**Tired of working with notebooks but having to manually make copies of cells where you define functions so other notebooks or other scripts can use them?  The solution is in this notebook.**

This notebook contains the code for "scraper.jl", a facility that periodically (every 2 secs) scans jupyter notebooks in a directory, looking within them for code cells that start with a line "#@include_me filename". The code in that cell is copied into that filename. (If multiple cells in a notebook use the same #@include_me and filename, they are all appended into that filename.)

The system maintain a database of when it last scraped each notebook; scraping of a notebook only runs if the notebook has been modified more recently than its last scraping, which happens infrequently, so the CPU load is very light.

To run the system, put it in a directory, make sure that this notebook itself has been scraped into the file scraper.jl (you could run "scrape_notebook("Scraper.ipynb"; verbose=true)" to do that), and then, within that directory run

shell>  julia scraper.jl &

This will do the periodic scraping in the background. All you have to do is to remember to include the first line #@include_me  filename  in cells that you want put into files, the rest is taken care of automatically.


<h1 id="tocheading">TABLE OF CONTENTS</h1>
<div id="toc"></div>

**Updates to the table of contents are periodic, but run the cell below to first start or force an update.**

In [None]:
macro javascript_str(s) display("text/javascript", s); end

javascript"""
$.getScript('https://sites.google.com/site/brodylabhome/files/make_table_of_contents.js')
"""

In [None]:
#@include_me   scraper.jl

using JSON

# Function that scrapes a single notebook for julia code

In [2]:
#@include_me   scraper.jl

# Scrape a notebook for julia code that should be written into an indicated file

"""
filenames_written = scrape_notebook(notebook_filename; verbose=false, includemagic="#@include_me")

Goes through a file notebook_filename, assuming it is an ipynb, and looks for code cells that start with
includemagic, followed by whitespace, followed by a string (which we shall call filename). 
When such a code cell is found, its contents are written into filename.

If more than one cell uses the same filename, then the first one starts the file, and subsequent cells
append to it.

Returns an array with the written filenames

"""
function scrape_notebook(notebook_filename; verbose=false, includemagic="#@include_me")

    filenames = [];    # List of output files found in this notebook
    A = JSON.parse(readstring(notebook_filename))
    
    for mycell in A["cells"]
        if mycell["cell_type"] == "code"   
            lines = mycell["source"]
            if length(lines)>0           # We only consider code cells that are not empty
                m= match(Regex(@sprintf("(?<include>%s)\\s*(?<filename>\\S*)", includemagic)), lines[1])
                if typeof(m)!=Void && length(m["filename"])>0  # proceed if we got a match and got a filename
                    if any(filenames .== m["filename"]); 
                        f = open(m["filename"], "a")           # we'll append if we already had that filename
                        if verbose; @printf("Appending to file %s\n", m["filename"]); end
                    else
                        f = open(m["filename"], "w")           # otherwise open fresh for writing
                        filenames = [filenames ; m["filename"]]
                        if verbose; @printf("Writing out file %s\n", m["filename"]); end
                    end
                    # Now write out the contents of the cell, with a warning at the top:
                    write(f, @sprintf("# DON'T MODIFY THIS FILE -- the source is in file %s\n\n", notebook_filename))
                    for i=2:length(lines)
                        write(f, lines[i])
                    end
                    write(f, "\n\n\n")
                    close(f)
                end
            end
        end
    end

    return filenames
end

scrape_notebook

### Example:

In [3]:
scrape_notebook("Scraper.ipynb"; verbose=true)

Writing out file scraper.jl
Appending to file scraper.jl
Appending to file scraper.jl
Appending to file scraper.jl
Appending to file scraper.jl


1-element Array{Any,1}:
 "scraper.jl"

# Reading the database of scraped notebooks

In [None]:
#@include_me   scraper.jl

# Get database of scraped files

"""
latest = latest_scrapedict(; scrapedir=".scrapedir", scrapefile="scrapelist")

Returns a dictionary that maps filenames to strings representing when they were
last scraped.  This information is stored in a human-readable text file, scrapedir/scrapefile

"""
function latest_scrapedict(; scrapedir=".scrapedir", scrapefile="scrapelist")
    if !isdir(scrapedir); mkdir(scrapedir); end;
    sfile = scrapedir * "/" * scrapefile
    if !isfile(sfile);
        return Dict()
    end
    
    answer = Dict()
    try
        A = readdlm(sfile, ',')
        for i=1:size(A,1)
            get!(answer, lstrip(A[i,2]), A[i,1])
        end
    catch
        answer = Dict()
    end
    return answer
end


"""
write_scrapedict(latest; scrapedir=".scrapedir", scrapefile="scrapelist")

Writes a dictionary containing filename-latest_scrape_time_string pairs into a file

"""
function write_scrapedict(latest; scrapedir=".scrapedir", scrapefile="scrapelist")

    sfile = scrapedir * "/" * scrapefile
    sf = open(sfile, "w")
    for k in keys(latest)
        write(sf, @sprintf("%s, %s\n", latest[k], k))
    end
    close(sf)

end

### Example

In [None]:
latest_scrapedict()

# Iterate over all notebooks in a directory

In [None]:
#@include_me   scraper.jl

# Go through all notebooks in directory and scrape them if they've been modified after their last
# scrape time.

"""
rescraped = scrape_all_notebooks(; scrapedir=".scrapedir", scrapefile="scrapelist", verbose=false)


"""
function scrape_all_notebooks(; scrapedir=".scrapedir", scrapefile="scrapelist", verbose=false)

    latest = latest_scrapedict(scrapedir=scrapedir, scrapefile=scrapefile)

    rescraped = []
    for f in filter(x -> endswith(x, ".ipynb"), readdir())
        if ~haskey(latest, f) || DateTime(latest[f]) < Dates.unix2datetime(stat(f).mtime) - Dates.Hour(4)
            if verbose; @printf("Will look into notebook %s\n", f); end
            rescraped = [rescraped; f]
            scrape_notebook(f)
            if haskey(latest, f)
                latest[f] = string(now())
            else
                get!(latest, f, string(now()))
            end
        end
    end

    write_scrapedict(latest; scrapedir=scrapedir, scrapefile=scrapefile)
    
    return rescraped
end



### Example

In [None]:
scrape_all_notebooks(; verbose=true)

# Periodically run yourself

In [None]:
#@include_me  scraper.jl

function scraperobot()
    while true
        scrape_all_notebooks()
        sleep(2)
    end
end

scraperobot()