How to find questions leading up to jupter cells?

How to read markdown leading up to code cells?


In [16]:
// TODO: use the m flag option for regexp?

function accumulateMarkdown(cells) {
    var counter = 0, prev = [];
    return cells.reduce((md, c) => {
        counter++;
        var source = c.source.join('');
        if (c.cell_type === 'markdown') {
            prev.push(source);
            return md;
        } else if (c.cell_type !== 'code') {
            return md;
        }
        // TODO: improve the counter
        var cell = {
            code: source, markdown: prev,
            from: counter - 1, to: counter,
            language: c.language
        };
        prev = [];
        md.push(cell);
        return md;
    }, []);
}

module.exports = accumulateMarkdown;



[Function: accumulateMarkdown]

This is a complex question to answer.  Perhaps using IBM Watson language alchemy?

How to find functions in jupyter notebooks?

How to cache notebook questions in fuse?



In [None]:
// TODO: interpret markdown leading up to code results and find the resulting function in the list
// for now, the boring solution is to assume all markdown output is a question?
var path = require('path');
var fs = require('fs');
var MATCH_QUESTIONS = (/^.*\?.*$/igm);
//var re2 = new RegExp('(^|\\n)//.*\\?([\\n\\s]+|$)', 'ig')

// How to store code markdown results for later use?
if (typeof cellCache === 'undefined') {
    var cellCache = [];
}
if (typeof cacheIds === 'undefined') {
    var cacheIds = {};
}

// How to convert a string to an Array of RegEx matches
function regexToArray(ex, str, i = 0) {
    var co = [];
    var m;
    while ((m = ex.exec(str)) && co.push(m[i])) ;
    return co;
};

function cacheCells(cells, notebook) {
    if(typeof cells === 'string' && cacheIds[cells] && typeof notebook === 'undefined') {
        return cacheIds[cells];
    }
    if(cells.length === 0 || notebook.length === 0) {
        return cellCache;
    }
    var filename = path.basename(notebook);
    // TODO: combine this remove logic with the "fresher" logic below?
    // update cells when files change, remove old cells from cache
    cellCache.forEach(c => {
        if(c.id.split('[')[0] === filename) {
            // TODO: Long-term language analysis, find V8 source code for splice,
            //   does this evaluate in constant time because it is by reference?
            //   how does this compare to PHP?
            cellCache.splice(cellCache.indexOf(c), 1);
        }
    });
    var newCache = accumulateMarkdown(cells);
    var mtime = fs.existsSync(notebook) ? fs.statSync(notebook).mtime.getTime() : (new Date()).getTime();
    newCache.forEach((c, i) => {
        var questions = regexToArray(MATCH_QUESTIONS, c.markdown)
            .map(r => r.replace(/how to|\?/ig, '').trim())
            .concat(regexToArray(MATCH_QUESTIONS, c.source)
                .map(r => r.replace(/how to|\?/ig, '').trim()));
        if(questions.length === 0) {
            questions = [''];
        }
        const newCell = Object.assign({}, c, {
            id: filename + '[' + i + ']',
            filename: notebook,
            questions: questions,
            notebook: filename,
            fresher: c.code,
            mtime: mtime
        });
        cacheIds[filename + '[' + i + ']'] = newCell;
        questions.forEach((q) => cellCache.push({
            id: filename + '[' + i + ']',
            question: q
        }));
    });
    return cellCache;
}
module.exports = cacheCells;


How to display interpreted results in markdown?



In [11]:
// How to represent search results in markdown?
function resultMarkdown(res) {
    return ('\n\n\n' + res.length + ' match'
        + (res.length !== 1 ? 'es' : '')
        + ' found: ' + res.join(' , ') + '\n\n\n'
        + (res.length > 0
            ? ('\n\n\n' + cacheCells(res[0]).markdown.join('\n') + '\n\n\n'
                + '```\n\n\n' + cacheCells(res[0]).code + '\n\n\n```\n\n\n')
            : ''));
};

function interpretMarkdown(results) {
    return (typeof results[0] !== 'undefined' && typeof results[0] !== 'string'
        ? results.reduce((str, res) => {
            str += resultMarkdown(res);
            return str;
        }, '')
        : resultMarkdown(results));
}

module.exports = interpretMarkdown;


[Function: interpretMarkdown]

How to search notebook questions?

How to answer questions using jupyter notebooks and simple search?

How to search jupter notebooks for questions and answers?


In [1]:
// install fuse
var path = require('path');
var fs = require('fs');
var Fuse = require('fuse.js');
var importer = require('../Core');

var FUSE_CONFIG = {
    caseSensitive: false,
    findAllMatches: true,
    distance: 100,
    threshold: 0.5,
    tokenize: false,
    shouldSort: true,
    keys: ['question'],
    id: 'id'
}
var fuse = new Fuse(cacheCells([], ''), FUSE_CONFIG);

// filter query results by filename
function filterFilename(arr, fname) { return arr.filter(id => id.substr(0, fname.length + 1) === fname + '['); }

function fuseSearch(queries) {
    const cellIds = cacheCells([], '').map(c => c.id);
    const query = path.basename(queries).split(/[\[\]]/ig);
    const files = filterFilename(cellIds, query[0])
        .sort((a, b) => parseInt(a.split(/[\[\]]/ig)[1]) - parseInt(b.split(/[\[\]]/ig)[1]));
    const isFileSearch = query.length === 1 && (query[0].indexOf('.ipynb') > -1 || files.length > 0);
    let searchResults;
    if (query.length === 3) {
        // TODO: return all code cells and perform a new fuse search for the notebook
        // filter query using exact cell if it is numeric
        searchResults = filterFilename(parseInt(query[1]) + '' === query[1] // check if using numeric
            ? cellIds.filter(id => id === path.basename(queries))
            : fuse.search(query[1]), query[0])
    } else if (isFileSearch) {
        searchResults = files;
    } else {
        searchResults = fuse.search(queries);
    }
    searchResults = searchResults.filter((r, i, arr) => arr.indexOf(r) === i);
    if(searchResults.length === 0) {
        throw new Error('Nothing found for ' + JSON.stringify(queries).substr(0, 200));
    }
    return isFileSearch ? interpretObject(searchResults) : interpretObject([searchResults[0]])[0];
}

function getFresher(cache) {
    try {
        const mtime = fs.statSync(cache.filename).mtime.getTime();
        if(mtime > (cache.mtime || 0)) {
            cache.mtime = mtime;
            const r = importer.getCells(cache.filename, ['*', 'markdown', 'code'])
            const cells = r.slice(cache.from, cache.to);
            cache.fresher = accumulateMarkdown(cells)[0].code;
        }
    } finally {
        return Object.assign({}, cache);
    }
}

function interpretObject(results) {
    return typeof results[0] !== 'undefined' && typeof results[0] !== 'string'
        ? results.map((res) => res.map(r => {
            return getFresher(cacheCells(r));
        }))
        : results.map(r => {
            return getFresher(cacheCells(r));
        });
}

module.exports = fuseSearch;


[Function: interpret]

How to test the interpreter works


In [None]:

// do nothing in cell run mode, describe is injected by test runner.ipynb
if(typeof describe === 'undefined') {
    var describe = (() => {});
}



describe('importer search service', () => {
    it('should require a codebase using nothing but search terms', () => {
        
    })
    
    it('should match entire notebooks', () => {
        
    })
    
    it('should match code cells by numeric index', () => {
        
    })
    
    it('should return the full cell descriptor from a list of IDs', () => {
        
    })
    
    it('should store searchable content for later lookup', () => {
        /*
        var assert = require('assert');
        var testNotebook = 'test import.ipynb';
        var testCells = [
            {cell_type: 'code', source: []},
            {cell_type: 'markdown', source: ['how to test for errors line 2?']},
            {cell_type: 'code', source: ['(function (err) { throw error; })']}
        ];
        global[testNotebook + '[1]']
            = eval(testCells[testCells.length - 1].source.join('').trim());
        var markdownCache = cacheCells(testCells, testNotebook);
        assert(markdownCache[0].id == testNotebook + '[1]');
        */
    })
    
})

