
TLDR; How to use fuse.js to implement a simple search?

# How to interpret questions in jupyter notebooks?

How to turn jupyter notebooks in to modules?

How to answer questions using jupyter notebooks and simple search?

How to search jupter notebooks for questions and answers?



In [1]:
// install fuse
var path = require('path');

if (typeof cellCache === 'undefined') {
    var cellCache = [];
}
if (typeof cacheIds === 'undefined') {
    var cacheIds = {};
}
var Fuse = require('fuse.js');
var fuse = new Fuse(cellCache, {
    shouldSort: true,
    keys: ['question'],
    id: 'id'
});

// filter query results by filename
var filterFilename = (arr, fname) => arr.filter(id => id.substr(0, fname.length + 1) === fname + '[')

var fuseSearch = (message) => {
    const cellIds = Object.keys(cacheIds);
    const query = path.basename(message).split(/[\[\]]/ig);
    const files = filterFilename(cellIds, query[0]);
    if (query.length === 3) {
        // filter query using exact cell if it is numeric
        return filterFilename(parseInt(query[1]) + '' === query[1]
            ? cellIds.filter(id => id === path.basename(message))
            : fuse.search(query[1]), query[0])
    } else if (query[0].indexOf('.ipynb') > -1 || query.length === 1 && files.length > 0) {
        return files;
    } else {
        return fuse.search(message);
    }
};
module.exports = fuseSearch;


[Function: interpret]

How to cache notebook questions in fuse?

In [2]:
var path = require('path');var getCells = require('../Core').getCells;// interpret notebooks by importing them in to cachevar cacheNotebook = (notebook) => {
    return cacheCells(getCells(notebook, ['*', 'markdown', 'code']), notebook);};module.exports = cacheNotebook;

[Function: interpretNotebook]

How to find questions leading up to jupter cells?

How to read markdown leading up to code cells?


In [16]:
// TODO: use the m flag option for regexp?
var re = (/^.*\?.*$/igm);
var re2 = new RegExp('(^|\\n)//.*\\?([\\n\\s]+|$)', 'ig')

var accumulateMarkdown = (cells) => {
    var counter = 0, prev = [];
    return cells.reduce((md, c) => {
        counter++;
        var source = c.source.join('');
        if (c.cell_type === 'markdown') {
            prev.push(source);
            return md;
        } else if (c.cell_type !== 'code') {
            return md;
        }
        // TODO: improve the counter
        var cell = {code: source, markdown: prev, from: counter - 1, to: counter, language: c.language};
        prev = [];
        md.push(cell);
        return md;
    }, []);
};

// How to convert a string to an Array of RegEx matches
var regexToArray = (ex, str, i = 0) => {
    var co = [];
    var m;
    while ((m = ex.exec(str)) && co.push(m[i])) ;
    return co;
};



[Function: accumulateMarkdown]



This is a complex question to answer.  Perhaps using IBM Watson language alchemy?

How to find functions in jupyter notebooks?



In [None]:
// TODO: interpret markdown leading up to code results and find the resulting function in the list// for now, the boring solution is to assume all markdown output is a question?// How to store code markdown results for later use?if (typeof cellCache === 'undefined') {    var cellCache = [];
}if (typeof cacheIds === 'undefined') {    var cacheIds = {};}var cacheCells = ((cells, notebook) => {
    var filename = path.basename(notebook);
    // update cells when files change
    cellCache.forEach(c => {
        if(c.id.indexOf(filename) > -1) {
            // does this evaluate in contant time because it is by reference?
            cellCache.splice(cellCache.indexOf(c), 1);
        }
    });
    var newCache = accumulateMarkdown(cells);    newCache.forEach((c, i) => {
        var questions = regexToArray(re, c.markdown)
            .map(r => r.replace(/how to|\?/ig, '').trim())            .concat(regexToArray(re, c.source)                .map(r => r.replace(/how to|\?/ig, '').trim()));
        cacheIds[filename + '[' + i + ']'] = Object.assign({}, c, {            id: filename + '[' + i + ']',            filename: notebook,            questions: questions,            notebook: filename,        });        questions.forEach((q) => cellCache.push({            id: filename + '[' + i + ']',            question: q        }));    });
    return cellCache;});

How to interpret a jupyter {directory}?

How to display interpreted results in markdown?



In [11]:
// How to represent search results in markdown?var resultMarkdown = (res) => {    return ('\n\n\n' + res.length + ' match'        + (res.length !== 1 ? 'es' : '')        + ' found: ' + res.join(' , ') + '\n\n\n'        + (res.length > 0            ? ('\n\n\n' + cacheIds[res[0]].markdown.join('\n') + '\n\n\n'                + '```\n\n\n' + cacheIds[res[0]].code + '\n\n\n```\n\n\n')            : ''));};var interpretMarkdown = (results) =>    (typeof results[0] !== 'undefined' && typeof results[0] !== 'string'        ? results.reduce((str, res) => {            str += resultMarkdown(res);            return str;        }, '')        : resultMarkdown(results));module.exports = interpretMarkdown;

[Function: interpretMarkdown]

In [10]:
var getCells = require('../Core').getCells;

var getFresher = (cache) => {
    try {
        if(typeof cache === 'undefined') {
            return cache;
        }
        const r = getCells(cache.filename, ['*', 'markdown', 'code'])
        const cells = r.slice(cache.from, cache.to);
        cache.fresher = accumulateMarkdown(cells)[0].code;
    } finally {
        return cache;
    }
};

var interpretObject = (results) => {
    return typeof results[0] !== 'undefined' && typeof results[0] !== 'string'
        ? results.map((res) => res.map(r => {
            return getFresher(cacheIds[r]);
        }))
        : results.map(r => {
            return getFresher(cacheIds[r]);
        })
};
module.exports = interpretObject;


[Function: interpretObject]

How to test the interpreter works?


In [None]:

// do nothing in cell run mode, describe is injected by test runner.ipynb
if(typeof describe === 'undefined') {
    var describe = (() => {});
}

describe('importer search service', () => {
    it('should require a codebase using nothing but search terms', () => {
        
    })
    
    it('should match entire notebooks', () => {
        
    })
    
    it('should match code cells by numeric index', () => {
        
    })
    
    it('should return the full cell descriptor from a list of IDs', () => {
        
    })
    
    it('should store searchable content for later lookup', () => {
        /*
        var assert = require('assert');
        var testNotebook = 'test import.ipynb';
        var testCells = [
            {cell_type: 'code', source: []},
            {cell_type: 'markdown', source: ['how to test for errors line 2?']},
            {cell_type: 'code', source: ['(function (err) { throw error; })']}
        ];
        global[testNotebook + '[1]']
            = eval(testCells[testCells.length - 1].source.join('').trim());
        var markdownCache = cacheCells(testCells, testNotebook);
        assert(markdownCache[0].id == testNotebook + '[1]');
        */
    })
    
})

