
### import notebook


In [None]:

def import_notebook(query_str):
    from Core import search_whoosh
    """Searches the Whoosh index and returns a module of the retrieved code."""
    results = search_whoosh(query_str)
    
    if not results:
        raise ImportError(f"No matching notebook found for query: {query_str}")
    
    module = types.ModuleType("imported_notebook")
    exec(results[0]["code"], module.__dict__)  # Execute the first matching cell in the module namespace
    
    return module

__all__ = {
  "import_notebook": import_notebook,
}



### accumulate markdown


In [None]:
import os
import json
import re

def get_questions(source, markdown):
    """Extracts questions from markdown and source code."""
    match_questions = re.compile(r'^.*\?.*$', re.IGNORECASE | re.MULTILINE)
    
    questions = [re.sub(r'how to|\?|#+', '', q, flags=re.IGNORECASE).strip()
                 for q in match_questions.findall(markdown)]
    
    questions += [re.sub(r'how to|\?|#+', '', q, flags=re.IGNORECASE).strip()
                  for q in match_questions.findall(source) if 'how to' in q.lower()]
    
    questions.sort(key=len)
    return questions + [questions[0]] if questions else ['']

def accumulate_markdown(cells):
    """Accumulates markdown leading up to code cells."""
    codes = [c for c in cells if c["cell_type"] == "code"]
    result = []
    
    for i, code_cell in enumerate(codes):
        from_idx = cells.index(codes[i-1]) + 1 if i > 0 else 0
        to_idx = cells.index(code_cell)
        markdown = "\n".join("".join(m["source"]) for m in cells[from_idx:to_idx])
        code = "".join(code_cell["source"])
        result.append({"from": from_idx, "to": to_idx, "markdown": markdown, "code": code})
    
    return result

def cache_cells(filename):
    from Core import get_cells
    """Parses a Jupyter notebook, extracts relevant cells, and generates cache entries."""
    filename = os.path.abspath(filename)
    mtime = os.path.getmtime(filename)
    
    cells = get_cells(filename)
    new_cache = accumulate_markdown(cells)
    
    return [{
        "id": f"{os.path.basename(filename)}[{i}]",
        "filename": filename,
        "mtime": mtime,
        "questions": get_questions(c["code"], c["markdown"]),
        "notebook": os.path.basename(filename),
        **c
    } for i, c in enumerate(new_cache)]


__all__ = {
  "cache_cells": cache_cells,
  "accumulate_markdown": accumulate_markdown,
  "get_questions": get_questions,
}



### initialize database



In [None]:
import os
import json
from whoosh.index import create_in
from whoosh.fields import Schema, TEXT, ID
from whoosh.qparser import QueryParser, MultifieldParser

# Define schema for Whoosh index
schema = Schema(questions=TEXT(stored=True), filename=ID(stored=True), code=TEXT(stored=True))

# Ensure index directory exists
if not os.path.exists("indexdir"):
    os.mkdir("indexdir")
    index = create_in("indexdir", schema)
else:
    from whoosh.index import open_dir
    index = open_dir("indexdir")

def store_in_whoosh(cells):
    """Stores extracted cells in Whoosh index."""
    writer = index.writer()
    for cell in cells:
        if 'code' in cell:
            # print(cell["questions"], cell["code"])
            writer.add_document(questions=cell["questions"], filename=cell["filename"], code=cell["code"])
    writer.commit()

def search_whoosh(question):
#  with index.searcher() as searcher:
#    query = QueryParser("questions", index.schema).parse(question)  # Fuzzy search
#    results = searcher.search(query)
#    return results
    scan_directory(os.path.join(os.path.dirname(__file__), '../Core'))
    with index.searcher() as searcher:
        parser = MultifieldParser(["filename", "questions"], schema=index.schema)
        query = parser.parse(question)
        results = searcher.search(query, limit=10)  # Adjust limit as needed
        return [{"filename": r["filename"], "code": r["code"]} for r in results]

def scan_directory(directory):
    from Core import cache_cells
    """Recursively scans a directory for notebooks and stores extracted cells in Whoosh index."""
    all_cells = []
    
    for root, _, files in os.walk(directory):
        for file in files:
            if file.startswith("."):
                continue
            if file.endswith(".ipynb"):
                notebook_path = os.path.join(root, file)
                all_cells.extend(cache_cells(notebook_path))

    store_in_whoosh(all_cells)
    print(f"Stored {len(all_cells)} cells in Whoosh index.")


__all__ = {
  "scan_directory": scan_directory,
  "search_whoosh": search_whoosh,
}




### get_cells(notebook_path)


In [None]:
import json
import os

def get_cells(notebook_path, types=['*', 'code']):
    """Extract cells from a Jupyter Notebook with additional metadata."""
    notebook_path = os.path.abspath(notebook_path)

    with open(notebook_path, 'r', encoding='utf-8') as f:
        notebook = json.load(f)

    kernel = notebook.get('metadata', {}).get('kernelspec', {})
    
    cells = [
        {
            **cell,
            "language": (cell.get("metadata", {}).get("vscode", {}).get("languageId") or
                         kernel.get("language") or
                         notebook.get("metadata", {}).get("language_info", {}).get("name", "")),
            "filename": notebook_path,
            "id": f"{os.path.basename(notebook_path)}[{i}]"
        }
        for i, cell in enumerate(notebook.get("cells", []))
        if '*' in types or cell.get("cell_type") in types
    ]

    return cells

__all__ = {
  "get_cells": get_cells
}



### run()

run python cells?



In [None]:
import json
import os
import sys
import types

def run():
    from Core import import_notebook
    if len(sys.argv) < 3:
        print("Usage: python script.py <notebook_path> <function_args>")
        sys.exit(1)

    notebook_path = sys.argv[1]
    inputs = sys.argv[2:]

    # Import the notebook as a module
    module = import_notebook(notebook_path)

    # Find the first function in the module
    func = None
    for name in dir(module):
        attr = getattr(module, name)
        if callable(attr):
            func = attr
            break

    if not func:
        print("No function found in the notebook.")
        sys.exit(1)

    # Extract parameters and map inputs
    params = get_function_params(func)
    mapped_inputs = []

    for param in params:
        for i, arg in enumerate(inputs):
            if arg.startswith(f"--{param}="):
                mapped_inputs.append(arg.split("=")[1])
                break
        else:
            mapped_inputs.append(inputs.pop(0) if inputs else None)

    # Convert types based on function annotations (if available)
    func_annotations = func.__annotations__
    for i, param in enumerate(params):
        if param in func_annotations:
            mapped_inputs[i] = func_annotations[param](mapped_inputs[i])

    # Execute the function
    result = func(*mapped_inputs)
    print(result)

if __name__ == "__run__":
    run()

__all__ = {
  "run": run
}
