In [None]:
import os
import subprocess

import nbformat
from nbconvert import HTMLExporter
from nbconvert.preprocessors import ClearOutputPreprocessor, ExecutePreprocessor
from nbconvert.preprocessors import CellExecutionError

# Sketching out notebook testing and cleaning

## Get filenames of notebooks in the git repo

* Using subprocess and `git ls-files` here lets us get just those files/notebooks that have been staged/committed. 

In [None]:
def save_html(nb, notebook_filename, html_directory):
    html_exporter = HTMLExporter()
    html_exporter.template_name = 'classic'

    html_filename = notebook_filename.replace('.ipynb', '.html')
    html_destination = os.path.join(html_directory, html_filename)

    with open(html_destination, 'w') as f:
        html_out, resources = html_exporter.from_notebook_node(nb)
        f.write(html_out)

In [None]:
def get_tags_flat(nb):
    cells = nb['cells']
    tags = [cell['metadata'].get('tags') for cell in cells]
    tags = [tag for tag in tags if tag]

    tags_flat = [tag for cell_tag in tags for tag in cell_tag]

    return tags_flat


def process_notebook(notebook_filename, html_directory = 'notebook-html'):
    
    with open(notebook_filename) as f:
        nb = nbformat.read(f, as_version=4)
        
    ep = ExecutePreprocessor(timeout=600, kernel_name='python3')
    clear = ClearOutputPreprocessor()
    
    html_exporter = HTMLExporter()
    html_exporter.template_name = 'classic'

    try:
        # Check that the notebook runs
        ep.preprocess(nb, {'metadata': {'path': ''}})
    except CellExecutionError:
        out = None
        msg = f'Error executing the notebook "{notebook_filename}".\n\n'
        msg += f'See notebook "{notebook_filename}" for the traceback.'
        #print(msg)
        raise
    finally:
        tags_flat = get_tags_flat(nb)

        if 'save_html' in tags_flat:
            save_html(nb, notebook_filename, html_directory)
            
        # Clear notebook outputs and save as .ipynb
        cleared = clear.preprocess(nb, {})
        with open(notebook_filename, mode='w', encoding='utf-8') as f:
            nbformat.write(nb, f)
         
        
    print(f"Processed {notebook_filename}")
    return
    

        
def process_all_notebooks(remove_fail_test=True):
    # Get all files included in the git repository
    git_files = (subprocess
                 .check_output("git ls-files", shell=True)
                 .decode('utf-8')
                 .splitlines())

    # Get just the notebooks from the git files
    notebooks = {fn:fn for fn in git_files if fn.endswith(".ipynb")}
    
    # Remove the notebook that's supposed to fail
    if remove_fail_test:
        del notebooks['notebook-fails.ipynb']
    
    # Test each notebook, save it to HTML with outputs, and clear the outputs from the .ipynb file
    for notebook in notebooks:
        process_notebook(notebook)
        
    return

In [None]:
def get_tags_flat(nb):
    cells = nb['cells']
    tags = [cell['metadata'].get('tags') for cell in cells]
    tags = [tag for tag in tags if tag]

    tags_flat = [tag for cell_tag in tags for tag in cell_tag]

    return tags_flat
