In [1]:
import nbformat as nbf
from copy import deepcopy
import numpy as np
import json
from glob import glob
import os.path as op
import os
from datetime import datetime

# Read our template notebook
master_ntbk = nbf.read('./template_notebook.ipynb', nbf.NO_CONVERT)

In [2]:
def lines_to_sections(lines):
    on = False
    ix = 0
    sections = []
    for ii, line in enumerate(lines):
        if on is True:
            if line.startswith('```'):
                on = False
                sections.append(lines[ix: ii + 1])
                ix = ii
        elif line.startswith('```{r'):
            on = True
            sections.append(lines[ix + 1: ii])
            ix = ii
    if len(sections) == 0:
        # No r cells
        sections = [lines]
    else:
        # Re-insert first line
        sections[0].insert(0, '---\n')

    # Split apart the metadata and first section
    ix_meta = [ii for ii, line in enumerate(sections[0]) if line.startswith('---\n')]
    meta = sections[0][ix_meta[0]: ix_meta[1] + 1]
    first = sections[0][ix_meta[1] + 1:]
    sections.pop(0)
    sections.insert(0, first)
    sections.insert(0, meta)
    return sections

def sections_to_notebook(sections):
    ntbk = deepcopy(master_ntbk)
    ntbk.cells = []
    for ii, section in enumerate(sections):
        if section[0].startswith('```{r'):
            this_cell = deepcopy(master_ntbk['cells'][1])
        else:
            this_cell = deepcopy(master_ntbk['cells'][0])
        this_cell['source'] = '\n'.join(section)
        ntbk['cells'].append(this_cell)
    return ntbk

def fix_metadata(notebook, custom_cells=None):
    """Modify the metadata of an Rmd header so it works with python.
    
    Parameters
    ----------
    notebook : instance of nbf NotebookNode.
    custom_cells : dictionary
        Any key found in the first cell of `notebook` will have its
        corresponding text replaced by `value` in this dictionary.
    """
    if custom_cells is None:
        custom_cells = {'modified': "'{:%Y-%m-%d}'.format(datetime.now())"}
    meta = notebook['cells'][0]['source']
    lines = meta.split('\n\n')
    for ii, line in enumerate(lines):
        has_key = [key for key in custom_cells.keys()
                   if line.startswith(key)]
        if len(has_key) == 0:
            continue

        key = has_key[0]
        if isinstance(custom_cells[key], str):
            parts = line.split(': ')
            parts[-1] = custom_cells[key]
            line = ': '.join(parts)
        lines[ii] = line.strip()
    notebook['cells'][0]['source'] = '\n'.join(lines)
    return notebook

In [10]:
# Now convert the new notebooks
rfiles = glob('../../_posts/python/earth-analytics/week-5/in-class/*.Rmd')
for ifile in rfiles:
    path, filename = op.split(ifile)
    with open(ifile, 'r') as ff:
        lines = ff.readlines()
    # Break up by ```r sections
    sections = lines_to_sections(lines)
    
    # Create a notebook with these sections
    notebook = sections_to_notebook(sections)
    
    # Add metadata to the first cell
    notebook = fix_metadata(notebook)
    
    # Save in a `python_templates` folder
    newfilename = filename.split('.')[0] + '.ipynb'
    folderpath = op.join(path, 'python_templates')
    if not op.exists(folderpath):
        os.makedirs(folderpath)
      
    # Write the metadata file
    with open(op.join(folderpath, newfilename + '-meta'), 'w') as ff:
        ff.write(notebook['cells'][0]['source'])
        
    # Now remove the first cell and write the rest
    notebook['cells'].pop(0)
    nbf.write(notebook, op.join(folderpath, newfilename))