In [21]:
from html.parser import HTMLParser
from math import ceil, log
from os.path import basename, splitext
import re
from urllib.parse import quote_plus, quote
from nbformat import read
from nbconvert import MarkdownExporter
from nbconvert.writers import FilesWriter
from nbconvert.nbconvertapp import NbConvertApp
from nbconvert.preprocessors import Preprocessor
from bs4 import BeautifulSoup

In [22]:
notebook = './1-MonteCarlo-Resource-Assessments-part1.ipynb'
output_dir = "/Users/dom/Downloads/nbconvert_output"
notebook_name = splitext(basename(notebook))[0]

In [23]:
with open(notebook, 'r', encoding='utf-8') as f:
            nb = read(f, 4)
        
try:
    output_name = quote_plus(
        nb.metadata.date + '-' + nb.metadata.title, safe='/'
    )
except:
    output_name = ''

In [24]:
def format_number(a, digits=3, e_lim=1e6):
    """Returns a float rounded to the specified number of digits.
    If the absolute value of the float is greater than 1, it will
    simply round to specified digits; if it is less than 1, it will
    determine how many digits to round to to preserve the specified
    number of significant digits.
    
    Use True for as_string to return a formated string and not a float."""
    
    
    if a == 0:
        fmt = '.0f'
    elif abs(a) >= e_lim:
        fmt = '.1e'
    elif abs(a) >= 1:
        digits = max(0, digits - ceil(log(abs(a), 10) - 1))
        fmt = '.{}f'.format(digits)
    else:
        fmt = '.{}f'.format(-ceil(log(abs(a), 10)) + digits)
                            
    return '{:,{}}'.format(a, fmt)



class CustomPreprocess(Preprocessor):

    
    def preprocess(self, nb, resources):
        
        global files
        files = []

        pre = """<div class="equation">\n\t<div>"""
        post = """</div>\n<div class="equation_dots"></div>\n"""
        post += "<div></div>\n"
        post += "</div>\n"
        
        img_pre = '{{ site.url }}' + 'assets/images/posts/' 
        img_pre += quote(output_name) + '_files/'
        
        for cell in nb.cells[:]:
            
            try:
                assert cell.metadata["exclude"]
            except:
                pass
            else:
                nb.cells.remove(cell)
                continue
            
            if cell.source == '':
                nb.cells.remove(cell)
                continue
            
            if cell.cell_type == 'markdown':
                s = re.sub("^\# .+\n\n", 
                           '', 
                           cell.source, 
                           flags=re.MULTILINE)
                s = re.sub(r'\\\\\[', pre + r'\\[', s)
                s = re.sub(r'\\\\\]', r'\\]' + post, s)
                
                soup = BeautifulSoup(s, 'html.parser')
                
                for img in soup.findAll('img'):
                    files.append(img['src'])
                    img['src'] = img_pre + basename(img['src'])
                    del img['width']
                    img['class'] = 'scaled'
                for a in soup.findAll('a'):
                    try:
                        a['href'] = (a.img['src'])
                    except:
                        pass
                for caption in soup.findAll('figcaption'):
                    c = caption.get_text()
                    c = re.sub('^Figure \d+: ', '', c)
                    caption.string = c
                    
                # replace > for md quotes
                s = re.sub(
                    '^&gt; ', 
                    '> ',
                    str(soup), 
                    flags=re.MULTILINE
                )
                
                cell.source = s
    
            elif cell.cell_type == 'code':
                for output in cell.outputs:
                    try:
                        soup = BeautifulSoup(
                            output.data['text/html'], 'html.parser'
                        )
                    except:
                        pass
                    else:
                        for td in soup.findAll('td'):
                            try:
                                val = float(td.get_text())
                            except:
                                pass
                            else:
                                td.string = format_number(val, 3)
                        output.data['text/html'] = str(soup)
                
                
        try:
            nb.metadata.notebook
        except:
            nb.metadata.notebook = notebook_name
            
        return nb, resources

In [25]:
md_exporter = MarkdownExporter(
#     template_name="blog-markdown",
    template_file='./index.md.j2',
    preprocessors=[CustomPreprocess]
)

In [26]:
md_writer = FilesWriter(
    build_directory = output_dir,
)


In [27]:
files = []
app = NbConvertApp(
    output_base = output_name
)
app.exporter = md_exporter
app.writer = md_writer

In [28]:
md_writer.files = files
app.convert_single_notebook(notebook)

[NbConvertApp] Converting notebook ./1-MonteCarlo-Resource-Assessments-part1.ipynb to 


In [29]:
files

['./assets/1-sensitivity.png']