# Convert Jupyter Folder to epub

Pressbooks uses EPUB v2.0 files while my mac uses v3.0 files. Use the ```pressbook``` flag to switch between the file types.  For more detailed information see:

https://en.wikipedia.org/wiki/EPUB#Open_Container_Format_2.0.1



[MSU OER website](https://openbooks.lib.msu.edu/)

In [1]:
from ebooklib import epub

pressbook = True
#infolder = 'CMSE401_Examples'
infolder = 'Examples'
#infolder = "MTH314_STUDENT"


book = epub.EpubBook()

ebookfilename = 'mth314.epub'
# set metadata
book.set_identifier('id123456"')
book.set_title('Linear Algebra with Computational Applications')
book.set_language('en')

book.add_author('Dirk Colbry')
book.add_author('Ming Yan', uid='coauthor')

if pressbook:
    book.FOLDER_NAME = 'OEBPS'

In [2]:
import nbformat
from nbconvert.preprocessors import ExecutePreprocessor
from pathlib import Path
from traitlets.config import Config
import os
from bs4 import BeautifulSoup
from nbconvert import HTMLExporter

In [3]:
import re
def fixLaTeXTag(text):
    """Function uses regular expressions to find latex in markdown sells and converts them to pressbook format"""
    pattern = re.compile(r'(?<!\\)[\$][\$]?[^\$]*(?<!\\)[\$|\$\$][\$]?')
    match = re.search(pattern,text)
    if(match):
        while(match):
            newtext = match.group(0)
            if newtext[1] == '$':
                newtext = '[latex]'+newtext[2:-2]+'[/latex]'
            else:
                newtext = '[latex]'+newtext[1:-1]+'[/latex]'
            text = text[:match.start()]+newtext+text[match.end():]
            match = re.search(pattern,text)
        return text
    else:
        return text

In [4]:
def ipynb2html(text):
    nb = nbformat.reads(text, as_version=4) #ipynb version 4
    
    ep = ExecutePreprocessor(timeout=600, kernel_name='python3', allow_errors=True)
    ep.preprocess(nb)
    
    # 2. Instantiate the exporter. We use the `basic` template for now; we'll get into more details
    # later about how to customize the exporter further.
    html_exporter = HTMLExporter()
    html_exporter.template_file = 'basic'

    # 3. Process the notebook we loaded earlier
    (body, resources) = html_exporter.from_notebook_node(nb)
    return (body, resources)


In [5]:
def cleanhtml(body):
    # Remove weird paragraph marks
    body = body.replace(r'&#182;','')
    
    tree = BeautifulSoup(body)
    for div in tree.find_all('div', class_='text_cell_render border-box-sizing rendered_html'):
        contents = div.prettify()
        if contents:
            if pressbook:
                contents = fixLaTeXTag(contents)
            div.contents = BeautifulSoup(contents)     
    body = tree.prettify()
    
    tree = BeautifulSoup(body)
    
    #Find all code tags and replace with pressbook format.
    for code in tree.find_all('code'):
        parent = code.parent
        contents = parent.prettify()
        if contents:
            contents = contents.replace(r'<code>',r'[code]')
            contents = contents.replace(r'</code>',r'[/code]')
            parent.contents = BeautifulSoup(contents)  
    for code in tree.find_all('a'):
        parent = code.parent
        contents = parent.prettify()
        if contents:
            if r'.ipynb' in contents:
                contents = contents.replace(r'.ipynb',r'')
                print(f'fixing links {contents}')
                parent.contents = BeautifulSoup(contents)
    body = tree.prettify()
    
    body = body.replace('\n',' ')
    return body

In [6]:
outfolder = f'../{infolder}_html'

os.chdir(infolder)
p = Path('.')
outpath = Path(outfolder)

outpath.mkdir(exist_ok=True)
files = p.glob('*.ipynb')

files = sorted(files)
toc = ['nav']
spine = ['nav']

print(type(toc))
id_num = 0
for filename in files:
    print(filename)
    text = open(filename, 'r').read()

    (body, resources) = ipynb2html(text)
    body = cleanhtml(body)

    ## FOR EPUB on mac remove .. in file_name
    if pressbook:
        c1 = epub.EpubHtml(title=filename.stem, file_name=f"../{filename.stem}", lang='en', media_type="application/xhtml+xml")
    else:
        c1 = epub.EpubHtml(title=filename.stem, file_name=f"{filename.stem}.xhtml", lang='en', media_type="application/xhtml+xml")
    print(filename.stem)
    c1.content=body
    #c1.properties.append('rendition:layout-pre-paginated rendition:orientation-landscape rendition:spread-none')

    book.add_item(c1)
    toc.append(c1)
    if pressbook:
        spine.append(c1.get_id())
    else:
        spine.append(c1)

    outfile = outfolder+'/'+filename.stem+'.xhtml'
    print(outfile)
    with open(outfile, "w") as file:
        file.write(body)
os.chdir('..')

<class 'list'>
0000--Jupyter-Getting-Started-Guild.ipynb
fixing links <div class="output_html rendered_html output_subarea">
 <a href="0000--Jupyter-Getting-Started-Guild" target="_blank">
  0000--Jupyter-Getting-Started-Guild
 </a>
 <a href="03--Linear_Equations-pre-class-assignment" target="_blank">
  03--Linear_Equations-pre-class-assignment
 </a>
</div>

0000--Jupyter-Getting-Started-Guild
../Examples_html/0000--Jupyter-Getting-Started-Guild.xhtml
03--Linear_Equations-pre-class-assignment.ipynb
03--Linear_Equations-pre-class-assignment
../Examples_html/03--Linear_Equations-pre-class-assignment.xhtml
03-Linear_Equations-in-class-assignment.ipynb
fixing links <p>
 <a href="03--Linear_Equations-pre-class-assignment">
  03--Linear_Equations-pre-class-assignment
 </a>
</p>

03-Linear_Equations-in-class-assignment
../Examples_html/03-Linear_Equations-in-class-assignment.xhtml
08--Robotics-pre-class-assignment.ipynb
08--Robotics-pre-class-assignment
../Examples_html/08--Robotics-pre-class

In [7]:
# define Table Of Contents
book.toc = tuple(toc)
    
# add default NCX and Nav file
book.add_item(epub.EpubNcx())
book.add_item(epub.EpubNav())

#coverfile='./Examples/Jupiter_poll.jpg'
#book.set_cover(coverfile, open(coverfile, 'r').read())
#book.set_cover(file_name='./Examples/Jupiter_poll.jpg', content='test')

# define CSS style
style = 'BODY {color: white;}'
nav_css = epub.EpubItem(uid="style_nav", file_name="style/nav.css", media_type="text/css", content=style)

# add CSS file
book.add_item(nav_css)

# basic spine
#book.spine = spine
book.spine=spine

# write to the file
epub.write_epub(ebookfilename, book, {})

In [8]:
# ## Convert EPUB to OEBPS 

# from zipfile import ZipFile, ZIP_DEFLATED
# source = ZipFile(f'{ebookfilename}', 'r')
# target = ZipFile('target.epub', 'w', ZIP_DEFLATED)

# for file in source.filelist:
#     if 'nav.xhtml' in file.filename:
#         navfile = source.read(file.filename)
#         navfile.replace('.xhtml','/')
#         target.writestr(file.filename, navfile)
#     else:
#         target.writestr(file.filename, source.read(file.filename))
# target.close()
# source.close()

# !mv target.epub mth314.epub

In [9]:
# ## Convert EPUB to OEBPS 
# ## Didn't seem to work, needed to manual META-INF/container.xml to use OEBPS
# ## and rezip using "zip -r target.epub *" in a temp folder

# from zipfile import ZipFile, ZIP_DEFLATED
# source = ZipFile(f'{ebookfilename}', 'r')
# target = ZipFile('target.epub', 'w', ZIP_DEFLATED)
# for file in source.filelist:
#     if file.filename.startswith('EPUB/'):
#         newfilename = file.filename.replace('EPUB/','OEBPS/')
#         target.writestr(newfilename, source.read(file.filename))
#     else:
#         target.writestr(file.filename, source.read(file.filename))
# target.close()
# source.close()

In [10]:
#!mv target.epub mth314.epub

In [11]:
!open mth314.epub

In [None]:
!ls

[Go to openbooks sight at MSU](https://openbooks.lib.msu.edu)

In [None]:
# print(filename)
# text = open(filename, 'r').read()

# nb = nbformat.reads(text, as_version=4)

# ep = ExecutePreprocessor(timeout=600, kernel_name='python3')
# ep.preprocess(nb)

# for cell in nb.cells:
#     if cell['cell_type'] == 'markdown':
#         cell['source'] = fixLaTeXTag(cell['source'])
#     elif cell['cell_type'] == 'code':
#         for out in cell['outputs']:
#             if 'data' in out:
#                 print(out['data']['text/plain'])
#                 if 'YouTubeVideo' in out['data']['text/plain']:
#                     out['data']['text/plain'] = 'IPython.core.display.HTML object>'

In [None]:
# # 2. Instantiate the exporter. We use the `basic` template for now; we'll get into more details
# # later about how to customize the exporter further.
# html_exporter = HTMLExporter()
# html_exporter.template_file = 'basic'

# # 3. Process the notebook we loaded earlier
# (body, resources) = html_exporter.from_notebook_node(nb)

# outfile = 'test.html'
# print(outfile)
# with open(outfile, "w") as file:
#     file.write(body)

https://openbooks.lib.msu.edu/testbook3/chapter/linear-algebra-with-computational-applications/08--Robotics-pre-class-assignment
https://openbooks.lib.msu.edu/testbook3/chapter/08-robotics-pre-class-assignment/