In [None]:
#| default_exp squeeze

# Run `nbsqueeze` from the command line 

> Squeeze all notebooks in your project at once 

For all my coding work, I use the awesome Python package [nbdev](https://nbdev.fast.ai/). Thanks to this software learned how to efficiently use Github and how to publish my own Python packages (like this one!) with beautiful documentation all rendered from a bunch of Jupyter notebooks. Ultimate freedom of expression! Now, I am pushing the nbdev machinery to it's limits due to the large amount of plots with high resolution images that I need to embed in my documentation.  

In my work flow, I need to squeeze the figures out of all Jupyter notebooks in a notebooks project folder simultaneously with a single command. For this reason I created a console command `nbsqueeze` that can executed from the terminal (a.k.a. command prompt) in the root folder of a project. Simply type this: 

```{bash}
$ nbsqueeze
```

The command assumes the your current working directory contains a folder named either `nbs` or `notebooks` that contain all your Jupyter notebooks. The command first identifies all figure images that have already been created, and then deletes all other obsolete image files. Subsequently it squeezes all figures from all notebooks. 


In [None]:
#|export 

import glob 
from nbsqueeze import squeeze_this_nb, make_imdir 
import os
import re 

In [None]:
#|export 

def find_links(nb_path):
    '''Find existing links to png images in notebook `nb_path`.'''
    
    if nb_path is None: 
        nb_path = ipynb_path.get()

    with open(nb_path) as fh: 
        lines = fh.readlines()

    links = [] 

    for line in lines:     
        m = re.match(r'.*src=\\"(.*\.png)\\"', line) 

        if m: 
            links.append(m.groups()[0]) 
            
    return links


def clean_imdir(nb_path, verbose=False): 
    '''Remove orphant image files corresponding to notebook `nb_path` that are not linked. '''

    imdir_path = make_imdir(nb_path)

    png_filenames = os.listdir(imdir_path) 

    # first select only (png) files that match the notebook prefix 
    prefix = re.sub('\.ipynb', '', os.path.basename(nb_path))

    nb_pngs = [png for png in png_filenames if prefix in png]

    links = find_links(nb_path)
    link_filenames = [re.sub('\./images/', '', link) for link in links]

    orphants = list(set(nb_pngs) - set(link_filenames))

    orphant_fpaths = [os.path.join(imdir_path, orph) for orph in orphants]
    
    for fp in orphant_fpaths: 
        os.remove(fp) 
        
    if verbose:  
        if len(orphants) > 0: 
            print(f'    Removing {len(orphants)} orphant image files:')
            print(orphants)
        else: 
            print(f'  (No orphant image files found)')

            
def squeeze(glob_ptrn='n*b*s/*.ipynb', verbose=False, overwrite=True): 
    '''Extract all embedded images from all notebooks in notebooks subdirectory.
    
    '''
    # locate notebooks 
    
    notebooks = glob.glob('n*b*s/*.ipynb')
    n = len(notebooks) 
    
    # clean and squeeze each notebook 
        
    for i, nb in enumerate(notebooks): 
        
        if verbose:
            print('-'*50) 
            print(f'Processing notebook {i+1}/{n}: {nb}')
            
        # clean orphant images 
        if verbose: 
            print(f'- Cleaning images folder...') 
        # first get rid of all orphant images 
        clean_imdir(nb, verbose=verbose) 
        
        # do the actual squeezing 
        if verbose: 
            print(f'- Extracting inline images...')
        
        squeeze_this_nb(nb, overwrite=overwrite, verbose=verbose) 
                