# utils

> This contains useful functions

In [None]:
#| hide
%load_ext autoreload
%autoreload 2

In [None]:
#| default_exp utils

In [None]:
#| hide
from nbdev.showdoc import *

In [None]:
#| export
import numpy as np
import pandas as pd
import io

from bokeh.plotting import figure
from bokeh.models.tools import BoxZoomTool
from bokeh.models import HoverTool, NumeralTickFormatter, LabelSet
from bokeh.models.glyphs import Patches
from bokeh.models import (
    CustomJS,
    Range1d,
    ColumnDataSource,
)

from collections import defaultdict
import warnings

In [None]:
#| export
def parse_gff(gff_path)->pd.DataFrame:
    cwd = os.getcwd()
    with open(gff_path,"r") as gff_file:
        # Create an in-memory file buffer using the io.StringIO class
        file_buffer = io.StringIO()
        for line in gff_file:
            if line[0]=="#":
                continue
            else:
                # Write each line to the file buffer
                file_buffer.write(line)
                
        # Reset the file pointer to the beginning of the file buffer
        file_buffer.seek(0)       

        df=pd.read_csv(file_buffer,sep="\t",header=None)
        df.columns=["seq_id", "source","type","start","end","score","strand","phase","attributes"]
        
    return df

In [None]:
#| export
from genomenotebook.data import get_example_data_dir
import os

In [None]:
data_path = get_example_data_dir()
gff_path = os.path.join(data_path, "MG1655_U00096.gff3")
df=parse_gff(gff_path)
assert type(df)==pd.DataFrame
df.head()

Unnamed: 0,seq_id,source,type,start,end,score,strand,phase,attributes
0,U00096.3,Genbank,region,1,4641652,.,+,.,ID=U00096.3:1..4641652;Dbxref=taxon:511145;Is_...
1,U00096.3,Genbank,gene,190,255,.,+,.,"ID=gene-b0001;Dbxref=ASAP:ABE-0000006,ECOCYC:E..."
2,U00096.3,Genbank,CDS,190,255,.,+,0,ID=cds-AAC73112.1;Parent=gene-b0001;Dbxref=Uni...
3,U00096.3,Genbank,gene,337,2799,.,+,.,"ID=gene-b0002;Dbxref=ASAP:ABE-0000008,ECOCYC:E..."
4,U00096.3,Genbank,CDS,337,2799,.,+,0,ID=cds-AAC73113.1;Parent=gene-b0002;Dbxref=Uni...


In [None]:
#| export
def set_positions(annotation: pd.DataFrame, # an annotation DataFrame extracted from a gff file
                            ):
    """Sets left and right as the position of the feature on the sequence, left is always lower than right.
    start and end represent the begining and end of the feature where start can be greater than end depending on the feature strand.
    """
    annotation=annotation.copy()
    annotation.loc[:, "left"] = annotation[["start"]].values
    annotation.loc[:, "right"] = annotation[["end"]].values
    
    mask = annotation["strand"] == "+"
    annotation.loc[mask, "start"] = annotation.loc[mask, "left"].values
    annotation.loc[mask, "end"] = annotation.loc[mask, "right"].values
    
    mask = annotation["strand"] == "-"
    annotation.loc[mask, "start"] = annotation.loc[mask, "right"].values
    annotation.loc[mask, "end"] = annotation.loc[mask, "left"].values
    
    annotation["middle"] = (annotation.right + annotation.left) / 2
    
    return annotation

In [None]:
set_positions(df.head())

Unnamed: 0,seq_id,source,type,start,end,score,strand,phase,attributes,left,right,middle
0,U00096.3,Genbank,region,1,4641652,.,+,.,ID=U00096.3:1..4641652;Dbxref=taxon:511145;Is_...,1,4641652,2320826.5
1,U00096.3,Genbank,gene,190,255,.,+,.,"ID=gene-b0001;Dbxref=ASAP:ABE-0000006,ECOCYC:E...",190,255,222.5
2,U00096.3,Genbank,CDS,190,255,.,+,0,ID=cds-AAC73112.1;Parent=gene-b0001;Dbxref=Uni...,190,255,222.5
3,U00096.3,Genbank,gene,337,2799,.,+,.,"ID=gene-b0002;Dbxref=ASAP:ABE-0000008,ECOCYC:E...",337,2799,1568.0
4,U00096.3,Genbank,CDS,337,2799,.,+,0,ID=cds-AAC73113.1;Parent=gene-b0002;Dbxref=Uni...,337,2799,1568.0


In [None]:
#| export
def get_genome_annotations(gff_path: str, seq_id: str=None):
    """Parses a gff file and extracts the features that belong to seq_id. Raises a warning if resulting DataFrame is empty."""
    annotation = parse_gff(gff_path)
    if seq_id:
        annotation = annotation.loc[(annotation.seq_id == seq_id)]
        if len(annotation)==0:
            warnings.warn("The annotation DataFrame is empty. Check that the fasta and gff files have the same sequence id")
    
    annotation = set_positions(annotation)
    return annotation

In [None]:
data_path = get_example_data_dir()
gff_path = os.path.join(data_path, "MG1655_U00096.gff3")
annotation=get_genome_annotations(gff_path, "U00096.3")
annotation.head()

Unnamed: 0,seq_id,source,type,start,end,score,strand,phase,attributes,left,right,middle
0,U00096.3,Genbank,region,1,4641652,.,+,.,ID=U00096.3:1..4641652;Dbxref=taxon:511145;Is_...,1,4641652,2320826.5
1,U00096.3,Genbank,gene,190,255,.,+,.,"ID=gene-b0001;Dbxref=ASAP:ABE-0000006,ECOCYC:E...",190,255,222.5
2,U00096.3,Genbank,CDS,190,255,.,+,0,ID=cds-AAC73112.1;Parent=gene-b0001;Dbxref=Uni...,190,255,222.5
3,U00096.3,Genbank,gene,337,2799,.,+,.,"ID=gene-b0002;Dbxref=ASAP:ABE-0000008,ECOCYC:E...",337,2799,1568.0
4,U00096.3,Genbank,CDS,337,2799,.,+,0,ID=cds-AAC73113.1;Parent=gene-b0002;Dbxref=Uni...,337,2799,1568.0


In [None]:
#| hide
#Testing mistake in seq_id
annotation=get_genome_annotations(gff_path, "U00097.3") #mistake in seq_id



In [None]:
#| export
import re

In [None]:
#| export
def extract_attribute(input_str:str, #attribute string to parse
                      attr_name:str, #name of the attribute to extract
                     ) -> str:
    """Extracts the attribute called attr_name from the GFF attributes string"""
    
    pattern = f"[{attr_name[0].lower()}{attr_name[0].upper()}]{attr_name[1:]}=(?P<{attr_name}>[^;]+)"
    match = re.search(pattern, input_str)
    if match:
        return match.groupdict()[attr_name]
    else:
        return None

In [None]:
input_str = 'ID=cds-ATV02827.1;Parent=gene-SaO11_00001;Dbxref=NCBI_GP:ATV02827.1;Name=ATV02827.1;gbkey=CDS;gene=dnaA;locus_tag=SaO11_00001;product=Chromosomal replication initiator protein DnaA;protein_id=ATV02827.1;transl_table=11'
extract_attribute(input_str,"gene")

'dnaA'

In [None]:
annotation=get_genome_annotations(gff_path, "U00096.3")
genes = annotation[annotation.type.isin(["CDS", "repeat_region", "ncRNA", "rRNA", "tRNA"])    ]
genes.attributes.apply(extract_attribute,attr_name='protein_id')

2        AAC73112.1
4        AAC73113.1
6        AAC73114.1
8        AAC73115.1
10       AAC73116.1
            ...    
10112    AAC77352.1
10114    AAC77353.1
10116    AAC77354.1
10118    AAC77355.1
10120    AAC77356.1
Name: attributes, Length: 5256, dtype: object

In [None]:
#| export
def extract_all_attributes(input_str:str)->dict:
    """Extracts all attributes from the GFF attributes column"""
    
    pattern = "(?P<key>\w+[-\w]*)=(?P<value>[^;]+)"
    match = re.findall(pattern, input_str)
    d=defaultdict()
    d.update(match)
    return d

In [None]:
extract_all_attributes(input_str)

defaultdict(None,
            {'ID': 'cds-ATV02827.1',
             'Parent': 'gene-SaO11_00001',
             'Dbxref': 'NCBI_GP:ATV02827.1',
             'Name': 'ATV02827.1',
             'gbkey': 'CDS',
             'gene': 'dnaA',
             'locus_tag': 'SaO11_00001',
             'product': 'Chromosomal replication initiator protein DnaA',
             'protein_id': 'ATV02827.1',
             'transl_table': '11'})

In [None]:
#| export
def available_feature_types(gff_path):
    features=parse_gff(gff_path)
    all_keys=set(features.type.values)
    return all_keys

In [None]:
available_feature_types(gff_path)

{'CDS',
 'exon',
 'gene',
 'mobile_genetic_element',
 'ncRNA',
 'origin_of_replication',
 'pseudogene',
 'rRNA',
 'recombination_feature',
 'region',
 'repeat_region',
 'sequence_feature',
 'tRNA'}

In [None]:
#| export
def available_attributes(gff_path):
    features=parse_gff(gff_path)
    attr_dicts=features.attributes.apply(extract_all_attributes)
    all_keys=list(set().union(*[d.keys() for d in attr_dicts]))
    return all_keys

In [None]:
print(available_attributes(gff_path))

['transl_except', 'mobile_element_type', 'gene_synonym', 'mol_type', 'product', 'Note', 'gene_biotype', 'Is_circular', 'part', 'gbkey', 'orig_transcript_id', 'Parent', 'gene', 'rpt_type', 'pseudo', 'substrain', 'recombination_class', 'protein_id', 'transl_table', 'orig_protein_id', 'strain', 'locus_tag', 'ID', 'Dbxref', 'genome', 'Name', 'exception']


In [None]:
#| export
def attributes_to_columns(features: pd.DataFrame):
    attr_dicts=features.attributes.apply(extract_all_attributes)
    all_keys=list(set().union(*[d.keys() for d in attr_dicts]))
    
    attr_dict=dict([(k,[d.get(k,None) for d in attr_dicts]) for k in all_keys])
    features=features.copy()
    for k,v in attr_dict.items():
        features[k]=v
        
    return features
    

In [None]:
attributes_to_columns(annotation.head())

Unnamed: 0,seq_id,source,type,start,end,score,strand,phase,attributes,left,...,protein_id,genome,Is_circular,strain,Name,gbkey,locus_tag,transl_table,ID,gene
0,U00096.3,Genbank,region,1,4641652,.,+,.,ID=U00096.3:1..4641652;Dbxref=taxon:511145;Is_...,1,...,,chromosome,True,K-12,ANONYMOUS,Src,,,U00096.3:1..4641652,
1,U00096.3,Genbank,gene,190,255,.,+,.,"ID=gene-b0001;Dbxref=ASAP:ABE-0000006,ECOCYC:E...",190,...,,,,,thrL,Gene,b0001,,gene-b0001,thrL
2,U00096.3,Genbank,CDS,190,255,.,+,0,ID=cds-AAC73112.1;Parent=gene-b0001;Dbxref=Uni...,190,...,AAC73112.1,,,,AAC73112.1,CDS,b0001,11.0,cds-AAC73112.1,thrL
3,U00096.3,Genbank,gene,337,2799,.,+,.,"ID=gene-b0002;Dbxref=ASAP:ABE-0000008,ECOCYC:E...",337,...,,,,,thrA,Gene,b0002,,gene-b0002,thrA
4,U00096.3,Genbank,CDS,337,2799,.,+,0,ID=cds-AAC73113.1;Parent=gene-b0002;Dbxref=Uni...,337,...,AAC73113.1,,,,AAC73113.1,CDS,b0002,11.0,cds-AAC73113.1,thrA


In [None]:
#| hide
input_str = 'locus_tag=SaO11_00001;product=Chromosomal replication initiator protein DnaA;protein_id=ATV02827.1;transl_table=11'
assert extract_attribute(input_str,"gene") == None

In [None]:
#| export
attributes=["gene", "locus_tag", "product"]
def get_features_from_annotation(annotation: pd.DataFrame, #annotation DataFrame extracted from GFF file
                                 feature_types: list = ["CDS", "repeat_region", "ncRNA", "rRNA", "tRNA"], # list of feature types to extract
                                 attributes: list = attributes # list of attributes to extract
                                ):
    """Filters annotations to keep only features of type `feature_type`.
       Extracts each attribute from `attributes` as a column.
    """
    features = annotation[
        annotation.type.isin(feature_types)
    ].copy()
 
    for attr in attributes:
        features[attr] = features.attributes.apply(extract_attribute,attr_name=attr)
        
    features.loc[features.type == "repeat_region", "gene"] = "REP"
    features['gene_or_locus'] = features['gene'].fillna(features[attributes[0]])
    
    return features

In [None]:
features=get_features_from_annotation(annotation)
features.head()

Unnamed: 0,seq_id,source,type,start,end,score,strand,phase,attributes,left,right,middle,gene,locus_tag,product,gene_or_locus
2,U00096.3,Genbank,CDS,190,255,.,+,0,ID=cds-AAC73112.1;Parent=gene-b0001;Dbxref=Uni...,190,255,222.5,thrL,b0001,thr operon leader peptide,thrL
4,U00096.3,Genbank,CDS,337,2799,.,+,0,ID=cds-AAC73113.1;Parent=gene-b0002;Dbxref=Uni...,337,2799,1568.0,thrA,b0002,fused aspartate kinase/homoserine dehydrogenase 1,thrA
6,U00096.3,Genbank,CDS,2801,3733,.,+,0,ID=cds-AAC73114.1;Parent=gene-b0003;Dbxref=Uni...,2801,3733,3267.0,thrB,b0003,homoserine kinase,thrB
8,U00096.3,Genbank,CDS,3734,5020,.,+,0,ID=cds-AAC73115.1;Parent=gene-b0004;Dbxref=Uni...,3734,5020,4377.0,thrC,b0004,threonine synthase,thrC
10,U00096.3,Genbank,CDS,5234,5530,.,+,0,ID=cds-AAC73116.1;Parent=gene-b0005;Dbxref=Uni...,5234,5530,5382.0,yaaX,b0005,DUF2502 domain-containing protein YaaX,yaaX


In [None]:
#| export
from collections import defaultdict

In [None]:
#| export
gene_y_range = (-1.5, -1)

def arrow_coordinates(feature):
    feature_size = feature.right - feature.left
    
    if feature.strand=="+":
        arrow_base = feature.end - np.minimum(feature_size, 100)
    else:
        arrow_base = feature.end + np.minimum(feature_size, 100)
    
    xs=(feature.start,
        feature.start,
        arrow_base,
        feature.end,
        arrow_base
       )
    
    y_min, y_max = gene_y_range
    ys = (y_min, y_max, y_max, (y_max + y_min) / 2, y_min)
    return xs, ys

def box_coordinates(feature):
    xs=(feature.left, feature.left,
        feature.right, feature.right)
    y_min, y_max = gene_y_range
    ys = (y_min, y_max, y_max, y_min)
    return xs, ys

default_glyphs=defaultdict(lambda: ("arrow",("purple","orange"))) #the default glyph will be the same as for CDS etc.
default_glyphs.update(dict([(f,("arrow",("purple","orange"))) for f in ["CDS", "ncRNA", "rRNA", "tRNA"]]))
default_glyphs['repeat_region']=("box",("grey",))

def get_patch_coordinates(feature, # row of a pandas DataFrame extracted from a GFF file
                          patch_dict: dict = default_glyphs # a dictionnary containing as key a feature type and as value a patch definition.
                         ):
    """
    patchs are defined with a patch a tuple: (patch_type, (patch_color_plus, patch_color_minus)). 
    Different colors can be specified depending on the strand."""
    
    patch_type, patch_colors = patch_dict[feature.type]
    if len(patch_colors)>1:
        color_dic={"+":patch_colors[0],
                   "-":patch_colors[1]}
    else:
        color_dic=defaultdict(lambda: patch_colors[0])
        
    if patch_type=="arrow":
        return arrow_coordinates(feature), color_dic[feature.strand]
    elif patch_type=="box":
        return box_coordinates(feature), color_dic[feature.strand]
    
    

In [None]:
coordinates, colors = zip(*features.apply(get_patch_coordinates,axis=1))
coordinates[:5], colors[:5]

((((190, 190, 190, 255, 190), (-1.5, -1, -1, -1.25, -1.5)),
  ((337, 337, 2699, 2799, 2699), (-1.5, -1, -1, -1.25, -1.5)),
  ((2801, 2801, 3633, 3733, 3633), (-1.5, -1, -1, -1.25, -1.5)),
  ((3734, 3734, 4920, 5020, 4920), (-1.5, -1, -1, -1.25, -1.5)),
  ((5234, 5234, 5430, 5530, 5430), (-1.5, -1, -1, -1.25, -1.5))),
 ('purple', 'purple', 'purple', 'purple', 'purple'))

In [None]:
#| export
def get_feature_patches(features: pd.DataFrame, #DataFrame of the features 
                        left: int, #left limit
                        right: int, #right limit
                        patch_dict: dict = default_glyphs, #glyphs to use for each feature type
                        attributes: list = attributes, #list of attributes to display when hovering
                        name: str = attributes[0] #attribute to be displayed as the feature name
                       ):
    features=features.loc[(features["right"] > left) & (features["left"] < right)]
    
    if len(features)>0:
        coordinates, colors = zip(*features.apply(get_patch_coordinates,patch_dict=patch_dict,axis=1))
        xs, ys = zip(*coordinates)
    else:
        colors = []
        xs, ys = [], []
    names = list(features[name].fillna("").values) #list(features.gene.fillna(features["locus_tag"]).values)
    out=dict(xs=xs,
             ys=ys,
             color=colors,
             pos=features.middle.values,
             names=names,
             hover_names=names,
            )

    out.update(features[attributes].to_dict(orient='list'))
    return out

 

In [None]:
default_glyphs

defaultdict(<function __main__.<lambda>()>,
            {'CDS': ('arrow', ('purple', 'orange')),
             'ncRNA': ('arrow', ('purple', 'orange')),
             'rRNA': ('arrow', ('purple', 'orange')),
             'tRNA': ('arrow', ('purple', 'orange')),
             'repeat_region': ('box', ('grey',))})

In [None]:
get_feature_patches(features,10000,15000, patch_dict=default_glyphs, name="gene")

{'xs': ((10494, 10494, 10028, 9928, 10028),
  (11356, 11356, 10743, 10643, 10743),
  (10830, 10830, 11215, 11315, 11215),
  (11786, 11786, 11482, 11382, 11482),
  (12163, 12163, 13979, 14079, 13979),
  (14168, 14168, 15198, 15298, 15198)),
 'ys': ((-1.5, -1, -1, -1.25, -1.5),
  (-1.5, -1, -1, -1.25, -1.5),
  (-1.5, -1, -1, -1.25, -1.5),
  (-1.5, -1, -1, -1.25, -1.5),
  (-1.5, -1, -1, -1.25, -1.5),
  (-1.5, -1, -1, -1.25, -1.5)),
 'color': ('orange', 'orange', 'purple', 'orange', 'purple', 'purple'),
 'pos': array([10211. , 10999.5, 11072.5, 11584. , 13121. , 14733. ]),
 'names': ['satP', 'yaaW', 'mbiA', 'yaaI', 'dnaK', 'dnaJ'],
 'hover_names': ['satP', 'yaaW', 'mbiA', 'yaaI', 'dnaK', 'dnaJ'],
 'gene': ['satP', 'yaaW', 'mbiA', 'yaaI', 'dnaK', 'dnaJ'],
 'locus_tag': ['b0010', 'b0011', 'b0012', 'b0013', 'b0014', 'b0015'],
 'product': ['acetate/succinate:H(+) symporter',
  'putative enzyme-specific chaperone YaaW',
  'uncharacterized protein MbiA',
  'DUF2541 domain-containing protein YaaI

In [None]:
#| export
def sort_list_dict(d: dict[list], #a dictionnary for which all values are of type list
                   ref_list="xs", #key of the list to use as a reference for sorting
                   func=lambda x: x, #A custom function can be supplied to customize the sort order. Default is the identity function.
                  ):
    ks=list(d.keys())
    ref_list_ix=ks.index(ref_list)
    # Sort all the lists in the dictionary based on the values of the reference list
    sorted_lists = sorted(zip(*[d[k] for k in ks]), key= lambda x: func(x[ref_list_ix]))

    # Convert the sorted tuples back into separate lists
    unzipped_lists = zip(*sorted_lists)

    # Create a new dictionary with the same keys as the original dictionary, but with the sorted lists as values
    d = {k: list(t) for k, t in zip(ks, unzipped_lists)}
    return d


In [None]:
d={'xs':[[2,5],[3,1]],'a':[1,3],'b':["c","d"]}
sort_list_dict(d, ref_list='xs', func= lambda x: x[1]) #sort according to the second element of each element of list 'xs'

{'xs': [[3, 1], [2, 5]], 'a': [3, 1], 'b': ['d', 'c']}

In [None]:
#| export
Y_RANGE = (-2, 2)
def get_y_range() -> tuple:
    """Accessor that returns the Y range for the genome browser plot
    """
    return Y_RANGE


In [None]:
#| export

def get_all_glyphs(genes,bounds:tuple) -> dict:
    all_glyphs=get_gene_patches(genes, bounds[0], bounds[1])

    ks=list(all_glyphs.keys())
    ref_list_ix=ks.index('xs')
    # Sort all the lists in the dictionary based on the values of the reference list
    sorted_lists = sorted(zip(*[all_glyphs[k] for k in ks]), key= lambda x: x[ref_list_ix][0])

    # Convert the sorted tuples back into separate lists
    unzipped_lists = zip(*sorted_lists)

    # Create a new dictionary with the same keys as the original dictionary, but with the sorted lists as values
    all_glyphs = {k: list(t) for k, t in zip(ks, unzipped_lists)}
    
    return all_glyphs

In [None]:
#| export
def create_genome_browser_plot(glyphSource, x_range, attributes=attributes, **kwargs):
    plot_height = kwargs.get("plot_height", 150)
    label_angle = kwargs.get("label_angle", 45)
    text_font_size = kwargs.get("text_font_size", "10pt")
    output_backend = kwargs.get("output_backend", "webgl")
    
    y_min, y_max = get_y_range()
    p_annot = figure(
        tools = "xwheel_zoom,xpan,save",
        active_scroll = "xwheel_zoom",
        height = plot_height,
        x_range = x_range,
        y_range = Range1d(y_min, y_max),
        output_backend=output_backend,
    )
    # Add tool
    p_annot.add_tools(BoxZoomTool(dimensions="width"))

    # Format x axis values
    p_annot.xaxis[0].formatter = NumeralTickFormatter(format="0,0")
    # Hide grid
    p_annot.xgrid.visible = False
    p_annot.ygrid.visible = False
    # Hide axis
    p_annot.yaxis.visible = False
    glyph = p_annot.add_glyph(
        glyphSource, Patches(xs="xs", ys="ys", fill_color="color")
    )
    # gene labels in the annotation track
    # This seems to be necessary to show the labels
    p_annot.scatter(x="pos", y=0, size=0, source=glyphSource)
    labels = LabelSet(
        x="pos",
        y=-0.9,
        text="names",
        level="glyph",
        angle=label_angle,
        text_font_size=text_font_size,
        x_offset=-5,
        y_offset=0,
        source=glyphSource,
        text_align='left',
    )

    p_annot.add_layout(labels)
    tooltips=[(attr,f"@{attr}") for attr in attributes]
    p_annot.add_tools(
        HoverTool(
            renderers=[glyph],
            tooltips=tooltips,
        )
    )
    return p_annot

In [None]:
from bokeh.models import (
    Range1d,
    ColumnDataSource,
)
from bokeh.plotting import show

In [None]:
features = get_features_from_annotation(annotation)
x_range = Range1d(
            2000, 4000, 
            bounds=(0,5000), 
            max_interval=100000,
            min_interval=40
        )

feature_patches = get_feature_patches(features, x_range.start, x_range.end)
glyph_source = ColumnDataSource(feature_patches)
p = create_genome_browser_plot(glyph_source, x_range)
show(p)

In [None]:
#| export
def split_string(string, max_length=10):
    if len(string) <= max_length:
        return string
    else:
        split_index = max_length
        while split_index > 0 and string[split_index] != ' ':
            split_index -= 1
        if split_index == 0:
            split_index = max_length  # If no suitable breaking point found, split at max_length
        return string[:split_index] + '\n' + split_string(string[split_index:].lstrip(), max_length)



In [None]:
# Example usage
long_string = "This is a very long string that needs to be split into multiple lines because it exceeds 50 characters."

split_result = split_string(long_string, max_length=50)
print(split_result)


This is a very long string that needs to be split
into multiple lines because it exceeds 50
characters.


In [None]:
#| hide
import nbdev; nbdev.nbdev_export()