In [None]:
# default_exp navigator

In [None]:
#hide
from google.colab import drive
drive.mount('/content/drive')

In [None]:
#hide
!pip install nbdev
!pip install fastcore

In [None]:
#hide
% cd /content/drive/My\ Drive/fa_convnav

/content/drive/My Drive/fa_convnav


In [None]:
#hide
#not deps but we need them to use nbdev and run tests
from nbdev import * 
from nbdev.showdoc import *
from fastcore.test import *

In [None]:
#hide
!pip install fastai2

In [None]:
#hide
from fastai2.basics import *
from fastai2.callback.all import *
from fastai2.vision.all import *
from torch import torch

pets = DataBlock(blocks=(ImageBlock, CategoryBlock), 
                 get_items=get_image_files, 
                 splitter=RandomSplitter(),
                 get_y=RegexLabeller(pat = r'/([^/]+)_\d+.jpg$'),
                 item_tfms=Resize(460),
                 batch_tfms=[*aug_transforms(size=224, max_rotate=30, min_scale=0.75), Normalize.from_stats(*imagenet_stats)])

dls = pets.dataloaders(untar_data(URLs.PETS)/"images",  bs=128)

model = resnet18
learn = cnn_learner(
    dls, 
    model, 
    opt_func=partial(Adam, lr=slice(3e-3), wd=0.01, eps=1e-8), 
    metrics=error_rate, 
    config=cnn_config(ps=0.33)).to_fp16()

In [None]:
#hide
def get_test_vars():
  #load test_vars from file if not already downloaded
  try:
    test_learner
  except:
    with gzip.open("test_learner_resnet18", "rb") as f:
      test_learner = pickle.load(f)
    with gzip.open("test_summary_resnet18", "rb") as f:
      test_summary = pickle.load(f)
  try:
    test_df
  except:
    with gzip.open("test_df_resnet18", "rb") as f:
      test_df = pickle.load(f)
  return test_learner, test_summary, test_df

# Navigator

> CNN viewer and navigator

In [None]:
#export
import gzip, pickle
from fa_convnav.models import models
from fa_convnav.core import *
from pandas import DataFrame, option_context

In [None]:
#export
class CNDFView:
  "Class to view a CNDF dataframe"

  def copy_layerinfo(self, df):
    "Copy layer information and block/layer counts across from hidden columns to displayed columns"
    df.loc[df['Division'] == '', 'Division'] = df['div_id']
    df.loc[df['Container_child'] == '', 'Container_child'] = df['chd_id']
    df.loc[df['Container_block'] == '', 'Container_block'] = df['blk_id']
    df.loc[df['Output_dimensions'] == '', 'Output_dimensions'] = df['out_dim']
    df.loc[df['Currently'] == '', 'Currently'] = df['current']
    return df

  def check_view_args(self, df, truncate, verbose):
    "Check arguments given to view function, `df`, `truncate` and `verbose` are valid"
    assert type(df) == DataFrame and 'Module_name' in df.columns, "Not a valid convnav dataframe"
    assert isinstance(truncate, int) and -10 <= truncate <= 10, f"Argument 'truncate' must be an integer between -10 (show more cols) and +10 (show fewer cols)"
    assert isinstance(verbose, int) and 1 <= verbose <= 5, f"Argument verbose must be 1 2 or 3 "

  def view(self, df=None, verbose=3, tight=True, truncate=0, align_cols='left', top=False, show=True, return_df=False):
    "Display dataframe `df` with optional arguments and styling"

    if not show: return None
    _df = df if df is not None else self._cndf.copy()
    self.check_view_args(_df, truncate, verbose)
    
    if not isinstance(tight, bool): tight = True
    if len(_df) < 10: tight = False
    if verbose != 3: truncate = (10, 4, 0, 0, -10)[verbose-1]
    if verbose == 4: _df = self.copy_layerinfo(_df)

    post_msg = ''
    if top and len(_df) > 10:
      post_msg = f'...{len(_df)-10} more layers'
      _df = _df.iloc[:10]
      tight=False
      
    if len(_df) == 0:
      print('No data to display')
      return None

    with option_context("display.max_rows", 1000):
      _df.index.name = 'Index' 
      _df_styled = _df.iloc[:,:-(11+truncate)].style.set_properties(**{'text-align': align_cols})
      if tight: 
        display(_df_styled)
      else:
        display(_df.iloc[:,:-(11+truncate)]) 
    print(post_msg)
    if return_df and df is not None: return(_df)

  def copy_view(self, df, **kwargs):
    "Copy over layer information then call `view` to display dataframe"
    df = self.copy_layerinfo(df)
    self.view(df=df, **kwargs)

In [None]:
show_doc(CNDFView.view)

<h4 id="CNDFView.view" class="doc_header"><code>CNDFView.view</code><a href="__main__.py#L19" class="source_link" style="float:right">[source]</a></h4>

> <code>CNDFView.view</code>(**`df`**=*`None`*, **`verbose`**=*`3`*, **`tight`**=*`True`*, **`truncate`**=*`0`*, **`align_cols`**=*`'left'`*, **`top`**=*`False`*, **`show`**=*`True`*, **`return_df`**=*`False`*)

Display dataframe `df` with optional arguments and styling



*   `df`: CNDF dataframe to be displayed. Displays instance dataframe `self. _cndf` if None (default)
*   `verbose`: 1 = Index and Layer_name columns only; 2 = Model structure; 3 = Model Structure and layer_info (output dims, params and frozen/unfrozen) (default);  4 = fill in container columns with layer_info; 5 = expose hidden columns.
*   `tight`: False = normal row spacing; True = tight layout with minimal space between rows (best for large models with many rows to display). The default is True but dataframes with fewer than 10 rows are automatically displayed with normal spacing.
*   `truncate`: truncate number displayed columns by integer value between -10 and 10. 0 = default. Negative values reveal hidden columns. Overidden when the verbose argument is set to a non-default setting.
*   `align_cols`: 'left' (default); 'right' alignment of column data
*   `top`: display first 10 rows only followed by a count of undisplayed rows
*   `show`: True (default)/False show/hide cell output.
*   `return_df`: return the formatted df to the caller if True. False returns None (default). 




In [None]:
#export
class CNDFSearch:
  "Class to search a CNDF dataframe, display the results in a dataframe and returns matching module object(s)"

  def _find_layer(self, df, searchterm, exact):
    "Searches `df` for `searchterm`, returning exact matches only if `exact=True` otherwise any match"

    if isinstance(searchterm, int):
      assert searchterm >= 0 and searchterm <= len(df), f'Layer ID out of range: min 0, max {len(df)}'
      #select 'df' row using index from 'searchterm'
      x = df.iloc[searchterm].copy()
      x = DataFrame(x).transpose()
      return x    
      
    #if searchterm is a float assume it is a layer name (i.e. format 0.0.1) and convert to string
    if isinstance(searchterm, float): searchterm = str(searchterm)
      
    if isinstance(searchterm, dict):
      #select rows matching the conditional df[key] ==/contains value (exact=True/false) for dict
      for col, s in searchterm.items():
        assert col in df.columns, f'{col} not a valid column identifier. Valid column names are {df.columns}'
        return df[df[col] == s].copy() if exact else df[df[col].str.contains(s)].copy()
      return x

    if isinstance(searchterm, str):
      #select rows in df where df[col] ==/contains searchterm string (exact=True/False) 
      #returns results after first matches are found in a column (remining columns not searched)
      searchterm = searchterm.strip(' \.')
      cols = {'Module_name', 'Torch_class', 'Division', 'Container_child', 'Container_block', 'Layer_description'}
      if exact: 
        for col in cols:
          x = df[df[col] == searchterm].copy()
          if not x.empty: return x
      else: 
        for col in cols:
          x = df[df[col].str.contains(searchterm)].copy()
          if not x.empty: return x
      return x
         
    assert True, 'Unrecognizable searchterm'
        
  def search(self, searchterm, df=None, exact=True, show=True): 
    "Search 'df` for single or combination of modules and layers. If df = None, searches instance dataframe `self._cndf` (default)"
    if df is not None: 
      _df = df.copy()
    else:
       _df = df = self._cndf.copy()

    if isinstance(searchterm, float): searchterm = str(searchterm)

    if isinstance(searchterm, int): 
      _df = self._find_layer(_df, searchterm, True) 

    elif isinstance(searchterm, str): 
      _df = self._find_layer(_df, searchterm, exact)  

    elif isinstance(searchterm, dict):
      #concatenate successive search results (logical 'OR') for series of dicts
      _df = DataFrame()  
      for col, s in searchterm.items():
        new_df = self._find_layer(df, {col:s}, exact)
        _df = pd.concat((_df, new_df), axis=0, ignore_index=False).drop_duplicates('Module_name')

    elif isinstance(searchterm, list):
      #concatenate successive search results (logical 'OR') in list
      _df = DataFrame()  
      for s in searchterm: 
        new_df = self._find_layer(df, s, exact)
        _df = pd.concat((_df, new_df), axis=0, ignore_index=False).drop_duplicates('Module_name')

    elif isinstance(searchterm, tuple):
      #recursively call find_layer on _df to logical 'AND' successive search results in tuple
      for s in searchterm:
        _df = self._find_layer(_df, s, exact)

    else: assert True, 'Unrecognizable searchterm'

    #show matches and return corresponding modules
    if _df is not None and not _df.empty:
      if show:
        print(f'{len(_df)} layers found matching searchterm(s): {searchterm}\n')
        self.view(df=_df)
      return _df['lyr_obj'].tolist()
    else: 
      if show: print(f'No matches for searchterm(s): {searchterm}\n')
      return None

In [None]:
show_doc(CNDFSearch.search)

<h4 id="CNDFSearch.search" class="doc_header"><code>CNDFSearch.search</code><a href="__main__.py#L41" class="source_link" style="float:right">[source]</a></h4>

> <code>CNDFSearch.search</code>(**`searchterm`**, **`df`**=*`None`*, **`exact`**=*`True`*, **`show`**=*`True`*)

Search 'df` for single or combination of modules and layers. If df = None, searches instance dataframe `self._cndf` (default)

`Searchterm` can be:

*   `int` : the module with Index number `int` is returned
*   `float`: module(s) where `str(float)` matches the Layer_name are returned
*   `str`: module(s) with `str` in one of 'Layer_name', 'Torch_class', 'Division', 'Module', 'Block', 'Layer_description' are returned. Columns are searched in this order with the search ending with the first column to make a match/matches. 
*   `dict`, e.g. {'col', 'str'} matches `str` in column `col` 

Searchterms can also be a combined as follows:

*   `[101, 102, 105]` logical OR of rows matching indexes `101`, `102` plus `103`
*   `('0.5', 'conv2d')` logical AND of rows matching `0.5` in Layer_name and `conv2d` in `Layer_description`
*   `{{'col1', 'str1'}, {'col2', 'str2'}}` logical OR of matches `str1` in `col1` plus `str2` in `col`'.

Return only exact matches between searchterm and column entry with exact = True (default).
<br />



.

In [None]:
_, _, test_df = get_test_vars()
test_df['lyr_obj'] = None
cndf_test = CNDFSearch()

test_eq(len(cndf_test.search(12, df=test_df, show=False)), 1)
test_eq(len(cndf_test.search('0.6.1.conv2', df=test_df, show=False)), 1)
test_eq(len(cndf_test.search(0.6, df=test_df, exact=False, show=False)), 16)
test_eq(len(cndf_test.search({'Module_name': '0.6', 'Layer_description':'Conv2d'}, df=test_df, exact=True, show=False)), 1)
test_eq(len(cndf_test.search(['0.6', '0.5'], df=test_df, exact=False, show=False)), 32)
test_eq(cndf_test.search(('0.6', '0.5'), df=test_df, exact=False, show=False), None)
del(cndf_test)

In [None]:
#export
class ConvNav(CNDF, CNDFSearch, CNDFView):
  "Class to view fastai supported CNNs, search and select module(s) and layer(s) for further investigation. Automatically builds a CNDF dataframe from Learner and Learner.summary()"
  def __init__(self, learner, learner_summary):
    super().__init__(learner, learner_summary)

  def __len__(self):
    return len(self._cndf)

  def __str__(self) -> str:
    return self.model_info

  def __call__(self):
    self.view(head=True)

  def __contains__(self, s):
    return self.search(s)

  @property
  def head(self):
    "Print `model` head summary info and modules"
    df = self._cndf.copy()
    df = df[df['Module_name'].str.startswith('1')]
    if not df.empty:
      res = f"{self.model_type.capitalize()}: {self.model_name.capitalize()}\n"
      res += f"Input shape: {self._cndf.iloc[1]['out_dim']} (bs, filt, h, w)\n"
      res += f"Output features: {self.output_dimensions} (bs, classes)\n" 
      print(res)
      self.view(df, truncate=1)
    else:
      res = "Model has no head"
      print(res)

  @property
  def body(self):
    "Print `model` body summary info and modules"
    df = self._cndf.copy()
    df = df.loc[df['Module_name'].str.startswith('0')]
    if not df.empty:
      res = f"{self.model_type.capitalize()}: {self.model_name.capitalize()}\n"
      res += f"Input shape: {self.input_sizes} (bs, ch, h, w)\n"
      res += f"Output dimensions: {df.iloc[-1]['Output_dimensions']} (bs, filt, h, w)\n"
      res += f"Currently frozen to parameter group {self.frozen_to} out of {self.num_param_groups}\n" 
      print(res)
      self.view(df)
    else:
      res = "Model body has no contents"
      print(res)

  @property
  def divs(self):
    "Print Summary information from `model` head and body"
    df = self._cndf[(self._cndf['Module_name'] == '0') | (self._cndf['Module_name'] == '1')].copy()

    for i in range(2):
      df_div = self._cndf.loc[self._cndf['div_id'] == str(i)].copy()
      df.iloc[i]['Model'] = self.model_name
      df.iloc[i]['Container_child'] = len(df_div[df_div['Container_child'] != ''])
      df.iloc[i]['Container_block'] = len(df_div[df_div['Container_block'] != ''])
      df.iloc[i]['Layer_description'] = len(df_div[df_div['Layer_description'] != ''])
      params = df_div['Parameters'].values
      params_summed = sum(filter(lambda i: isinstance(i, int), params))
      df.iloc[i]['Parameters'] = params_summed

    df['Output_dimensions'] = df['out_dim']
    df.iloc[0]['Currently'] = df.iloc[0]['current']

    df = df.rename(columns={'Container_child': 'Child modules', 'Container_block': 'Blocks', 'Layer_description': 'Layers'})
    print(f"{self.model_name.capitalize()}\nDivisions:  body (0), head (1)\n")
    self.view(df, tight=False)

  @property
  def dim_transitions(self):
    "Finds layers with different input and output dimensions. These are useful points to apply hooks and callbacks for investigating model activity."
    df = self._cndf[self._cndf['Torch_class'].str.contains('Conv2d')].copy()

    n = []
    old_dims = 0
    for i, row in enumerate(df.iterrows()):
      row=row[1]
      new_dims = row['Output_dimensions'].rstrip(']').split(' x ')[-1]
      if new_dims != old_dims:
        n.append(i)
        old_dims = new_dims
    df = df.iloc[n]

    print(f"{self.model_name.capitalize()}\nLayer dimension changes\n")
    self.copy_view(df, tight=False)
    return df['lyr_obj'].tolist()

  @property
  def linear_layers(self):
    "Prints and returns all linear layers in the `model`"
    df = self._cndf[self._cndf['Torch_class'].str.contains('Linear')].copy()
    df['Division'] = df['div_id']

    print(f"{self.model_name.capitalize()} linear layers\n")
    self.view(df, truncate=1, tight=False)
    return df['lyr_obj'].tolist()


```
cn = ConvNav(Learner, Learner.summary()
```

In [None]:
test_learner, test_summary, _ = get_test_vars()
cn_test = ConvNav(test_learner, test_summary)

test_eq(type(cn_test._cndf), DataFrame)    
test_eq(len(cn_test._cndf), 79)             # rows
test_eq(len(cn_test._cndf.columns), 22)     # columns
del(cn_test)


View and search the CNDF dataframe to select modules and layers of interest.
```
cn.view()
cn.search('conv2d')
```

Additional methods:

In [None]:
show_doc(ConvNav.head)

<h4 id="ConvNav.head" class="doc_header"><code>ConvNav.head</code><a href="" class="source_link" style="float:right">[source]</a></h4>

Print `model` head summary info and modules

In [None]:
show_doc(ConvNav.body)

<h4 id="ConvNav.body" class="doc_header"><code>ConvNav.body</code><a href="" class="source_link" style="float:right">[source]</a></h4>

Print `model` body summary info and modules

In [None]:
show_doc(ConvNav.divs)

<h4 id="ConvNav.divs" class="doc_header"><code>ConvNav.divs</code><a href="" class="source_link" style="float:right">[source]</a></h4>

Print Summary information from `model` head and body

In [None]:
show_doc(ConvNav.dim_transitions)

<h4 id="ConvNav.dim_transitions" class="doc_header"><code>ConvNav.dim_transitions</code><a href="" class="source_link" style="float:right">[source]</a></h4>

Finds layers with different input and output dimensions. These are useful points to apply hooks and callbacks for investigating model activity.

In [None]:
show_doc(ConvNav.linear_layers)

<h4 id="ConvNav.linear_layers" class="doc_header"><code>ConvNav.linear_layers</code><a href="" class="source_link" style="float:right">[source]</a></h4>

Prints and returns all linear layers in the `model`

##Saving and loading CNDF dataframes.

In [None]:
#export
def save_cndf(cn, filename, path='', with_modules=False):
  "Saves a CNDF dataframe of the ConvNav instance `cn` to persistent storage at `path` with `filename` gzip compresseed"
  df = cn._cndf.copy()
  if not with_modules: df = df.iloc[:,:-1]
  with gzip.open(path+filename, "wb") as f:
    pickle.dump(df, f, pickle.HIGHEST_PROTOCOL)

In native format, CNDF dataframes include the module objetcs in a 'lyr_obj' column and the combined size of the module objects can be quite large, 100-200mb for a complex model such as a densenet or xresnet. Thus, by default, module objects are removed from the dataframe before saving. To save the model objects as well, check you have enough space in the download location and set `with_modules` to True. Dataframes are gzip compressed. 

In [None]:
#export
def load_cndf(filename, path=''):
  "Loads a CNDF dataframe from persistent storage at `path`+`filename` and unzips it."
  with gzip.open(path+filename, "rb") as f:
    return pickle.load(f)

In [None]:
#hide
#save example ConvNav Learner, Learner.summary() and and df to use in testing and exmaples
# with gzip.open("test_learner_resnet18", "wb") as f:
#     pickle.dump(learn, f, pickle.HIGHEST_PROTOCOL)

# with gzip.open("test_summary_resnet18", "wb") as f:
#     pickle.dump(learn.summary(), f, pickle.HIGHEST_PROTOCOL)
    
# df_test = cn_test._cndf.iloc[:,:-1]
# with gzip.open("test_df_resnet18", "wb") as f:
#     pickle.dump(df_test, f, pickle.HIGHEST_PROTOCOL)

In [None]:
#hide
test_df = None
test_learner = None
test_summary = None

In [None]:
#hide
#download the example Convnav object and df
with gzip.open("test_df_resnet18", "rb") as f:
    test_df = pickle.load(f)

with gzip.open("test_summary_resnet18", "rb") as f:
    test_summary = pickle.load(f)

with gzip.open("test_learner_resnet18", "rb") as f:
    test_learner = pickle.load(f)