In [None]:
import sys
sys.path.append("..")

import numpy as np
import pandas as pd
import seaborn as sns

import ast
import matplotlib.pyplot as plt
import matplotlib as mpl
%matplotlib inline
import re

In [None]:
# Normalised measurement of saliency ranking out of total number of neurons
def measure_concept_relevance(row_index, total_neurons):
    min_rank = total_neurons / 100
    max_rank = 1
    percentile, _ = divmod(row_index, 100)
    rank = percentile + 1
    normalized_rank = (rank - min_rank) / (max_rank - min_rank)
    return abs(normalized_rank)

# Ignore words with '\x80' (NULL)
def has_word_starting_with_backslash(word_list):
    for word in word_list:
        if not word.isalpha():
            return True
    return False

# Convert into grid of 7 * 768 neurons with saliency values
def build_heatmap_data(p, total_neurons):
    heat_data = [[float('NaN')] * 768 for _ in range(6)]
    heatdf = pd.DataFrame(heat_data)
    for index, row in p.iterrows():
        nid = row['neuron-id']
        layer_id, neuron_index = divmod(nid, 768)
        string_data = row["current_concepts"]
        words = [word for word, _ in ast.literal_eval(string_data)]
        if not has_word_starting_with_backslash(words):
            heatdf.loc[layer_id, neuron_index] = measure_concept_relevance(index, total_neurons)
    return heatdf

def build_heatmap_figure(df, name, save=False):
    # Set the width and height of the figure
    plt.figure(figsize=(30,10))

    # Heatmap showing average arrival delay for each airline by month
    cmap = mpl.colormaps['coolwarm']
    cmap.set_bad('black')
    sns.heatmap(data=df, cmap=cmap)

    plt.xlabel("Neuron index")
    plt.ylabel("Layer")
    
    if save:
        plt.savefig(f'{name}.pdf', bbox_inches='tight')
        
def build_zoom_heatmap_figure(df, name, save=False):
    # Set the width and height of the figure
    plt.figure(figsize=(5,5))
    
    # Slice the dataframe to show
    data = df.iloc[:, 360:391]

    # Heatmap showing average arrival delay for each airline by month
    cmap = mpl.colormaps['coolwarm']
    cmap.set_bad('black')
    sns.heatmap(data=data, cmap=cmap, vmin=0, vmax=1)

    plt.xlabel("Neuron index")
    plt.ylabel("Layer")
    
    if save:
        plt.savefig(f'{name}.pdf')

Create saliency heatmaps for entire base model, zoomed in base model and zoomed in final retrained model.

In [None]:
from src import BASE_CONCEPT_PATH

base_df = pd.read_csv(BASE_CONCEPT_PATH)
base_set_df = base_df[['neuron-id','current_concepts']]

base_heatdf = build_heatmap_data(base_set_df, 768 * 7)
build_heatmap_figure(base_heatdf, "base_orig_heatmap", True)

In [None]:
from src import BASE_CONCEPT_PATH

base_df = pd.read_csv(BASE_CONCEPT_PATH)
base_set_df = base_df[['neuron-id','current_concepts']]

base_heatdf = build_heatmap_data(base_set_df, 768 * 7)
build_zoom_heatmap_figure(base_heatdf, "base_orig_heatmap_zoom", True)

In [None]:
from src import PROCESSED_4

retrain_4_df = pd.read_csv(PROCESSED_4)
retrain_4_set_df = retrain_4_df[['neuron-id','current_concepts']]

retrain_4_heatdf = build_heatmap_data(retrain_4_set_df, 768 * 7)
build_zoom_heatmap_figure(retrain_4_heatdf, "retrain_4_heatmap_zoom", True)