In [None]:
# For Development and debugging:
# Reload modul without restarting the kernel
%load_ext autoreload
%autoreload 2

In [None]:
import tensorflow as tf
import numpy as np
import pandas as pd
pd.options.display.max_columns = None
import os
import sys
import matplotlib.pyplot as plt
import json
import copy
import time

# Add EXTERNAL_LIBS_PATH to sys paths (for loading libraries)
EXTERNAL_LIBS_PATH = '/home/hhughes/Documents/Master_Thesis/Project/workspace/libs'
sys.path.insert(1, EXTERNAL_LIBS_PATH)

# Load cortum libs
import NN_interpretability as nn_inter
import Data_augmentation as data_aug

# Disable GPU
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"

In [None]:
# Set parameters
params = {}

params["input_data_dir"] = "/home/hhughes/Documents/Master_Thesis/Project/workspace/Interpretability/Cells"
params['base_path'] = '/home/hhughes/Documents/Master_Thesis/Project/workspace/Interpretability'
params['model_dir'] = 'XC'
params['CMA'] = 'CMA_0'
params['cells'] = ['340547', '307720', '321021', '232615', '205760', '379184']

score_maps_dir = os.path.join(params['base_path'], 'Score_maps', params['model_dir'])

# 1.- Load Data

## 1.1.- Load general data (independent to the model)

In [None]:
# Load metadata
with open(os.path.join(params['base_path'], 'Metadata', 'filtered_metadata.csv'), 'r') as file:
    metadata_df = pd.read_csv(file)
metadata_df.head()

In [None]:
# Load parameters
with open(os.path.join(params['base_path'], 'Metadata', 'parameters.json'), 'r') as file:
    model_params = json.load(file)
#model_params.keys()

In [None]:
# Load Channels
with open(os.path.join(params['base_path'], 'Metadata', 'channels.csv'), 'r') as file:
    channels_df = pd.read_csv(file)
# Get input channel ids
mask = channels_df.name.isin(model_params['input_channels'])
input_ids = channels_df[mask].channel_id.values
# Get output channel id
mask = channels_df.name == '00_EU'
output_id = channels_df[mask].channel_id.values[0]
# Get normalization values
norm_vals = channels_df.sort_values(by=['channel_id']).normalization_vals.values
channels_df

## 1.2.- Load Model Data

In [None]:
# Load models
models = {}
models_path = os.path.join(params['base_path'], 'Models', params['model_dir'])
for model in os.listdir(models_path):
    print('Loading model: ', model)
    models[model] = tf.keras.models.load_model(os.path.join(models_path, model, params['CMA']))
print('')
models[model].summary()

In [None]:
# Load model_predictions
models_path = os.path.join(params['base_path'], 'Models', params['model_dir'])
targets_df = pd.DataFrame()
for i, model in enumerate(os.listdir(models_path)):
    print('Reading predicted values for model: ', model)
    temp_path = os.path.join(models_path, model, 'targets_'+params['CMA']+'.csv')
    with open(temp_path, 'r') as file:
        temp_df = pd.read_csv(file)
    temp_df = temp_df.drop(['y - y_hat'], axis=1)
    prediction_name = 'y_hat'+'_'+model
    temp_df[prediction_name] = temp_df.y_hat
    temp_df = temp_df.drop(['y_hat'], axis=1)
    if i == 0:
        targets_df = temp_df.copy()
    else:
        temp_df = temp_df[['mapobject_id_cell', prediction_name]]
        targets_df = targets_df.merge(temp_df, 
                                      left_on='mapobject_id_cell',
                                      right_on='mapobject_id_cell',
                                      how='left')
targets_df.head()

## 1.3.- Load cells

In [None]:
mask = targets_df.mapobject_id_cell.isin(np.array(params['cells'], dtype=np.int64))
targets_df[mask]

In [None]:
cells = {}
n_cells = len(params['cells'])
plt.figure(figsize=(n_cells*11,10))

for i, cell in enumerate(params['cells'], 1):
    temp_path = os.path.join(params['base_path'], 'Cells', cell+'.npz')
    temp_cell = np.load(temp_path)
    # Normalize cell
    cells[cell+'_img'] = copy.deepcopy(temp_cell['img'] / norm_vals)
    # filter accordingly to the input channels
    cells[cell+'_img'] = cells[cell+'_img'][:,:,input_ids].astype(np.float32)
    cells[cell+'_mask'] = copy.deepcopy(temp_cell['mask'])
    
    # Plot cells
    temp_img = (cells[cell+'_img'] / np.max(cells[cell+'_img'], axis=(0,1)))[:,:,10:13]
    plt.subplot(1, n_cells, i)
    nn_inter.plot_cell(img=temp_img, title=cell)

In [None]:
# Sanity check: compute y_hat given the image and using loaded models
for model in os.listdir(models_path):
    print('Model: ', model)
    for cell in params['cells']:
        print('\tmapobject_id_cell: '+cell)

        train_tensor = tf.expand_dims(cells[cell+'_img'], axis=0)
        y_true = round(targets_df.y[targets_df.mapobject_id_cell == int(cell)].values[0], 2)
        y_hat = round(targets_df['y_hat_'+model][targets_df.mapobject_id_cell == int(cell)].values[0], 2)
        y_hat_sanity = round(float(models[model].predict(train_tensor)[0][0]), 2)

        print('\t\ty_true: {}, y_hat: {}, y_hat_sanity: {}'.format(y_true, y_hat, y_hat_sanity))

# 2.- Get Score Matrix for each cell (VarGrad IG)

# 2.2.- Load VarGrad IG

### Load Score Maps

In [None]:
VarGrad_IG = {}
for cell in os.listdir(score_maps_dir):
    print('Loading Score map: '+cell)
    
    temp_path = os.path.join(score_maps_dir, cell)
    if '.npy' in cell:
        VarGrad_IG[cell[0:-4]] = np.load(temp_path)

### Plote Score maps for both models and compare

In [None]:
cell = '379184'
nn_inter.plot_VarGrad_IG_with_control(img=cells[cell+'_img'],
                                      img_mask=cells[cell+'_mask'],
                                      score_map_1=VarGrad_IG['Run_1_'+cell],
                                      score_map_2=VarGrad_IG['Run_2_'+cell],
                                      top_percent=0.4,
                                      channels_df=channels_df,
                                      img_size=(14,14),
                                      score_map_same_sacale=False,
                                      channels=[13, 23])

### 100%

In [None]:
# Plot score maps for both models
for cell in params['cells']:
    print('Plotting cell: '+cell)
    nn_inter.plot_VarGrad_IG_with_control(img=cells[cell+'_img'],
                                          img_mask=cells[cell+'_mask'],
                                          score_map_1=VarGrad_IG['Run_1_'+cell],
                                          score_map_2=VarGrad_IG['Run_2_'+cell],
                                          top_percent=1,
                                          channels_df=channels_df,
                                          img_size=(7,7),
                                          score_map_same_sacale=False)

### 50%

In [None]:
for cell in params['cells']:
    print('Plotting cell: '+cell)
    nn_inter.plot_VarGrad_IG_with_control(img=cells[cell+'_img'],
                                          img_mask=cells[cell+'_mask'],
                                          score_map_1=VarGrad_IG['Run_1_'+cell],
                                          score_map_2=VarGrad_IG['Run_2_'+cell],
                                          top_percent=0.5,
                                          channels_df=channels_df,
                                          img_size=(7,7),
                                          score_map_same_sacale=False)

### 20%

In [None]:
for cell in params['cells']:
    print('Plotting cell: '+cell)
    nn_inter.plot_VarGrad_IG_with_control(img=cells[cell+'_img'],
                                          img_mask=cells[cell+'_mask'],
                                          score_map_1=VarGrad_IG['Run_1_'+cell],
                                          score_map_2=VarGrad_IG['Run_2_'+cell],
                                          top_percent=0.2,
                                          channels_df=channels_df,
                                          img_size=(7,7),
                                          score_map_same_sacale=False)

### 10%

In [None]:
for cell in params['cells']:
    print('Plotting cell: '+cell)
    nn_inter.plot_VarGrad_IG_with_control(img=cells[cell+'_img'],
                                          img_mask=cells[cell+'_mask'],
                                          score_map_1=VarGrad_IG['Run_1_'+cell],
                                          score_map_2=VarGrad_IG['Run_2_'+cell],
                                          top_percent=0.1,
                                          channels_df=channels_df,
                                          img_size=(7,7),
                                          score_map_same_sacale=False)

### 5%

In [None]:
for cell in params['cells']:
    print('Plotting cell: '+cell)
    nn_inter.plot_VarGrad_IG_with_control(img=cells[cell+'_img'],
                                          img_mask=cells[cell+'_mask'],
                                          score_map_1=VarGrad_IG['Run_1_'+cell],
                                          score_map_2=VarGrad_IG['Run_2_'+cell],
                                          top_percent=0.05,
                                          channels_df=channels_df,
                                          img_size=(7,7),
                                          score_map_same_sacale=False)