Sensitivity Grad-CAM - add a random node at a random visit

In [None]:
import os

# The current working directory needs to be in explainable_TGCNN
print(os.getcwd())
os.chdir('..\\')
print(os.getcwd())

import pandas as pd
from src import whole_model_demographics_gradcam, graph_plot, gc, utils, create_fake_patients
import tensorflow as tf
import numpy as np

In [None]:
second_TGCNN_layer = True
demo = True

include_drugs = True
max_timesteps=100

stride = 1
filter_size = 4

run_name='hip_1999_to_one_year_advance_model'
years_in_advance = "5"

if include_drugs:
    max_event_codes = 518
else:
    max_event_codes = 512
hip_or_knee = 'hip'

# fake mapping dataframe for the ReadCodes and the corresponding descriptions
read_code_map_df = pd.read_csv('fake_read_code_descriptions.csv')

model = whole_model_demographics_gradcam.TGCNN_Model(num_filters=16, num_nodes=max_event_codes, num_time_steps=max_timesteps, 
                            filter_size=filter_size, variable_gamma=True, 
                            exponential_scaling=True, dropout_rate=0.7, lstm_units=64,
                            fcl1_units=128, LSTM_ablation=False, stride=stride, activation_type='LeakyReLU', 
                            no_timestamp=False, second_TGCNN_layer=second_TGCNN_layer, num_labels=1)
model.load_weights('hip_1999_to_one_year_advance_model1_CNN_layer')

num_pats = 5
cv_patients = create_fake_patients.create_fake_patient_df(num_pats, 99, max_event_codes)

In [None]:
# Loop through each patient, run the original graph (once) then loop through the modified graphs with psuedo nodes
relu = False

num_random_node = 10

sensitivity_list = []
for pat in range(num_pats):
    
    input_3d, input_4d, demo_tensor, outcome, outcome_bin = utils.return_pat_from_df(cv_patients, max_event_codes, hip_or_knee, pat, max_timesteps)
    dense_tensor = tf.sparse.to_dense(input_3d)
    dense_tensor= tf.transpose(dense_tensor, perm=[2, 1, 0])
    dense_tensor = np.flip(dense_tensor, axis=0)
    logits = model(input_4d, demo_tensor, training=False)

    grads = model.dy_du_branch1

    # Get the entire patient's history in a DataFrame
    edges_df = graph_plot.create_edges_df_gc(dense_tensor)

    # Get the node positions for the graph
    pos_df = graph_plot.create_position_df_gc(edges_df)
    pos_list = graph_plot.generate_pos_sequence(pos_df['max_codes_per_visit'].max())

    pos_df = graph_plot.map_y_coord_to_node(pos_df, pos_list)

    l_map = gc.calc_local_map(model, grads, only_pos=relu, filt_num=None)

    timestep_ave_grad_df = gc.calc_timestep_weights(stride, filter_size, l_map, max_timesteps)

    read_code_pos_df = gc.map_read_code_labels(pos_df, read_code_map_df, timestep_ave_grad_df)

    v_mod_list, v_orig_list = [], []

    for i in range(num_random_node):
        # Generate individual data for the model
        input_3d, input_4d, demo_tensor, outcome, outcome_bin, visit_num = utils.return_pat_from_df(cv_patients, max_event_codes, hip_or_knee, pat, max_timesteps, add_p_node=True)
        dense_tensor = tf.sparse.to_dense(input_3d)
        dense_tensor= tf.transpose(dense_tensor, perm=[2, 1, 0])
        dense_tensor_p = np.flip(dense_tensor, axis=0)
        logits = model(input_4d, demo_tensor, training=False)

        grads = model.dy_du_branch1

        # Get the entire patient's history in a DataFrame
        p_edges_df = graph_plot.create_edges_df_gc(dense_tensor_p)

        # Get the node positions for the graph
        p_pos_df = graph_plot.create_position_df_gc(p_edges_df)
        p_pos_list = graph_plot.generate_pos_sequence(p_pos_df['max_codes_per_visit'].max())

        p_pos_df = graph_plot.map_y_coord_to_node(p_pos_df, p_pos_list)

        p_l_map = gc.calc_local_map(model, grads, only_pos=relu, filt_num=None)

        p_timestep_ave_grad_df = gc.calc_timestep_weights(stride, filter_size, p_l_map, max_timesteps)

        p_read_code_pos_df = gc.map_read_code_labels(p_pos_df, read_code_map_df, p_timestep_ave_grad_df)

        # get the difference in the percentage influence on the timestep with and without the psuedo node
        mod_visit_infl = p_read_code_pos_df[p_read_code_pos_df['x']==visit_num]['perc_timestep_infl']
        v_mod = mod_visit_infl.iloc[0]
        
        orig_visit_infl = read_code_pos_df[read_code_pos_df['x']==visit_num]['perc_timestep_infl']
        v_orig = orig_visit_infl.iloc[0]
        v_mod_list.append(v_mod)
        v_orig_list.append(v_orig)



    l1_norm = np.sum(np.abs(np.array(v_mod_list) - np.array(v_orig_list)))
    ave_l1_norm = l1_norm / num_random_node
    sensitivity_list.append(ave_l1_norm)
    if (pat % 10) == 0 and (pat !=0):
        print(f"Patient number: {pat}")
        print(f"{(((pat+1)/num_pats)*100):.2f}% Complete")
        print(f"Sensitivity mean +- std: {np.mean(sensitivity_list)}$\pm${np.std(sensitivity_list)}")
print(f"Sensitivity mean +- std: {np.mean(sensitivity_list)}$\pm${np.std(sensitivity_list)}")