In [1]:
%load_ext autoreload
%autoreload 2

import os
import sys
import time
import pandas as pd
import numpy as np
import seaborn as sns
import tensorflow as tf
import gc
import cv2
import shap

from tqdm import tqdm

import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix, ConfusionMatrixDisplay
from sklearn.metrics import ConfusionMatrixDisplay


# from tensorflow import keras
# from tensorflow.keras import layers, models
from tensorflow.keras.models import load_model
# from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping, ModelCheckpoint

from matplotlib import pyplot as plt


notebook_dir = os.getcwd()
target_path = os.path.abspath(os.path.join(notebook_dir, '../Python_Code'))
sys.path.append(target_path)
from dataGenerator import DataGenerator, read_mat, DataGeneratorUnified

import util

# Constants
activities = {
  'A': 'Push forward',
  'C': 'Hands up and down',
  'P': 'Reading',
  'S': 'Writing'
}
filter_labels=list(activities.keys())
batchsize = 32
labels = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T']
labels_array = np.array(labels)

2025-06-07 10:08:47.718094: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-06-07 10:08:47.733813: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-06-07 10:08:47.737252: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-06-07 10:08:47.747197: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
  from .autonotebook import tqdm as notebook_tqdm


In [3]:
experiment = {
  'description': 'Classroom-M2. Extract Grad-CAM, SHAP and Embeddings.',
  'output_dir': '../Output/CompareFineGrainedModelsSingleVsMultipleWithSHAPClassroomM2',
  'shap_background': ['../Data/fine_grained/Classroom/80MHz/3mo/m2/Slots/Train/train_set.csv'],
  'shap_background_count': 100,
  'shap_background_random_seed': 42,
  'work': [
    {
        'model': 'fine_grained_trainedon_Classroom_m2_242.h5', 
        'evaluate_on': ['../Data/fine_grained/Classroom/80MHz/3mo/m2/Slots/Test/test_set.csv'],
    },
    {
        'model': 'fine_grained_trainedon_ClassroomOffice_m1m2m3_242-fixed.h5', 
        'evaluate_on': ['../Data/fine_grained/Classroom/80MHz/3mo/m2/Slots/Test/test_set.csv']
    },
  ]
}

In [3]:
# Utility functions
def create_datagenerators(list_csvs, filter_labels, NoOfClasses=20, NoOfSubcarrier=242, window_size=50, batchsize=48):
  list_test_dir = [os.path.dirname(csv_file_path) for csv_file_path in list_csvs]
  batches = DataGeneratorUnified(list_test_dir, list_csvs, NoOfSubcarrier, NoOfClasses, (window_size, NoOfSubcarrier, 2), batchsize=batchsize, shuffle=False, filter_labels=filter_labels)
  samples = DataGeneratorUnified(list_test_dir, list_csvs, NoOfSubcarrier, NoOfClasses, (window_size, NoOfSubcarrier, 2), batchsize=1, shuffle=False, filter_labels=filter_labels)
  return batches, samples

def read_csv_to_dataframe(list_csvs, filter_labels=None):
  all_dfs = None
  # add a column if name of csv file
  for i, csv_file_path in enumerate(list_csvs):
    df = pd.read_csv(csv_file_path)
    df['source'] = csv_file_path
    if all_dfs is None:
      all_dfs = df
    else:
      all_dfs = pd.concat([all_dfs, df], ignore_index=True)

  if filter_labels is not None:
    df = df[df['label'].isin(filter_labels)]

  return df.reset_index(drop=True)

def plot_confusion_matrix(conf_matrix, labels, filename='confusion_matrix.png'):
  plt.figure(figsize=(32, 32))
  ax = sns.heatmap(conf_matrix, cmap=plt.cm.Greens, annot=True, fmt='d', square=True, xticklabels=labels, yticklabels=labels)
  plt.title("Confusion Matrix")
  ax.set_ylabel('Actual', fontsize=20)
  ax.set_xlabel('Predicted', fontsize=20)
  plt.savefig(filename, bbox_inches='tight', dpi=300)
  plt.close()

def get_embeddings(X, model):
  EMBEDDINGS_LAYER_NAME = "flatten"
  embeddings_layer = model.get_layer(EMBEDDINGS_LAYER_NAME)
  embedding_model = tf.keras.Model(inputs=model.inputs, outputs=embeddings_layer.output)
  embeddings = embedding_model({'input_layer': X}, training=False) 
  return embeddings

def create_background(list_csvs, filter_labels, NoOfClasses=20, NoOfSubcarrier=242, window_size=50, random_state=experiment['shap_background_random_seed']):
  list_test_dir = [os.path.dirname(csv_file_path) for csv_file_path in list_csvs]
  samples = DataGeneratorUnified(list_test_dir, list_csvs, NoOfSubcarrier, NoOfClasses, (window_size, NoOfSubcarrier, 2), batchsize=1, shuffle=False, filter_labels=filter_labels)
  df = pd.read_csv(list_csvs[0])
  df = df[df['label'].isin(filter_labels)].reset_index(drop=True)
  labels = df['label']
  indices = util.amostragem_estratificada_indices(labels, n_amostras=experiment['shap_background_count'], random_state=random_state)

  # Collect the samples at the specified indices
  selected_samples = [samples[i][0] for i in indices]
  return selected_samples


In [4]:
def process(experiment):
  start_time = time.time()
  summary = {}

  print(f"Processing experiment: {experiment['description']}")
  print(f"Output directory: {experiment['output_dir']}")
  if not os.path.exists(experiment['output_dir']):
    os.makedirs(experiment['output_dir'])

  background = create_background(experiment['shap_background'], filter_labels)

  for current_work in experiment['work']:
    print(f"Evaluating model: {current_work['model']}")
    
    batches, samples = create_datagenerators(current_work['evaluate_on'], filter_labels=filter_labels, batchsize=batchsize)
    csv_output = read_csv_to_dataframe(current_work['evaluate_on'], filter_labels=filter_labels)
    model = load_model(f"../trained_models/{current_work['model']}")
    gradient_explainer = shap.GradientExplainer(model, background)
    
    gradcam_heatmaps = None
    shap_gradients_heatmaps = None

    embeddings = None
    csv_output['predicted_label'] = None
    csv_output['predicted_prob'] = None

    # Batches
    for batch_index in tqdm(range(len(batches))):
      X, Y = batches[batch_index]
      gradcam_heatmap, top_pred_index, top_class_channel = util.batch_make_gradcam_heatmap(X, model)
      embedding = get_embeddings(X, model)
      shap_values_gradient = gradient_explainer(X)

      top_pred_index_np = top_pred_index.numpy()
      idx = top_pred_index_np[:, None, None, None, None]
      shap_values_gradient_selected = np.take_along_axis(shap_values_gradient.values, idx, axis=-1)
      shap_values_gradient_selected = np.squeeze(shap_values_gradient_selected, axis=-1)

      if gradcam_heatmaps is None:
        gradcam_heatmaps = gradcam_heatmap
      else:
        gradcam_heatmaps = np.concatenate((gradcam_heatmaps, gradcam_heatmap), axis=0)

      if embeddings is None:
        embeddings = embedding
      else:
        embeddings = np.concatenate((embeddings, embedding), axis=0)
      
      if shap_gradients_heatmaps is None:
        shap_gradients_heatmaps = shap_values_gradient_selected
      else:
        shap_gradients_heatmaps = np.concatenate((shap_gradients_heatmaps, shap_values_gradient_selected), axis=0)
      
      batch_start_index = batch_index * batchsize
      batch_end_index = batch_start_index + len(X)
      csv_output.loc[batch_start_index:batch_end_index - 1, 'predicted_label'] = labels_array[top_pred_index.numpy()]
      csv_output.loc[batch_start_index:batch_end_index - 1, 'predicted_prob'] = top_class_channel.numpy()

    # Remaining samples
    sample_index_start = len(batches) * batchsize

    for sample_index in tqdm(range(sample_index_start, len(samples))):
      X, Y = samples[sample_index]
      gradcam_heatmap, top_pred_index, top_class_channel = util.batch_make_gradcam_heatmap(X, model)
      embedding = get_embeddings(X, model)

      shap_values_gradient = gradient_explainer(X)
      top_pred_index_np = top_pred_index.numpy()
      idx = top_pred_index_np[:, None, None, None, None]
      shap_values_gradient_selected = np.take_along_axis(shap_values_gradient.values, idx, axis=-1)
      shap_values_gradient_selected = np.squeeze(shap_values_gradient_selected, axis=-1)

      gradcam_heatmaps = np.concatenate((gradcam_heatmaps, gradcam_heatmap), axis=0)
      embeddings = np.concatenate((embeddings, embedding), axis=0)
      shap_gradients_heatmaps = np.concatenate((shap_gradients_heatmaps, shap_values_gradient_selected), axis=0)
      csv_output.at[sample_index, 'predicted_label'] = labels_array[top_pred_index.numpy()].item()
      csv_output.at[sample_index, 'predicted_prob'] = top_class_channel.numpy()[0]

    # Free GPU memory
    del model
    gc.collect()
    tf.keras.backend.clear_session()

    # 
    csv_output['right_prediction'] = csv_output['label'] == csv_output['predicted_label']

    # Summary filenames
    csv_output_filename = f"{experiment['output_dir']}/{current_work['model']}_output.csv"
    gradcam_and_embeddings_filename = f"{experiment['output_dir']}/{current_work['model']}_gradcam_and_embeddings.npz"
    confusion_matrix_filename = f"{experiment['output_dir']}/{current_work['model']}_confusion_matrix.png"

    # Some performance metrics
    accuracy = accuracy_score(csv_output['label'], csv_output['predicted_label'])
    f1 = f1_score(csv_output['label'], csv_output['predicted_label'], average='weighted')
    conf_matrix = confusion_matrix(csv_output['label'], csv_output['predicted_label'], labels=labels)
    summary[f"{current_work['model']}_accuracy"] = accuracy
    summary[f"{current_work['model']}_f1score"] = f1

    # Save outputs
    csv_output.to_csv(csv_output_filename, index=False)
    np.savez_compressed(
      gradcam_and_embeddings_filename, 
      gradcam_heatmaps=gradcam_heatmaps, 
      embeddings=embeddings,
      shap_gradients_heatmaps=shap_gradients_heatmaps,
    )
    plot_confusion_matrix(conf_matrix, labels, filename=confusion_matrix_filename)

  end_time = time.time()
  elapsed_time = end_time - start_time

  summary['elapsed_time_seconds'] = elapsed_time

  # Save summary to a file
  summary_file = os.path.join(experiment['output_dir'], 'summary.txt')
  save_summary = "\n".join([f"{key}: {value}" for key, value in summary.items()])

  with open(summary_file, 'w') as f:
    f.write(f"Experiment: {experiment['description']}\n\n")
    f.write(save_summary)
  print(f"Summary saved to {summary_file}")

  

In [6]:
process(experiment)

Processing experiment: Classroom-M2. Extract Grad-CAM, SHAP and Embeddings.
Output directory: ../Output/CompareFineGrainedModelsSingleVsMultipleWithSHAPClassroomM2
Evaluating model: fine_grained_trainedon_Classroom_m2_242.h5


I0000 00:00:1749236059.015186 1386041 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
I0000 00:00:1749236059.183057 1386041 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
I0000 00:00:1749236059.189516 1386041 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
I0000 00:00:1749236059.195937 1386041 cuda_executor.cc:1015] successful NUMA node read from SysFS ha

Evaluating model: fine_grained_trainedon_ClassroomOffice_m1m2m3_242-fixed.h5


Expected: input_layer
Received: inputs=['Tensor(shape=(32, 50, 242, 2))']
Expected: input_layer
Received: inputs=['Tensor(shape=(50, 50, 242, 2))']
Expected: input_layer
Received: inputs=['Tensor(shape=(32, 50, 242, 2))']
Expected: input_layer
Received: inputs=['Tensor(shape=(32, 50, 242, 2))']
Expected: input_layer
Received: inputs=['Tensor(shape=(32, 50, 242, 2))']
Expected: input_layer
Received: inputs=['Tensor(shape=(32, 50, 242, 2))']
Expected: input_layer
Received: inputs=['Tensor(shape=(32, 50, 242, 2))']
Expected: input_layer
Received: inputs=['Tensor(shape=(32, 50, 242, 2))']
Expected: input_layer
Received: inputs=['Tensor(shape=(32, 50, 242, 2))']
Expected: input_layer
Received: inputs=['Tensor(shape=(32, 50, 242, 2))']
Expected: input_layer
Received: inputs=['Tensor(shape=(32, 50, 242, 2))']
Expected: input_layer
Received: inputs=['Tensor(shape=(32, 50, 242, 2))']
Expected: input_layer
Received: inputs=['Tensor(shape=(32, 50, 242, 2))']
Expected: input_layer
Received: inputs

Summary saved to ../Output/CompareFineGrainedModelsSingleVsMultipleWithSHAPClassroomM2/summary.txt


In [4]:
!ls -lah {experiment['output_dir']}

total 808M
drwxrwxr-x 2 diogo diogo 4,0K jun  6 20:25 .
drwxrwxr-x 6 diogo diogo 4,0K jun  6 15:54 ..
-rw-rw-r-- 1 diogo diogo 376K jun  6 18:09 fine_grained_trainedon_Classroom_m2_242.h5_confusion_matrix.png
-rw-rw-r-- 1 diogo diogo 409M jun  6 18:09 fine_grained_trainedon_Classroom_m2_242.h5_gradcam_and_embeddings.npz
-rw-rw-r-- 1 diogo diogo 473K jun  6 18:08 fine_grained_trainedon_Classroom_m2_242.h5_output.csv
-rw-rw-r-- 1 diogo diogo 373K jun  6 20:25 fine_grained_trainedon_ClassroomOffice_m1m2m3_242-fixed.h5_confusion_matrix.png
-rw-rw-r-- 1 diogo diogo 397M jun  6 20:25 fine_grained_trainedon_ClassroomOffice_m1m2m3_242-fixed.h5_gradcam_and_embeddings.npz
-rw-rw-r-- 1 diogo diogo 455K jun  6 20:24 fine_grained_trainedon_ClassroomOffice_m1m2m3_242-fixed.h5_output.csv
-rw-rw-r-- 1 diogo diogo  424 jun  6 20:25 summary.txt


In [None]:
# Load the saved data
ROOT_DIR = experiment['output_dir']
models = [work['model'] for work in experiment['work']]

data = {}

for model_index in range(len(models)):
    model = models[model_index]
    data[model_index] = {}
    
    saved = np.load(f'{ROOT_DIR}/{model}_gradcam_and_embeddings.npz')
    data[model_index]['gradcam'] = saved['gradcam_heatmaps']
    data[model_index]['embeddings'] = saved['embeddings']
    data[model_index]['shap_gradients'] = saved['shap_gradients_heatmaps']
    data[model_index]['csv'] = pd.read_csv(f'{ROOT_DIR}/{model}_output.csv')
    
    print(f'Loaded data for model {model_index}: {model}')
    print('Grad-CAM shape:', data[model_index]['gradcam'].shape)
    print('Embeddings shape:', data[model_index]['embeddings'].shape)
    print('SHAP Gradients shape:', data[model_index]['shap_gradients'].shape)
    print('CSV shape:', data[model_index]['csv'].shape)   
    print()

Loaded data for model 0: fine_grained_trainedon_Classroom_m2_242.h5
Grad-CAM shape: (4195, 25, 121)
Embeddings shape: (4195, 92928)
SHAP Gradients shape: (4195, 50, 242, 2)
CSV shape: (4195, 6)

Loaded data for model 1: fine_grained_trainedon_ClassroomOffice_m1m2m3_242-fixed.h5
Grad-CAM shape: (4195, 25, 121)
Embeddings shape: (4195, 92928)
SHAP Gradients shape: (4195, 50, 242, 2)
CSV shape: (4195, 6)

