# Interpretability

Experimenting Application of SHAP values for model interpretability

## Setup and Imports

In [1]:
# Core libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import pickle
import os
import warnings
warnings.filterwarnings('ignore')

# Machine learning libraries
from sklearn.metrics import accuracy_score, f1_score, classification_report, confusion_matrix, roc_auc_score
import scipy.sparse as sp

# Deep learning libraries
import torch
import torch.nn as nn

# Interpretability
import shap

# Visualization settings
plt.style.use('ggplot')
sns.set(style='whitegrid')
%matplotlib inline

## Load Models and Test Data

In [6]:
from google.colab import drive
drive.mount('/content/drive')
data_dir = '/content/drive/MyDrive/Colab Notebooks/is5126/final-project/data'
features_dir = '/content/drive/MyDrive/Colab Notebooks/is5126/final-project/data/features'
models_dir = '/content/drive/MyDrive/Colab Notebooks/is5126/final-project/models'
results_dir = '/content/drive/MyDrive/Colab Notebooks/is5126/final-project/results'

# Load pre-split datasets
train_df = pd.read_csv(os.path.join(data_dir, 'twitter_training_clean.csv'))
val_df = pd.read_csv(os.path.join(data_dir, 'twitter_validation_clean.csv'))
test_df = pd.read_csv(os.path.join(data_dir, 'twitter_testing_clean.csv'))

# Extract labels
y_train = train_df['sentiment'].values
y_val = val_df['sentiment'].values
y_test = test_df['sentiment'].values

# Define a function to load features
def load_features(feature_type, feature_dir):
  if feature_type in ['bow', 'tfidf']:
    return (
      sp.load_npz(os.path.join(feature_dir, f'{feature_type}_features_train.npz')),
      sp.load_npz(os.path.join(feature_dir, f'{feature_type}_features_val.npz')),
      sp.load_npz(os.path.join(feature_dir, f'{feature_type}_features_test.npz'))
    )
  else:
    return (
      np.load(os.path.join(feature_dir, f'{feature_type}_features_train.npy')),
      np.load(os.path.join(feature_dir, f'{feature_type}_features_val.npy')),
      np.load(os.path.join(feature_dir, f'{feature_type}_features_test.npy'))
    )

# Load features using the function
X_bow_train, X_bow_val, X_bow_test = load_features('bow', features_dir)
X_tfidf_train, X_tfidf_val, X_tfidf_test = load_features('tfidf', features_dir)
X_word2vec_train, X_word2vec_val, X_word2vec_test = load_features('word2vec', features_dir)
X_glove_train, X_glove_val, X_glove_test = load_features('glove', features_dir)
X_bert_train, X_bert_val, X_bert_test = load_features('bert', features_dir)
print("Features loaded successfully.")

# Load model comparison results
model_results = pd.read_csv(os.path.join(results_dir, 'model_comparison_results.csv'))

# Get top performing models
top_models = model_results  # Get top 5 models
print("Top 5 performing models:")
top_models

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Features loaded successfully.
Top 5 performing models:


Unnamed: 0,Model,Feature,Accuracy,F1 Score
0,BERT,Fine-tuned,0.95662,0.956656
1,Random Forest,BoW,0.954449,0.95451
2,Random Forest,TF-IDF,0.953906,0.953967
3,BiLSTM (Optimized),BERT,0.903967,0.904289
4,Random Forest,BERT,0.903289,0.903277
5,Random Forest,Word2Vec,0.889537,0.889533
6,Random Forest,GloVe,0.873434,0.873449
7,Logistic Regression,BoW,0.757181,0.757226
8,Logistic Regression,TF-IDF,0.755417,0.755646
9,Logistic Regression,BERT,0.578685,0.580091


In [4]:
# Load label encoder
with open(os.path.join(models_dir, 'label_encoder.pkl'), 'rb') as f:
    label_encoder = pickle.load(f)

# Display label encoding
print("Label Encoding:")
for i, label in enumerate(label_encoder.classes_):
    print(f"{i} -> {label}")

Label Encoding:
0 -> Irrelevant
1 -> Negative
2 -> Neutral
3 -> Positive


## Model Interpretation with SHAP (Logistic Regression)

In [7]:
# Function to interpret sklearn model with SHAP
def interpret_sklearn_model(model, X_train, X_test, feature_names, model_name, feature_type):
    print(f"\nGenerating SHAP values for {model_name} with {feature_type}...")

    # For large sparse matrices, sample a subset for speed
    if isinstance(X_train, sp.spmatrix) and X_train.shape[1] > 1000:
        # Limit to top 1000 features
        X_train_sample = X_train[:, :1000]
        X_test_sample = X_test[:, :1000]
        feature_names = feature_names[:1000] if feature_names is not None else None
    else:
        X_train_sample = X_train
        X_test_sample = X_test

    # Convert sparse matrices to dense if needed
    if isinstance(X_train_sample, sp.spmatrix):
        X_train_sample = X_train_sample.toarray()
    if isinstance(X_test_sample, sp.spmatrix):
        X_test_sample = X_test_sample.toarray()

    # Sample for speed if necessary
    if X_train_sample.shape[0] > 500:
        indices = np.random.choice(X_train_sample.shape[0], 500, replace=False)
        X_train_sample = X_train_sample[indices]

    # Use appropriate explainer
    if hasattr(model, 'predict_proba'):
        explainer = shap.KernelExplainer(model.predict_proba, X_train_sample)
        # Sample test set for speed if necessary
        if X_test_sample.shape[0] > 50:
            test_indices = np.random.choice(X_test_sample.shape[0], 50, replace=False)
            X_test_sample = X_test_sample[test_indices]

        shap_values = explainer.shap_values(X_test_sample)

        # Plot summary
        plt.figure(figsize=(12, 8))
        shap.summary_plot(shap_values, X_test_sample, feature_names=feature_names, show=False)
        plt.title(f"SHAP Summary Plot - {model_name} with {feature_type}")
        plt.tight_layout()
        plt.savefig(os.path.join(results_dir, f'shap_summary_{model_name.lower()}_{feature_type.lower()}.png'))
        plt.show()

        return shap_values
    else:
        # For models without predict_proba
        explainer = shap.KernelExplainer(model.predict, X_train_sample)
        if X_test_sample.shape[0] > 50:
            test_indices = np.random.choice(X_test_sample.shape[0], 50, replace=False)
            X_test_sample = X_test_sample[test_indices]

        shap_values = explainer.shap_values(X_test_sample)

        # Plot summary
        plt.figure(figsize=(12, 8))
        shap.summary_plot(shap_values, X_test_sample, feature_names=feature_names, show=False)
        plt.title(f"SHAP Summary Plot - {model_name} with {feature_type}")
        plt.tight_layout()
        plt.savefig(os.path.join(results_dir, f'shap_summary_{model_name.lower()}_{feature_type.lower()}.png'))
        plt.show()

        return shap_values

In [None]:
# Apply SHAP to the best models (only for sklearn models)
rows = model_results.iloc[[7, 8, 9, 10, 11]].to_dict(orient='records')

for result in rows:
    print(result)
    if result['Model'] != 'BiLSTM':  # Skip BiLSTM for now
        model_name = 'logistic_regression'
        feature_name = result['Feature']

        # Load the model
        model_path = os.path.join(models_dir, f'best_{model_name.lower()}_{feature_name.lower()}.pkl')
        model = load_sklearn_model(model_path)

        # Load appropriate features
        if feature_name == 'BoW':
            X_train_features = X_bow_train
            X_test_features = X_bow_test
            # Load vectorizer to get feature names
            with open(os.path.join(models_dir, 'bow_vectorizer.pkl'), 'rb') as f:
                vectorizer = pickle.load(f)
            feature_names = vectorizer.get_feature_names_out()
        elif feature_name == 'TF-IDF':
            X_train_features = X_tfidf_train
            X_test_features = X_tfidf_test
            # Load vectorizer to get feature names
            with open(os.path.join(models_dir, 'tfidf_vectorizer.pkl'), 'rb') as f:
                vectorizer = pickle.load(f)
            feature_names = vectorizer.get_feature_names_out()
        elif feature_name == 'Word2Vec':
            X_train_features = X_word2vec_train
            X_test_features = X_word2vec_test
            feature_names = [f"dim_{i}" for i in range(X_train_features.shape[1])]
        elif feature_name == 'GloVe':
            X_train_features = X_glove_train
            X_test_features = X_glove_test
            feature_names = [f"dim_{i}" for i in range(X_train_features.shape[1])]
        elif feature_name == 'BERT':
            X_train_features = X_bert_train
            X_test_features = X_bert_test
            feature_names = [f"dim_{i}" for i in range(X_train_features.shape[1])]

        # Interpret model
        interpret_sklearn_model(model, X_train_features, X_test_features, feature_names, model_name, feature_name)