In [19]:
import shap
from shapash.explainer.smart_explainer import SmartExplainer
import torch
import torch.nn as nn
import pandas as pd
import matplotlib.pyplot as plt
import joblib

In [20]:
class PitchVelocityModel(nn.Module):
    def __init__(self, num_features):
        super(PitchVelocityModel, self).__init__()
        # Fully connected layers
        self.fc1 = torch.nn.Linear(num_features, 1024)
        self.fc2 = torch.nn.Linear(1024, 512)
        self.fc3 = torch.nn.Linear(512, 256)
        self.fc4 = torch.nn.Linear(256, 128)
        self.fc5 = torch.nn.Linear(128, 64)
        self.fc6 = torch.nn.Linear(64, 32)
        self.fc7 = torch.nn.Linear(32, 1)
        
        # Relu activation function
        self.relu = torch.nn.ReLU()  
                
    def forward(self, x):
        # Forward pass
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.relu(self.fc3(x))
        x = self.relu(self.fc4(x))
        x = self.relu(self.fc5(x))
        x = self.relu(self.fc6(x))
        x = self.fc7(x)
        
        return x

In [21]:
def batchify(data, batch_size):
    batches = [data[i:i + batch_size] for i in range(0, data.size(0), batch_size)]
    return batches

In [22]:
# Load the scaled datasets
X_train_scaled = pd.read_csv('X_train_scaled.csv').values
X_val_scaled = pd.read_csv('X_val_scaled.csv').values
X_test_scaled = pd.read_csv('X_test_scaled.csv').values

# Convert the NumPy arrays to PyTorch tensors
X_train_tensor = torch.tensor(X_train_scaled, dtype=torch.float32)
X_val_tensor = torch.tensor(X_val_scaled, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test_scaled, dtype=torch.float32)

# Load the labels
y_train = pd.read_csv('y_train.csv').values.ravel()
y_val = pd.read_csv('y_val.csv').values.ravel()
y_test = pd.read_csv('y_test.csv').values.ravel()

# Load your trained model
model = PitchVelocityModel(X_train_scaled.shape[1])
model.load_state_dict(torch.load('pitch_velocity_model.pth'))
model.eval()

# If using GPU, move tensors to the same device as the model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
X_train_tensor = X_train_tensor.to(device)
X_val_tensor = X_val_tensor.to(device)
X_test_tensor = X_test_tensor.to(device)

In [23]:
# Define the batch size
batch_size = 32  # Adjust as necessary

# Create batches from your test tensor
X_test_batches = batchify(X_test_tensor, batch_size)

# Use DeepExplainer for each batch and aggregate the results
explainer = shap.DeepExplainer(model, X_train_tensor)
shap_values = [explainer.shap_values(batch) for batch in X_test_batches]

scaler = joblib.load('C:/Users/adam.bloebaum/Documents/GitHub/biomech/deeplearning/scaler.pk1')
preprocessing = {'scaler': scaler}

# Use Shapash
xpl = SmartExplainer()

# Generate model predictions for Shapash
model.eval()
with torch.no_grad():
    y_pred_tensor = model(X_test_tensor)
    y_pred_shapash = y_pred_tensor.cpu().numpy()

xpl.compile(x=X_test_scaled, model=model, preprocessing=preprocessing, y_pred=y_pred_shapash)
xpl.to_pandas().head()

# Summary plot
xpl.plot.features_importance()
plt.savefig('C:/Users/adam.bloebaum/Documents/GitHub/biomech/deeplearning/outputs') # Save the plot as a PNG file

RuntimeError: The size of tensor a (64) must match the size of tensor b (32) at non-singleton dimension 1

In [None]:
xpl.run_app(title_story='Model Explanations')