In [5]:
import numpy as np
import pandas as pd
# import pytorch_lightning as pl
# from pytorch_lightning.callbacks import ModelCheckpoint
# from pytorch_lightning.loggers import TensorBoardLogger
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from torch.utils.data import DataLoader, TensorDataset
from collections import Counter
import os
from lstm_model import LSTMModel
from dino_dataset import DINOFeatureDataset

In [None]:
# Load annotations
annotation_path = "/home/henry/robo/cis5810/final/updated_dataset.pkl"
df = pd.read_pickle(annotation_path)
clip_label = df[['seq', 'gait_pat']].drop_duplicates()
clip_angle = df[['seq', 'cam_view']].drop_duplicates()
label_mapping = {row['seq']: row['gait_pat'] for idx, row in clip_label.iterrows()}
angle_mapping = {row['seq']: row['cam_view'] for idx, row in clip_angle.iterrows()}

# Load all sequence directories
feature_dir = "/home/henry/robo/cis5810/final/dino_features"
sequence_dirs = [d.split("_")[0] for d in os.listdir(feature_dir) if d.startswith('c')]
labels = [label_mapping[seq] for seq in sequence_dirs]

# Count occurrences of each label
label_counts = Counter(labels)

# Keep only classes with at least 2 samples
valid_classes = {label for label, count in label_counts.items() if count > 1}
filtered_dirs = [seq for seq, label in zip(sequence_dirs, labels) if label in valid_classes]
filtered_labels = [label for label in labels if label in valid_classes]

label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(filtered_labels)
# Create a mapping of labels to their encodings
label_to_encoding = {label: idx for idx, label in enumerate(label_encoder.classes_)}
encoding_to_label = {idx: label for label, idx in label_to_encoding.items()}
encoded_label_mapping = {idx: label_to_encoding[label] for idx, label in label_mapping.items()}

# Stratified split
train_dirs, temp_dirs, train_labels, temp_labels = train_test_split(
    filtered_dirs, y_encoded, stratify=y_encoded, test_size=0.35, random_state=42
)
val_dirs, test_dirs, val_labels, test_labels = train_test_split(
    temp_dirs, temp_labels, stratify=temp_labels, test_size=0.5, random_state=42)

print(label_encoder.classes_)

['cljwt275b001o3n6lx021hys6', 'cll9wg28l004r3o6lvje4jtk1', 'cljxl1oc900113n6larkl504p', 'cljxlxyc0003v3n6l0if3b0i7', 'cllam7xgc000z3o6lwanrl4ie', 'cljwrzfhq004t3n6lpnogqw2y', 'cljo6pd2x000k3n6lzncvjn3t', 'cljo39ok9002h3n6ldr0w5sey', 'cljxlzffo00413n6le9f85cjl', 'cljs4fg5q00533n6lva2cwo7m', 'cll74q59g00283o6lp8wjejom', 'cll8lc6c0000e3o6lgu3uicr8', 'cljo8g74m006g3n6l6kuxy9cf', 'cll9v50ee004y3o6lj9djtcfk', 'cljr6q6hi000s3n6l92mb8yqa', 'cljap0c1m003o3n6lgmq5uhwn', 'cll8yhohu003u3o6loak9t1uh', 'cljws24xx00563n6l1lvi70og', 'cll8e7fwo000a3o6lx7vbhve7', 'cll9vgqli000x3o6lnyfwj9yf', 'cllc2jyqh003u3o6lmte5fdzt', 'cll44yz75006d3o6l4a0v6ovq', 'cll7p6iq200793o6lyzrndvi1', 'clk7gxjve00123n6l2advgzrw', 'cll44uuy8005d3o6lp580ed3c', 'cll7pe1vf000u3o6l4kz90gxi', 'cll7os8xa00533o6lf7ezb4xb', 'cll8kj4xu007w3o6ll27qmd7n', 'cll8gkpjk003x3o6lj30cbt8l', 'cll7qekvj005t3o6ln160mgm7', 'cll7nivjk00043o6llmtvg69b', 'cll9wbp04004b3o6ldbbhuvwx', 'cljwd6kxa001i3n6lm1mk8bg8', 'cljwcz4en000d3n6lf1g0j311', 'cll8k1pmt005

In [4]:
# Initialize Dataset
train_dataset = DINOFeatureDataset(
    clips=train_dirs,
    feature_dir=feature_dir,
    label_mapping=encoded_label_mapping,
    angle_mapping=angle_mapping,
    sequence_length=20,
    downsample_factor=3
)
# Initialize Dataset
val_dataset = DINOFeatureDataset(
    clips=val_dirs,
    feature_dir=feature_dir,
    label_mapping=encoded_label_mapping,
    angle_mapping=angle_mapping,
    sequence_length=20,
    downsample_factor=3
)
# Initialize Dataset
test_dataset = DINOFeatureDataset(
    clips=test_dirs,
    feature_dir=feature_dir,
    label_mapping=encoded_label_mapping,
    angle_mapping=angle_mapping,
    sequence_length=20,
    downsample_factor=3
)

# Make data loaders
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=16, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False)

In [42]:
# Model parameters
input_size = 768  # Number of features, assuming landmark_11 to landmark_32 (22 landmarks)
hidden_size = 128  # Size of the LSTM's hidden layer
num_layers = 2     # Number of LSTM layers
output_size = 12    # Number of output units (change this based on your task)

# Instantiate the model
model = LSTMModel(input_size, hidden_size, num_layers, output_size)


In [49]:
# Loss function
class_weights = compute_class_weight(class_weight='balanced', classes=np.unique(y_encoded), y=y_encoded)
class_weights = torch.tensor(class_weights, dtype=torch.float)
print(class_weights)

tensor([ 0.1997,  4.3476,  2.3776,  0.6587,  6.6159,  0.8270,  0.5229,  3.2376,
        30.4333,  3.9017,  2.0289,  2.1738])


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import wandb 
from sklearn.utils.class_weight import compute_class_weight


# Initialize wandb
wandb.login(key='af968dcc6f77f75c5bcd9bfc39807a51bcecab9d')
wandb.init(project='3d-cnn-gait-analysis')

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Loss function
class_weights = compute_class_weight(class_weight='balanced', classes=np.unique(y_encoded), y=y_encoded)
class_weights = torch.tensor(class_weights, dtype=torch.float)

class_weights = class_weights.to(device)

# Use in loss
criterion = torch.nn.CrossEntropyLoss(weight=class_weights)
optimizer = optim.Adam(model.parameters(), lr=1e-3)

# Track the best validation loss
best_val_loss = float('inf')
best_model_weights = None  # To hold the best model's weights

# Training loop
num_epochs = 50
for epoch in range(num_epochs):
    model.train()  # Set the model to training mode
    running_loss = 0.0
    correct = 0
    total = 0

    for batch in train_loader:
        inputs, targets, _, _ = batch  # Assuming train_loader returns inputs and targets
        inputs, targets = inputs.to(device), targets.to(device)

        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, targets)

        # Backward pass and optimize
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

        # Calculate accuracy
        _, predicted = torch.max(outputs, 1)
        total += targets.size(0)
        correct += (predicted == targets).sum().item()

        train_loss = running_loss / len(train_loader)
        train_accuracy = 100 * correct / total

    # Log the validation loss and accuracy to wandb
    wandb.log({"epoch":epoch, "train_loss": train_loss, "train_accuracy": train_accuracy})
    print(f"Train Loss: {train_loss:.4f}, Train Accuracy: {train_accuracy:.2f}%")

    # Validation step
    model.eval()
    val_loss = 0.0
    correct = 0
    total = 0
    with torch.no_grad():
        for sequences, labels, _, _ in val_loader:
            sequences = sequences.to(device)
            labels = labels.to(device)

            outputs = model(sequences)
            loss = criterion(outputs, labels)
            val_loss += loss.item()

            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    val_loss /= len(val_loader)
    val_accuracy = 100 * correct / total

    # Log the validation loss and accuracy to wandb
    wandb.log({"epoch":epoch, "val_loss": val_loss, "val_accuracy": val_accuracy})
    print(f"Val Loss: {val_loss:.4f}, Val Accuracy: {val_accuracy:.2f}%")

    # Save the model if the validation loss improves
    if val_loss < best_val_loss:
        best_val_loss = val_loss

        # Save the model checkpoint
        torch.save(model.state_dict(), f"dinomodel_20_3.pth")
        print("Model saved with improved validation loss.")




0,1
epoch,▁▁██
train_accuracy,█▁
train_loss,▁█
val_accuracy,▁█
val_loss,█▁

0,1
epoch,1.0
train_accuracy,99.23077
train_loss,0.02306
val_accuracy,87.10317
val_loss,1.25673


Train Loss: 0.8484, Train Accuracy: 67.04%
Val Loss: 1.0142, Val Accuracy: 70.71%
Model saved with improved validation loss.
Train Loss: 0.4118, Train Accuracy: 82.21%
Val Loss: 1.1166, Val Accuracy: 72.89%
Train Loss: 0.3146, Train Accuracy: 86.22%
Val Loss: 0.8087, Val Accuracy: 78.75%
Model saved with improved validation loss.
Train Loss: 0.2480, Train Accuracy: 89.00%
Val Loss: 0.8337, Val Accuracy: 80.26%
Train Loss: 0.1662, Train Accuracy: 92.09%
Val Loss: 0.7771, Val Accuracy: 83.95%
Model saved with improved validation loss.
Train Loss: 0.1334, Train Accuracy: 93.45%
Val Loss: 0.7629, Val Accuracy: 83.31%
Model saved with improved validation loss.
Train Loss: 0.1713, Train Accuracy: 92.99%
Val Loss: 0.9316, Val Accuracy: 81.15%
Train Loss: 0.1556, Train Accuracy: 93.66%
Val Loss: 0.7664, Val Accuracy: 84.20%
Train Loss: 0.0917, Train Accuracy: 95.58%
Val Loss: 0.8567, Val Accuracy: 81.85%
Train Loss: 0.1217, Train Accuracy: 94.37%
Val Loss: 0.8550, Val Accuracy: 85.17%
Train Lo

In [None]:
torch.save(model.state_dict(), f"final_dinomodel_20_3.pth")

Model saved with improved validation loss.


In [46]:
# Test the model
model.eval()  # Set the model to evaluation mode
test_loss = 0.0
correct = 0
total = 0

with torch.no_grad():
    for batch in test_loader:
        inputs, targets, _, _ = batch
        inputs, targets = inputs.to(device), targets.to(device)

        outputs = model(inputs)
        loss = criterion(outputs, targets)
        test_loss += loss.item()

        # Get predictions
        _, predicted = torch.max(outputs, 1)
        total += targets.size(0)
        correct += (predicted == targets).sum().item()

print(f"Test Accuracy: {100 * correct / total:.2f}%")


Test Accuracy: 90.56%


In [51]:
from sklearn.metrics import confusion_matrix
from collections import defaultdict
import numpy as np

# Extract true labels, clip IDs, and predictions
y_true = [x[1] for x in test_dataset]
clip_ids = [x[2] for x in test_dataset]
x_test = torch.stack([x[0] for x in test_dataset]).to(device)

# Predict using the model
y_pred = torch.argmax(model(x_test), dim=1).cpu().numpy()

# Group predictions and true labels by video (clip ID)
clip_predictions = defaultdict(list)
clip_labels = {}

for pred, true_label, clip in zip(y_pred, y_true, clip_ids):
    clip_predictions[clip].append(pred)
    clip_labels[clip] = true_label  # True label for each video

# Determine majority vote prediction and true label per video
video_preds = []
video_labels = []

for clip, preds in clip_predictions.items():
    majority_vote = np.bincount(preds).argmax()  # Most common prediction
    video_preds.append(majority_vote)
    video_labels.append(clip_labels[clip])

# Generate the per-video confusion matrix
per_video_conf_matrix = confusion_matrix(video_labels, video_preds)
print("Per-Video Confusion Matrix:\n", per_video_conf_matrix)

# Assuming you have a label encoder to get class names
print("Class Labels:", label_encoder.classes_)


Per-Video Confusion Matrix:
 [[131   0   0   2   0   0   1   0   0   0   0   0]
 [  0   6   0   0   0   0   0   0   0   0   0   0]
 [  2   0   9   0   0   0   0   0   0   0   0   0]
 [  2   0   0  38   0   0   0   0   0   0   0   0]
 [  1   0   0   0   3   0   0   0   0   0   0   0]
 [  1   0   0   1   0  31   0   0   0   0   0   0]
 [  2   0   2   2   0   0  44   1   0   0   0   0]
 [  2   0   0   0   0   0   0   6   0   0   0   0]
 [  0   0   0   1   0   0   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0   0   0   7   0   0]
 [  2   0   0   0   0   0   0   1   0   0  10   0]
 [  0   0   0   0   0   0   0   0   0   0   0  12]]
Class Labels: ['abnormal' 'antalgic' 'cerebral palsy' 'exercise' 'inebriated'
 'myopathic' 'normal' 'parkinsons' 'pregnant' 'prosthetic' 'stroke'
 'style']


In [50]:
from sklearn.metrics import confusion_matrix

# Generate the confusion matrix
conf_matrix = confusion_matrix(y_true, y_pred)
print("Confusion Matrix:\n", conf_matrix)

print(label_encoder.classes_)


Confusion Matrix:
 [[1741    1    0   19    0   10   11    5    0    0    1    3]
 [   0   72    0    0    0    3    0    0    0    0    0    0]
 [  39    0  127    0    0    0    2    0    0    0    0    0]
 [  34    0    0  484    0    0    5    0    0    7    0    4]
 [   6    0    0    0   39    0    0    0    0    0    0    0]
 [  17    1    0    9    0  292    0    0    0    0    0    0]
 [  18    0   23   77    0    0  331   11    0    0    0    0]
 [  36    0    0    0    0    0    0   96    0    0    0    0]
 [   1    0    2    6    0    0    0    0    0    0    0    0]
 [   0    0    0    0    0    0    0    0    0  114    0    0]
 [  27    0    0    3    0    0    0    9    0    0  375    0]
 [   0    0    0    0    0    0    0    0    0    0    0   71]]
['abnormal' 'antalgic' 'cerebral palsy' 'exercise' 'inebriated'
 'myopathic' 'normal' 'parkinsons' 'pregnant' 'prosthetic' 'stroke'
 'style']


In [None]:
import numpy as np

# Calculate per-class accuracy
class_accuracy = {}
num_classes = conf_matrix.shape[0]

for i in range(num_classes):
    true_positives = conf_matrix[i, i]  # Correctly predicted instances for class i
    total_instances = np.sum(conf_matrix[i, :])  # Total instances for class i
    if total_instances > 0:
        class_accuracy[label_encoder.classes_[i]] = true_positives / total_instances * 100  # Percent correct
    else:
        class_accuracy[label_encoder.classes_[i]] = 0.0  # Handle cases with no instances of class i

#print("Original labels:", label_encoder.classes_)

# Print per-class accuracy
for class_label, accuracy in class_accuracy.items():
    print(f"Class {class_label}: {accuracy:.2f}%")


Class abnormal: 93.35%
Class antalgic: 67.31%
Class cerebral palsy: 48.43%
Class exercise: 79.62%
Class inebriated: 87.50%
Class myopathic: 85.11%
Class normal: 91.87%
Class parkinsons: 64.38%
Class pregnant: 100.00%
Class prosthetic: 70.18%
Class stroke: 72.29%
Class style: 94.20%


In [None]:
import shap

x_test = torch.stack([x[0] for x in test_dataset]).to(device)
bg = x_test[:100]

class ModelWrapper:
    """Wrap the model to handle PyTorch tensor inputs and outputs for SHAP."""
    def __init__(self, model):
        self.model = model

    def __call__(self, inputs):
        with torch.no_grad():
            outputs = self.model(inputs)
        return outputs.cpu().numpy()

explainer = shap.DeepExplainer(ModelWrapper(model), x_test)

# Generate SHAP values
shap_values = explainer.shap_values(x_test)

# Visualize SHAP values for a single prediction
sample_idx = 0  # Index of the sample you want to explain
shap.summary_plot(shap_values[0][sample_idx], x_test[sample_idx].cpu().numpy())

2024-11-27 21:07:13.220669: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1732741633.248764  448753 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1732741633.255844  448753 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-11-27 21:07:13.282642: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
  if dtype.type == np.bool:


ValueError: <class '__main__.ModelWrapper'> is not currently a supported model type!