In [1]:
import json
import numpy as np
import os
import random

import torch
import torch.nn as nn
import torch.optim as optim

## Balance dataset

In [28]:
import os

def find_minimum_class_size(data_directory):
    class_sizes = {}
    for class_dir in os.listdir(data_directory):
        class_path = os.path.join(data_directory, class_dir)
        if os.path.isdir(class_path):
            class_sizes[class_dir] = len([name for name in os.listdir(class_path) if name.endswith('.npy')])
    min_size = min(class_sizes.values())
    return min_size, class_sizes

data_directory = 'splitted_datapoints_shot/val'
min_size, class_sizes = find_minimum_class_size(data_directory)
print("Minimum class size:", min_size)
print("Class sizes:", class_sizes)


Minimum class size: 400
Class sizes: {'Shot': 400, 'Background': 867}


In [29]:
import shutil

def balance_classes(data_directory, output_directory, min_size):
    if not os.path.exists(output_directory):
        os.makedirs(output_directory)

    for class_dir in os.listdir(data_directory):
        class_path = os.path.join(data_directory, class_dir)
        if os.path.isdir(class_path):
            files = [name for name in os.listdir(class_path) if name.endswith('.npy')]
            np.random.shuffle(files)  # Shuffle to avoid bias
            selected_files = files[:min_size]  # Select a subset based on the minimum size
            
            # Create a directory for the current class in the output directory
            output_class_path = os.path.join(output_directory, class_dir)
            if not os.path.exists(output_class_path):
                os.makedirs(output_class_path)
                
            # Copy selected files to the new directory
            for file_name in selected_files:
                src_file_path = os.path.join(class_path, file_name)
                dst_file_path = os.path.join(output_class_path, file_name)
                shutil.copy(src_file_path, dst_file_path)
    print("Data balanced and copied to:", output_directory)

# Specify the output directory where balanced data will be stored
output_directory = 'balanced_splitted_datapoints_shot/val'
balance_classes(data_directory, output_directory, min_size)


Data balanced and copied to: balanced_splitted_datapoints_shot/val


## Data loader

In [2]:
import torch
from torch.utils.data import Dataset, DataLoader
import numpy as np
import os

class NPYDataset(Dataset):
    def __init__(self, main_directory):
        super().__init__()
        self.main_directory = main_directory
        self.filenames = self._load_filenames()

    def _load_filenames(self):
        # Traverse subdirectories to find all .npy files
        filenames = []
        for root, _, files in os.walk(self.main_directory):
            for file in files:
                if file.endswith('.npy'):
                    full_path = os.path.join(root, file)
                    filenames.append(full_path)
        return filenames

    def __len__(self):
        return len(self.filenames)

    def __getitem__(self, idx):
        file_path = self.filenames[idx]
        data = np.load(file_path, allow_pickle=True)
        keypoints = np.ascontiguousarray(data['data'], dtype=np.float32)
        label = np.float32(data['label'])
        # Convert to tensors
        keypoints = torch.tensor(keypoints)
        keypoints = keypoints[:, :, :-1, :]
        label = torch.tensor(label)
        return keypoints, label


In [3]:


# DataLoader setup
data_directory = '7-segment_datasets/highRel_splitted_dataset_backall_included/train'
dataset = NPYDataset(data_directory)
dataloader = DataLoader(dataset, batch_size=1, shuffle=True)

val_dataset = NPYDataset('7-segment_datasets/highRel_splitted_dataset_backall_included/val')
val_loader = DataLoader(val_dataset, batch_size=1, shuffle=False)


In [4]:
len(dataloader)

2170

In [5]:
len(val_loader)

544

In [6]:
for keypoints, labels in dataloader:
    # keypoints = keypoints.squeeze(0)
    print(keypoints.shape, labels)

torch.Size([1, 1, 7, 33, 2]) tensor([[1.]])
torch.Size([1, 1, 7, 33, 2]) tensor([[0.]])
torch.Size([1, 1, 7, 33, 2]) tensor([[1.]])
torch.Size([1, 1, 7, 33, 2]) tensor([[1.]])
torch.Size([1, 1, 7, 33, 2]) tensor([[0.]])
torch.Size([1, 1, 7, 33, 2]) tensor([[1.]])
torch.Size([1, 1, 7, 33, 2]) tensor([[0.]])
torch.Size([1, 1, 7, 33, 2]) tensor([[1.]])
torch.Size([1, 1, 7, 33, 2]) tensor([[0.]])
torch.Size([1, 1, 7, 33, 2]) tensor([[1.]])
torch.Size([1, 1, 7, 33, 2]) tensor([[1.]])
torch.Size([1, 1, 7, 33, 2]) tensor([[0.]])
torch.Size([1, 1, 7, 33, 2]) tensor([[0.]])
torch.Size([1, 1, 7, 33, 2]) tensor([[1.]])
torch.Size([1, 1, 7, 33, 2]) tensor([[1.]])
torch.Size([1, 1, 7, 33, 2]) tensor([[1.]])
torch.Size([1, 1, 7, 33, 2]) tensor([[0.]])
torch.Size([1, 1, 7, 33, 2]) tensor([[0.]])
torch.Size([1, 1, 7, 33, 2]) tensor([[1.]])
torch.Size([1, 1, 7, 33, 2]) tensor([[1.]])
torch.Size([1, 1, 7, 33, 2]) tensor([[1.]])
torch.Size([1, 1, 7, 33, 2]) tensor([[0.]])
torch.Size([1, 1, 7, 33, 2]) ten

In [7]:
# len(val_loader)

## Model

In [8]:
class ConvLSTMModel(nn.Module):
    def __init__(self, input_shape, hidden_dim, num_classes):
        super(ConvLSTMModel, self).__init__()
        self.segment_length, self.keypoints, self.coords = input_shape
        
        self.conv1d = nn.Conv1d(in_channels=self.keypoints * self.coords, out_channels=32, kernel_size=3, padding=1)
        self.lstm = nn.LSTM(input_size=32, hidden_size=hidden_dim, batch_first=True)
        # Change to output a single value for binary classification
        self.fc = nn.Linear(hidden_dim, 1)  # Only one output neuron for binary classification

    def forward(self, x):
        x = x.view(x.size(0), self.keypoints * self.coords, self.segment_length)
        x = self.conv1d(x)
        x = torch.relu(x)
        x = x.transpose(1, 2)
        x, (hn, cn) = self.lstm(x)
        x = x[:, -1, :]
        x = self.fc(x)
        x = torch.sigmoid(x)  # Sigmoid activation for binary classification
        return x


In [9]:

# Assume 'model' is an instance of your model class
model = ConvLSTMModel(input_shape=(7, 33, 3), hidden_dim=64, num_classes=2)
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001)


In [10]:
# Define a function for evaluating the model on the validation set
def evaluate(model, val_loader, criterion):
    model.eval()  # Set the model to evaluation mode
    running_loss = 0.0
    correct_predictions = 0
    total_predictions = 0
    
    with torch.no_grad():  # No need to compute gradients for validation
        for keypoints, labels in val_loader:
            # Forward pass
            outputs = model(keypoints)  # outputs shape should be [batch_size, 1]
            
            # Compute loss
            loss = criterion(outputs, labels)
            running_loss += loss.item()
            
            # Calculate accuracy
            predicted_labels = outputs.round()  # Round the outputs to get the final predictions for binary classification
            correct_predictions += (predicted_labels == labels).sum().item()
            total_predictions += labels.size(0)
    
    # Calculate average loss and accuracy
    avg_loss = running_loss / len(val_loader)
    accuracy = correct_predictions / total_predictions
    return avg_loss, accuracy

In [11]:
import torch
from torch.utils.tensorboard import SummaryWriter

# Initialize TensorBoard
writer = SummaryWriter(log_dir='./runs/convLSTM')
# Variables to keep track of the best validation accuracy
best_val_acc = 0.0
best_model_path = '7-segment_datasets/highRel_weights_z/best_model.pth'
last_model_path = '7-segment_datasets/highRel_weights_z/last_model.pth'
num_epochs = 500
model.train()  # Set the model to training mode
for epoch in range(num_epochs):
    running_loss = 0.0
    correct_predictions = 0
    total_predictions = 0

    for keypoints, labels in dataloader:
        optimizer.zero_grad()
        # Forward pass
        outputs = model(keypoints)  # outputs shape should be [batch_size, 1]
        
        # Compute loss
        loss = criterion(outputs, labels)
        running_loss += loss.item()

        # Backward and optimize
        loss.backward()
        optimizer.step()
        # print('outputs', outputs)
        # Calculate accuracy
        predicted_labels = outputs.round()  # Round the outputs to get the final predictions for binary classification
        # print('predicted_labels',predicted_labels, 'True labels:', labels)
        correct_predictions += (predicted_labels == labels).sum().item()
        total_predictions += labels.size(0)

        # Calculate accuracy and loss for the epoch
    accuracy = correct_predictions / total_predictions * 100
    epoch_loss = running_loss / len(dataloader)

    # Perform validation
    val_loss, val_accuracy = evaluate(model, val_loader, criterion)
    
    # Log loss and accuracy to TensorBoard
    writer.add_scalar('Loss/train', epoch_loss, epoch)
    writer.add_scalar('Accuracy/train', accuracy, epoch)
    writer.add_scalar('Accuracy/val', val_accuracy, epoch)
    writer.add_scalar('Loss/val', val_loss, epoch)
    
    # Print validation metrics
    print(f'Epoch [{epoch+1}/{num_epochs}], Train Loss: {epoch_loss:.4f}, Train Accuracy: {accuracy:.2f}%')
    print(f'Epoch {epoch+1}/{num_epochs} - Validation Loss: {val_loss:.4f}, Validation Accuracy: {val_accuracy:.4f}')

    # Save the last model weights
    torch.save(model.state_dict(), last_model_path)
    
    # Save the best model weights based on validation accuracy
    if val_accuracy > best_val_acc:
        best_val_acc = val_accuracy
        torch.save(model.state_dict(), best_model_path)
        print(f'Best model saved at epoch {epoch+1} with val acc: {val_accuracy:.4f}')

print(f'Best validation accuracy: {best_val_acc:.4f}')
# Close the TensorBoard writer
writer.close()


Epoch [1/500], Train Loss: 0.6710, Train Accuracy: 58.15%
Epoch 1/500 - Validation Loss: 0.6854, Validation Accuracy: 0.5775
Best model saved at epoch 1 with val acc: 0.5775
Epoch [2/500], Train Loss: 0.6806, Train Accuracy: 55.96%
Epoch 2/500 - Validation Loss: 0.6908, Validation Accuracy: 0.5732
Epoch [3/500], Train Loss: 0.6783, Train Accuracy: 57.35%
Epoch 3/500 - Validation Loss: 0.6638, Validation Accuracy: 0.5732
Epoch [4/500], Train Loss: 0.6710, Train Accuracy: 57.72%
Epoch 4/500 - Validation Loss: 0.6735, Validation Accuracy: 0.5690
Epoch [5/500], Train Loss: 0.6728, Train Accuracy: 58.57%
Epoch 5/500 - Validation Loss: 0.6645, Validation Accuracy: 0.5732
Epoch [6/500], Train Loss: 0.6747, Train Accuracy: 56.98%
Epoch 6/500 - Validation Loss: 0.6645, Validation Accuracy: 0.5754
Epoch [7/500], Train Loss: 0.6669, Train Accuracy: 57.35%
Epoch 7/500 - Validation Loss: 0.6803, Validation Accuracy: 0.5732
Epoch [8/500], Train Loss: 0.6713, Train Accuracy: 59.05%
Epoch 8/500 - Vali

In [12]:
model.load_state_dict(torch.load('7-segment_datasets/highRel_weights_z/best_model.pth'))

  model.load_state_dict(torch.load('7-segment_datasets/highRel_weights_z/best_model.pth'))


<All keys matched successfully>

In [13]:
import torch
from sklearn.metrics import confusion_matrix, precision_score, recall_score, f1_score

# Define a function for evaluating the model on the validation set
def evaluate_with_confusion_matrix(model, val_loader, criterion):
    model.eval()  # Set the model to evaluation mode
    running_loss = 0.0
    all_labels = []
    all_predictions = []
    
    with torch.no_grad():  # No need to compute gradients for validation
        for keypoints, labels in val_loader:
            # Forward pass
            outputs = model(keypoints)  # outputs shape should be [batch_size, 1]
            # outputs = outputs.squeeze()
            # labels = labels.squeeze().long()
            # Compute loss
            loss = criterion(outputs, labels)
            running_loss += loss.item()
            
            # Get predictions
            predicted_labels = outputs.round()  # This should be along dimension 1 for batch data
            all_labels.append(labels.tolist()[0][0])
            all_predictions.append(predicted_labels.tolist()[0][0])
            
        # Calculate average loss
    avg_loss = running_loss / len(val_loader)
    
    # Calculate confusion matrix
    cm = confusion_matrix(all_labels, all_predictions)
    accuracy = np.trace(cm) / np.sum(cm)
    precision = precision_score(all_labels, all_predictions, average='binary')
    recall = recall_score(all_labels, all_predictions, average='binary')
    f1 = f1_score(all_labels, all_predictions, average='binary')
    print('Precision: ', precision)
    print('Recall: ', recall)
    print('f1-score: ', f1)
    return avg_loss, accuracy, cm

# Use the function
avg_loss, accuracy, cm = evaluate_with_confusion_matrix(model, val_loader, criterion)
print(f"Average Loss: {avg_loss}, Accuracy: {accuracy}")
print("Confusion Matrix:\n", cm)


Precision:  0.9491525423728814
Recall:  0.6222222222222222
f1-score:  0.7516778523489933
Average Loss: 0.4636344749425121, Accuracy: 0.7643312101910829
Confusion Matrix:
 [[192   9]
 [102 168]]


In [13]:
175/(26+175)

0.8706467661691543

In [14]:
178/(92+178)

0.6592592592592592

## Convert to ONNX

In [12]:
# torch_model = MyModel()
torch_input = torch.randn(1, 20, 33, 2)
onnx_program = torch.onnx.dynamo_export(model, torch_input)

Skipping constant folding for op SequenceConstruct with multiple outputs.
Skipping constant folding for op SequenceConstruct with multiple outputs.


Applied 20 of general pattern rewrite rules.


In [13]:
onnx_program.save("shot_classifier_weights_balanced/shot_best.onnx")

In [14]:
import onnx
onnx_model = onnx.load("shot_classifier_weights_balanced/shot_best.onnx")
onnx.checker.check_model(onnx_model)

## Model simple LSTM

In [17]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np

# Define LSTM classifier model
class LSTMClassifier(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size):
        super(LSTMClassifier, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        x = x.view(x.size(0), x.size(1), -1)
        
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        out, _ = self.lstm(x, (h0, c0))
        out = self.fc(out[:, -1, :])
        return out

In [18]:
# # Generate sample data
# X_train = np.random.rand(100, 10, 50)  # 100 sequences of length 10 with 50 features each
# y_train = np.random.randint(2, size=(100,))  # Binary labels (0 or 1)

# # Convert data to PyTorch tensors
# X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
# y_train_tensor = torch.tensor(y_train, dtype=torch.long)

# Define model parameters
input_size = 33*2
hidden_size = 64
num_layers = 2
output_size = 2


In [19]:
# Instantiate the model
model = LSTMClassifier(input_size, hidden_size, num_layers, output_size)

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001)


In [None]:
# Train the model
num_epochs = 500
for epoch in range(num_epochs):
    correct = 0
    total = 0
    running_loss = 0.0
    for keypoints, labels in dataloader:
        # Squeeze unnecessary dimensions
        keypoints = keypoints.squeeze(0)  # Now keypoints shape should be [1, 20, 17, 2]
        labels = labels.squeeze(0)        # Now labels shape should be [1]
        # print('keypoints:',keypoints.shape,'labels:', labels.shape)
        optimizer.zero_grad()
        outputs = model(keypoints)
        # print('outputs before',outputs, 'labels before',labels)

        outputs = outputs.squeeze()
        labels = labels.squeeze().long()
        # print('outputs after',outputs, 'labels after',labels)
        # break
        loss = criterion(outputs, labels)
        running_loss += loss.item()
        loss.backward()
        optimizer.step()
        _, predicted = torch.max(outputs.data, 0)
        # Total number of labels
        total += 1
        
        # Total correct predictions
        correct += (predicted == labels).sum().item()
    accuracy =  correct / total * 100
    epoch_loss = running_loss / len(dataloader)        
            # Print Loss
    print(' Loss: {}. Accuracy: {}'.format( epoch_loss, accuracy))


 Loss: 0.7066145324707032. Accuracy: 46.0
 Loss: 0.646399582028389. Accuracy: 74.0
 Loss: 0.6253186723589897. Accuracy: 62.0
 Loss: 0.6004764226451517. Accuracy: 66.0
 Loss: 0.5673007851839066. Accuracy: 64.0
 Loss: 0.5666879870928824. Accuracy: 68.0
 Loss: 0.5529879846051335. Accuracy: 68.0
 Loss: 0.5546136916801333. Accuracy: 70.0
 Loss: 0.5598513038922102. Accuracy: 68.0
 Loss: 0.5483754526730626. Accuracy: 70.0
 Loss: 0.5462060511251912. Accuracy: 68.0
 Loss: 0.5345488362945616. Accuracy: 74.0
 Loss: 0.6796253682486713. Accuracy: 60.0
 Loss: 0.5924330477602779. Accuracy: 68.0
 Loss: 0.5473819950129837. Accuracy: 70.0
 Loss: 0.5350996433291584. Accuracy: 72.0
 Loss: 0.5204374854825438. Accuracy: 74.0
 Loss: 0.6699774176906794. Accuracy: 68.0
 Loss: 0.5793360046669841. Accuracy: 68.0
 Loss: 0.5573308748006821. Accuracy: 66.0
 Loss: 0.5372575928643346. Accuracy: 76.0
 Loss: 0.5082164804823697. Accuracy: 74.0
 Loss: 0.5007441286928952. Accuracy: 74.0
 Loss: 0.5154678407032043. Accuracy

In [27]:
import torch
from torch.utils.tensorboard import SummaryWriter

# Initialize TensorBoard
writer = SummaryWriter(log_dir='./runs/convLSTM')
num_epochs = 100
for epoch in range(num_epochs):
    correct = 0
    total = 0
    running_loss = 0.0
    
    for keypoints, labels in dataloader:
        # Squeeze unnecessary dimensions
        keypoints = keypoints.squeeze(0)  # Now keypoints shape should be [1, 20, 17, 2]
        labels = labels.squeeze(0)        # Now labels shape should be [1]
        # print('keypoints:',keypoints.shape,'labels:', labels.shape)
        
        optimizer.zero_grad()
        outputs = model(keypoints)
        # print('outputs before',outputs, 'labels before',labels)

        outputs = outputs.squeeze()
        labels = labels.squeeze().long()
        # print('outputs after',outputs, 'labels after',labels)
        # break
        
        loss = criterion(outputs, labels)
        running_loss += loss.item()
        loss.backward()
        optimizer.step()
        
        _, predicted = torch.max(outputs.data, 0)
        # Total number of labels
        total += 1
        
        # Total correct predictions
        correct += (predicted == labels).sum().item()
    
    # Calculate accuracy and loss for the epoch
    accuracy = correct / total * 100
    epoch_loss = running_loss / len(dataloader)
    
    # Log loss and accuracy to TensorBoard
    writer.add_scalar('Loss/train', epoch_loss, epoch)
    writer.add_scalar('Accuracy/train', accuracy, epoch)
    
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.4f}, Accuracy: {accuracy:.2f}%')

# Close the TensorBoard writer
writer.close()


RuntimeError: The size of tensor a (2) must match the size of tensor b (32) at non-singleton dimension 0

## Updated ConvLSTM

In [7]:
import torch.nn.functional as F

class ConvLSTMModel(nn.Module):
    def __init__(self, input_shape, hidden_dim, num_classes):
        super(ConvLSTMModel, self).__init__()
        self.segment_length, self.keypoints, self.coords = input_shape

        # Convolutional layers
        self.conv1d_1 = nn.Conv1d(in_channels=self.keypoints * self.coords, out_channels=16, kernel_size=3, padding=1)
        self.bn1 = nn.BatchNorm1d(16)
        self.conv1d_2 = nn.Conv1d(in_channels=16, out_channels=32, kernel_size=3, padding=1)
        self.bn2 = nn.BatchNorm1d(32)

        # Adaptive pooling
        self.adaptive_pool = nn.AdaptiveMaxPool1d(output_size=8)

        # Dropout
        self.dropout = nn.Dropout(p=0.5)  # Moderate dropout rate

        # LSTM layer
        self.lstm_input_size = 32  # Output channels from last Conv1d layer
        self.lstm = nn.LSTM(input_size=self.lstm_input_size, hidden_size=hidden_dim, batch_first=True)

        # Fully connected layer
        self.fc = nn.Linear(hidden_dim, 1) 

    def forward(self, x):
        # Reshape input
        x = x.view(x.size(0), self.keypoints * self.coords, self.segment_length)

        # Convolutional layers
        x = F.relu(self.bn1(self.conv1d_1(x)))
        x = F.relu(self.bn2(self.conv1d_2(x)))

        # Apply adaptive pooling to ensure output size is appropriate
        x = self.adaptive_pool(x)

        # Flatten the features
        x = x.transpose(1, 2)  # (batch_size, seq_len, num_features)

        # LSTM layer
        x, (hn, cn) = self.lstm(x)

        # Use only the output of the last time step
        x = x[:, -1, :]

        # Apply dropout manually
        x = self.dropout(x)

        # Fully connected layer
        x = self.fc(x)
        x = torch.sigmoid(x)
        return x


In [8]:

# Assume 'model' is an instance of your model class
model = ConvLSTMModel(input_shape=(7, 33, 2), hidden_dim=64, num_classes=2)
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001)


In [9]:
# Define a function for evaluating the model on the validation set
def evaluate(model, val_loader, criterion):
    model.eval()  # Set the model to evaluation mode
    running_loss = 0.0
    correct_predictions = 0
    total_predictions = 0
    
    with torch.no_grad():  # No need to compute gradients for validation
        for keypoints, labels in val_loader:
            # Forward pass
            outputs = model(keypoints)  # outputs shape should be [batch_size, 1]
            
            # Compute loss
            loss = criterion(outputs, labels)
            running_loss += loss.item()
            
            # Calculate accuracy
            predicted_labels = outputs.round()  # Round the outputs to get the final predictions for binary classification
            correct_predictions += (predicted_labels == labels).sum().item()
            total_predictions += labels.size(0)
    
    # Calculate average loss and accuracy
    avg_loss = running_loss / len(val_loader)
    accuracy = correct_predictions / total_predictions
    return avg_loss, accuracy

In [10]:
import torch
from torch.utils.tensorboard import SummaryWriter

# Initialize TensorBoard
writer = SummaryWriter(log_dir='./runs/convLSTM')
# Variables to keep track of the best validation accuracy
best_val_acc = 0.0
best_model_path = '7-segment_datasets/highRel_weights_back_all/best_model.pth'
last_model_path = '7-segment_datasets/highRel_weights_back_all/last_model.pth'
num_epochs = 500
model.train()  # Set the model to training mode
for epoch in range(num_epochs):
    running_loss = 0.0
    correct_predictions = 0
    total_predictions = 0

    for keypoints, labels in dataloader:
        optimizer.zero_grad()
        # Forward pass
        outputs = model(keypoints)  # outputs shape should be [batch_size, 1]
        
        # Compute loss
        loss = criterion(outputs, labels)
        running_loss += loss.item()

        # Backward and optimize
        loss.backward()
        optimizer.step()
        # print('outputs', outputs)
        # Calculate accuracy
        predicted_labels = outputs.round()  # Round the outputs to get the final predictions for binary classification
        # print('predicted_labels',predicted_labels, 'True labels:', labels)
        correct_predictions += (predicted_labels == labels).sum().item()
        total_predictions += labels.size(0)

        # Calculate accuracy and loss for the epoch
    accuracy = correct_predictions / total_predictions * 100
    epoch_loss = running_loss / len(dataloader)

    # Perform validation
    val_loss, val_accuracy = evaluate(model, val_loader, criterion)
    
    # Log loss and accuracy to TensorBoard
    writer.add_scalar('Loss/train', epoch_loss, epoch)
    writer.add_scalar('Accuracy/train', accuracy, epoch)
    writer.add_scalar('Accuracy/val', val_accuracy, epoch)
    writer.add_scalar('Loss/val', val_loss, epoch)
    
    # Print validation metrics
    print(f'Epoch [{epoch+1}/{num_epochs}], Train Loss: {epoch_loss:.4f}, Train Accuracy: {accuracy:.2f}%')
    print(f'Epoch {epoch+1}/{num_epochs} - Validation Loss: {val_loss:.4f}, Validation Accuracy: {val_accuracy:.4f}')

    # Save the last model weights
    torch.save(model.state_dict(), last_model_path)
    
    # Save the best model weights based on validation accuracy
    if val_accuracy > best_val_acc:
        best_val_acc = val_accuracy
        torch.save(model.state_dict(), best_model_path)
        print(f'Best model saved at epoch {epoch+1} with val acc: {val_accuracy:.4f}')

print(f'Best validation accuracy: {best_val_acc:.4f}')
# Close the TensorBoard writer
writer.close()


Epoch [1/500], Train Loss: 0.6417, Train Accuracy: 61.43%
Epoch 1/500 - Validation Loss: 0.5752, Validation Accuracy: 0.7206
Best model saved at epoch 1 with val acc: 0.7206
Epoch [2/500], Train Loss: 0.5104, Train Accuracy: 73.59%
Epoch 2/500 - Validation Loss: 0.4633, Validation Accuracy: 0.7812
Best model saved at epoch 2 with val acc: 0.7812
Epoch [3/500], Train Loss: 0.4754, Train Accuracy: 75.58%
Epoch 3/500 - Validation Loss: 0.4745, Validation Accuracy: 0.7684
Epoch [4/500], Train Loss: 0.4580, Train Accuracy: 77.70%
Epoch 4/500 - Validation Loss: 0.4668, Validation Accuracy: 0.7757
Epoch [5/500], Train Loss: 0.4534, Train Accuracy: 76.73%
Epoch 5/500 - Validation Loss: 0.4714, Validation Accuracy: 0.7684
Epoch [6/500], Train Loss: 0.4403, Train Accuracy: 78.11%
Epoch 6/500 - Validation Loss: 0.4228, Validation Accuracy: 0.7978
Best model saved at epoch 6 with val acc: 0.7978
Epoch [7/500], Train Loss: 0.4329, Train Accuracy: 78.48%
Epoch 7/500 - Validation Loss: 0.4008, Valida

In [9]:
model.load_state_dict(torch.load('7-segment_datasets/highRel_weights/best_model.pth'))

  model.load_state_dict(torch.load('7-segment_datasets/highRel_weights/best_model.pth'))


<All keys matched successfully>

In [10]:
import torch
from sklearn.metrics import confusion_matrix, precision_score, recall_score, f1_score

# Define a function for evaluating the model on the validation set
def evaluate_with_confusion_matrix(model, val_loader, criterion):
    model.eval()  # Set the model to evaluation mode
    running_loss = 0.0
    all_labels = []
    all_predictions = []
    
    with torch.no_grad():  # No need to compute gradients for validation
        for keypoints, labels in val_loader:
            # Forward pass
            outputs = model(keypoints)  # outputs shape should be [batch_size, 1]
            # outputs = outputs.squeeze()
            # labels = labels.squeeze().long()
            # Compute loss
            loss = criterion(outputs, labels)
            running_loss += loss.item()
            
            # Get predictions
            predicted_labels = outputs.round()  # This should be along dimension 1 for batch data
            all_labels.append(labels.tolist()[0][0])
            all_predictions.append(predicted_labels.tolist()[0][0])
            
        # Calculate average loss
    avg_loss = running_loss / len(val_loader)
    
    # Calculate confusion matrix
    cm = confusion_matrix(all_labels, all_predictions)
    accuracy = np.trace(cm) / np.sum(cm)
    precision = precision_score(all_labels, all_predictions, average='binary')
    recall = recall_score(all_labels, all_predictions, average='binary')
    f1 = f1_score(all_labels, all_predictions, average='binary')
    print('Precision: ', precision)
    print('Recall: ', recall)
    print('f1-score: ', f1)
    return avg_loss, accuracy, cm

# Use the function
avg_loss, accuracy, cm = evaluate_with_confusion_matrix(model, val_loader, criterion)
print(f"Average Loss: {avg_loss}, Accuracy: {accuracy}")
print("Confusion Matrix:\n", cm)


Precision:  0.9425287356321839
Recall:  0.9111111111111111
f1-score:  0.9265536723163842
Average Loss: 0.36838574798661033, Accuracy: 0.9171974522292994
Confusion Matrix:
 [[186  15]
 [ 24 246]]


with z point:
Precision:  0.9277566539923955
Recall:  0.9037037037037037
f1-score:  0.9155722326454033
Average Loss: 0.43734880255015096, Accuracy: 0.9044585987261147
Confusion Matrix:
 [[182  19]
 [ 26 244]]

without ball points:
Precision:  0.9427480916030534
Recall:  0.9148148148148149
f1-score:  0.9285714285714286
Average Loss: 0.427513430891732, Accuracy: 0.9193205944798302
Confusion Matrix:
 [[186  15]
 [ 23 247]]

with Ball points and no z point:
Precision:  0.9425287356321839
Recall:  0.9111111111111111
f1-score:  0.9265536723163842
Average Loss: 0.36838574798661033, Accuracy: 0.9171974522292994
Confusion Matrix:
 [[186  15]
 [ 24 246]]
 