## Imports

In [18]:
%pip install wandb

Collecting wandb
  Using cached wandb-0.19.8-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (10 kB)
Collecting docker-pycreds>=0.4.0 (from wandb)
  Using cached docker_pycreds-0.4.0-py2.py3-none-any.whl.metadata (1.8 kB)
Collecting sentry-sdk>=2.0.0 (from wandb)
  Downloading sentry_sdk-2.24.1-py2.py3-none-any.whl.metadata (10 kB)
Collecting setproctitle (from wandb)
  Using cached setproctitle-1.3.5-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (10 kB)
Using cached wandb-0.19.8-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (20.8 MB)
Using cached docker_pycreds-0.4.0-py2.py3-none-any.whl (9.0 kB)
Downloading sentry_sdk-2.24.1-py2.py3-none-any.whl (336 kB)
Using cached setproctitle-1.3.5-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (31 kB)
Installing collected packages: setproctitle, sentry-sdk, docker-pycreds, wandb
Successfully installed docker-

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
import random
import wandb

In [2]:
# Luka's code
#Path
dataset_path = '/exchange/dspro2/silent-speech/full-dataset/'

#Load the dataset
X = np.load(dataset_path + "X_landmarks.npy")
y = np.load(dataset_path + "y_labels.npy")

#Ensure all labels are in uppercase before encoding
y = np.array([label.upper() for label in y])

#Split into train, validation, and test sets
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, random_state=42)  # 70% Train
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)  # 15% Validation, 15% Test
print(X_train.shape, X_temp.shape, y_train.shape, y_temp.shape)
print(X_val.shape, X_test.shape, y_val.shape, y_test.shape)
print(f'X_train0  {X_train[0]}')

#Convert labels to numbers (A-Z → 0-26)
label_encoder = LabelEncoder()
label_encoder.fit(np.concatenate((y_train, y_val, y_test)))  # Fit on full dataset
y_train = label_encoder.transform(y_train)
y_val = label_encoder.transform(y_val)
y_test = label_encoder.transform(y_test)
print(f'Encoder: {y_train[0]}')

# Normalize the landmarks (Standardization: mean=0, std=1)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train.reshape(X_train.shape[0], -1))  # Flatten before fitting
X_val = scaler.transform(X_val.reshape(X_val.shape[0], -1))
X_test = scaler.transform(X_test.reshape(X_test.shape[0], -1))
# No need to normalize y sets, since it is categorical data.
print(f'X_train0  {X_train[0]}')

#Convert to PyTorch tensors
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

X_train_tensor = torch.tensor(X_train, dtype=torch.float32).to(device)
y_train_tensor = torch.tensor(y_train, dtype=torch.long).to(device)
X_val_tensor = torch.tensor(X_val, dtype=torch.float32).to(device)
y_val_tensor = torch.tensor(y_val, dtype=torch.long).to(device)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32).to(device)
y_test_tensor = torch.tensor(y_test, dtype=torch.long).to(device)

# To check encoder
label_mapping = dict(zip(label_encoder.classes_, label_encoder.transform(label_encoder.classes_)))

print("Label Mapping (Letter → Number):")
print(label_mapping)
print(len(label_mapping))

(103313, 63) (44277, 63) (103313,) (44277,)
(22138, 63) (22139, 63) (22138,) (22139,)
X_train0  [ 4.74841505e-01  8.52257133e-01  1.46229922e-06  5.43130815e-01
  7.87584066e-01 -7.35012516e-02  5.51918805e-01  6.86782658e-01
 -1.08694024e-01  4.63680536e-01  6.33455217e-01 -1.39162436e-01
  3.75922471e-01  6.09890461e-01 -1.65406048e-01  5.25304854e-01
  5.12442410e-01 -5.95139042e-02  5.63664794e-01  3.69527698e-01
 -1.12695187e-01  5.84739983e-01  2.74222910e-01 -1.47435576e-01
  5.97175360e-01  1.98159873e-01 -1.66424483e-01  4.38691258e-01
  5.25739908e-01 -5.98610975e-02  3.80971014e-01  3.72085869e-01
 -1.20647617e-01  3.46378207e-01  2.70861030e-01 -1.59385324e-01
  3.15379590e-01  1.83500409e-01 -1.74398318e-01  3.74877930e-01
  5.80185652e-01 -6.73106834e-02  3.61128092e-01  5.29071808e-01
 -1.56928211e-01  4.08112168e-01  6.01952732e-01 -1.73931554e-01
  4.32187468e-01  6.50748730e-01 -1.55184045e-01  3.29831362e-01
  6.52131617e-01 -7.87678286e-02  3.25864255e-01  6.0419452

In [24]:
import numpy as np
dataset_path = '/exchange/dspro2/silent-speech/full-dataset/'

X = np.load(dataset_path + "X_landmarks.npy")

X_train = np.load(dataset_path + "X_train.npy")
X_val = np.load(dataset_path + "X_val.npy")
X_test = np.load(dataset_path + "X_test.npy")
y_train = np.load(dataset_path + "y_train.npy")
y_val = np.load(dataset_path + "y_val.npy")
y_test = np.load(dataset_path + "y_test.npy")
y = np.load(dataset_path + "y_labels.npy")
y = np.array([label.upper() for label in y])
print(X.shape)
print(X_train.shape)
print(X_val.shape)
print(X_test.shape)
print(X_train.shape[0]+X_val.shape[0]+ X_test.shape[0])
print(y[0], y.shape)

print(y_train.shape)
print(y_val.shape)
print(y_test.shape)

# Normalize the landmarks (Standardization: mean=0, std=1)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train.reshape(X_train.shape[0], -1))  # Flatten before fitting
X_val = scaler.transform(X_val.reshape(X_val.shape[0], -1))
X_test = scaler.transform(X_test.reshape(X_test.shape[0], -1))
# No need to normalize y sets, since it is categorical data.

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


X_train_tensor = torch.tensor(X_train, dtype=torch.float32).to(device)
y_train_tensor = torch.tensor(y_train, dtype=torch.long).to(device)
X_val_tensor = torch.tensor(X_val, dtype=torch.float32).to(device)
y_val_tensor = torch.tensor(y_val, dtype=torch.long).to(device)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32).to(device)
y_test_tensor = torch.tensor(y_test, dtype=torch.long).to(device)



(147590, 63)
(103313, 63)
(22138, 63)
(22139, 63)
147590
A (147590,)
(103313,)
(22138,)
(22139,)


In [25]:
print(X_train.shape)
print(X_train_tensor.shape)

print(X_train[0])
print(X_train_tensor[0])
print(len(torch.unique(y_train_tensor)))

(103313, 63)
torch.Size([103313, 63])
[-0.14409181  0.97627613  1.35064229  0.24285501  0.99964528 -0.5833287
  0.2953147   0.9316651  -0.47930831 -0.21967023  0.85458174 -0.42988094
 -0.69308043  0.74726016 -0.38630752  0.19540619  0.52824147 -0.17688858
  0.43654401 -0.05427969 -0.14882095  0.50030596 -0.70218994 -0.14920302
  0.49435292 -0.9513238  -0.14855248 -0.49333923  0.54173687 -0.14789362
 -0.88234453 -0.16789072 -0.1309146  -1.04753766 -0.89293535 -0.24910232
 -1.11704326 -1.16000137 -0.31373966 -0.85415464  0.71687904 -0.10420099
 -0.89206102  0.65014668 -0.35043247 -0.61472954  0.75129988 -0.47526191
 -0.42991866  0.78166519 -0.39982409 -0.91840378  0.89376961 -0.04297662
 -0.90769314  0.83231145 -0.29224907 -0.72728907  0.95339554 -0.24339432
 -0.59232106  0.98834825 -0.09789881]
tensor([-0.1441,  0.9763,  1.3506,  0.2429,  0.9996, -0.5833,  0.2953,  0.9317,
        -0.4793, -0.2197,  0.8546, -0.4299, -0.6931,  0.7473, -0.3863,  0.1954,
         0.5282, -0.1769,  0.4365, 

## CNN Model

In [26]:
import torch.nn as nn
import torch.nn.functional as F

class AslCNN(nn.Module):
    def __init__(self, num_classes):
        super(AslCNN, self).__init__()
        
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=16, kernel_size=(3, 3), padding=1)
        self.conv2 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=(3, 3), padding=1)
        self.fc1 = nn.Linear(32 * 21 * 3, 128)  # Fully connected layer
        self.fc2 = nn.Linear(128, num_classes)  # Output layer
        
    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))
        x = x.view(x.size(0), -1)  # Flatten
        x = F.relu(self.fc1(x))
        x = self.fc2(x)  # No activation since CrossEntropyLoss applies Softmax
        return x


In [27]:
# Reproducibility --> TODO CHECK IF WORKING
seed = 42
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)

<torch._C.Generator at 0x7f4e6018fe50>

### Attempt with shuffled y to search bias in the model

In [30]:
y_shuffled = y_train_tensor[torch.randperm(y_train_tensor.size(0))]  # Shuffle labels

if not isinstance(y_shuffled, torch.Tensor):
    y_shuffled = torch.tensor(y_shuffled, dtype=torch.long).to(device)
    
# Create a new dataset with shuffled labels
shuffled_dataset = TensorDataset(X_train_tensor, y_shuffled)

# Load into DataLoader
shuffled_loader = DataLoader(shuffled_dataset, batch_size=32, shuffle=True)


In [34]:
import wandb
wandb.login()


Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33mshse13[0m ([33mshse13-doe[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

## Data loading

In [28]:
from torch.utils.data import TensorDataset, DataLoader
# Create DataLoaders
X_train_tensor = X_train_tensor.view(-1, 1, 21, 3)  # Adjust this based on expected shape (4D and not 2D)
X_val_tensor = X_val_tensor.view(-1, 1, 21, 3)

train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
val_dataset = TensorDataset(X_val_tensor, y_val_tensor)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)


## Training, validation, test loops

In [31]:
import torch.optim as optim


# Define Model, Loss, and Optimizer
num_classes = len(torch.unique(y_train_tensor)) # 27
model = AslCNN(num_classes).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training Loop
num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    total_loss = 0
    
    for X_batch, y_batch in shuffled_loader: # to change
        optimizer.zero_grad()
        outputs = model(X_batch)
        loss = criterion(outputs, y_batch)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    
    print(f"Epoch {epoch+1}, Loss: {total_loss/len(train_loader):.4f}")

print("Training complete!")

Epoch 1, Loss: 3.2822
Epoch 2, Loss: 3.2807
Epoch 3, Loss: 3.2806
Epoch 4, Loss: 3.2804
Epoch 5, Loss: 3.2802
Epoch 6, Loss: 3.2799
Epoch 7, Loss: 3.2792
Epoch 8, Loss: 3.2780
Epoch 9, Loss: 3.2768
Epoch 10, Loss: 3.2752
Training complete!


In [15]:
model.eval()
correct = 0
total = 0

with torch.no_grad():
    for X_batch, y_batch in val_loader:
        outputs = model(X_batch)
        _, predicted = torch.max(outputs, 1)
        correct += (predicted == y_batch).sum().item()
        total += y_batch.size(0)

accuracy = 100 * correct / total
print(f"Validation Accuracy: {accuracy:.2f}%")

Validation Accuracy: 99.96%


In [16]:
X_test_tensor = X_test_tensor.view(-1, 1, 21, 3) # already done above

test_dataset = TensorDataset(X_test_tensor, y_test_tensor)
test_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)


model.eval()
correct = 0
total = 0

with torch.no_grad():
    for X_batch, y_batch in test_loader:
        outputs = model(X_batch)
        _, predicted = torch.max(outputs, 1)
        correct += (predicted == y_batch).sum().item()
        total += y_batch.size(0)

accuracy = 100 * correct / total
print(f"Validation Accuracy: {accuracy:.2f}%")

Validation Accuracy: 99.96%
