In [36]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
print(os.listdir('/kaggle/input/asl-signs'))
   

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

['sign_to_prediction_index_map.json', 'train.csv', 'train_landmark_files']


In [37]:
# Kaggle already includes these; no need to install
import numpy as np
import pandas as pd
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import warnings
warnings.filterwarnings("ignore")


In [38]:
BASE_DIR = "/kaggle/input/asl-signs"  # or your local path
ROWS_PER_FRAME = 543
NUM_CLASSES = 250
MAX_LEN = 384  # Frames to keep per sample
PAD = -100.0

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)


Using device: cuda


In [39]:
# Define landmark indices from MediaPipe
LIP = [61, 185, 40, 39, 37, 267, 269, 270, 409, 291, 146, 91, 181, 84, 17, 314, 405, 321, 375]
LHAND = np.arange(468, 489).tolist()
RHAND = np.arange(522, 543).tolist()
NOSE = [1, 2, 98, 327]
REYE = [33, 7, 163, 144, 145]
LEYE = [263, 249, 390, 373, 374]

POINT_LANDMARKS = LIP + LHAND + RHAND + NOSE + REYE + LEYE
NUM_NODES = len(POINT_LANDMARKS)
CHANNELS = 6 * NUM_NODES


In [40]:
df = pd.read_csv(f"{BASE_DIR}/train.csv")
le = LabelEncoder()
df['label'] = le.fit_transform(df['sign'])
sign_to_index = {s: i for i, s in enumerate(le.classes_)}


In [41]:
def torch_nan_mean(x, dim=0, keepdim=False):
    mask = ~torch.isnan(x)
    x = torch.where(mask, x, torch.zeros_like(x))
    count = mask.sum(dim=dim, keepdim=keepdim).clamp(min=1)
    return x.sum(dim=dim, keepdim=keepdim) / count

def torch_nan_std(x, center=None, dim=0, keepdim=False):
    if center is None:
        center = torch_nan_mean(x, dim=dim, keepdim=True)
    d = x - center
    variance = torch_nan_mean(d * d, dim=dim, keepdim=keepdim)
    return torch.sqrt(variance)


In [42]:
class Preprocess(nn.Module):
    def __init__(self, max_len=None, point_landmarks=None):
        super().__init__()
        self.max_len = max_len
        self.point_landmarks = point_landmarks

    def forward(self, inputs):
        if inputs.dim() == 3:
            x = inputs.unsqueeze(0)
        else:
            x = inputs

        mean = torch_nan_mean(x[:, :, [17], :], dim=(1, 2), keepdim=True)

        mean = torch.where(torch.isnan(mean), torch.tensor(0.5, device=x.device), mean)

        x = x[:, :, self.point_landmarks, :]
        std = torch_nan_std(x, mean, dim=(1, 2), keepdim=True)

        x = (x - mean) / std

        if self.max_len:
            x = x[:, :self.max_len]
        length = x.shape[1]

        x = x[..., :2]  # drop z

        dx = torch.cat([x[:, 1:] - x[:, :-1], torch.zeros_like(x[:, :1])], dim=1)
        dx2 = torch.cat([x[:, 2:] - x[:, :-2], torch.zeros_like(x[:, :2])], dim=1)

        x = torch.cat([
            x.reshape(-1, length, 2 * len(self.point_landmarks)),
            dx.reshape(-1, length, 2 * len(self.point_landmarks)),
            dx2.reshape(-1, length, 2 * len(self.point_landmarks)),
        ], dim=-1)

        return torch.nan_to_num(x, nan=0.0)


In [43]:
def preprocess(x, sign, max_len=MAX_LEN):
    x = torch.tensor(x, dtype=torch.float32)
    x = x.view(-1, ROWS_PER_FRAME, 3)

    if x.shape[0] < max_len:
        pad_len = max_len - x.shape[0]
        pad = torch.zeros((pad_len, ROWS_PER_FRAME, 3))
        x = torch.cat([x, pad], dim=0)
    else:
        x = x[:max_len]

    processor = Preprocess(max_len=max_len, point_landmarks=POINT_LANDMARKS)
    x = processor(x.unsqueeze(0)).squeeze(0)

    label = sign_to_index[sign]
    return x, label


In [44]:
class SignLanguageDataset(Dataset):
    def __init__(self, df, max_len=384,augment=False, transform=None):
        self.df = df
        self.max_len = max_len
        self.augment = augment
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        path = os.path.join(BASE_DIR, row['path'])
        data = pd.read_parquet(path)
        coords = data[['x', 'y', 'z']].values

        x, y = self.transform(coords, row['sign'], max_len=self.max_len)
        return x.float(), torch.tensor(y).long()


In [45]:
def get_pytorch_dataset(df, batch_size=64, max_len=384,augment=False, transform=None):
    dataset = SignLanguageDataset(df, max_len=max_len, transform=transform)
    return DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=2,pin_memory=True)


In [46]:
train_df, val_df = train_test_split(df, test_size=0.1, stratify=df['sign'], random_state=42)

train_loader = get_pytorch_dataset(train_df, batch_size=64, max_len=384, transform=preprocess)
val_loader = get_pytorch_dataset(val_df, batch_size=64, max_len=384, transform=preprocess)


BUILD THE 1D CNN + LSTM MODEL 

In [47]:
import torch.nn as nn

class CNNLSTMModel(nn.Module):
    def __init__(self, input_dim=708, num_classes=250, hidden_size=256):
        super(CNNLSTMModel, self).__init__()

        self.conv1 = nn.Conv1d(input_dim, 256, kernel_size=3, padding=1)
        self.relu1 = nn.ReLU()
        self.bn1 = nn.BatchNorm1d(256)
        self.dropout1 = nn.Dropout(0.3)

        self.lstm = nn.LSTM(input_size=256, hidden_size=hidden_size, batch_first=True, bidirectional=True)

        self.fc1 = nn.Linear(hidden_size * 2, 256)
        self.relu2 = nn.ReLU()
        self.dropout2 = nn.Dropout(0.3)
        self.fc_out = nn.Linear(256, num_classes)

    def forward(self, x):  # x: (batch, seq_len, features)
        x = x.transpose(1, 2)  # → (batch, features, seq_len) for Conv1D
        x = self.dropout1(self.bn1(self.relu1(self.conv1(x))))
        x = x.transpose(1, 2)  # → (batch, seq_len, features) for LSTM

        lstm_out, _ = self.lstm(x)  # → (batch, seq_len, hidden*2)
        x = lstm_out[:, -1, :]  # Use last timestep

        x = self.dropout2(self.relu2(self.fc1(x)))
        return self.fc_out(x)


In [48]:
from sklearn.metrics import f1_score, accuracy_score

def compute_metrics(y_true, y_pred):
    y_true = y_true.cpu().numpy()
    y_pred = y_pred.cpu().numpy()

    preds = y_pred.argmax(axis=1)
    acc = accuracy_score(y_true, preds)
    f1 = f1_score(y_true, preds, average='macro')

    return {
        "accuracy": acc,
        "f1_score": f1,
        "score": (acc + f1) / 2  # Final metric
    }


In [49]:
from torch.cuda.amp import autocast, GradScaler
from tqdm import tqdm

def train_model(model, dataloader, validloader, optimizer, criterion, epochs=10):
    best_score = 0
    scaler = GradScaler()  # For mixed precision

    for epoch in range(epochs):
        model.train()
        total_loss = 0

        print(f"\nEpoch {epoch + 1}/{epochs}")
        for inputs, labels in tqdm(dataloader, desc="Training", leave=False):
            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()

            with autocast():  # Mixed precision context
                outputs = model(inputs)
                loss = criterion(outputs, labels)

            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()

            total_loss += loss.item()

        print(f"Train Loss: {total_loss / len(dataloader):.4f}")

        # 🔍 Validation (no need for AMP here)
        model.eval()
        all_preds, all_labels = [], []
        with torch.no_grad():
            for inputs, labels in validloader:
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                all_preds.append(outputs.cpu())
                all_labels.append(labels.cpu())

        all_preds = torch.cat(all_preds)
        all_labels = torch.cat(all_labels)

        metrics = compute_metrics(all_labels, all_preds)
        print(f"Validation Accuracy: {metrics['accuracy']:.4f}, F1 Score: {metrics['f1_score']:.4f}, Combined Score: {metrics['score']:.4f}")

        if metrics['score'] > best_score:
            best_score = metrics['score']
            torch.save(model.state_dict(), "best_model.pt")
            print("✅ New best model saved!")


In [51]:
# Load dataset
train_df, val_df = train_test_split(df, test_size=0.1, stratify=df['sign'], random_state=42)

train_loader = get_pytorch_dataset(train_df, batch_size=64, max_len=384, augment=True, transform=preprocess)
val_loader = get_pytorch_dataset(val_df, batch_size=64, max_len=384, augment=False, transform=preprocess)

# Initialize model
model = CNNLSTMModel(input_dim=450, num_classes=250).to(device)

# Loss and Optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

# Train
train_model(model, train_loader, val_loader, optimizer, criterion, epochs=3)



Epoch 1/3


                                                             

Train Loss: 5.4561




Validation Accuracy: 0.0093, F1 Score: 0.0011, Combined Score: 0.0052
✅ New best model saved!

Epoch 2/3


                                                             

Train Loss: 5.3724




Validation Accuracy: 0.0123, F1 Score: 0.0023, Combined Score: 0.0073
✅ New best model saved!

Epoch 3/3


                                                             

Train Loss: 5.3471




Validation Accuracy: 0.0130, F1 Score: 0.0033, Combined Score: 0.0081
✅ New best model saved!
