## 1. Single image classification

In [8]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, models, transforms
from torch.utils.data import DataLoader
from tqdm import tqdm
import os
import numpy as np
import copy
import random

# --- CONFIGURATION ---
BASE_DIR = "/home/dad/Desktop/temporal_test_images/data/single_image"
TRAIN_DIR = os.path.join(BASE_DIR, "train")
VAL_DIR = os.path.join(BASE_DIR, "val")

BATCH_SIZE    = 65
LEARNING_RATE = 0.001
NUM_EPOCHS    = 10
NUM_CLASSES   = 3 
SEED          = 0  # Fixed Seed

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

def set_seed(seed):
    """Fix all random seeds for reproducibility."""
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)  # if you are using multi-GPU.
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    print(f"Random Seed set to: {seed}")

def calculate_weights(dataset):
    targets = dataset.targets
    counts = np.bincount(targets)
    print(f"  Train Class Counts: {counts}")
    counts = np.maximum(counts, 1) 
    weights = len(targets) / (len(counts) * counts)
    return torch.FloatTensor(weights).to(device)

def train_single_frame():
    set_seed(SEED) # <--- APPLY SEED HERE

    # Standard ResNet Transforms
    data_transforms = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])

    print("--- Loading Single-Frame Datasets ---")
    train_dataset = datasets.ImageFolder(TRAIN_DIR, transform=data_transforms)
    val_dataset = datasets.ImageFolder(VAL_DIR, transform=data_transforms)
    
    # Dataloaders (Worker seeding is handled by PyTorch usually, but main seed is key)
    train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4)
    val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)

    class_weights = calculate_weights(train_dataset)

    model = models.resnet18(pretrained=True)
    num_ftrs = model.fc.in_features
    model.fc = nn.Linear(num_ftrs, NUM_CLASSES)
    model = model.to(device)

    criterion = nn.CrossEntropyLoss(weight=class_weights)
    optimizer = optim.SGD(model.parameters(), lr=LEARNING_RATE, momentum=0.9)

    print(f"Starting Training on {len(train_dataset)} images...")

    best_acc = 0.0
    best_model_wts = copy.deepcopy(model.state_dict())

    for epoch in range(NUM_EPOCHS):
        model.train()
        running_loss = 0.0
        correct = 0
        total = 0
        
        for inputs, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}/{NUM_EPOCHS}"):
            inputs = inputs.to(device)
            labels = labels.to(device)

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item() * inputs.size(0)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        # Validation
        model.eval()
        val_correct = 0
        val_total = 0
        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs = inputs.to(device)
                labels = labels.to(device)
                outputs = model(inputs)
                _, predicted = torch.max(outputs, 1)
                val_total += labels.size(0)
                val_correct += (predicted == labels).sum().item()
        
        train_acc = correct / total
        val_acc = val_correct / val_total
        print(f"Epoch {epoch+1}: Train Acc: {train_acc:.4f} | Val Acc: {val_acc:.4f}")

        if val_acc > best_acc:
            best_acc = val_acc
            best_model_wts = copy.deepcopy(model.state_dict())
            torch.save(model.state_dict(), "best_single_frame_resnet18.pth")
            print(f"  >>> New Best Model Saved! (Acc: {best_acc:.4f})")

    print(f"Training Complete. Best Validation Accuracy: {best_acc:.4f}")

if __name__ == "__main__":
    train_single_frame()

Random Seed set to: 0
--- Loading Single-Frame Datasets ---
  Train Class Counts: [1885 2560 2917]
Starting Training on 7362 images...


Epoch 1/10: 100%|██████████| 148/148 [00:13<00:00, 10.92it/s]


Epoch 1: Train Acc: 0.7738 | Val Acc: 0.8414
  >>> New Best Model Saved! (Acc: 0.8414)


Epoch 2/10: 100%|██████████| 148/148 [00:13<00:00, 10.85it/s]


Epoch 2: Train Acc: 0.8898 | Val Acc: 0.8738
  >>> New Best Model Saved! (Acc: 0.8738)


Epoch 3/10: 100%|██████████| 148/148 [00:13<00:00, 10.86it/s]


Epoch 3: Train Acc: 0.9390 | Val Acc: 0.8770
  >>> New Best Model Saved! (Acc: 0.8770)


Epoch 4/10: 100%|██████████| 148/148 [00:13<00:00, 10.88it/s]


Epoch 4: Train Acc: 0.9666 | Val Acc: 0.8835
  >>> New Best Model Saved! (Acc: 0.8835)


Epoch 5/10: 100%|██████████| 148/148 [00:13<00:00, 10.89it/s]


Epoch 5: Train Acc: 0.9863 | Val Acc: 0.8867
  >>> New Best Model Saved! (Acc: 0.8867)


Epoch 6/10: 100%|██████████| 148/148 [00:13<00:00, 11.05it/s]


Epoch 6: Train Acc: 0.9928 | Val Acc: 0.8770


Epoch 7/10: 100%|██████████| 148/148 [00:13<00:00, 10.87it/s]


Epoch 7: Train Acc: 0.9973 | Val Acc: 0.8867


Epoch 8/10: 100%|██████████| 148/148 [00:13<00:00, 10.87it/s]


Epoch 8: Train Acc: 0.9978 | Val Acc: 0.8803


Epoch 9/10: 100%|██████████| 148/148 [00:13<00:00, 10.91it/s]


Epoch 9: Train Acc: 0.9992 | Val Acc: 0.8900
  >>> New Best Model Saved! (Acc: 0.8900)


Epoch 10/10: 100%|██████████| 148/148 [00:13<00:00, 10.88it/s]


Epoch 10: Train Acc: 0.9990 | Val Acc: 0.8835
Training Complete. Best Validation Accuracy: 0.8900


## 2. Early fusion classification (3 frames)

In [9]:
import os
import glob
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import models, transforms
from PIL import Image
from tqdm import tqdm
import numpy as np
import copy
import random

# --- CONFIGURATION ---
BASE_DIR = "/home/dad/Desktop/temporal_test_images/data/temporal_images"
TRAIN_DIR = os.path.join(BASE_DIR, "train")
VAL_DIR = os.path.join(BASE_DIR, "val")

BATCH_SIZE = 65
LEARNING_RATE = 0.001
NUM_EPOCHS = 10
NUM_CLASSES = 3 
SEED = 0# Fixed Seed

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

def set_seed(seed):
    """Fix all random seeds for reproducibility."""
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    print(f"Random Seed set to: {seed}")

class TemporalStackedDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.samples = []
        self.labels = [] 
        self.class_to_idx = {'U': 0, 'D': 1, 'P': 2} 
        
        for class_name, class_idx in self.class_to_idx.items():
            class_path = os.path.join(root_dir, class_name)
            if not os.path.isdir(class_path): continue
            
            t0_files = glob.glob(os.path.join(class_path, "*_t0.jpg"))
            for t0_path in t0_files:
                t1_path = t0_path.replace("_t0.jpg", "_t1.jpg")
                t2_path = t0_path.replace("_t0.jpg", "_t2.jpg")
                
                if os.path.exists(t1_path) and os.path.exists(t2_path):
                    self.samples.append((t0_path, t1_path, t2_path, class_idx))
                    self.labels.append(class_idx)

    def __len__(self): return len(self.samples)

    def __getitem__(self, idx):
        path_t0, path_t1, path_t2, label = self.samples[idx]
        img_t0 = Image.open(path_t0).convert('RGB')
        img_t1 = Image.open(path_t1).convert('RGB')
        img_t2 = Image.open(path_t2).convert('RGB')

        if self.transform:
            img_t0 = self.transform(img_t0)
            img_t1 = self.transform(img_t1)
            img_t2 = self.transform(img_t2)

        stacked_imgs = torch.cat([img_t0, img_t1, img_t2], dim=0)
        return stacked_imgs, label

def get_temporal_model(num_classes=3):
    model = models.resnet18(pretrained=True)
    original_weights = model.conv1.weight.data
    new_conv1 = nn.Conv2d(9, 64, kernel_size=7, stride=2, padding=3, bias=False)
    new_weights = torch.cat([original_weights, original_weights, original_weights], dim=1)
    new_conv1.weight.data = new_weights / 3.0
    model.conv1 = new_conv1
    
    num_ftrs = model.fc.in_features
    model.fc = nn.Linear(num_ftrs, num_classes)
    return model

def calculate_weights(dataset):
    counts = np.bincount(dataset.labels)
    print(f"  Train Class Counts: {counts}")
    counts = np.maximum(counts, 1)
    weights = len(dataset.labels) / (len(counts) * counts)
    return torch.FloatTensor(weights).to(device)

def train_temporal():
    set_seed(SEED) # <--- APPLY SEED HERE

    data_transforms = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) 
    ])

    print("--- Loading Temporal Datasets ---")
    train_dataset = TemporalStackedDataset(TRAIN_DIR, transform=data_transforms)
    val_dataset = TemporalStackedDataset(VAL_DIR, transform=data_transforms)
    
    train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4)
    val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)

    class_weights = calculate_weights(train_dataset)

    model = get_temporal_model(NUM_CLASSES)
    model = model.to(device)

    criterion = nn.CrossEntropyLoss(weight=class_weights)
    optimizer = optim.SGD(model.parameters(), lr=LEARNING_RATE, momentum=0.9)

    print(f"Starting Training on {len(train_dataset)} triplets...")

    best_acc = 0.0
    best_model_wts = copy.deepcopy(model.state_dict())

    for epoch in range(NUM_EPOCHS):
        model.train()
        running_loss = 0.0
        correct = 0
        total = 0
        
        for inputs, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}/{NUM_EPOCHS}"):
            inputs = inputs.to(device)
            labels = labels.to(device)

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item() * inputs.size(0)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        model.eval()
        val_correct = 0
        val_total = 0
        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs = inputs.to(device)
                labels = labels.to(device)
                outputs = model(inputs)
                _, predicted = torch.max(outputs, 1)
                val_total += labels.size(0)
                val_correct += (predicted == labels).sum().item()
        
        train_acc = correct / total
        val_acc = val_correct / val_total
        print(f"Epoch {epoch+1}: Train Acc: {train_acc:.4f} | Val Acc: {val_acc:.4f}")

        if val_acc > best_acc:
            best_acc = val_acc
            best_model_wts = copy.deepcopy(model.state_dict())
            torch.save(model.state_dict(), "best_temporal_resnet18_9ch.pth")
            print(f"  >>> New Best Model Saved! (Acc: {best_acc:.4f})")

    print(f"Training Complete. Best Validation Accuracy: {best_acc:.4f}")

if __name__ == "__main__":
    train_temporal()

Random Seed set to: 0
--- Loading Temporal Datasets ---
  Train Class Counts: [2917 1885 2560]
Starting Training on 7362 triplets...


Epoch 1/10: 100%|██████████| 114/114 [00:18<00:00,  6.05it/s]


Epoch 1: Train Acc: 0.7460 | Val Acc: 0.8479
  >>> New Best Model Saved! (Acc: 0.8479)


Epoch 2/10: 100%|██████████| 114/114 [00:18<00:00,  6.08it/s]


Epoch 2: Train Acc: 0.8890 | Val Acc: 0.9191
  >>> New Best Model Saved! (Acc: 0.9191)


Epoch 3/10: 100%|██████████| 114/114 [00:18<00:00,  6.09it/s]


Epoch 3: Train Acc: 0.9321 | Val Acc: 0.9029


Epoch 4/10: 100%|██████████| 114/114 [00:18<00:00,  6.12it/s]


Epoch 4: Train Acc: 0.9612 | Val Acc: 0.9061


Epoch 5/10: 100%|██████████| 114/114 [00:18<00:00,  6.12it/s]


Epoch 5: Train Acc: 0.9766 | Val Acc: 0.8997


Epoch 6/10: 100%|██████████| 114/114 [00:18<00:00,  6.10it/s]


Epoch 6: Train Acc: 0.9908 | Val Acc: 0.9159


Epoch 7/10: 100%|██████████| 114/114 [00:18<00:00,  6.09it/s]


Epoch 7: Train Acc: 0.9954 | Val Acc: 0.9029


Epoch 8/10: 100%|██████████| 114/114 [00:18<00:00,  6.10it/s]


Epoch 8: Train Acc: 0.9974 | Val Acc: 0.9191


Epoch 9/10: 100%|██████████| 114/114 [00:18<00:00,  6.09it/s]


Epoch 9: Train Acc: 0.9977 | Val Acc: 0.9256
  >>> New Best Model Saved! (Acc: 0.9256)


Epoch 10/10: 100%|██████████| 114/114 [00:18<00:00,  6.07it/s]


Epoch 10: Train Acc: 0.9993 | Val Acc: 0.9256
Training Complete. Best Validation Accuracy: 0.9256


In [11]:
pwd

'/home/dad/Desktop/temporal_test_images'

In [12]:
import torch
import torch.nn as nn
from torchvision import datasets, models, transforms
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import os
import glob
from tqdm import tqdm

# --- CONFIGURATION ---
# 1. Path to your Test Sets
SINGLE_TEST_DIR = "/home/dad/Desktop/temporal_test_images/data/single_image/test"
TEMPORAL_TEST_DIR = "/home/dad/Desktop/temporal_test_images/data/temporal_images/test"

# 2. Model Files (Must allow match the names you saved)
SINGLE_MODEL_PATH = "best_single_frame_resnet18.pth"
TEMPORAL_MODEL_PATH = "best_temporal_resnet18_9ch.pth"

BATCH_SIZE = 32
NUM_CLASSES = 3
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# --- DATASET CLASSES (Must match training code) ---
class TemporalStackedDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.samples = []
        # Ensure exact same class mapping as training
        self.class_to_idx = {'U': 0, 'D': 1, 'P': 2}
        
        for class_name, class_idx in self.class_to_idx.items():
            class_path = os.path.join(root_dir, class_name)
            if not os.path.isdir(class_path): continue
            
            t0_files = glob.glob(os.path.join(class_path, "*_t0.jpg"))
            for t0_path in t0_files:
                t1_path = t0_path.replace("_t0.jpg", "_t1.jpg")
                t2_path = t0_path.replace("_t0.jpg", "_t2.jpg")
                if os.path.exists(t1_path) and os.path.exists(t2_path):
                    self.samples.append((t0_path, t1_path, t2_path, class_idx))

    def __len__(self): return len(self.samples)

    def __getitem__(self, idx):
        path_t0, path_t1, path_t2, label = self.samples[idx]
        img_t0 = Image.open(path_t0).convert('RGB')
        img_t1 = Image.open(path_t1).convert('RGB')
        img_t2 = Image.open(path_t2).convert('RGB')
        
        if self.transform:
            img_t0 = self.transform(img_t0)
            img_t1 = self.transform(img_t1)
            img_t2 = self.transform(img_t2)
            
        return torch.cat([img_t0, img_t1, img_t2], dim=0), label

# --- MODEL LOADERS ---
def load_single_model(path):
    print(f"Loading Single-Frame Model from {path}...")
    model = models.resnet18(pretrained=False)
    num_ftrs = model.fc.in_features
    model.fc = nn.Linear(num_ftrs, NUM_CLASSES)
    model.load_state_dict(torch.load(path))
    model.to(device)
    model.eval()
    return model

def load_temporal_model(path):
    print(f"Loading Temporal Model from {path}...")
    model = models.resnet18(pretrained=False)
    # Rebuild 9-channel layer structure
    new_conv1 = nn.Conv2d(9, 64, kernel_size=7, stride=2, padding=3, bias=False)
    model.conv1 = new_conv1
    
    num_ftrs = model.fc.in_features
    model.fc = nn.Linear(num_ftrs, NUM_CLASSES)
    
    model.load_state_dict(torch.load(path))
    model.to(device)
    model.eval()
    return model

# --- EVALUATION FUNCTION ---
def evaluate(model, dataloader, name):
    correct = 0
    total = 0
    
    # Optional: Track per-class accuracy
    class_correct = list(0. for i in range(NUM_CLASSES))
    class_total = list(0. for i in range(NUM_CLASSES))
    
    with torch.no_grad():
        for inputs, labels in tqdm(dataloader, desc=f"Testing {name}"):
            inputs = inputs.to(device)
            labels = labels.to(device)
            outputs = model(inputs)
            _, predicted = torch.max(outputs, 1)
            
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            
            # Per class calculation
            c = (predicted == labels).squeeze()
            for i in range(len(labels)):
                label = labels[i]
                class_correct[label] += c[i].item()
                class_total[label] += 1
    
    acc = 100 * correct / total
    print(f"\n>>> {name} Overall Accuracy: {acc:.2f}%")
    
    classes = ['Up (U)', 'Down (D)', 'Pass (P)']
    for i in range(NUM_CLASSES):
        if class_total[i] > 0:
            print(f"    {classes[i]} Accuracy: {100 * class_correct[i] / class_total[i]:.2f}%")
            
    return acc

# --- MAIN ---
if __name__ == "__main__":
    # Standard Transforms (Same as training)
    transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])

    # 1. Evaluate Single Frame
    if os.path.exists(SINGLE_MODEL_PATH):
        single_dataset = datasets.ImageFolder(SINGLE_TEST_DIR, transform=transform)
        single_loader = DataLoader(single_dataset, batch_size=BATCH_SIZE, shuffle=False)
        model_single = load_single_model(SINGLE_MODEL_PATH)
        acc_single = evaluate(model_single, single_loader, "Single-Frame")
    else:
        print(f"Error: Could not find {SINGLE_MODEL_PATH}")
        acc_single = 0

    # 2. Evaluate Temporal
    if os.path.exists(TEMPORAL_MODEL_PATH):
        temporal_dataset = TemporalStackedDataset(TEMPORAL_TEST_DIR, transform=transform)
        temporal_loader = DataLoader(temporal_dataset, batch_size=BATCH_SIZE, shuffle=False)
        model_temporal = load_temporal_model(TEMPORAL_MODEL_PATH)
        acc_temporal = evaluate(model_temporal, temporal_loader, "Temporal")
    else:
        print(f"Error: Could not find {TEMPORAL_MODEL_PATH}")
        acc_temporal = 0

    # 3. Final Report
    print("\n" + "="*40)
    print(f"FINAL TEST SET RESULTS")
    print(f"Single-Frame Model: {acc_single:.2f}%")
    print(f"Temporal Model:     {acc_temporal:.2f}%")
    print(f"Improvement:        {acc_temporal - acc_single:+.2f}%")
    print("="*40)



Loading Single-Frame Model from best_single_frame_resnet18.pth...


Testing Single-Frame: 100%|██████████| 10/10 [00:00<00:00, 21.02it/s]



>>> Single-Frame Overall Accuracy: 88.37%
    Up (U) Accuracy: 82.00%
    Down (D) Accuracy: 94.06%
    Pass (P) Accuracy: 89.00%
Loading Temporal Model from best_temporal_resnet18_9ch.pth...


Testing Temporal: 100%|██████████| 10/10 [00:01<00:00,  8.66it/s]


>>> Temporal Overall Accuracy: 89.37%
    Up (U) Accuracy: 83.00%
    Down (D) Accuracy: 90.00%
    Pass (P) Accuracy: 95.05%

FINAL TEST SET RESULTS
Single-Frame Model: 88.37%
Temporal Model:     89.37%
Improvement:        +1.00%



