In [543]:
import numpy as np
import os
from dotenv import load_dotenv
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import precision_score, recall_score, f1_score, classification_report
from sklearn.model_selection import train_test_split

import torch
from torch.utils.data import TensorDataset, DataLoader
from PIL import Image
import torchvision.transforms as transforms

from tqdm.auto import tqdm

import wandb

import logging
import sys
from torchinfo import summary

In [544]:
def setup_logger(name=__name__):
    """
    Sets up a logger that outputs to the console (stdout).
    """
    logger = logging.getLogger(name)
    if not logger.handlers:
        logger.setLevel(logging.INFO)
        handler = logging.StreamHandler(sys.stdout)
        formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
        handler.setFormatter(formatter)
        logger.addHandler(handler)
    return logger

logger = setup_logger()

In [545]:
# Load the dataset
data_folder = "../data"
preped_folder = os.path.join(data_folder, "_preped")

train_data = pd.read_csv(os.path.join(data_folder, 'train_data.csv')).values.tolist()
test_data = pd.read_csv(os.path.join(data_folder, 'test_data.csv')).values.tolist()

# Define image transformations
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Resize to consistent size
    transforms.ToTensor(),           # Convert to tensor [0, 1]
    transforms.Normalize(mean=[0.5], std=[0.5])
])

x_train = []
y_train = []

for img_name, label in train_data:
    img_path = os.path.join(preped_folder, img_name)
    try:
        img = Image.open(img_path).convert('L') # Convert to grayscale
        img_tensor = transform(img)
        x_train.append(img_tensor)
        y_train.append(label)
    except Exception as e:
        logger.info(f"Error loading {img_name}: {e}")

# Stack into tensors
x_train_tensor = torch.stack(x_train)
logger.info(f"Training images shape: {x_train_tensor.shape}")

# Encode labels to integers
label_to_idx = {label: idx for idx, label in enumerate(np.unique(y_train))}
y_train_encoded = [label_to_idx[label] for label in y_train]
y_train_tensor = torch.tensor(y_train_encoded, dtype=torch.long)

logger.info(f"Training labels shape: {y_train_tensor.shape}")
logger.info(f"Label mapping: {label_to_idx}")

2025-12-12 13:56:58,296 - INFO - Training images shape: torch.Size([241, 1, 224, 224])
2025-12-12 13:56:58,298 - INFO - Training labels shape: torch.Size([241])
2025-12-12 13:56:58,299 - INFO - Label mapping: {np.str_('1_Pronacio'): 0, np.str_('2_Neutralis'): 1, np.str_('3_Szupinacio'): 2}


In [546]:
x_test = []
y_test = []

for img_name, label in test_data:
    img_path = os.path.join(preped_folder, img_name)
    try:
        img = Image.open(img_path).convert('L') # Convert to grayscale
        img_tensor = transform(img)
        x_test.append(img_tensor)
        y_test.append(label)
    except Exception as e:
        logger.info(f"Error loading {img_name}: {e}")

x_test_tensor = torch.stack(x_test)
logger.info(f"Test images shape: {x_test_tensor.shape}")
y_test_encoded = [label_to_idx[label] for label in y_test]
y_test_tensor = torch.tensor(y_test_encoded, dtype=torch.long)

logger.info(f"Test labels shape: {y_test_tensor.shape}")

2025-12-12 13:57:04,754 - INFO - Test images shape: torch.Size([49, 1, 224, 224])
2025-12-12 13:57:04,755 - INFO - Test labels shape: torch.Size([49])


In [547]:
if torch.cuda.is_available():
    logger.info(f"CUDA available: {torch.cuda.is_available()}")
    logger.info(f"Number of GPUs: {torch.cuda.device_count()}")
    for i in range(torch.cuda.device_count()):
        logger.info(f"\nGPU {i}: {torch.cuda.get_device_name(i)}")
        props = torch.cuda.get_device_properties(i)
        logger.info(f"  Memory: {props.total_memory / 1024**3:.2f} GB")
        logger.info(f"  Compute Capability: {props.major}.{props.minor}")
else:
    logger.info("CUDA not available")

2025-12-12 13:57:04,766 - INFO - CUDA available: True
2025-12-12 13:57:04,766 - INFO - Number of GPUs: 1
2025-12-12 13:57:04,767 - INFO - 
GPU 0: NVIDIA GeForce RTX 4060
2025-12-12 13:57:04,768 - INFO -   Memory: 8.00 GB
2025-12-12 13:57:04,769 - INFO -   Compute Capability: 8.9


In [548]:
batch_size = 16
num_epochs = 70
device = 'cuda' 

train_dataset = TensorDataset(x_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

test_dataset = TensorDataset(x_test_tensor, y_test_tensor)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [549]:
# wandb login an init
# Login to wandb with API key
load_dotenv()
wandb.login(key=os.getenv("wandbKey"))

def init_wandb():
    # Initialize wandb project
    wandb.init(
        project="ankle-align",
        config={
            "batch_size": batch_size,
            "num_epochs": num_epochs,
            "learning_rate": lr,
            "architecture": "Custom CNN",
            "dataset": "AnkleAlign",
            "optimizer": "Adam"
        }
    )

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: C:\Users\Win 10\_netrc


In [550]:
net0 = torch.nn.Sequential(
    torch.nn.Conv2d(1, 8, kernel_size=3, stride=2, padding=1),   # 224x224 -> 112x112
    torch.nn.ReLU(),
    torch.nn.Conv2d(8, 16, kernel_size=3, stride=2, padding=1),  # 112x112 -> 56x56
    torch.nn.ReLU(),
    torch.nn.Conv2d(16, 32, kernel_size=3, stride=2, padding=1),  # 56x56 -> 28x28
    torch.nn.ReLU(),
    torch.nn.AdaptiveAvgPool2d(1),
    torch.nn.Flatten(),
    torch.nn.Linear(32, 128),
    torch.nn.ReLU(),
    torch.nn.Linear(128, 64),
    torch.nn.ReLU(),
    torch.nn.Linear(64, 3)                       # Output layer     
).to(device)


def init_weights(m):
    if isinstance(m, torch.nn.Conv2d) or isinstance(m, torch.nn.Linear):
        torch.nn.init.kaiming_uniform_(m.weight, mode='fan_in', nonlinearity='relu')
        if m.bias is not None:
            torch.nn.init.constant_(m.bias, 0)

net0.apply(init_weights)
loss_fn = torch.nn.CrossEntropyLoss()
#optimizer = torch.optim.SGD(net0.parameters(), lr=0.1, momentum=0.9)
optimizer = torch.optim.Adam(net0.parameters(), lr=0.01)

summary(net0, input_size=(batch_size, 1, 224, 224))


Layer (type:depth-idx)                   Output Shape              Param #
Sequential                               [16, 3]                   --
├─Conv2d: 1-1                            [16, 8, 112, 112]         80
├─ReLU: 1-2                              [16, 8, 112, 112]         --
├─Conv2d: 1-3                            [16, 16, 56, 56]          1,168
├─ReLU: 1-4                              [16, 16, 56, 56]          --
├─Conv2d: 1-5                            [16, 32, 28, 28]          4,640
├─ReLU: 1-6                              [16, 32, 28, 28]          --
├─AdaptiveAvgPool2d: 1-7                 [16, 32, 1, 1]            --
├─Flatten: 1-8                           [16, 32]                  --
├─Linear: 1-9                            [16, 128]                 4,224
├─ReLU: 1-10                             [16, 128]                 --
├─Linear: 1-11                           [16, 64]                  8,256
├─ReLU: 1-12                             [16, 64]                  --
├─L

In [551]:
# Trying to overfit one batch
init_wandb()
one_batch = next(iter(train_loader))
images, labels = one_batch

images = images.to(device)
labels = labels.to(device)

loss_values = []
net0.train()
for epoch in tqdm(range(num_epochs), desc='Training model'):
        pred_logits = net0(images)
        loss = loss_fn(pred_logits, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        loss_values.append(loss.item())
        wandb.log({
                "epoch": epoch + 1,
                "train_loss": loss.item()
            })
        
wandb.finish()
print(loss_values)

Training model: 100%|██████████| 70/70 [00:00<00:00, 122.92it/s]
[34m[1mwandb[0m: [32m[41mERROR[0m The nbformat package was not found. It is required to save notebook history.


0,1
epoch,▁▁▁▂▂▂▃▃▃▃▃▃▄▄▄▄▄▄▄▅▅▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇████
train_loss,█▅▅▅▅▅▅▅▅▅▅▅▄▄▄▄▄▄▄▄▄▄▃▃▄▃▃▃▃▃▂▂▂▂▂▁▁▁▁▄

0,1
epoch,70.0
train_loss,0.62023


[1.0397367477416992, 1.3844525814056396, 0.9132133722305298, 0.9917631149291992, 0.9320054054260254, 0.8967487812042236, 0.8918147087097168, 0.8908231854438782, 0.8864966034889221, 0.866675853729248, 0.8517279028892517, 0.843503475189209, 0.8373140692710876, 0.8291265368461609, 0.820592999458313, 0.8140286207199097, 0.8116449117660522, 0.8017197251319885, 0.7955611944198608, 0.794366717338562, 0.7864888906478882, 0.7819048166275024, 0.7766679525375366, 0.7729988098144531, 0.7650701403617859, 0.7563906908035278, 0.748356819152832, 0.7373569011688232, 0.7254234552383423, 0.712177574634552, 0.6971168518066406, 0.689474880695343, 0.7014814019203186, 0.6802625060081482, 0.6465932130813599, 0.6284670829772949, 0.6367696523666382, 0.6178879141807556, 0.5825920104980469, 0.5750789046287537, 0.5960018634796143, 0.6158249378204346, 0.5524513721466064, 0.5541458129882812, 0.626255452632904, 0.5536177754402161, 0.5403266549110413, 0.5101431012153625, 0.4849310517311096, 0.48394930362701416, 0.4614

In [552]:
def train_model(network, optimizer, loss_fn, enable_early_stopping=False, patience=5):
    torch.cuda.empty_cache()

    loss_values = []

    if enable_early_stopping:
        early_stopping = EarlyStopping(patience=patience, verbose=True)

    network.train()
    for epoch in tqdm(range(num_epochs), desc='Training model'):
        network.train()
        epoch_loss = 0.0
        num_batches = 0
        for images, target_labels in train_loader:
            images = images.to(device)
            target_labels = target_labels.to(device)

            pred_logits = network(images)
            loss = loss_fn(pred_logits, target_labels)
            epoch_loss += loss.item()
            num_batches += 1

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        avg_train_loss = epoch_loss / num_batches

        if enable_early_stopping:
            network.eval()
            val_loss = 0.0
            val_batches = 0
            correct = 0
            total = 0
            with torch.no_grad():
                for images, target_labels in val_loader:
                    images = images.to(device)
                    target_labels = target_labels.to(device)
                    
                    pred_logits = network(images)
                    loss = loss_fn(pred_logits, target_labels)
                    val_loss += loss.item()
                    val_batches += 1
                    
                    _, predicted = torch.max(pred_logits, 1)
                    total += target_labels.size(0)
                    correct += (predicted == target_labels).sum().item()
            
            avg_val_loss = val_loss / val_batches
            val_accuracy = correct / total

        # Log metrics
        if enable_early_stopping:
            wandb.log({
                "epoch": epoch + 1,
                "train_loss": avg_train_loss,
                "val_loss": avg_val_loss,
                "val_accuracy": val_accuracy
            })
        else:
            wandb.log({
                "epoch": epoch + 1,
                "train_loss": avg_train_loss
            })
        loss_values.append(avg_train_loss)
        
        if enable_early_stopping:
            logger.info(f"Epoch {epoch+1}/{num_epochs}, Train Loss: {avg_train_loss:.4f}, Val Loss: {avg_val_loss:.4f}, Val Acc: {val_accuracy:.4f}")
        else:
            logger.info(f"Epoch {epoch+1}/{num_epochs}, Train Loss: {avg_train_loss:.4f}")

        # Early stopping check
        if enable_early_stopping:
            early_stopping(avg_val_loss, network)
            if early_stopping.early_stop:
                logger.info("Early stopping triggered")
                network.load_state_dict(early_stopping.best_model)
                break
    
    # Load best model
    if enable_early_stopping and early_stopping.best_model is not None:
        network.load_state_dict(early_stopping.best_model)
        logger.info("Loaded best model weights")

    logger.info(loss_values)

In [553]:
def evaluate_model(network):
    # Training score
    true_labels = y_test_encoded
    predicted_labels = []
    network.eval()
    with torch.no_grad():
        for images, _ in test_loader:
            images = images.to(device)
            outputs = network(images)
            _, predicted = torch.max(outputs, 1)
            predicted_labels.extend(predicted.cpu().numpy())

    accuracy = np.mean([true == pred for true, pred in zip(true_labels, predicted_labels)])
    precision = precision_score(true_labels, predicted_labels, average='weighted')
    recall = recall_score(true_labels, predicted_labels, average='weighted')
    f1 = f1_score(true_labels, predicted_labels, average='weighted')

    logger.info(f"network accuracy: {accuracy * 100:.2f}%")
    logger.info(f"network precision: {precision * 100:.2f}%")
    logger.info(f"network recall: {recall * 100:.2f}%")
    logger.info(f"network F1 score: {f1 * 100:.2f}%")

    logger.info(f"Detailed Classification Report: \n{classification_report(true_labels, predicted_labels)}")

    # Log test metrics
    wandb.log({
        "test_accuracy": accuracy,
        "test_precision": precision,
        "test_recall": recall,
        "test_f1": f1
    })

    wandb.finish()