In [None]:
# Imports and config
import sys
import os

notebook_dir = os.getcwd()
project_root = os.path.dirname(notebook_dir)
sys.path.insert(0, project_root)

import torch
torch.backends.cudnn.benchmark = True

from src.data_loader import get_data_loaders
from src.train import train_model
from src.models.squeezenet import SqueezeNet
from src.models.mobilenet import MobileNetV1, MobileNetV2, MobileNetV3
from src.models.shiftnet import ShiftNet
from src.models.shufflenet import ShuffleNetV2

# Configuration
DATA_DIR = "../data/FER2013"
BATCH_SIZE = 64
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
NUM_EPOCHS = 40
LR = 1e-3

print("Device:", DEVICE)
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")

# Load data
print("\nLoading data...")
train_loader, test_loader = get_data_loaders(DATA_DIR, batch_size=BATCH_SIZE)
print(f"Training samples: {len(train_loader.dataset)}")
print(f"Testing samples: {len(test_loader.dataset)}")

Device: cuda
PyTorch version: 2.5.1+cu121
CUDA available: True

Loading data...
Training samples: 28709
Testing samples: 7178


In [None]:
# Prepare model save folders
os.makedirs("../models/mobilenet", exist_ok=True)
os.makedirs("../models/squeezenet", exist_ok=True)
os.makedirs("../models/shiftnet", exist_ok=True)

In [None]:
# Train SqueezeNet from scratch
print("\nInitializing SqueezeNet model...")
squeezenet = SqueezeNet(num_classes=7, in_channels=1)

# Model summary
total_params = sum(p.numel() for p in squeezenet.parameters())
trainable_params = sum(p.numel() for p in squeezenet.parameters() if p.requires_grad)
print(f"Total parameters: {total_params:,}")
print(f"Trainable parameters: {trainable_params:,}")

# Train the model
print("\nStarting training...")
squeezenet_trained = train_model(
    squeezenet, 
    train_loader, 
    test_loader, 
    device=DEVICE,
    num_epochs=NUM_EPOCHS, 
    lr=LR
)

print("\nTraining complete!")

In [None]:
# Train MobileNetV1 from scratch
print("\nInitializing MobileNetV1 model...")
mobilenetv1 = MobileNetV1(num_classes=7, in_channels=1)

# Print model summary
total_params = sum(p.numel() for p in mobilenetv1.parameters())
trainable_params = sum(p.numel() for p in mobilenetv1.parameters() if p.requires_grad)
print(f"Total parameters: {total_params:,}")
print(f"Trainable parameters: {trainable_params:,}")

# Start training
print("\nStarting training...")
mobilenetv1_trained = train_model(
    model=mobilenetv1,
    train_loader=train_loader,
    test_loader=test_loader,
    device=DEVICE,
    num_epochs=NUM_EPOCHS,
    lr=LR
)

print("\nTraining complete!")

In [None]:
# Train MobileNetV2 from scratch
print("\nInitializing MobileNetV2 model...")
mobilenetv2 = MobileNetV2(num_classes=7, in_channels=1)

# Print model summary
total_params = sum(p.numel() for p in mobilenetv2.parameters())
trainable_params = sum(p.numel() for p in mobilenetv2.parameters() if p.requires_grad)
print(f"Total parameters: {total_params:,}")
print(f"Trainable parameters: {trainable_params:,}")

# Start training
print("\nStarting training...")
mobilenetv2_trained = train_model(
    model=mobilenetv2,
    train_loader=train_loader,
    test_loader=test_loader,
    device=DEVICE,
    num_epochs=NUM_EPOCHS,
    lr=LR
)

print("\nTraining complete!")

In [None]:
# Finetuning MobileNetV3 
print("\nInitializing MobileNetV3 model...")
mobilenetv3 = MobileNetV3(num_classes=7, in_channels=1)

total_params = sum(p.numel() for p in mobilenetv3.parameters())
trainable_params = sum(p.numel() for p in mobilenetv3.parameters() if p.requires_grad)
print(f"Total parameters: {total_params:,}")
print(f"Trainable parameters: {trainable_params:,}")

print("\nStarting training...")
mobilenetv3_trained = train_model(
    mobilenetv3,
    train_loader,
    test_loader,
    device=DEVICE,
    num_epochs=NUM_EPOCHS,
    lr=1e-4
)

print("\nTraining complete!")

In [None]:
# Train ShiftNet from scratch
print("\nInitializing ShiftNet model...")
shiftnet = ShiftNet(num_classes=7)

# Model summary
total_params = sum(p.numel() for p in shiftnet.parameters())
trainable_params = sum(p.numel() for p in shiftnet.parameters() if p.requires_grad)
print(f"Total parameters: {total_params:,}")
print(f"Trainable parameters: {trainable_params:,}")

# Train the model
print("\nStarting training...")
shiftnet_trained = train_model(
    shiftnet, 
    train_loader, 
    test_loader, 
    device=DEVICE,
    num_epochs=NUM_EPOCHS, 
    lr=LR
)

print("\nTraining complete!")

In [None]:
# Train ShuffleNet from scratch
print("\nInitializing ShuffleNet model...")
shufflenet = ShuffleNetV2(num_classes=7, in_channels=1, model_size="1.0x")

# Model summary
total_params = sum(p.numel() for p in shufflenet.parameters())
trainable_params = sum(p.numel() for p in shufflenet.parameters() if p.requires_grad)
print(f"Total parameters: {total_params:,}")
print(f"Trainable parameters: {trainable_params:,}")

# Train the model
print("\nStarting training...")
shufflenet_trained = train_model(
    shufflenet, 
    train_loader, 
    test_loader, 
    device=DEVICE,
    num_epochs=NUM_EPOCHS, 
    lr=LR
)

print("\nTraining complete!")


Initializing ShuffleNet model...
Total parameters: 1,260,347
Trainable parameters: 1,260,347

Starting training...

Starting Training: ShuffleNetV2


Epoch [1/40]


Training: 100%|██████████████████████████| 449/449 [03:28<00:00,  2.15it/s, loss=1.7312, acc=28.12%]
Validation: 100%|████████████████████████| 113/113 [00:48<00:00,  2.35it/s, loss=1.3935, acc=35.86%]



────────────────────────────────────────────────────────────
  Train Loss: 1.7588 | Train Acc: 28.12%
  Val Loss:   1.6191 | Val Acc:   35.86%
────────────────────────────────────────────────────────────
  ✓ Best model saved! Accuracy: 35.86%

Epoch [2/40]


Training: 100%|██████████████████████████| 449/449 [03:10<00:00,  2.36it/s, loss=1.5789, acc=37.00%]
Validation: 100%|████████████████████████| 113/113 [00:13<00:00,  8.44it/s, loss=1.3495, acc=38.81%]



────────────────────────────────────────────────────────────
  Train Loss: 1.6000 | Train Acc: 37.00%
  Val Loss:   1.5331 | Val Acc:   38.81%
────────────────────────────────────────────────────────────
  ✓ Best model saved! Accuracy: 38.81%

Epoch [3/40]


Training: 100%|██████████████████████████| 449/449 [03:09<00:00,  2.37it/s, loss=1.5201, acc=41.10%]
Validation: 100%|████████████████████████| 113/113 [00:13<00:00,  8.45it/s, loss=0.8012, acc=44.41%]



────────────────────────────────────────────────────────────
  Train Loss: 1.5123 | Train Acc: 41.10%
  Val Loss:   1.4355 | Val Acc:   44.41%
────────────────────────────────────────────────────────────
  ✓ Best model saved! Accuracy: 44.41%

Epoch [4/40]


Training: 100%|██████████████████████████| 449/449 [03:09<00:00,  2.37it/s, loss=1.2638, acc=44.28%]
Validation: 100%|████████████████████████| 113/113 [00:13<00:00,  8.36it/s, loss=0.6875, acc=46.52%]



────────────────────────────────────────────────────────────
  Train Loss: 1.4447 | Train Acc: 44.28%
  Val Loss:   1.3709 | Val Acc:   46.52%
────────────────────────────────────────────────────────────
  ✓ Best model saved! Accuracy: 46.52%

Epoch [5/40]


Training: 100%|██████████████████████████| 449/449 [03:09<00:00,  2.37it/s, loss=1.4729, acc=47.22%]
Validation: 100%|████████████████████████| 113/113 [00:13<00:00,  8.48it/s, loss=1.1686, acc=48.20%]



────────────────────────────────────────────────────────────
  Train Loss: 1.3814 | Train Acc: 47.22%
  Val Loss:   1.3493 | Val Acc:   48.20%
────────────────────────────────────────────────────────────
  ✓ Best model saved! Accuracy: 48.20%

Epoch [6/40]


Training: 100%|██████████████████████████| 449/449 [03:08<00:00,  2.38it/s, loss=1.2072, acc=48.63%]
Validation: 100%|████████████████████████| 113/113 [00:13<00:00,  8.47it/s, loss=0.7190, acc=50.88%]



────────────────────────────────────────────────────────────
  Train Loss: 1.3398 | Train Acc: 48.63%
  Val Loss:   1.2871 | Val Acc:   50.88%
────────────────────────────────────────────────────────────
  ✓ Best model saved! Accuracy: 50.88%

Epoch [7/40]


Training: 100%|██████████████████████████| 449/449 [03:09<00:00,  2.37it/s, loss=1.4065, acc=50.21%]
Validation: 100%|████████████████████████| 113/113 [00:13<00:00,  8.31it/s, loss=0.4938, acc=50.70%]



────────────────────────────────────────────────────────────
  Train Loss: 1.2981 | Train Acc: 50.21%
  Val Loss:   1.2777 | Val Acc:   50.70%
────────────────────────────────────────────────────────────

Epoch [8/40]


Training: 100%|██████████████████████████| 449/449 [03:09<00:00,  2.37it/s, loss=1.2801, acc=51.45%]
Validation: 100%|████████████████████████| 113/113 [00:13<00:00,  8.31it/s, loss=0.6344, acc=52.83%]



────────────────────────────────────────────────────────────
  Train Loss: 1.2661 | Train Acc: 51.45%
  Val Loss:   1.2403 | Val Acc:   52.83%
────────────────────────────────────────────────────────────
  ✓ Best model saved! Accuracy: 52.83%

Epoch [9/40]


Training: 100%|██████████████████████████| 449/449 [03:10<00:00,  2.36it/s, loss=1.0749, acc=53.20%]
Validation: 100%|████████████████████████| 113/113 [00:14<00:00,  8.01it/s, loss=0.4442, acc=53.02%]



────────────────────────────────────────────────────────────
  Train Loss: 1.2254 | Train Acc: 53.20%
  Val Loss:   1.2213 | Val Acc:   53.02%
────────────────────────────────────────────────────────────
  ✓ Best model saved! Accuracy: 53.02%

Epoch [10/40]


Training: 100%|██████████████████████████| 449/449 [03:11<00:00,  2.34it/s, loss=1.3494, acc=54.31%]
Validation: 100%|████████████████████████| 113/113 [00:14<00:00,  7.90it/s, loss=0.6153, acc=55.27%]



────────────────────────────────────────────────────────────
  Train Loss: 1.1970 | Train Acc: 54.31%
  Val Loss:   1.1756 | Val Acc:   55.27%
────────────────────────────────────────────────────────────
  ✓ Best model saved! Accuracy: 55.27%

Epoch [11/40]


Training: 100%|██████████████████████████| 449/449 [03:13<00:00,  2.32it/s, loss=1.3345, acc=55.37%]
Validation: 100%|████████████████████████| 113/113 [00:13<00:00,  8.33it/s, loss=0.4903, acc=55.06%]



────────────────────────────────────────────────────────────
  Train Loss: 1.1678 | Train Acc: 55.37%
  Val Loss:   1.1723 | Val Acc:   55.06%
────────────────────────────────────────────────────────────

Epoch [12/40]


Training: 100%|██████████████████████████| 449/449 [03:10<00:00,  2.36it/s, loss=1.1473, acc=56.13%]
Validation: 100%|████████████████████████| 113/113 [00:13<00:00,  8.34it/s, loss=0.3659, acc=55.46%]



────────────────────────────────────────────────────────────
  Train Loss: 1.1460 | Train Acc: 56.13%
  Val Loss:   1.1657 | Val Acc:   55.46%
────────────────────────────────────────────────────────────
  ✓ Best model saved! Accuracy: 55.46%

Epoch [13/40]


Training: 100%|██████████████████████████| 449/449 [03:11<00:00,  2.34it/s, loss=1.1769, acc=57.41%]
Validation: 100%|████████████████████████| 113/113 [00:14<00:00,  7.62it/s, loss=0.3692, acc=56.85%]



────────────────────────────────────────────────────────────
  Train Loss: 1.1256 | Train Acc: 57.41%
  Val Loss:   1.1452 | Val Acc:   56.85%
────────────────────────────────────────────────────────────
  ✓ Best model saved! Accuracy: 56.85%

Epoch [14/40]


Training: 100%|██████████████████████████| 449/449 [03:18<00:00,  2.26it/s, loss=1.1136, acc=57.64%]
Validation:   0%|                                                           | 0/113 [00:00<?, ?it/s]