In [None]:
# Imports and config
import sys
import os

# For Jupyter notebooks
notebook_dir = os.getcwd()
project_root = os.path.dirname(notebook_dir)
sys.path.insert(0, project_root)

import torch
torch.backends.cudnn.benchmark = True

from src.data_loader import get_data_loaders
from src.train import train_model
from src.models.squeezenet import SqueezeNet, SqueezeNetBypass
from src.models.mobilenet import MobileNetV1

# Configuration
DATA_DIR = "../data/FER2013"
BATCH_SIZE = 64
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
NUM_EPOCHS = 40  # change to 5-10 for quick runs
LR = 1e-3

print("Device:", DEVICE)
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")

# Load data
print("\nLoading data...")
train_loader, test_loader = get_data_loaders(DATA_DIR, batch_size=BATCH_SIZE)
print(f"Training samples: {len(train_loader.dataset)}")
print(f"Testing samples: {len(test_loader.dataset)}")

Device: cuda
PyTorch version: 2.5.1+cu121
CUDA available: True

Loading data...
Training samples: 28709
Testing samples: 7178


In [5]:
# Prepare model save folders
os.makedirs("../models/mobilenet", exist_ok=True)
os.makedirs("../models/squeezenet", exist_ok=True)
os.makedirs("../models/shiftnet", exist_ok=True)

In [6]:
# Train SqueezeNet from scratch
print("\nInitializing SqueezeNet model...")
squeezenet = SqueezeNet(num_classes=7, in_channels=1)

# Print model summary (optional)
total_params = sum(p.numel() for p in squeezenet.parameters())
trainable_params = sum(p.numel() for p in squeezenet.parameters() if p.requires_grad)
print(f"Total parameters: {total_params:,}")
print(f"Trainable parameters: {trainable_params:,}")

# Train the model
print("\nStarting training...")
squeezenet_trained = train_model(
    squeezenet, 
    train_loader, 
    test_loader, 
    device=DEVICE,
    num_epochs=NUM_EPOCHS, 
    lr=LR
)
# Model is automatically saved to models/<classname>/best_model.pth by train_model

print("\nTraining complete!")


Initializing SqueezeNet model...
Total parameters: 735,559
Trainable parameters: 735,559

Starting training...

Starting Training: SqueezeNet


Epoch [1/40]


Training: 100%|██████████████████████████| 449/449 [01:24<00:00,  5.33it/s, loss=1.8270, acc=22.32%]
Validation: 100%|████████████████████████| 113/113 [00:32<00:00,  3.51it/s, loss=1.9371, acc=27.67%]



────────────────────────────────────────────────────────────
  Train Loss: 1.9782 | Train Acc: 22.32%
  Val Loss:   1.8080 | Val Acc:   27.67%
────────────────────────────────────────────────────────────
  ✓ Best model saved! Accuracy: 27.67%

Epoch [2/40]


Training: 100%|██████████████████████████| 449/449 [01:16<00:00,  5.89it/s, loss=1.7140, acc=29.43%]
Validation: 100%|████████████████████████| 113/113 [00:08<00:00, 13.60it/s, loss=1.0350, acc=35.72%]



────────────────────────────────────────────────────────────
  Train Loss: 1.7676 | Train Acc: 29.43%
  Val Loss:   1.6251 | Val Acc:   35.72%
────────────────────────────────────────────────────────────
  ✓ Best model saved! Accuracy: 35.72%

Epoch [3/40]


Training: 100%|██████████████████████████| 449/449 [01:34<00:00,  4.76it/s, loss=1.5731, acc=35.92%]
Validation: 100%|████████████████████████| 113/113 [00:08<00:00, 12.85it/s, loss=0.6481, acc=40.82%]



────────────────────────────────────────────────────────────
  Train Loss: 1.6303 | Train Acc: 35.92%
  Val Loss:   1.5254 | Val Acc:   40.82%
────────────────────────────────────────────────────────────
  ✓ Best model saved! Accuracy: 40.82%

Epoch [4/40]


Training: 100%|██████████████████████████| 449/449 [01:32<00:00,  4.83it/s, loss=1.4151, acc=38.83%]
Validation: 100%|████████████████████████| 113/113 [00:09<00:00, 12.39it/s, loss=0.6830, acc=41.77%]



────────────────────────────────────────────────────────────
  Train Loss: 1.5659 | Train Acc: 38.83%
  Val Loss:   1.4965 | Val Acc:   41.77%
────────────────────────────────────────────────────────────
  ✓ Best model saved! Accuracy: 41.77%

Epoch [5/40]


Training: 100%|██████████████████████████| 449/449 [01:32<00:00,  4.86it/s, loss=1.5621, acc=42.03%]
Validation: 100%|████████████████████████| 113/113 [00:08<00:00, 13.04it/s, loss=0.7818, acc=44.96%]



────────────────────────────────────────────────────────────
  Train Loss: 1.5044 | Train Acc: 42.03%
  Val Loss:   1.4297 | Val Acc:   44.96%
────────────────────────────────────────────────────────────
  ✓ Best model saved! Accuracy: 44.96%

Epoch [6/40]


Training: 100%|██████████████████████████| 449/449 [01:36<00:00,  4.65it/s, loss=1.7580, acc=44.21%]
Validation: 100%|████████████████████████| 113/113 [00:06<00:00, 18.08it/s, loss=0.7435, acc=45.95%]



────────────────────────────────────────────────────────────
  Train Loss: 1.4509 | Train Acc: 44.21%
  Val Loss:   1.4008 | Val Acc:   45.95%
────────────────────────────────────────────────────────────
  ✓ Best model saved! Accuracy: 45.95%

Epoch [7/40]


Training: 100%|██████████████████████████| 449/449 [01:28<00:00,  5.07it/s, loss=1.3100, acc=45.58%]
Validation: 100%|████████████████████████| 113/113 [00:05<00:00, 18.85it/s, loss=0.6250, acc=48.75%]



────────────────────────────────────────────────────────────
  Train Loss: 1.4231 | Train Acc: 45.58%
  Val Loss:   1.3244 | Val Acc:   48.75%
────────────────────────────────────────────────────────────
  ✓ Best model saved! Accuracy: 48.75%

Epoch [8/40]


Training: 100%|██████████████████████████| 449/449 [01:30<00:00,  4.99it/s, loss=1.3653, acc=48.00%]
Validation: 100%|████████████████████████| 113/113 [00:08<00:00, 13.51it/s, loss=0.6752, acc=49.40%]



────────────────────────────────────────────────────────────
  Train Loss: 1.3632 | Train Acc: 48.00%
  Val Loss:   1.3077 | Val Acc:   49.40%
────────────────────────────────────────────────────────────
  ✓ Best model saved! Accuracy: 49.40%

Epoch [9/40]


Training: 100%|██████████████████████████| 449/449 [01:34<00:00,  4.74it/s, loss=1.2612, acc=49.01%]
Validation: 100%|████████████████████████| 113/113 [00:09<00:00, 12.50it/s, loss=0.8590, acc=51.45%]



────────────────────────────────────────────────────────────
  Train Loss: 1.3377 | Train Acc: 49.01%
  Val Loss:   1.2775 | Val Acc:   51.45%
────────────────────────────────────────────────────────────
  ✓ Best model saved! Accuracy: 51.45%

Epoch [10/40]


Training: 100%|██████████████████████████| 449/449 [01:30<00:00,  4.98it/s, loss=1.4076, acc=50.95%]
Validation: 100%|████████████████████████| 113/113 [00:09<00:00, 12.36it/s, loss=0.3808, acc=51.69%]



────────────────────────────────────────────────────────────
  Train Loss: 1.2984 | Train Acc: 50.95%
  Val Loss:   1.2591 | Val Acc:   51.69%
────────────────────────────────────────────────────────────
  ✓ Best model saved! Accuracy: 51.69%

Epoch [11/40]


Training: 100%|██████████████████████████| 449/449 [01:36<00:00,  4.63it/s, loss=1.2746, acc=52.04%]
Validation: 100%|████████████████████████| 113/113 [00:09<00:00, 12.44it/s, loss=0.1694, acc=53.05%]



────────────────────────────────────────────────────────────
  Train Loss: 1.2676 | Train Acc: 52.04%
  Val Loss:   1.2241 | Val Acc:   53.05%
────────────────────────────────────────────────────────────
  ✓ Best model saved! Accuracy: 53.05%

Epoch [12/40]


Training: 100%|██████████████████████████| 449/449 [01:32<00:00,  4.85it/s, loss=1.1672, acc=52.15%]
Validation: 100%|████████████████████████| 113/113 [00:06<00:00, 18.31it/s, loss=1.0938, acc=52.74%]



────────────────────────────────────────────────────────────
  Train Loss: 1.2511 | Train Acc: 52.15%
  Val Loss:   1.2363 | Val Acc:   52.74%
────────────────────────────────────────────────────────────

Epoch [13/40]


Training: 100%|██████████████████████████| 449/449 [01:34<00:00,  4.73it/s, loss=1.1249, acc=54.09%]
Validation: 100%|████████████████████████| 113/113 [00:04<00:00, 24.34it/s, loss=0.5154, acc=53.09%]



────────────────────────────────────────────────────────────
  Train Loss: 1.2161 | Train Acc: 54.09%
  Val Loss:   1.2305 | Val Acc:   53.09%
────────────────────────────────────────────────────────────
  ✓ Best model saved! Accuracy: 53.09%

Epoch [14/40]


Training: 100%|██████████████████████████| 449/449 [01:36<00:00,  4.66it/s, loss=1.2505, acc=54.78%]
Validation: 100%|████████████████████████| 113/113 [00:05<00:00, 20.89it/s, loss=0.4318, acc=55.84%]



────────────────────────────────────────────────────────────
  Train Loss: 1.2056 | Train Acc: 54.78%
  Val Loss:   1.1595 | Val Acc:   55.84%
────────────────────────────────────────────────────────────
  ✓ Best model saved! Accuracy: 55.84%

Epoch [15/40]


Training: 100%|██████████████████████████| 449/449 [01:36<00:00,  4.64it/s, loss=1.3138, acc=55.18%]
Validation: 100%|████████████████████████| 113/113 [00:08<00:00, 13.41it/s, loss=0.4076, acc=56.67%]



────────────────────────────────────────────────────────────
  Train Loss: 1.1824 | Train Acc: 55.18%
  Val Loss:   1.1437 | Val Acc:   56.67%
────────────────────────────────────────────────────────────
  ✓ Best model saved! Accuracy: 56.67%

Epoch [16/40]


Training: 100%|██████████████████████████| 449/449 [01:34<00:00,  4.76it/s, loss=1.1248, acc=56.23%]
Validation: 100%|████████████████████████| 113/113 [00:08<00:00, 13.39it/s, loss=0.2369, acc=55.28%]



────────────────────────────────────────────────────────────
  Train Loss: 1.1638 | Train Acc: 56.23%
  Val Loss:   1.1669 | Val Acc:   55.28%
────────────────────────────────────────────────────────────

Epoch [17/40]


Training: 100%|██████████████████████████| 449/449 [01:37<00:00,  4.62it/s, loss=1.2170, acc=56.71%]
Validation: 100%|████████████████████████| 113/113 [00:08<00:00, 13.26it/s, loss=0.7474, acc=55.88%]



────────────────────────────────────────────────────────────
  Train Loss: 1.1434 | Train Acc: 56.71%
  Val Loss:   1.1585 | Val Acc:   55.88%
────────────────────────────────────────────────────────────

Epoch [18/40]


Training: 100%|██████████████████████████| 449/449 [01:31<00:00,  4.89it/s, loss=0.9505, acc=57.45%]
Validation: 100%|████████████████████████| 113/113 [00:08<00:00, 12.84it/s, loss=0.3540, acc=57.50%]



────────────────────────────────────────────────────────────
  Train Loss: 1.1254 | Train Acc: 57.45%
  Val Loss:   1.1102 | Val Acc:   57.50%
────────────────────────────────────────────────────────────
  ✓ Best model saved! Accuracy: 57.50%

Epoch [19/40]


Training: 100%|██████████████████████████| 449/449 [01:33<00:00,  4.81it/s, loss=1.0338, acc=58.11%]
Validation: 100%|████████████████████████| 113/113 [00:09<00:00, 12.18it/s, loss=0.2795, acc=57.30%]



────────────────────────────────────────────────────────────
  Train Loss: 1.1132 | Train Acc: 58.11%
  Val Loss:   1.1267 | Val Acc:   57.30%
────────────────────────────────────────────────────────────

Epoch [20/40]


Training: 100%|██████████████████████████| 449/449 [01:30<00:00,  4.95it/s, loss=1.1407, acc=58.55%]
Validation: 100%|████████████████████████| 113/113 [00:09<00:00, 12.46it/s, loss=0.2195, acc=57.79%]



────────────────────────────────────────────────────────────
  Train Loss: 1.1024 | Train Acc: 58.55%
  Val Loss:   1.1109 | Val Acc:   57.79%
────────────────────────────────────────────────────────────
  ✓ Best model saved! Accuracy: 57.79%

Epoch [21/40]


Training: 100%|██████████████████████████| 449/449 [01:36<00:00,  4.65it/s, loss=1.2150, acc=58.85%]
Validation: 100%|████████████████████████| 113/113 [00:09<00:00, 12.43it/s, loss=0.4081, acc=57.89%]



────────────────────────────────────────────────────────────
  Train Loss: 1.0887 | Train Acc: 58.85%
  Val Loss:   1.1145 | Val Acc:   57.89%
────────────────────────────────────────────────────────────
  ✓ Best model saved! Accuracy: 57.89%

Epoch [22/40]


Training: 100%|██████████████████████████| 449/449 [01:30<00:00,  4.99it/s, loss=0.9005, acc=59.74%]
Validation: 100%|████████████████████████| 113/113 [00:08<00:00, 13.62it/s, loss=0.4859, acc=57.79%]



────────────────────────────────────────────────────────────
  Train Loss: 1.0718 | Train Acc: 59.74%
  Val Loss:   1.1254 | Val Acc:   57.79%
────────────────────────────────────────────────────────────

Epoch [23/40]


Training: 100%|██████████████████████████| 449/449 [01:31<00:00,  4.89it/s, loss=1.1918, acc=60.26%]
Validation: 100%|████████████████████████| 113/113 [00:05<00:00, 20.09it/s, loss=0.2431, acc=57.65%]



────────────────────────────────────────────────────────────
  Train Loss: 1.0545 | Train Acc: 60.26%
  Val Loss:   1.1140 | Val Acc:   57.65%
────────────────────────────────────────────────────────────

Epoch [24/40]


Training: 100%|██████████████████████████| 449/449 [01:35<00:00,  4.71it/s, loss=0.8790, acc=60.87%]
Validation: 100%|████████████████████████| 113/113 [00:07<00:00, 15.44it/s, loss=0.2214, acc=58.40%]



────────────────────────────────────────────────────────────
  Train Loss: 1.0398 | Train Acc: 60.87%
  Val Loss:   1.1012 | Val Acc:   58.40%
────────────────────────────────────────────────────────────
  ✓ Best model saved! Accuracy: 58.40%

Epoch [25/40]


Training: 100%|██████████████████████████| 449/449 [01:36<00:00,  4.66it/s, loss=0.8294, acc=61.21%]
Validation: 100%|████████████████████████| 113/113 [00:08<00:00, 13.74it/s, loss=0.1896, acc=59.01%]



────────────────────────────────────────────────────────────
  Train Loss: 1.0334 | Train Acc: 61.21%
  Val Loss:   1.0835 | Val Acc:   59.01%
────────────────────────────────────────────────────────────
  ✓ Best model saved! Accuracy: 59.01%

Epoch [26/40]


Training: 100%|██████████████████████████| 449/449 [01:33<00:00,  4.83it/s, loss=1.2447, acc=61.12%]
Validation: 100%|████████████████████████| 113/113 [00:08<00:00, 13.05it/s, loss=0.2364, acc=59.15%]



────────────────────────────────────────────────────────────
  Train Loss: 1.0255 | Train Acc: 61.12%
  Val Loss:   1.0880 | Val Acc:   59.15%
────────────────────────────────────────────────────────────
  ✓ Best model saved! Accuracy: 59.15%

Epoch [27/40]


Training: 100%|██████████████████████████| 449/449 [01:34<00:00,  4.76it/s, loss=1.2900, acc=61.93%]
Validation: 100%|████████████████████████| 113/113 [00:08<00:00, 13.52it/s, loss=0.2272, acc=60.66%]



────────────────────────────────────────────────────────────
  Train Loss: 1.0107 | Train Acc: 61.93%
  Val Loss:   1.0557 | Val Acc:   60.66%
────────────────────────────────────────────────────────────
  ✓ Best model saved! Accuracy: 60.66%

Epoch [28/40]


Training: 100%|██████████████████████████| 449/449 [01:31<00:00,  4.88it/s, loss=0.9934, acc=62.36%]
Validation: 100%|████████████████████████| 113/113 [00:08<00:00, 12.98it/s, loss=0.3883, acc=60.37%]



────────────────────────────────────────────────────────────
  Train Loss: 0.9984 | Train Acc: 62.36%
  Val Loss:   1.0730 | Val Acc:   60.37%
────────────────────────────────────────────────────────────

Epoch [29/40]


Training: 100%|██████████████████████████| 449/449 [01:34<00:00,  4.75it/s, loss=0.9891, acc=63.14%]
Validation: 100%|████████████████████████| 113/113 [00:09<00:00, 12.21it/s, loss=0.3102, acc=60.88%]



────────────────────────────────────────────────────────────
  Train Loss: 0.9888 | Train Acc: 63.14%
  Val Loss:   1.0479 | Val Acc:   60.88%
────────────────────────────────────────────────────────────
  ✓ Best model saved! Accuracy: 60.88%

Epoch [30/40]


Training: 100%|██████████████████████████| 449/449 [01:26<00:00,  5.19it/s, loss=1.1458, acc=63.37%]
Validation: 100%|████████████████████████| 113/113 [00:08<00:00, 12.63it/s, loss=0.4813, acc=60.56%]



────────────────────────────────────────────────────────────
  Train Loss: 0.9774 | Train Acc: 63.37%
  Val Loss:   1.0591 | Val Acc:   60.56%
────────────────────────────────────────────────────────────

Epoch [31/40]


Training: 100%|██████████████████████████| 449/449 [01:38<00:00,  4.55it/s, loss=0.9740, acc=63.77%]
Validation: 100%|████████████████████████| 113/113 [00:09<00:00, 12.51it/s, loss=0.5550, acc=61.47%]



────────────────────────────────────────────────────────────
  Train Loss: 0.9623 | Train Acc: 63.77%
  Val Loss:   1.0421 | Val Acc:   61.47%
────────────────────────────────────────────────────────────
  ✓ Best model saved! Accuracy: 61.47%

Epoch [32/40]


Training: 100%|██████████████████████████| 449/449 [01:31<00:00,  4.90it/s, loss=0.8131, acc=64.04%]
Validation: 100%|████████████████████████| 113/113 [00:08<00:00, 13.44it/s, loss=0.1822, acc=61.23%]



────────────────────────────────────────────────────────────
  Train Loss: 0.9544 | Train Acc: 64.04%
  Val Loss:   1.0394 | Val Acc:   61.23%
────────────────────────────────────────────────────────────

Epoch [33/40]


Training: 100%|██████████████████████████| 449/449 [01:35<00:00,  4.69it/s, loss=0.7412, acc=64.48%]
Validation: 100%|████████████████████████| 113/113 [00:06<00:00, 16.14it/s, loss=0.4135, acc=61.15%]



────────────────────────────────────────────────────────────
  Train Loss: 0.9416 | Train Acc: 64.48%
  Val Loss:   1.0554 | Val Acc:   61.15%
────────────────────────────────────────────────────────────

Epoch [34/40]


Training: 100%|██████████████████████████| 449/449 [01:23<00:00,  5.37it/s, loss=0.8988, acc=64.99%]
Validation: 100%|████████████████████████| 113/113 [00:04<00:00, 22.91it/s, loss=0.3116, acc=60.66%]



────────────────────────────────────────────────────────────
  Train Loss: 0.9332 | Train Acc: 64.99%
  Val Loss:   1.0590 | Val Acc:   60.66%
────────────────────────────────────────────────────────────

Epoch [35/40]


Training: 100%|██████████████████████████| 449/449 [01:25<00:00,  5.24it/s, loss=1.1823, acc=65.27%]
Validation: 100%|████████████████████████| 113/113 [00:05<00:00, 22.05it/s, loss=0.2938, acc=61.60%]



────────────────────────────────────────────────────────────
  Train Loss: 0.9242 | Train Acc: 65.27%
  Val Loss:   1.0544 | Val Acc:   61.60%
────────────────────────────────────────────────────────────
  ✓ Best model saved! Accuracy: 61.60%

Epoch [36/40]


Training: 100%|██████████████████████████| 449/449 [01:16<00:00,  5.89it/s, loss=0.6458, acc=65.68%]
Validation: 100%|████████████████████████| 113/113 [00:04<00:00, 23.62it/s, loss=0.1467, acc=61.49%]



────────────────────────────────────────────────────────────
  Train Loss: 0.9103 | Train Acc: 65.68%
  Val Loss:   1.0397 | Val Acc:   61.49%
────────────────────────────────────────────────────────────

Epoch [37/40]


Training: 100%|██████████████████████████| 449/449 [01:23<00:00,  5.39it/s, loss=0.8062, acc=66.20%]
Validation: 100%|████████████████████████| 113/113 [00:05<00:00, 19.38it/s, loss=0.1839, acc=61.33%]



────────────────────────────────────────────────────────────
  Train Loss: 0.9050 | Train Acc: 66.20%
  Val Loss:   1.0545 | Val Acc:   61.33%
────────────────────────────────────────────────────────────

Epoch [38/40]


Training: 100%|██████████████████████████| 449/449 [01:28<00:00,  5.08it/s, loss=0.6811, acc=66.80%]
Validation: 100%|████████████████████████| 113/113 [00:05<00:00, 19.70it/s, loss=0.2514, acc=62.62%]



────────────────────────────────────────────────────────────
  Train Loss: 0.8905 | Train Acc: 66.80%
  Val Loss:   1.0342 | Val Acc:   62.62%
────────────────────────────────────────────────────────────
  ✓ Best model saved! Accuracy: 62.62%

Epoch [39/40]


Training: 100%|██████████████████████████| 449/449 [01:11<00:00,  6.27it/s, loss=0.8366, acc=67.13%]
Validation: 100%|████████████████████████| 113/113 [00:08<00:00, 13.58it/s, loss=0.2964, acc=61.28%]



────────────────────────────────────────────────────────────
  Train Loss: 0.8809 | Train Acc: 67.13%
  Val Loss:   1.0578 | Val Acc:   61.28%
────────────────────────────────────────────────────────────

Epoch [40/40]


Training: 100%|██████████████████████████| 449/449 [01:26<00:00,  5.18it/s, loss=0.9062, acc=67.40%]
Validation: 100%|████████████████████████| 113/113 [00:08<00:00, 13.69it/s, loss=0.1584, acc=61.54%]


────────────────────────────────────────────────────────────
  Train Loss: 0.8726 | Train Acc: 67.40%
  Val Loss:   1.0580 | Val Acc:   61.54%
────────────────────────────────────────────────────────────

Training Completed!
Best Validation Accuracy: 62.62%
Model saved at: models/squeezenet/best_model.pth


Training complete!



