In [1]:
import torch

print("Number of GPU: ", torch.cuda.device_count())
print("GPU Name: ", torch.cuda.get_device_name())


device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using device:', device)

Number of GPU:  1
GPU Name:  NVIDIA GeForce RTX 5060
Using device: cuda


In [33]:
import os
import cv2
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms.v2 as v2
from torch.utils.data import DataLoader, Dataset
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.utils.class_weight import compute_class_weight
from torchvision import transforms
from sklearn.metrics import accuracy_score
from torch.optim.lr_scheduler import ReduceLROnPlateau
from torch.optim.lr_scheduler import OneCycleLR

class FER2013Dataset(Dataset):
    def __init__(self, images, labels, transform=None):
        self.images = images
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img = self.images[idx]
        label = self.labels[idx]
        if self.transform:
            img = self.transform(img)
        return img, label

def load_images_from_folder(base_dir, image_size=(48, 48)):
    images, labels = [], []

    for label in os.listdir(base_dir):
        folder = os.path.join(base_dir, label)
        if not os.path.isdir(folder):
            continue

        for img_name in os.listdir(folder):
            img_path = os.path.join(folder, img_name)
            img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
            img = cv2.resize(img, image_size)
            images.append(img)
            labels.append(label)

    images = np.array(images, dtype="float32") / 255.0
    images = images.reshape(-1, 48, 48, 1)

    return images, np.array(labels)

train_dir = r"C:\Users\usEr\.cache\kagglehub\datasets\msambare\fer2013\versions\1\train"
test_dir  = r"C:\Users\usEr\.cache\kagglehub\datasets\msambare\fer2013\versions\1\test"

X_train, y_train = load_images_from_folder(train_dir)
X_test, y_test   = load_images_from_folder(test_dir)

label_encoder = LabelEncoder()
y_train_enc = label_encoder.fit_transform(y_train)
y_test_enc  = label_encoder.transform(y_test)

y_train_cat = np.eye(7)[y_train_enc]  
y_test_cat  = np.eye(7)[y_test_enc]

X_train, X_val, y_train_cat, y_val_cat, y_train_enc, y_val_enc = train_test_split(
    X_train,
    y_train_cat,
    y_train_enc,
    test_size=0.15,
    stratify=y_train_enc,
    random_state=42
)

class CNNModel(nn.Module):
    def __init__(self, num_classes=7):
        super(CNNModel, self).__init__()
        
        self.conv1 = nn.Conv2d(1, 128, kernel_size=3, padding=1)
        self.bn1 = nn.BatchNorm2d(128)
        self.pool1 = nn.MaxPool2d(2, 2)
        self.dropout1 = nn.Dropout(0.25)
        
        self.conv2 = nn.Conv2d(128, 256, kernel_size=3, padding=1)
        self.bn2 = nn.BatchNorm2d(256)
        self.pool2 = nn.MaxPool2d(2, 2)
        self.dropout2 = nn.Dropout(0.25)
        
        self.conv3 = nn.Conv2d(256, 512, kernel_size=3, padding=1)
        self.bn3 = nn.BatchNorm2d(512)
        self.pool3 = nn.MaxPool2d(2, 2)
        self.dropout3 = nn.Dropout(0.25)
        
        self.conv4 = nn.Conv2d(512, 1024, kernel_size=3, padding=1)
        self.bn4 = nn.BatchNorm2d(1024)
        self.pool4 = nn.MaxPool2d(2, 2)
        self.dropout4 = nn.Dropout(0.25)
        
        self.conv5 = nn.Conv2d(1024, 1024, kernel_size=3, padding=1)
        self.bn5 = nn.BatchNorm2d(1024)
        self.dropout5 = nn.Dropout(0.25)
        
        self.global_avg_pool = nn.AdaptiveAvgPool2d(1)
        
        self.fc1 = nn.Linear(1024, 512)
        self.bn_fc1 = nn.BatchNorm1d(512)
        self.dropout_fc1 = nn.Dropout(0.5)
        
        self.fc2 = nn.Linear(512, 256)
        self.bn_fc2 = nn.BatchNorm1d(256)
        self.dropout_fc2 = nn.Dropout(0.5)
        
        self.fc3 = nn.Linear(256, num_classes)

    def forward(self, x):
        x = torch.relu(self.bn1(self.conv1(x)))
        x = self.pool1(x)
        x = self.dropout1(x)
        
        x = torch.relu(self.bn2(self.conv2(x)))
        x = self.pool2(x)
        x = self.dropout2(x)
        
        x = torch.relu(self.bn3(self.conv3(x)))
        x = self.pool3(x)
        x = self.dropout3(x)
        
        x = torch.relu(self.bn4(self.conv4(x)))
        x = self.pool4(x)
        x = self.dropout4(x)
        
        x = torch.relu(self.bn5(self.conv5(x)))
        x = self.dropout5(x)
        
        x = self.global_avg_pool(x)      
        x = torch.flatten(x, 1)             
        
        x = torch.relu(self.bn_fc1(self.fc1(x)))
        x = self.dropout_fc1(x)
        
        x = torch.relu(self.bn_fc2(self.fc2(x)))
        x = self.dropout_fc2(x)
        
        x = self.fc3(x)
        return x

train_transform = v2.Compose([
    v2.ToImage(),
    v2.RandomHorizontalFlip(p=0.5),
    v2.RandomRotation(degrees=10),
    v2.RandomAffine(
    degrees=10,
    translate=(0.1, 0.1),
    scale=(0.9, 1.1)
),
    v2.RandomErasing(p=0.3, scale=(0.02, 0.25)),
    v2.ToDtype(torch.float32, scale=True),
    v2.Normalize(mean=[0.5], std=[0.5]),
])

val_test_transform = v2.Compose([
    v2.ToImage(),
    v2.ToDtype(torch.float32, scale=True),
    v2.Normalize(mean=[0.5], std=[0.5]),
])

train_dataset = FER2013Dataset(X_train, y_train_enc, transform=train_transform)
val_dataset = FER2013Dataset(X_val, y_val_enc, transform=val_test_transform)
test_dataset = FER2013Dataset(X_test, y_test_enc, transform=val_test_transform)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = CNNModel(num_classes=7).to(device)

class_weights = compute_class_weight('balanced', classes=np.unique(y_train_enc), y=y_train_enc)
class_weights = torch.tensor(class_weights, dtype=torch.float32).cuda()

model = CNNModel().cuda()

criterion = nn.CrossEntropyLoss(label_smoothing=0.1, weight=class_weights)
optimizer = optim.AdamW(model.parameters(), lr=0.0008, weight_decay=1e-4)
scheduler = ReduceLROnPlateau(
    optimizer,
    mode='min',
    factor=0.6,
    patience=3,
    min_lr=1e-5
)

num_epochs = 100
best_val_loss = float("inf")

patience = 10
counter = 0

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    for images, labels in train_loader:
        images, labels = images.cuda(), labels.cuda()

        optimizer.zero_grad()

        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}, Accuracy: {correct/total:.4f}")

    model.eval()
    val_loss = 0.0
    val_correct = 0
    val_total = 0
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.cuda(), labels.cuda()
            outputs = model(images)
            loss = criterion(outputs, labels)

            val_loss += loss.item()

            _, predicted = torch.max(outputs.data, 1)
            val_total += labels.size(0)
            val_correct += (predicted == labels).sum().item()

    val_accuracy = val_correct / val_total
    print(f"Validation Loss: {val_loss/len(val_loader):.4f}, Validation Accuracy: {val_accuracy:.4f}")

    scheduler.step(val_loss)

    if val_loss < best_val_loss:
        best_val_loss = val_loss
        torch.save(model.state_dict(), 'best_model.pth')
        counter = 0
    else:
        counter += 1
        if counter >= patience:
            print("Early stopping triggered")
            break

model.load_state_dict(torch.load('best_model.pth'))
model.eval()
test_correct = 0
test_total = 0
with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.cuda(), labels.cuda()
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        test_total += labels.size(0)
        test_correct += (predicted == labels).sum().item()

print(f"Test Accuracy: {test_correct/test_total:.4f}")

Epoch [1/100], Loss: 2.1940, Accuracy: 0.0936
Validation Loss: 2.1209, Validation Accuracy: 0.0420
Epoch [2/100], Loss: 2.1396, Accuracy: 0.0909
Validation Loss: 2.0337, Validation Accuracy: 0.1145
Epoch [3/100], Loss: 2.0504, Accuracy: 0.1879
Validation Loss: 1.9518, Validation Accuracy: 0.3176
Epoch [4/100], Loss: 1.9657, Accuracy: 0.2944
Validation Loss: 1.8739, Validation Accuracy: 0.4189
Epoch [5/100], Loss: 1.9085, Accuracy: 0.3517
Validation Loss: 1.8237, Validation Accuracy: 0.4219
Epoch [6/100], Loss: 1.8708, Accuracy: 0.3752
Validation Loss: 1.8047, Validation Accuracy: 0.4423
Epoch [7/100], Loss: 1.8439, Accuracy: 0.4026
Validation Loss: 1.7560, Validation Accuracy: 0.4664
Epoch [8/100], Loss: 1.8267, Accuracy: 0.4212
Validation Loss: 1.7445, Validation Accuracy: 0.5103
Epoch [9/100], Loss: 1.8029, Accuracy: 0.4365
Validation Loss: 1.7126, Validation Accuracy: 0.5150
Epoch [10/100], Loss: 1.7904, Accuracy: 0.4462
Validation Loss: 1.6881, Validation Accuracy: 0.4899
Epoch [11

In [None]:
import time
import numpy as np
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

def evaluate_weighted_only(model, test_loader, device):

    model.eval()
    
    y_true = []
    y_pred = []
    
    start_time = time.time()
    
    with torch.no_grad():
        for images, labels in test_loader:
            images = images.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs, dim=1)
            
            y_pred.extend(predicted.cpu().numpy())
            y_true.extend(labels.cpu().numpy())
    
    inference_time = time.time() - start_time
    
    y_true = np.array(y_true)
    y_pred = np.array(y_pred)

    accuracy = accuracy_score(y_true, y_pred)
    precision_w = precision_score(y_true, y_pred, average='weighted', zero_division=0)
    recall_w    = recall_score(y_true, y_pred, average='weighted', zero_division=0)
    f1_w        = f1_score(y_true, y_pred, average='weighted', zero_division=0)

    print("Classification Report")
    
    print(f"Accuracy           : {accuracy:.4f}")
    print(f"Precision : {precision_w:.4f}")
    print(f"Recall    : {recall_w:.4f}")
    print(f"F1-Score  : {f1_w:.4f}")
    print(f"Inference time     : {inference_time:.2f} seconds")
    
    return {
        'accuracy': accuracy,
        'precision': precision_w,
        'recall': recall_w,
        'f1': f1_w,
        'inference_time': inference_time
    }

model.load_state_dict(torch.load('best_model.pth'))
model.to(device)
model.eval()

results = evaluate_weighted_only(model, test_loader, device)

Classification Report
Accuracy           : 0.6718
Precision : 0.6738
Recall    : 0.6718
F1-Score  : 0.6685
Inference time     : 1.69 seconds
