In [None]:
from google.colab import drive

# This command opens an authentication window.
# 1. Click the link that appears in the output.
# 2. Select your Google account and grant permissions.
# 3. Copy the authorization code and paste it back into the text box that appears below the link.
drive.mount('/content/drive')

print("\nGoogle Drive is now mounted at /content/drive")
print("You can access your files using paths like: /content/drive/MyDrive/YOUR_FOLDER_NAME")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).

Google Drive is now mounted at /content/drive
You can access your files using paths like: /content/drive/MyDrive/YOUR_FOLDER_NAME


In [None]:
# ==============================================================================
# 1. SETUP & IMPORTS
# ==============================================================================
import os
import time
import copy
import re
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.models as models
from torch.optim import lr_scheduler
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

# ----------------- CONSTANTS -----------------
# EfficientNet-B0 requires 224x224 input
IMAGE_SIZE = 224

# ImageNet means and standard deviations for normalization
IMAGENET_MEAN = [0.485, 0.456, 0.406]
IMAGENET_STD = [0.229, 0.224, 0.225]

# Set device
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

print(f"Device: {device}")

Device: cpu


In [None]:
# ==============================================================================
# 2. METADATA AND MODEL CONFIGURATION
# ==============================================================================

# 19 Game Classes
GAME_CLASSES = [
    'Hades', 'Undertale', 'Hollow Knight', 'Among Us', 'Persona 5 Royal',
    'Stray', 'God of War', 'Celeste', 'Yakuza 0', 'Red Dead Redemption 2',
    'Grand Theft Auto V', 'Cyberpunk 2077', 'Persona 4 Golden', 'Outer Wilds',
    'Stardew Valley', 'Devil May Cry 5', 'Resident Evil 4', 'Inscryption',
    'Terraria'
]

# 19 Total Game Classes
NUM_GAME_CLASSES = len(GAME_CLASSES)

# Mapping each game to a representative Genre for the two-head model
GAME_TO_GENRE_MAP = {
    'Hades': 'RPG', 'Undertale': 'RPG', 'Hollow Knight': 'Platform',
    'Among Us': 'Strategy', 'Persona 5 Royal': 'RPG', 'Stray': 'Adventure',
    'God of War': 'Adventure', 'Celeste': 'Platform', 'Yakuza 0': 'Adventure',
    'Red Dead Redemption 2': 'Adventure', 'Grand Theft Auto V': 'Adventure',
    'Cyberpunk 2077': 'RPG', 'Persona 4 Golden': 'RPG', 'Outer Wilds': 'Adventure',
    'Stardew Valley': 'RPG', 'Devil May Cry 5': 'Brawler', 'Resident Evil 4': 'RPG',
    'Inscryption': 'Strategy', 'Terraria': 'Adventure'
}

# 10 Unique Genre Classes
GENRE_CLASSES = sorted(list(set(GAME_TO_GENRE_MAP.values())))
NUM_GENRE_CLASSES = len(GENRE_CLASSES)
GENRE_TO_IDX = {genre: i for i, genre in enumerate(GENRE_CLASSES)}

print(f"Game Classes: {NUM_GAME_CLASSES}")
print(f"Genre Classes: {NUM_GENRE_CLASSES} ({GENRE_CLASSES})")

Game Classes: 19
Genre Classes: 5 (['Adventure', 'Brawler', 'Platform', 'RPG', 'Strategy'])


In [None]:
# ==============================================================================
# 3. DATA AUGMENTATION AND TRANSFORMS (OPTIMAL SET)
# ==============================================================================

# Training Transformations (Less Aggressive for Better Stability with New Data)
train_transforms = transforms.Compose([
    transforms.Resize((IMAGE_SIZE, IMAGE_SIZE)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
    transforms.ToTensor(),
    transforms.Normalize(IMAGENET_MEAN, IMAGENET_STD)
])

# Validation/Test Transformations (Static for accurate testing)
val_transforms = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(IMAGE_SIZE),
    transforms.ToTensor(),
    transforms.Normalize(IMAGENET_MEAN, IMAGENET_STD)
])

print("✅ Transforms defined. Using less aggressive augmentation.")

✅ Transforms defined. Using less aggressive augmentation.


In [None]:
# ==============================================================================
# 4. TWO-HEAD MODEL DEFINITION
# ==============================================================================

class TwoHeadModel(nn.Module):
    def __init__(self, num_game_classes, num_genre_classes):
        super(TwoHeadModel, self).__init__()

        # Load pre-trained EfficientNet-B0
        self.feature_extractor = models.efficientnet_b0(weights=models.EfficientNet_B0_Weights.IMAGENET1K_V1)

        # Get the number of features from the final layer
        num_features = self.feature_extractor.classifier[1].in_features

        # Remove the original classification layer
        self.feature_extractor.classifier = nn.Identity()

        # Define the two new classification heads
        self.game_head = nn.Sequential(
            nn.Dropout(0.2),
            nn.Linear(num_features, num_game_classes)
        )
        self.genre_head = nn.Sequential(
            nn.Dropout(0.2),
            nn.Linear(num_features, num_genre_classes)
        )

    def forward(self, x):
        features = self.feature_extractor(x)
        game_output = self.game_head(features)
        genre_output = self.genre_head(features)
        return game_output, genre_output

# Instantiate the model
model = TwoHeadModel(NUM_GAME_CLASSES, NUM_GENRE_CLASSES).to(device)

print(f"✅ Two-Head Model (EfficientNet-B0) initialized on {device}.")

✅ Two-Head Model (EfficientNet-B0) initialized on cpu.


In [None]:
# ==============================================================================
# 5. TRAINING AND EVALUATION HELPER FUNCTION
# ==============================================================================

def train_model(model, dataloaders, criterion, optimizer, scheduler, num_epochs=25, phase='Phase 1'):
    since = time.time()
    best_model_wts = copy.deepcopy(model.state_dict())
    best_game_acc = 0.0

    print(f"\nStarting {phase}: Training for {num_epochs} epochs.")

    for epoch in range(num_epochs):
        print(f"\nEpoch {epoch+1}/{num_epochs}")
        print('-' * 20)

        for phase_name in ['train', 'val']:
            if phase_name == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_game_corrects = 0
            running_genre_corrects = 0

            # Count for calculating mean loss/accuracy later
            data_size = len(dataloaders[phase_name].dataset)

            for inputs, game_labels_idx in dataloaders[phase_name]:
                inputs = inputs.to(device)
                game_labels_idx = game_labels_idx.to(device)

                # --- Get Genre Labels from Game Labels ---
                # This must be done inside the loop to ensure correct batch mapping
                game_labels_names = [dataloaders['train'].dataset.classes[i] for i in game_labels_idx.tolist()]
                genre_labels_names = [GAME_TO_GENRE_MAP[name] for name in game_labels_names]
                genre_labels_idx = torch.tensor([GENRE_TO_IDX[name] for name in genre_labels_names], dtype=torch.long).to(device)

                optimizer.zero_grad()

                # Forward pass
                with torch.set_grad_enabled(phase_name == 'train'):
                    game_outputs, genre_outputs = model(inputs)
                    _, game_preds = torch.max(game_outputs, 1)
                    _, genre_preds = torch.max(genre_outputs, 1)

                    # Total loss is the sum of both task losses
                    game_loss = criterion(game_outputs, game_labels_idx)
                    genre_loss = criterion(genre_outputs, genre_labels_idx)
                    total_loss = game_loss + genre_loss

                    # Backward pass + optimization only if in training phase
                    if phase_name == 'train':
                        total_loss.backward()
                        optimizer.step()

                # Statistics
                running_loss += total_loss.item() * inputs.size(0)
                running_game_corrects += torch.sum(game_preds == game_labels_idx.data)
                running_genre_corrects += torch.sum(genre_preds == genre_labels_idx.data)

            if phase_name == 'train':
                scheduler.step()

            epoch_loss = running_loss / data_size
            epoch_game_acc = running_game_corrects.double() / data_size
            epoch_genre_acc = running_genre_corrects.double() / data_size

            print(f'{phase_name:4} Total Loss: {epoch_loss:.4f} | Game Acc: {epoch_game_acc:.4f} | Genre Acc: {epoch_genre_acc:.4f}')

            # Deep copy the model if it is the best validation game accuracy so far
            if phase_name == 'val' and epoch_game_acc > best_game_acc:
                best_game_acc = epoch_game_acc
                best_model_wts = copy.deepcopy(model.state_dict())

    time_elapsed = time.time() - since
    print(f'\nTraining complete in {time_elapsed // 60:.0f}m {time_elapsed % 60:.0f}s')
    print(f'Best validation Game Acc: {best_game_acc:.4f}')

    # Load best model weights
    model.load_state_dict(best_model_wts)
    return model, best_game_acc

In [None]:
# ==============================================================================
# 6. DATALOADER SETUP (FIXED FOR .AVIF AND .WEBP EXTENSIONS)
# ==============================================================================
import os
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

# --- Configuration (Ensure this path is correct for your Google Drive mount) ---
DATA_DIR = '/content/drive/MyDrive/Dataset/dataset_flat'
BATCH_SIZE = 16

# --- Define ALL supported image extensions ---
IMG_EXTENSIONS = (
    '.jpg', '.jpeg', '.png', '.ppm', '.bmp', '.pgm', '.tif', '.tiff',
    '.webp', '.avif' # <--- ADDED SUPPORT FOR .AVIF
)

def is_valid_file(path: str) -> bool:
    """Checks if a file is a valid image with a supported extension."""
    return path.lower().endswith(IMG_EXTENSIONS)

# Load the datasets using the custom validation function
train_ds = datasets.ImageFolder(
    os.path.join(DATA_DIR, 'train'),
    train_transforms,
    is_valid_file=is_valid_file # <--- Pass the custom function
)
val_ds = datasets.ImageFolder(
    os.path.join(DATA_DIR, 'val'),
    val_transforms,
    is_valid_file=is_valid_file # <--- Pass the custom function
)

# --- Class Order Verification ---
# The classes from ImageFolder must match the GAME_CLASSES list order
# Note: You must ensure GAME_CLASSES is defined in Cell 2
try:
    if len(train_ds.classes) != len(GAME_CLASSES):
        print(f"⚠️ WARNING: ImageFolder found {len(train_ds.classes)} classes, expected {len(GAME_CLASSES)}.")
    train_ds.classes = GAME_CLASSES
    val_ds.classes = GAME_CLASSES
except NameError:
     print("⚠️ WARNING: GAME_CLASSES variable not found. Ensure Cell 2 was run.")

# Create DataLoaders
dataloaders = {
    'train': DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True, num_workers=4, pin_memory=True),
    'val': DataLoader(val_ds, batch_size=BATCH_SIZE, shuffle=False, num_workers=4, pin_memory=True)
}

print("✅ DataLoaders Setup Complete with custom file extensions.")
print(f"Training images loaded: {len(train_ds)}")
print(f"Validation images loaded: {len(val_ds)}")

✅ DataLoaders Setup Complete with custom file extensions.
Training images loaded: 151
Validation images loaded: 38




In [None]:
# ==============================================================================
# 7. PHASE 1: INITIAL TRAINING (FROZEN FEATURE EXTRACTOR)
# ==============================================================================
print("Starting Phase 1: Training Classification Heads...")

# Freeze all layers in the feature extractor
for param in model.feature_extractor.parameters():
    param.requires_grad = False

# Only the new heads' parameters are trainable
params_to_update = list(model.game_head.parameters()) + list(model.genre_head.parameters())

# Optimizer and Loss
optimizer = optim.Adam(params_to_update, lr=0.001)
criterion = nn.CrossEntropyLoss()
scheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

# Train
model_ft, best_acc_ph1 = train_model(
    model,
    dataloaders,
    criterion,
    optimizer,
    scheduler,
    num_epochs=15,
    phase='Phase 1: Frozen Heads'
)

Starting Phase 1: Training Classification Heads...

Starting Phase 1: Frozen Heads: Training for 15 epochs.

Epoch 1/15
--------------------




train Total Loss: 4.4485 | Game Acc: 0.0662 | Genre Acc: 0.3576
val  Total Loss: 4.1633 | Game Acc: 0.1579 | Genre Acc: 0.3421

Epoch 2/15
--------------------
train Total Loss: 3.7397 | Game Acc: 0.4503 | Genre Acc: 0.4768
val  Total Loss: 3.8848 | Game Acc: 0.3684 | Genre Acc: 0.4737

Epoch 3/15
--------------------
train Total Loss: 3.2988 | Game Acc: 0.7285 | Genre Acc: 0.5828
val  Total Loss: 3.6424 | Game Acc: 0.5526 | Genre Acc: 0.4737

Epoch 4/15
--------------------
train Total Loss: 2.8725 | Game Acc: 0.8675 | Genre Acc: 0.6424
val  Total Loss: 3.4568 | Game Acc: 0.5526 | Genre Acc: 0.5789

Epoch 5/15
--------------------
train Total Loss: 2.4805 | Game Acc: 0.9205 | Genre Acc: 0.6556
val  Total Loss: 3.3232 | Game Acc: 0.5789 | Genre Acc: 0.5263

Epoch 6/15
--------------------
train Total Loss: 2.2559 | Game Acc: 0.9205 | Genre Acc: 0.6887
val  Total Loss: 3.1572 | Game Acc: 0.6053 | Genre Acc: 0.5789

Epoch 7/15
--------------------
train Total Loss: 2.0918 | Game Acc: 0.9

In [None]:
# ==============================================================================
# 8. PHASE 2: FINE-TUNING (UNFROZEN FEATURE EXTRACTOR)
# ==============================================================================
print("\nStarting Fine-Tuning Setup...")

# Unfreeze all layers
for param in model_ft.parameters():
    param.requires_grad = True

# Use a much lower learning rate for fine-tuning the entire model
FT_LEARNING_RATE = 1e-5
optimizer_ft = optim.Adam(model_ft.parameters(), lr=FT_LEARNING_RATE)
criterion_ft = nn.CrossEntropyLoss()
scheduler_ft = lr_scheduler.StepLR(optimizer_ft, step_size=5, gamma=0.1)

print(f"Model feature extractor unfrozen. New learning rate: {FT_LEARNING_RATE}")
print("Starting Phase 2: Fine-Tuning for 10 epochs.")

# Train
model_ft, best_acc_ph2 = train_model(
    model_ft,
    dataloaders,
    criterion_ft,
    optimizer_ft,
    scheduler_ft,
    num_epochs=10,
    phase='Phase 2: Fine-Tuning'
)

print(f"\nFinal Training Complete. Best Game Accuracy: {max(best_acc_ph1, best_acc_ph2):.4f}")


Starting Fine-Tuning Setup...
Model feature extractor unfrozen. New learning rate: 1e-05
Starting Phase 2: Fine-Tuning for 10 epochs.

Starting Phase 2: Fine-Tuning: Training for 10 epochs.

Epoch 1/10
--------------------
train Total Loss: 2.0365 | Game Acc: 0.9669 | Genre Acc: 0.7417
val  Total Loss: 3.1219 | Game Acc: 0.6053 | Genre Acc: 0.5789

Epoch 2/10
--------------------
train Total Loss: 2.0861 | Game Acc: 0.9007 | Genre Acc: 0.7086
val  Total Loss: 3.1191 | Game Acc: 0.5789 | Genre Acc: 0.5526

Epoch 3/10
--------------------
train Total Loss: 1.9765 | Game Acc: 0.9404 | Genre Acc: 0.6954
val  Total Loss: 3.0711 | Game Acc: 0.6053 | Genre Acc: 0.5789

Epoch 4/10
--------------------
train Total Loss: 1.9932 | Game Acc: 0.9338 | Genre Acc: 0.7351
val  Total Loss: 3.0622 | Game Acc: 0.6316 | Genre Acc: 0.5789

Epoch 5/10
--------------------
train Total Loss: 1.9621 | Game Acc: 0.9272 | Genre Acc: 0.7351
val  Total Loss: 3.0463 | Game Acc: 0.5526 | Genre Acc: 0.5789

Epoch 6/

In [None]:
# ==============================================================================
# 9. FINAL EVALUATION
# ==============================================================================

def evaluate_model(model, dataloader, ds_classes, genre_map, genre_to_idx):
    model.eval()
    running_game_corrects = 0
    running_genre_corrects = 0
    data_size = len(dataloader.dataset)

    print("\n--- Validation Set Prediction Details ---")

    with torch.no_grad():
        for i, (inputs, game_labels_idx) in enumerate(dataloader):
            inputs = inputs.to(device)
            game_labels_idx = game_labels_idx.to(device)

            # Prepare genre labels
            game_labels_names = [ds_classes[idx] for idx in game_labels_idx.tolist()]
            genre_labels_names = [genre_map[name] for name in game_labels_names]
            genre_labels_idx = torch.tensor([genre_to_idx[name] for name in genre_labels_names], dtype=torch.long).to(device)

            game_outputs, genre_outputs = model(inputs)

            _, game_preds = torch.max(game_outputs, 1)
            _, genre_preds = torch.max(genre_outputs, 1)

            running_game_corrects += torch.sum(game_preds == game_labels_idx.data)
            running_genre_corrects += torch.sum(genre_preds == genre_labels_idx.data)

            # Print details for the first 10 samples
            if i * dataloader.batch_size < 10:
                for j in range(inputs.size(0)):
                    if (i * dataloader.batch_size + j) < 10:
                        true_game = ds_classes[game_labels_idx.data[j].item()]
                        pred_game = ds_classes[game_preds[j].item()]

                        true_genre = GENRE_CLASSES[genre_labels_idx.data[j].item()]
                        pred_genre = GENRE_CLASSES[genre_preds[j].item()]

                        game_status = "CORRECT" if true_game == pred_game else "INCORRECT"
                        genre_status = "CORRECT" if true_genre == pred_genre else "INCORRECT"

                        print(f"Sample {i*dataloader.batch_size + j + 1}: True={true_game:20} | Pred={pred_game:20} | GAME {game_status:9} | GENRE {genre_status}")


    final_game_acc = running_game_corrects.double() / data_size
    final_genre_acc = running_genre_corrects.double() / data_size

    print(f"\nFinal Validation Game Accuracy: {final_game_acc:.4f}")
    print(f"Final Validation Genre Accuracy: {final_genre_acc:.4f}")

print("✅ Best model weights loaded for final evaluation.")
evaluate_model(model_ft, dataloaders['val'], train_ds.classes, GAME_TO_GENRE_MAP, GENRE_TO_IDX)

✅ Best model weights loaded for final evaluation.

--- Validation Set Prediction Details ---
Sample 1: True=Hades                | Pred=Hades                | GAME CORRECT   | GENRE CORRECT
Sample 2: True=Hades                | Pred=Hades                | GAME CORRECT   | GENRE CORRECT
Sample 3: True=Undertale            | Pred=Undertale            | GAME CORRECT   | GENRE CORRECT
Sample 4: True=Undertale            | Pred=Undertale            | GAME CORRECT   | GENRE CORRECT
Sample 5: True=Hollow Knight        | Pred=Hollow Knight        | GAME CORRECT   | GENRE INCORRECT
Sample 6: True=Hollow Knight        | Pred=Hollow Knight        | GAME CORRECT   | GENRE CORRECT
Sample 7: True=Among Us             | Pred=Grand Theft Auto V   | GAME INCORRECT | GENRE INCORRECT
Sample 8: True=Among Us             | Pred=Among Us             | GAME CORRECT   | GENRE INCORRECT
Sample 9: True=Persona 5 Royal      | Pred=Among Us             | GAME INCORRECT | GENRE CORRECT
Sample 10: True=Persona 5 Ro

In [None]:
# ==============================================================================
# SAVE ENTIRE MODEL FOR APP DEPLOYMENT (Full Object Save)
# ==============================================================================
import torch
import os

# Ensure your Google Drive is mounted!
# The model with the best validation accuracy (0.6316) is in 'model_ft'

# Define the save path
SAVE_DIR = '/content/drive/MyDrive/Dataset/'
MODEL_NAME = 'game_classifier.pth'
SAVE_PATH = os.path.join(SAVE_DIR, MODEL_NAME)

# ⚠️ Saving the entire model (including structure and weights)
# Note: This method requires you to have the TwoHeadModel class definition available
# when loading the model in your app's environment.
torch.save(model_ft, SAVE_PATH)

print(f"✅ Full model object saved successfully to: {SAVE_PATH}")
print("This file contains the entire model structure and its current best weights.")

✅ Full model object saved successfully to: /content/drive/MyDrive/Dataset/game_classifier.pth
This file contains the entire model structure and its current best weights.
