##Load Dataset

In [None]:
!pip install kaggle
import os
import zipfile

def download_data_from_kaggle():
    try:
        from google.colab import files
        uploaded = files.upload()
    except ImportError:
        print("Running outside of Colab. Please ensure your kaggle.json is in ~/.kaggle/")

    if 'kaggle.json' in os.listdir('.'):
        !mkdir -p ~/.kaggle
        !mv kaggle.json ~/.kaggle/
        !chmod 600 ~/.kaggle/kaggle.json
    else:
        print("kaggle.json not found. Please upload it or place it in the correct directory.")

    if not os.path.exists('cpe342-karena.zip'):
        print("Downloading data from Kaggle competition 'cpe342-karena'...")
        !kaggle competitions download -c cpe342-karena
    else:
        print("Data already downloaded.")

    if os.path.exists('cpe342-karena.zip'):
        print("Unzipping data...")
        try:
            with zipfile.ZipFile('cpe342-karena.zip', 'r') as zip_ref:
                zip_ref.extractall('.')
            print("Data unzipped.")
        except zipfile.BadZipFile:
            print("Error: Downloaded file is not a valid zip file.")
        except Exception as e:
            print(f"An error occurred during unzipping: {e}")
    else:
        print("Zip file not found, cannot unzip.")



In [None]:
download_data_from_kaggle()

Saving kaggle.json to kaggle.json
Downloading data from Kaggle competition 'cpe342-karena'...
Downloading cpe342-karena.zip to /content
 95% 875M/922M [00:04<00:00, 127MB/s] 
100% 922M/922M [00:04<00:00, 220MB/s]
Unzipping data...
Data unzipped.


#Task4: Game Title Detection

##Setup

In [None]:
!pip install timm


import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import torchvision.transforms as T
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score
import timm
import os
from timm.data.mixup import Mixup
from timm.loss import SoftTargetCrossEntropy



##Load CSV

In [None]:
train_df = pd.read_csv('public_dataset/task4/train.csv')
test_df = pd.read_csv('public_dataset/task4/test_refined.csv')


# split train/val
train_df, val_df = train_test_split(train_df, test_size=0.2, stratify=train_df['label'], random_state=42)

##Preprocessing

In [None]:
transform_train = T.Compose([
    T.Resize((224, 224)),
    T.RandomResizedCrop(224, scale=(0.8, 1.0)),  # ‡∏î‡∏µ‡∏Ç‡∏∂‡πâ‡∏ô‡∏°‡∏≤‡∏Å
    T.RandomHorizontalFlip(),
    T.RandomRotation(15),
    T.ColorJitter(brightness=0.3, contrast=0.3, saturation=0.2),
    T.ToTensor()
])


transform_val = T.Compose([
T.Resize((224, 224)),
T.ToTensor()
])


class ImgDataset(Dataset):
  def __init__(self, df, transform):
    self.df = df.reset_index(drop=True)
    self.transform = transform

  def __len__(self):
    return len(self.df)

  def __getitem__(self, idx):
    row = self.df.iloc[idx]
    img_path = os.path.join("public_dataset/task4/train", row.file_name)
    img = Image.open(img_path).convert("RGB")
    img = self.transform(img)
    return img, int(row.label)


class TestDataset(Dataset):
  def __init__(self, df, transform):
    self.df = df.reset_index(drop=True)
    self.transform = transform

  def __len__(self):
    return len(self.df)

  def __getitem__(self, idx):
    row = self.df.iloc[idx]
    img_path = os.path.join("public_dataset/task4/test", row.file_name)
    img = Image.open(img_path).convert("RGB")
    img = self.transform(img)
    return img, row.file_name

##DataLoaders

In [None]:
train_ds = ImgDataset(train_df, transform_train)
val_ds = ImgDataset(val_df, transform_val)
test_ds = TestDataset(test_df, transform_val)

train_loader = DataLoader(train_ds, batch_size=32, shuffle=True)
val_loader = DataLoader(val_ds, batch_size=32)
test_loader = DataLoader(test_ds, batch_size=32, shuffle=False)

##Model (ViT) add Mixup/CutMix, add Weight Decay, add Learning Rate Scheduler

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"


model = timm.create_model("vit_base_patch16_224", pretrained=True, num_classes=5)
model = model.to(device)


mixup_fn = Mixup(mixup_alpha=0.2, cutmix_alpha=0.2, num_classes=5)
criterion = SoftTargetCrossEntropy()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4, weight_decay=1e-4)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=15)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


model.safetensors:   0%|          | 0.00/346M [00:00<?, ?B/s]

##Training Loop

In [None]:
EPOCHS = 15
best_f1 = 0.0

for epoch in range(EPOCHS):
    model.train()
    total_loss = 0

    # ---------------------------
    #  Training
    # ---------------------------
    for imgs, labels in train_loader:
        imgs, labels = imgs.to(device), labels.to(device)

        # mixup
        imgs, labels = mixup_fn(imgs, labels)

        optimizer.zero_grad()
        outputs = model(imgs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    scheduler.step()

    # ---------------------------
    #  Validation F1
    # ---------------------------
    model.eval()
    preds, trues = [], []

    with torch.no_grad():
        for imgs, labels in val_loader:
            imgs = imgs.to(device)
            outputs = model(imgs)
            preds += outputs.argmax(1).cpu().numpy().tolist()
            trues += labels.numpy().tolist()

    f1 = f1_score(trues, preds, average='macro')

    # ---------------------------
    #  Save Best Model
    # ---------------------------
    if f1 > best_f1:
        best_f1 = f1
        torch.save(model.state_dict(), "best_model.pth")

    print(f"Epoch {epoch+1}/{EPOCHS} - Loss: {total_loss:.4f} - F1: {f1:.4f}")


Epoch 1/15 - Loss: 765.3728
Epoch 2/15 - Loss: 629.0607
Epoch 3/15 - Loss: 616.8677
Epoch 4/15 - Loss: 607.5675
Epoch 5/15 - Loss: 586.6082
Epoch 6/15 - Loss: 567.1779


##Validation F1

In [None]:
model.eval()
preds, trues = [], []


with torch.no_grad():
  for imgs, labels in val_loader:
    imgs = imgs.to(device)
    outputs = model(imgs)
    preds += outputs.argmax(1).cpu().numpy().tolist()
    trues += labels.numpy().tolist()


f1 = f1_score(trues, preds, average='macro')
print("Validation Macro F1:", f1)

#Load Best Model Path

In [None]:
import timm
import torch

device = "cuda" if torch.cuda.is_available() else "cpu"

# ‡∏™‡∏£‡πâ‡∏≤‡∏á‡πÇ‡∏°‡πÄ‡∏î‡∏•‡πÇ‡∏Ñ‡∏£‡∏á‡∏™‡∏£‡πâ‡∏≤‡∏á‡πÄ‡∏î‡∏µ‡∏¢‡∏ß‡∏Å‡∏±‡∏ö‡∏ï‡∏≠‡∏ô train
model = timm.create_model(
    "vit_base_patch16_224",
    pretrained=False,    # ‡∏ï‡πâ‡∏≠‡∏á‡πÄ‡∏õ‡πá‡∏ô False ‡πÄ‡∏û‡∏£‡∏≤‡∏∞‡πÄ‡∏£‡∏≤‡∏à‡∏∞‡πÇ‡∏´‡∏•‡∏î weight ‡πÄ‡∏≠‡∏á
    num_classes=5
)

# ‡πÇ‡∏´‡∏•‡∏î‡∏ô‡πâ‡∏≥‡∏´‡∏ô‡∏±‡∏Å‡∏à‡∏≤‡∏Å‡πÑ‡∏ü‡∏•‡πå‡∏ó‡∏µ‡πà save ‡πÑ‡∏ß‡πâ
model.load_state_dict(torch.load("best_model.pth", map_location=device))

model = model.to(device)
model.eval()

print("‚úÖ Loaded best_model.pth successfully!")

‚úÖ Loaded best_model.pth successfully!


#predict test set

#TTA
- Normal (original)

- Horizontal Flip

- Rotation ¬±10¬∞

- Center Crop + Resize

- Brightness Shift

- Contrast Shift

In [None]:
import torchvision.transforms as T
from PIL import Image
import torch
import pandas as pd

# ---------------------------------
# BEST TTA TRANSFORMS
# ---------------------------------
tta_transforms = [
    # 1) Original
    T.Compose([
        T.Resize((224, 224)),
        T.ToTensor()
    ]),

    # 2) Horizontal Flip
    T.Compose([
        T.Resize((224, 224)),
        T.RandomHorizontalFlip(p=1.0),
        T.ToTensor()
    ]),

    # 3) Rotate +10¬∞
    T.Compose([
        lambda img: T.functional.rotate(img, 10),
        T.Resize((224, 224)),
        T.ToTensor()
    ]),

    # 4) Rotate -10¬∞
    T.Compose([
        lambda img: T.functional.rotate(img, -10),
        T.Resize((224, 224)),
        T.ToTensor()
    ]),

    # 5) Center Crop + Resize
    T.Compose([
        T.CenterCrop(200),
        T.Resize((224, 224)),
        T.ToTensor()
    ]),

    # 6) Brightness Shift
    T.Compose([
        T.Resize((224,224)),
        T.ColorJitter(brightness=0.3),
        T.ToTensor()
    ]),

    # 7) Contrast Shift
    T.Compose([
        T.Resize((224,224)),
        T.ColorJitter(contrast=0.3),
        T.ToTensor()
    ])
]


In [None]:
pred_list = []
filenames = []

with torch.no_grad():
    for imgs, names in test_loader:
        tta_logits_list = []

        for name in names:
            img_path = f"public_dataset/task4/test/{name}"
            img = Image.open(img_path).convert("RGB")

            # ‡∏£‡∏ß‡∏°‡∏ú‡∏•‡∏à‡∏≤‡∏Å TTA ‡∏ó‡∏∏‡∏Å‡πÅ‡∏ö‡∏ö‡∏Ç‡∏≠‡∏á‡∏†‡∏≤‡∏û‡∏ô‡∏µ‡πâ
            one_image_logits = []

            for t in tta_transforms:
                aug_img = t(img).unsqueeze(0).to(device)
                logits = model(aug_img).softmax(dim=1)
                one_image_logits.append(logits)

            # ‡∏Ñ‡πà‡∏≤‡πÄ‡∏â‡∏•‡∏µ‡πà‡∏¢ (ensemble)
            avg_logits = torch.mean(torch.stack(one_image_logits), dim=0)
            tta_logits_list.append(avg_logits)

        # ‡∏£‡∏ß‡∏°‡∏ú‡∏• batch ‡∏ô‡∏µ‡πâ
        final_preds = torch.cat(tta_logits_list, dim=0).argmax(1).cpu().numpy()

        pred_list.extend(final_preds)
        filenames.extend(names)

# Save submission
submission = pd.DataFrame({
    "filename": filenames,
    "label": pred_list
})

submission.to_csv("submission.csv", index=False)
print("üéâ Saved submission.csv with FULL TTA")


üéâ Saved submission.csv with FULL TTA
