In [None]:
# 0-cell: Telepíti a fine_tune.ipynb-ben használt könyvtárakat (Ucloud környezethez)
# Futtasd a notebook elején (0. cella). A torch-installáció GPU esetén a cu118 wheel-t használja,
# ha az Ucloud máshogy konfigurált, módosítsd az index-url-t vagy a torch telepítési parancsot.
import sys, subprocess, shutil, importlib

def pip_install(cmd):
    print('\nRunning:', ' '.join(cmd))
    subprocess.check_call(cmd)

# Detektáljuk, van-e NVIDIA GPU (nvidia-smi elérhető)
has_gpu = shutil.which('nvidia-smi') is not None
print('GPU detected:', has_gpu)

# Általános Python csomagok, amiket a fine_tune notebook használ
other_pkgs = ['pandas', 'transformers', 'pillow', 'tqdm', 'scikit-learn']
pip_install([sys.executable, '-m', 'pip', 'install', '--upgrade'] + other_pkgs)

# Torch telepítése: ha GPU van, cu118 wheel-eket próbálunk, egyébként CPU wheel-eket
torch_index = 'https://download.pytorch.org/whl/cu118' if has_gpu else 'https://download.pytorch.org/whl/cpu'
print('Installing torch/torchvision from index:', torch_index)
pip_install([sys.executable, '-m', 'pip', 'install', '--upgrade', '--index-url', torch_index, 'torch', 'torchvision'])

# Rövid ellenőrzés: verziók kiíratása
print('\nInstalled package versions:')
def get_ver(mod_name):
    try:
        m = importlib.import_module(mod_name)
        return getattr(m, '__version__', str(m))
    except Exception as e:
        return f'not installed ({e})'

for pkg in ['torch', 'transformers', 'pandas', 'PIL', 'tqdm', 'sklearn']:
    print(pkg + ':', get_ver(pkg))


In [1]:
import pandas as pd

df = pd.read_csv("train_labels.csv")
df.head()


Unnamed: 0,ID,category
0,angry_Training_10118481.jpg,angry
1,angry_Training_10120469.jpg,angry
2,angry_Training_10131352.jpg,angry
3,angry_Training_10161559.jpg,angry
4,angry_Training_1021836.jpg,angry


In [6]:
# Load processor and model used for preprocessing and fine-tuning
from transformers import ViTImageProcessor, ViTForImageClassification
import torch

# Use the same pretrained processor/model you used for inference
processor = ViTImageProcessor.from_pretrained('abhilash88/face-emotion-detection')
model = ViTForImageClassification.from_pretrained('abhilash88/face-emotion-detection')

device = 'cuda' if torch.cuda.is_available() else 'cpu'
model.to(device)
print('Loaded processor and model. Device:', device)

Loaded processor and model. Device: cpu


In [7]:
from torch.utils.data import Dataset
from PIL import Image

class EmotionDataset(Dataset):
    def __init__(self, df, img_dir, processor):
        self.df = df
        self.img_dir = img_dir
        self.processor = processor
        
        # class-name → numeric label
        self.label2id = {
            "angry": 0,
            "disgust": 1,
            "fear": 2,
            "happy": 3,
            "sad": 4,
            "surprise": 5,
            "neutral": 6
        }

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        img_name = row["ID"]
        label_str = row["category"]

        # Full image path
        img_path = f"{self.img_dir}/{img_name}"

        # Load image
        img = Image.open(img_path).convert("RGB")

        # ViT preprocessing
        inputs = self.processor(img, return_tensors="pt")

        # Remove batch dimension added by processor
        inputs = {k: v.squeeze(0) for k, v in inputs.items()}

        label = self.label2id[label_str]

        return inputs, label


In [8]:
from torch.utils.data import DataLoader

train_ds = EmotionDataset(df, "train/train", processor)
train_loader = DataLoader(train_ds, batch_size=16, shuffle=True)


In [None]:
import torch
from torch import nn, optim

device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-5)

EPOCHS = 4

for epoch in range(EPOCHS):
    model.train()
    total_loss = 0

    for inputs, labels in train_loader:
        inputs = {k: v.to(device) for k, v in inputs.items()}
        labels = labels.to(device)

        outputs = model(**inputs).logits
        loss = criterion(outputs, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    print(f"Epoch {epoch+1} - Loss: {total_loss/len(train_loader):.4f}")


In [None]:
model.save_pretrained("emotion_finetuned")
processor.save_pretrained("emotion_finetuned")


In [None]:
import os

test_dir = "test"

model.eval()

results = []
for filename in os.listdir(test_dir):
    if filename.endswith(".jpg"):
        img = Image.open(f"{test_dir}/{filename}").convert("RGB")
        inputs = processor(img, return_tensors="pt").to(device)

        with torch.no_grad():
            logits = model(**inputs).logits
            pred = torch.argmax(logits, dim=1).item()

        results.append((filename, pred))
