# Artist Recognition — Training

In this notebook, we train a neural network, using the Kaggle GPU, to recognize the painter of a given painting. We start with the Kaggle dataset [Best Artworks of All Time](https://www.kaggle.com/datasets/ikarus777/best-artworks-of-all-time?select=resized). Given the **imbalanced data** (some painters are overrepresented), we focus for educational purposes on the **top 3 most represented painters**: Van Gogh, Picasso, and Degas.

## Model

We use **ResNet-18**, a convolutional neural network with 18 layers. We start with a **pretrained model** and **fine-tune only the last few layers**, while freezing the earlier layers. This is done because early layers learn **generic features** like edges and textures, whereas later layers learn **task-specific features** relevant to our dataset.

## Image Preprocessing

We standardize the images by:

* Resizing them to **224×224 pixels**
* Normalizing pixel values by **subtracting the mean and dividing by the standard deviation** for each RGB channel

## Data Augmentation

To improve generalization and reduce overfitting, we apply the following augmentations:

* Randomly flipping the images horizontally  
* Randomly rotating them  
* Randomly changing brightness, contrast, saturation, and hue

## Training Setup

* **Loss function:** Cross-entropy with class weighting to address imbalance  
* **Optimizer:** Adam  
* **Fine-tuned layers:** Last convolutional block (`layer4`) and the fully connected layer (`fc`)  
* **Number of epochs:** 20  
* **Batch size:** 32  

## Results

The model is trained on the top 3 painters and evaluated on a validation set. Data augmentation and fine-tuning help improve generalization despite the small dataset.


In [None]:
import os
from PIL import Image

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms

from collections import Counter

import re

import unicodedata


image_dir = "./top3_painters" #directory where the images are located


def normalize_artist(name):
    # Step 1 — Fix mojibake (latin1 -> utf8)
    try:
        name = name.encode("latin1").decode("utf8")
    except:
        pass

    # Step 2 — Normalize accented unicode (ü -> u, é -> e, etc.)
    name = unicodedata.normalize("NFKD", name)

    # Step 3 — Force everything to pure ASCII
    name = name.encode("ascii", "ignore").decode("ascii")

    return name
    
import os, re

def extract_artist(filename):
    name = os.path.splitext(filename)[0]
    name = re.sub(r'_\d+$', '', name)
    name = normalize_artist(name)
    return name

files = os.listdir(image_dir) 

labels = [extract_artist(file) for file in files]

class_counts = Counter(labels)

unique_labels = sorted(set(labels))

print(unique_labels)
print(class_counts)

# Create mappings
label2idx = {label: i for i, label in enumerate(unique_labels)}
idx2label = {i: label for label, i in label2idx.items()}


train_transform = transforms.Compose([
    transforms.Resize((224,224)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ColorJitter(0.2,0.2,0.2,0.1),
    transforms.RandomAffine(degrees=10, translate=(0.1, 0.1)),
    transforms.ToTensor(),
    transforms.Normalize([0.485,0.456,0.406],
                         [0.229,0.224,0.225])
])

val_transform = transforms.Compose([
    transforms.Resize((224,224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485,0.456,0.406],
                         [0.229,0.224,0.225])
])


#To train the model with PyTorch we need to create a dataset class
class PaintingDataset(Dataset):
    def __init__(self, files, labels, transform):
        self.files = files
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.files)

    def __getitem__(self, idx):
        img = Image.open(os.path.join(image_dir, self.files[idx])).convert("RGB")
        img = self.transform(img)

        label = label2idx[self.labels[idx]]

        return img, label

['Edgar_Degas', 'Pablo_Picasso', 'Vincent_van_Gogh']
Counter({'Vincent_van_Gogh': 877, 'Edgar_Degas': 702, 'Pablo_Picasso': 439})


In [None]:
from sklearn.model_selection import train_test_split
from torchvision.models import resnet18, ResNet18_Weights

#Split the dataset into train and val
train_files, val_files = train_test_split(files, test_size=0.2, random_state=42)
train_labels = [extract_artist(f) for f in train_files]
val_labels = [extract_artist(f) for f in val_files]

#Load the dataset and apply the transformations (standardisation and augmentation)
train_dataset = PaintingDataset(train_files, train_labels, train_transform)
val_dataset = PaintingDataset(val_files, val_labels, val_transform)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=2)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, num_workers=2)


model = resnet18(weights=ResNet18_Weights.DEFAULT) 


num_classes = len(unique_labels)  # number of painters

# Replace the final fully connected layer
model.fc = nn.Linear(model.fc.in_features, num_classes)

#If the cuda gpu is available use it (can be done on Kaggle)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

#since the data is unbalanced we rescale the weights
weights_tensor = torch.tensor([1.0 / class_counts[label] for label in unique_labels], dtype=torch.float)
weights_tensor = weights_tensor.to(device)


for param in model.parameters():
    param.requires_grad = False

for m in model.modules():
    if isinstance(m, nn.BatchNorm2d):
        m.train()

# Unfreeze last conv block + fc layer
for name, p in model.named_parameters():
    if "layer3" in name or "layer4" in name or "fc" in name:
        p.requires_grad=True

# Optimizer only for trainable params
optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=3e-4)

# Loss function
criterion = nn.CrossEntropyLoss(weight=weights_tensor)


In [None]:
# Training the model (10 epochs)
# Use tqmd for a progress bar

from tqdm import tqdm

num_epochs = 10
best_val_acc = 0.0

for epoch in range(num_epochs):
    # ---------- Training ----------
    model.train()
    running_loss = 0.0

    # Wrap train_loader with tqdm
    train_iter = tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs} [Train]", leave=False)
    for imgs, lbls in train_iter:
        imgs, lbls = imgs.to(device), lbls.to(device)

        optimizer.zero_grad()
        outputs = model(imgs)
        loss = criterion(outputs, lbls)
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * imgs.size(0)

        # Update tqdm description with current loss
        train_iter.set_postfix(loss=loss.item())

    epoch_loss = running_loss / len(train_dataset)

    # ---------- Validation ----------
    model.eval()
    val_correct, val_total = 0, 0
    val_iter = tqdm(val_loader, desc=f"Epoch {epoch+1}/{num_epochs} [Val]", leave=False)
    with torch.no_grad():
        for imgs, lbls in val_iter:
            imgs, lbls = imgs.to(device), lbls.to(device)
            outputs = model(imgs)
            _, preds = torch.max(outputs, 1)
            val_correct += (preds == lbls).sum().item()
            val_total += lbls.size(0)
    
    val_acc = val_correct / val_total
    print(f"Epoch {epoch+1}/{num_epochs} - Loss: {epoch_loss:.4f} - Val Accuracy: {val_acc:.4f}")

    # Save the best model
    
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        torch.save(model.state_dict(), "best_painter_top3_resnet18.pth")

Epoch 1/10 [Train]:   0%|          | 0/51 [00:00<?, ?it/s]