# Artist Recognition — Testing

In this notebook we evaluate the trained model on a small test set of **13 paintings per artist**.  
The images were manually collected and were **not part of the training dataset**.

We intentionally selected some **rare or relatively unknown artworks** for each artist to assess how well the model generalizes to new material rather than simply memorizing common masterpieces. This evaluation therefore provides a more realistic estimate of the model’s ability to recognize an artist’s style in unseen data.

The model appears to perform well overall. However, a larger and more systematically curated test set would be needed for a rigorous quantitative evaluation. Such an extension is beyond the scope of this work, which is intended primarily as a **pedagogical exercise** to learn how to use **PyTorch** and apply **transfer learning with ResNet-18** for image classification.

The test results are therefore meant to demonstrate the workflow and model behavior rather than to provide a rigorous or statistically meaningful evaluation of real-world performance.

In [4]:
import os
from PIL import Image

import torch
import torch.nn as nn
from torchvision import transforms
from torchvision.models import resnet18, ResNet18_Weights
import torch.nn as nn

from collections import Counter
import re
import unicodedata



transform = transforms.Compose([
    transforms.Resize((224, 224)),  # same size as training
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

image_dir = "./top3_painters" #directory where the images are located


def normalize_artist(name):
    # Step 1 — Fix mojibake (latin1 -> utf8)
    try:
        name = name.encode("latin1").decode("utf8")
    except:
        pass

    # Step 2 — Normalize accented unicode (ü -> u, é -> e, etc.)
    name = unicodedata.normalize("NFKD", name)

    # Step 3 — Force everything to pure ASCII
    name = name.encode("ascii", "ignore").decode("ascii")

    return name

def extract_artist(filename):
    name = os.path.splitext(filename)[0]
    name = re.sub(r'_\d+$', '', name)
    name = normalize_artist(name)
    return name

files = os.listdir(image_dir) 

labels = [extract_artist(file) for file in files]

class_counts = Counter(labels)

unique_labels = sorted(set(labels))

#We need these maps since Pytorch returns integers as labels, we 
# then need to convert them back to the painter's name
label2idx = {label: i for i, label in enumerate(unique_labels)}
idx2label = {i: label for label, i in label2idx.items()}


num_classes = len(unique_labels)  # same as when training
weights = ResNet18_Weights.DEFAULT
model = resnet18(weights=weights)
model.fc = nn.Linear(model.fc.in_features, num_classes)
model.load_state_dict(torch.load("./best_painter_top3_resnet18.pth", map_location='cpu'))
model.eval()  # very important!

def preprocess_image(img_path):
    img = Image.open(img_path).convert("RGB")
    img_tensor = transform(img).unsqueeze(0)  # add batch dimension
    return img_tensor

def predict_painter(img_path):
    img_tensor = preprocess_image(img_path)
    with torch.no_grad():
        outputs = model(img_tensor)
        _, pred_idx = torch.max(outputs, 1)
    predicted_artist = idx2label[pred_idx.item()]
    return predicted_artist

In [5]:
# Test folders with painter labels

test_folders = {
    "Edgar_Degas": "./test_set/Degas artworks",
    "Vincent_van_Gogh": "./test_set/van Gogh artworks",
    "Pablo_Picasso": "./test_set/Picasso artworks"
}

# Accuracy evaluation

results = {}
total_correct = 0
total_images = 0

for true_label, folder in test_folders.items():
    img_files = os.listdir(folder)
    img_paths = [os.path.join(folder, f) for f in img_files]

    correct = 0

    for img in img_paths:
        try:
            pred = predict_painter(img)

            if pred == true_label:
                correct += 1

        except Exception as e:
            print(f"Skipping {img}: {e}")
            continue

    n = len(img_paths)
    acc = correct / n if n > 0 else 0

    results[true_label] = {
        "correct": correct,
        "total": n,
        "accuracy": round(acc, 4)
    }

    total_correct += correct
    total_images += n

# Overall accuracy

overall_acc = total_correct / total_images if total_images > 0 else 0
results["OVERALL"] = {
    "correct": total_correct,
    "total": total_images,
    "accuracy": round(overall_acc, 4)
}


# Print results

from pprint import pprint
pprint(results)

{'Edgar_Degas': {'accuracy': 1.0, 'correct': 13, 'total': 13},
 'OVERALL': {'accuracy': 1.0, 'correct': 39, 'total': 39},
 'Pablo_Picasso': {'accuracy': 1.0, 'correct': 13, 'total': 13},
 'Vincent_van_Gogh': {'accuracy': 1.0, 'correct': 13, 'total': 13}}
