In [1]:
import os
import random
from sklearn.model_selection import train_test_split



In [2]:
map_characters = {0: 'abraham_grampa_simpson', 
                  1: 'apu_nahasapeemapetilon', 2: 'bart_simpson', 
                  3: 'charles_montgomery_burns', 4: 'chief_wiggum', 
                  5: 'comic_book_guy', 6: 'edna_krabappel', 7: 'homer_simpson', 
                  8: 'kent_brockman', 9: 'krusty_the_clown', 10: 'lisa_simpson', 
                  11: 'marge_simpson', 12: 'milhouse_van_houten', 
                  13: 'moe_szyslak', 14: 'ned_flanders', 15: 'nelson_muntz', 
                  16: 'principal_skinner', 17: 'sideshow_bob'}

data_folder = "/kaggle/input/the-simpsons-characters-dataset/simpsons_dataset"
class_names = map_characters.values()
num_classes = len(class_names)

# Create a list to hold the paths and labels of all images
all_image_paths = []
all_labels = []

for class_idx, class_name in enumerate(class_names):
    class_path = os.path.join(data_folder, class_name)
    class_image_paths = [os.path.join(class_path, img_name) for img_name in os.listdir(class_path)]
    all_image_paths.extend(class_image_paths)
    all_labels.extend([class_idx] * len(class_image_paths))

# Perform the stratified train-test split
train_image_paths, dev_image_paths, train_labels, dev_labels = train_test_split(
    all_image_paths, all_labels, test_size=0.2, stratify=all_labels, random_state=42
)


In [3]:
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import torchvision.models as models
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torchvision.io import read_image
from torchvision.transforms.functional import InterpolationMode
from tqdm import tqdm
from PIL import Image

In [4]:
# Define your dataset class
class SimpsonsDataset(Dataset):
    def __init__(self, image_paths, labels, transform=None):
        self.image_paths = image_paths
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        image_path = self.image_paths[idx]
        label = self.labels[idx]
        image = Image.open(image_path)
        if self.transform:
            image = self.transform(image)
        return image, label

In [5]:
# Paths to save the trained model
model_save_path = '/kaggle/working/model.pth'
batch_size = 128

# Define transformations
transform = transforms.Compose([
    transforms.Resize((224, 224), interpolation=InterpolationMode.BICUBIC),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# Create datasets and dataloaders for train and test sets
train_dataset = SimpsonsDataset(train_image_paths, train_labels, transform=transform)
dev_dataset = SimpsonsDataset(dev_image_paths, dev_labels, transform=transform)

train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
dev_dataloader = DataLoader(dev_dataset, batch_size=batch_size, shuffle=False)

In [6]:
# Load the pretrained ResNet-50 model
resnet = models.resnet50(weights=True)

# Modify the final fully connected layer
num_ftrs = resnet.fc.in_features
resnet.fc = nn.Linear(num_ftrs, num_classes)

if False: 
    saved_state_dict = torch.load("/kaggle/input/preds/model3.pth")
    resnet.load_state_dict(saved_state_dict)

Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth
100%|██████████| 97.8M/97.8M [00:00<00:00, 177MB/s]


In [7]:
# Set up training
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
resnet.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(resnet.parameters(), lr=0.001, weight_decay=1e-5)

In [8]:
# Train the model
num_epochs = 70
best_val_acc = 0
best_state = None

for epoch in range(num_epochs):
    resnet.train()
    running_loss = 0.0
    for inputs, labels in tqdm(train_dataloader, desc=f"Epoch {epoch+1}/{num_epochs}"):
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = resnet(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    print(f"Epoch {epoch+1}/{num_epochs} - Loss: {running_loss / len(train_dataloader)}")
    
    # Calculate validation loss and accuracy
    resnet.eval()
    val_loss = 0.0
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in dev_dataloader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = resnet(inputs)
            loss = criterion(outputs, labels)
            val_loss += loss.item()
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()

    val_loss /= len(dev_dataloader)
    accuracy = correct / total
    print(f"Validation Loss: {val_loss:.4f} - Accuracy: {accuracy:.4f}")
    if accuracy > best_val_acc:
        torch.save(resnet.state_dict(), model_save_path)
        best_state = resnet.state_dict()
        best_val_acc = accuracy

    
print("Training finished!")


In [9]:
import re
import glob

def extract_name(filename):
    # Remove numbers and file extension using regular expressions
    cleaned_name = re.sub(r'\d+', '', filename)  # Remove numbers
    cleaned_name = re.sub(r'\.[^.]+$', '', cleaned_name)  # Remove file extension
    cleaned_name = cleaned_name.strip('_')  # Remove leading/trailing underscores
    
    return cleaned_name


test_dir = "/kaggle/input/the-simpsons-characters-dataset/kaggle_simpson_testset/kaggle_simpson_testset/*.jpg"
image_paths = glob.glob(test_dir) 

test_path_list = []
actual_labels = []

for path in image_paths: 
    filename = path.split('/')[-1] # Getting only the file name
    char_name = extract_name(filename) # Extracting the character name from the file name 
    if char_name in map_characters.values(): # Only storing the characters on which we trained our model
        test_path_list.append(path) 
        actual_labels.append(char_name)

map_characters_rev = {value: key for key, value in map_characters.items()}
actual_labels_num = list(map(lambda x: map_characters_rev.get(x), actual_labels))

In [10]:
test_dataset = SimpsonsDataset(test_path_list, actual_labels_num, transform=transform)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [11]:
# Calculate validation loss and accuracy

resnet.eval()
val_loss = 0.0
correct = 0
total = 0
with torch.no_grad():
    for inputs, labels in test_dataloader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = resnet(inputs)
        loss = criterion(outputs, labels)
        val_loss += loss.item()
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()
    
    val_loss /= len(test_dataloader)
    accuracy = correct / total
    print(f"Test Loss: {val_loss:.4f} - Accuracy: {accuracy:.4f}")