## Simpsons Image Classification.

### Initial Variables

In [12]:
from google.colab import auth
from googleapiclient.discovery import build
from googleapiclient.http import MediaFileUpload


# The dataset to download from Kaggle
KAGGLE_DATASET = "alexattia/the-simpsons-characters-dataset"

# The directory where the dataset is extracted
KAGGLE_UNZIP_DATASET_DIR = "/content/simpsons_data"

# The Google Drive Folder ID to read / write files
GOOGLE_DRIVE_FOLDER_ID = "1GZ0NBMKvCcNAvPdW50j6OwcSasaoK8A1"

# The Image Size (width,height)
IMG_WIDTH = 64
IMG_HEIGHT = 64

# Request permissions to access (read/write) the Google Drive Folder ID
auth.authenticate_user()
drive_service = build('drive', 'v3')

print(f"Successful initialization: Dataset: {KAGGLE_DATASET} - Google Drive Id: {GOOGLE_DRIVE_FOLDER_ID}")

Successful initialization: Dataset: alexattia/the-simpsons-characters-dataset -


### 0. Download Dataset.

In [13]:
import os
from google.colab import userdata


# Getting Kaggle credentials and setting in environment
os.environ['KAGGLE_USERNAME'] = userdata.get('KAGGLE_USERNAME')
os.environ['KAGGLE_KEY'] = userdata.get('KAGGLE_KEY')

# Download and unzip dataset from Kaggle
!kaggle datasets download -d {KAGGLE_DATASET}
!unzip -q the-simpsons-characters-dataset.zip -d {KAGGLE_UNZIP_DATASET_DIR}

print(f"Downloaded {KAGGLE_DATASET} in {KAGGLE_UNZIP_DATASET_DIR}")

Dataset URL: https://www.kaggle.com/datasets/alexattia/the-simpsons-characters-dataset
License(s): CC-BY-NC-SA-4.0
Downloading the-simpsons-characters-dataset.zip to /content
100% 1.07G/1.08G [00:12<00:00, 127MB/s]
100% 1.08G/1.08G [00:12<00:00, 93.8MB/s]
replace /content/simpsons_data/annotation.txt? [y]es, [n]o, [A]ll, [N]one, [r]ename: A
Downloaded alexattia/the-simpsons-characters-dataset in /content/simpsons_data


### 1. Prepare Dataset.

In [None]:
import os
import cv2
import numpy as np
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

# Sets the source dataset directory and the image size to be resize
# Shuffle is simply "shuffling" or mixing up the order of your data.
DATA_DIR = '/content/simpsons_data/simpsons_dataset'
IMG_WIDTH = 64
IMG_HEIGHT = 64

def load_simpsons_dataset_with_labels(directory, img_width, img_height):
    images = []
    labels = []
    class_names = sorted(os.listdir(directory))
    class_map = {name: i for i, name in enumerate(class_names)}

    # Browse folders
    for class_name in class_names:
        class_dir = os.path.join(directory, class_name)
        if not os.path.isdir(class_dir):
            continue

        class_idx = class_map[class_name]

        # Read images from each folder
        for img_name in os.listdir(class_dir):
            img_path = os.path.join(class_dir, img_name)
            try:
                # Read image with OpenCV
                img = cv2.imread(img_path)
                if img is not None:
                    # Convert BGR (OpenCV) to RGB
                    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
                    # Resize (CNN needs fixed size)
                    img = cv2.resize(img, (img_width, img_height))

                    images.append(img)
                    labels.append(class_idx)
            except Exception as e:
                print(f"Error cargando {img_path}: {e}")

    # Convert to numpy arrays
    X = np.array(images)
    y = np.array(labels)

    # Mix data (Shuffle)
    indices = np.arange(X.shape[0])
    np.random.shuffle(indices)
    X = X[indices]
    y = y[indices]

    return X, y, class_names

# Load the full dataset with labels
print("Loading datasets with labels may take a while")

X_full, y_full, class_names = load_simpsons_dataset_with_labels(DATA_DIR, IMG_WIDTH, IMG_HEIGHT)

print(f"# of samples: {len(X_full)}")
print(f"# of clases: {len(class_names)}: {class_names}")



Loading datasets with labels may take a while
Detected 43 classes.
# of samples: 20933
# of clases: 43: ['abraham_grampa_simpson', 'agnes_skinner', 'apu_nahasapeemapetilon', 'barney_gumble', 'bart_simpson', 'carl_carlson', 'charles_montgomery_burns', 'chief_wiggum', 'cletus_spuckler', 'comic_book_guy', 'disco_stu', 'edna_krabappel', 'fat_tony', 'gil', 'groundskeeper_willie', 'homer_simpson', 'kent_brockman', 'krusty_the_clown', 'lenny_leonard', 'lionel_hutz', 'lisa_simpson', 'maggie_simpson', 'marge_simpson', 'martin_prince', 'mayor_quimby', 'milhouse_van_houten', 'miss_hoover', 'moe_szyslak', 'ned_flanders', 'nelson_muntz', 'otto_mann', 'patty_bouvier', 'principal_skinner', 'professor_john_frink', 'rainier_wolfcastle', 'ralph_wiggum', 'selma_bouvier', 'sideshow_bob', 'sideshow_mel', 'simpsons_dataset', 'snake_jailbird', 'troy_mcclure', 'waylon_smithers']
