In [2]:
!pip install caer canaro

In [3]:
import os 
import caer
import canaro
import numpy as np
import cv2 as cv
import gc

In [4]:
IMG_SIZE = (80,80)
channels = 1 #grayscale
char_path = r'../input/the-simpsons-characters-dataset/simpsons_dataset'

In [5]:
char_dict = {}
for char in os.listdir(char_path):
    char_dict[char] = len(os.listdir(os.path.join(char_path, char)))
    
# sort in descendign order
char_dict = caer.sort_dict(char_dict, descending=True)
char_dict

In [6]:
characters = []
count = 0
for i in char_dict:
    characters.append(i[0])
    count += 1
    if count >= 10:
        break
characters

In [7]:
# create the training data
# go through every folder in 'simpsons_dataset', and look at every elemeny inside 'characters'. it'll look at each element inside that folder and grab all the images and add them to the training set.
train = caer.preprocess_from_dir(char_path, characters, channels=channels, IMG_SIZE=IMG_SIZE, isShuffle=True)


In [8]:
len(train)

In [10]:
import matplotlib.pyplot as plt
plt.figure(figsize=(30,30))
plt.imshow(train[0][0], cmap='gray')
plt.show()
# we're not using opencv because opencv does not seem to display properly in jupyter notebooks

In [11]:
# separate the trainingset to featureset an labels and reshape it to a 4 dimensional tensor 
featureset, labels = caer.sep_train(train, IMG_SIZE=IMG_SIZE)

In [12]:
from tensorflow.keras.utils import to_categorical
# normalize the featureset to be in a range of 0 to 1
featureset = caer.normalize(featureset)
labels = to_categorical(labels, len(characters))

In [14]:
x_train, x_val, y_train, y_val = caer.train_val_split(featureset, labels, val_ratio=0.2)

In [15]:
# to save memory, delete some variables that we're not going to use
del train
del featureset
del labels
gc.collect()

In [18]:
BATCH_SIZE = 32
EPOCHS = 10

In [19]:
# image data generator
datagen = canaro.generators.imageDataGenerator()
train_gen = datagen.flow(x_train, y_train, batch_size=BATCH_SIZE)

In [20]:
# creating the model. the author already created a model for this dataset
model = canaro.models.createSimpsonsModel(IMG_SIZE=IMG_SIZE, channels=channels, output_dim=len(characters),
                                         loss='binary_crossentropy', decay=1e-6, learning_rate=0.001, 
                                         momentum=0.9, nesterov=True)

In [21]:
model.summary()

In [22]:
from tensorflow.keras.callbacks import LearningRateScheduler
# create a callbacks list
callbacks_list = [LearningRateScheduler(canaro.lr_schedule)]

In [27]:
train = model.fit(train_gen,
                 steps_per_epoch=len(x_train)//BATCH_SIZE,
                 epochs=EPOCHS,
                 validation_data=(x_val,y_val),
                 validation_steps=len(y_val)//BATCH_SIZE,
                 callbacks=callbacks_list)
# note: i had to run the cell over and over to get to an acceptable accuracy

In [28]:
characters

In [42]:
test_path = r'../input/the-simpsons-characters-dataset/kaggle_simpson_testset/kaggle_simpson_testset/charles_montgomery_burns_0.jpg'
img = cv.imread(test_path)
plt.imshow(img, cmap='gray')
plt.show()

In [43]:
# prepare our image dimensions for the model
def prepare(img):
    img = cv.cvtColor(img, cv.COLOR_BGR2GRAY)
    img = cv.resize(img, IMG_SIZE)
    img = caer.reshape(img, IMG_SIZE, 1)
    return img

In [44]:
predictions = model.predict(prepare(img))

In [45]:
predictions

In [46]:
print(characters[np.argmax(predictions[0])])