In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [1]:
pip install caer

In [2]:
pip install canaro

In [13]:
import os
import caer
import canaro
import numpy as np
import cv2 as cv
import gc

In [18]:
IMG_SIZE = (80,80)
channels = 1
char_path = r'../input/the-simpsons-characters-dataset/simpsons_dataset'

In [19]:
char_dict = {}
for char in os.listdir(char_path):
    char_dict[char] = len(os.listdir(os.path.join(char_path,char)))
    
# sort the dictionary in descending order
char_dict = caer.sort_dict(char_dict, descending=True)
char_dict

In [20]:
characters = []
count = 0
for i in char_dict:
    characters.append(i[0])
    count += 1
    if count >= 10:
        break
characters

In [23]:
# Create a training data
train = caer.preprocess_from_dir(char_path, characters, channels=channels, IMG_SIZE=IMG_SIZE, isShuffle=True)

In [24]:
len(train)

In [26]:
import matplotlib.pyplot as plt
plt.figure(figsize=(30,30))
plt.imshow(train[0][0], cmap='gray')
plt.show()

In [27]:
featureSet, labels = caer.sep_train(train, IMG_SIZE=IMG_SIZE)

In [29]:
from tensorflow.keras.utils import to_categorical
# Normalize the feature set in the range of (0,1)
featureSet = caer.normalize(featureSet)
labels = to_categorical(labels, len(characters))

In [30]:
x_train,x_val,y_train,y_val = caer.train_val_split(featureSet,labels, val_ratio=.2)

In [31]:
del train
del featureSet
del labels
gc.collect()

In [33]:
BATCH_SIZE = 32
EPOCHS = 10

In [34]:
# Image data generator
datagen = canaro.generators.imageDataGenerator()
train_gen = datagen.flow(x_train, y_train, batch_size = BATCH_SIZE)

In [37]:
# Creating a model
model = canaro.models.createSimpsonsModel(IMG_SIZE=IMG_SIZE, channels=channels, output_dim=len(characters),loss = 'binary_crossentropy', 
                                         decay=1e-6, learning_rate = 0.001, momentum = 0.9, nesterov = True)

In [38]:
model.summary()

In [39]:
from tensorflow.keras.callbacks import LearningRateScheduler
callbacks_list = [LearningRateScheduler(canaro.lr_schedule)]

In [40]:
training = model.fit(train_gen,
                    steps_per_epoch=len(x_train)//BATCH_SIZE,
                    epochs=EPOCHS,
                    validation_data = (x_val,y_val),
                    validation_steps = len(y_val)//BATCH_SIZE,
                    callbacks = callbacks_list)

In [41]:
characters

In [60]:
test_path = r'../input/the-simpsons-characters-dataset/kaggle_simpson_testset/kaggle_simpson_testset/charles_montgomery_burns_0.jpg'

img = cv.imread(test_path)
plt.imshow(img, cmap='gray')
plt.show()

In [61]:
def prepare(img):
    img = cv.cvtColor(img,cv.COLOR_BGR2GRAY)
    img = cv.resize(img, IMG_SIZE)
    img = caer.reshape(img, IMG_SIZE, 1)
    return img

In [62]:
predictions = model.predict(prepare(img))

In [63]:
print(characters[np.argmax(predictions[0])])