1. Imports
2. Data preprocessing
3. EDA
4. Data Preparation
5. Model
6. Train
7. Evaluation

# **1. IMPORTS** 

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
# for dirname, _, filenames in os.walk('/kaggle/input'):
#     for filename in filenames:
#         print(os.path.join(dirname, filename))
import tensorflow as tf
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report, confusion_matrix

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# 2. Data PreProcessing

In [None]:
# Constants
DIR_TEST='/kaggle/input/the-simpsons-characters-dataset/kaggle_simpson_testset/kaggle_simpson_testset/'
DIR_TRAIN='/kaggle/input/the-simpsons-characters-dataset/simpsons_dataset/simpsons_dataset/'

IMG_SIZE = (160,160)

In [None]:
# Select Train Data
characters_all = {}
for x in os.listdir(DIR_TRAIN):
    characters_all[x]=len(os.listdir(os.path.join(DIR_TRAIN,x)))
characters_all={k: v for k, v in sorted(characters_all.items(), key=lambda item: item[1],reverse=True)}
characters_all = list(characters_all)
characters_all = characters_all[0:8]
characters_all

In [None]:
# Select Test Data
dataframe_test=[]
for file in os.listdir(DIR_TEST):
    for char in characters_all:
        if file[0:len(char)]==char:
            dataframe_test.append([(os.path.join(DIR_TEST,file)),char])
dataframe_test = pd.DataFrame(dataframe_test,columns=['name','label'])
dataframe_test.groupby('label').count()

# 3. EDA

In [None]:
# Display Data
plt.figure( figsize=(10, 16), dpi=320 )
for i,character in enumerate(characters_all):
    images=np.random.choice(os.listdir(DIR_TRAIN+character), 5)
#     print(images)
#     print(i,character)
    for j in range(len(images)):
        k=int(i)*5+j+1
        plt.subplot(len(characters_all),5,k)
        plt.tick_params(which='both', bottom=False, left=False, top=False,labelbottom=False,labelleft=False)
        img=plt.imread(os.path.join(DIR_TRAIN,character,images[j]))
        plt.xlabel(character)
        plt.imshow(img)
plt.tight_layout()

# 4. Data Preparation

In [None]:
# Create dataframe

labels_name_int = {}
labels_int_name = {}
for i,character in enumerate(characters_all):
    labels_name_int[character] = int(i)
    labels_int_name[int(i)] = character

dataframe = pd.DataFrame(None,columns=['name','label'])
img=plt.imread(os.path.join(DIR_TRAIN,character,images[j]))

for char in characters_all:
    for filename in os.listdir(os.path.join(DIR_TRAIN,char)):
#         print(filename)
        dataframe=dataframe.append({'label':char,'name':os.path.join(DIR_TRAIN,char,filename)},ignore_index=True)
dataframe

In [None]:
datagen_train = tf.keras.preprocessing.image.ImageDataGenerator(
    featurewise_center=False, samplewise_center=False,
    featurewise_std_normalization=False, samplewise_std_normalization=False,
    zca_whitening=False, zca_epsilon=1e-06, rotation_range=0, width_shift_range=0.0,
    height_shift_range=0.0, brightness_range=None, shear_range=0.0, zoom_range=0.0,
    channel_shift_range=0.0, fill_mode='nearest', cval=0.0,
    horizontal_flip=False, vertical_flip=False, rescale=1./255,
    preprocessing_function=None, data_format=None, validation_split=0.0, dtype=None
)
datagen_train_flow = datagen_train.flow_from_dataframe(
    dataframe, directory=None, x_col='name', y_col='label',
    weight_col=None, target_size=IMG_SIZE, color_mode='rgb',
    classes=None, class_mode='sparse', batch_size=32, shuffle=True,
    seed=None, save_to_dir=None, save_prefix='',subset=None, interpolation='nearest',
    validate_filenames=True
)


# 5. Model

In [None]:
# Model

from tensorflow.keras.models import load_model
# load the model
base_model = load_model('/kaggle/input/facenet/keras-facenet/model/facenet_keras.h5')
base_model.load_weights('/kaggle/input/facenet/keras-facenet/weights/facenet_keras_weights.h5')

base_model = tf.keras.Model(inputs=base_model.input,outputs=base_model.get_layer('AvgPool').output) 
base_model.trainable=False
base_model.summary()

In [None]:
face_model = tf.keras.Sequential([
    base_model,
    tf.keras.layers.Dense(256,activation='relu'),
    tf.keras.layers.Dense(32,activation='relu'),
    tf.keras.layers.Dense(8,activation='softmax'),
])
face_model.compile( optimizer='adam', loss='SparseCategoricalCrossentropy', metrics='accuracy')


# 6.Training 

In [None]:
face_model.fit(
    datagen_train_flow, steps_per_epoch=None, epochs=10, verbose=1, callbacks=None,
    validation_data=None, validation_steps=None, validation_freq=1,
    class_weight=None, workers=1, use_multiprocessing=False,
    shuffle=True, initial_epoch=0
)
face_model.save('/kaggle/working/face_model.h5')
face_model.save_weights('/kaggle/working/face_model_weights.h5')

# 7. Evaluation

In [None]:
model_new = load_model('/kaggle/working/face_model.h5')
model_new.load_weights('/kaggle/working/face_model_weights.h5')

In [None]:
datagen_test = tf.keras.preprocessing.image.ImageDataGenerator(
    featurewise_center=False, samplewise_center=False,
    featurewise_std_normalization=False, samplewise_std_normalization=False,
    zca_whitening=False, zca_epsilon=1e-06, rotation_range=0, width_shift_range=0.0,
    height_shift_range=0.0, brightness_range=None, shear_range=0.0, zoom_range=0.0,
    channel_shift_range=0.0, fill_mode='nearest', cval=0.0,
    horizontal_flip=False, vertical_flip=False, rescale=1./255,
    preprocessing_function=None, data_format=None, validation_split=0.0, dtype=None
)
datagen_test_flow = datagen_test.flow_from_dataframe(
    dataframe_test, directory=None, x_col='name', y_col='label',
    weight_col=None, target_size=IMG_SIZE, color_mode='rgb',
    classes=None, class_mode='sparse', batch_size=32, shuffle=False,
    seed=None, save_to_dir=None, save_prefix='',subset=None, interpolation='nearest',
    validate_filenames=True
)

prediction=np.argmax(face_model.predict(datagen_test_flow),axis=1)

In [None]:
face_model.evaluate(datagen_test_flow)

In [None]:
y=np.array(dataframe_test['label'])
true_label=[]
for char in y:
    true_label.append(datagen_test_flow.class_indices[char])
true_label

In [None]:
print('Confusion Matrix')
print(confusion_matrix(true_label, prediction))
print('Classification Report')
print(classification_report(true_label, prediction))

In [None]:
face_model.save('/kaggle/working/face_model.h5')
face_model.save_weights('/kaggle/working/face_model_weights.h5')