In [1]:
##### IMPORTING DEPENDENCIES #####
# system tools and parse
import os 
import argparse
import warnings
warnings.filterwarnings("ignore")
# data tools
import pandas as pd
# tf tools
import tensorflow as tf
# image processsing
from tensorflow.keras.preprocessing.image import (ImageDataGenerator)
# VGG16 model
from tensorflow.keras.applications.inception_v3 import InceptionV3
from tensorflow.keras.applications.vgg16 import VGG16
# layers
from tensorflow.keras.layers import (Dense, Flatten, Dropout, GlobalAveragePooling2D, BatchNormalization)
# generic model object
from tensorflow.keras.models import Model
# optimizers
from tensorflow.keras.optimizers import Adam, SGD
#scikit-learn
from sklearn.metrics import classification_report
import sklearn.model_selection as sk
# for plotting
import numpy as np
import matplotlib.pyplot as plt


In [2]:
def import_and_preprocess_data():
    # Importing labels
    df = pd.read_csv(os.path.join(os.getcwd(), "pokemon.csv"))
    
    df = df.sort_values("Name")
    
    image_paths = []
    
    for pokemon in os.listdir(os.path.join(os.getcwd(), "images")):
        image_paths.append(os.path.join(os.getcwd(), "images", pokemon))

    df['image_path'] = image_paths
    
    return df

In [3]:
df = import_and_preprocess_data()

In [4]:
train, test = sk.train_test_split(df,
                                  test_size=0.30,
                                  random_state=42)

In [9]:
def setup_generators():
    # Parameters for loading data and images

    train_generator = ImageDataGenerator(horizontal_flip=True,
                                         rescale = 1./255,
                                         validation_split=0.2
                                         )
    
    test_generator = ImageDataGenerator(rescale=1./255)
    
    return train_generator, test_generator

train_generator, test_generator = setup_generators()

In [10]:
def setup_data(train_generator, test_generator):
    # Split the data into three categories.
    train_ds = train_generator.flow_from_dataframe(
        dataframe=train,
        x_col='image_path',
        y_col='Type1',
        target_size=(224, 224),
        color_mode='rgb',
        class_mode='categorical',
        batch_size=1,
        subset='training',
        shuffle=True,
        seed=42
    )
    
    val_ds = train_generator.flow_from_dataframe(
        dataframe=train,
        x_col='image_path',
        y_col='Type1',
        target_size=(224, 224),
        color_mode='rgb',
        class_mode='categorical',
        batch_size=1,
        subset='validation',
        shuffle=True,
        seed=42
    )

    test_ds = test_generator.flow_from_dataframe(
        dataframe=test,
        x_col='image_path',
        y_col='Type1',
        target_size=(224, 224),
        color_mode='rgb',
        class_mode='categorical',
        batch_size=1,
        shuffle=False
    )
    
    return train_ds, val_ds, test_ds

train_ds, val_ds, test_ds = setup_data(train_generator, test_generator)

Found 453 validated image filenames belonging to 18 classes.
Found 113 validated image filenames belonging to 18 classes.
Found 243 validated image filenames belonging to 18 classes.


In [11]:
def model_setup():

    tf.keras.backend.clear_session()
    
    # load model without classifier layers
    model = VGG16(include_top=False, 
                pooling="avg",
                input_shape=(224, 224, 3),
                weights='imagenet')

    # mark loaded layers as not trainable
    for layer in model.layers:
        layer.trainable = False
    
    x = Flatten()(model.layers[-1].output)
    x = BatchNormalization()(x)
    x = Dense(256, activation='relu')(x)
    x = Dense(128, activation='relu')(x)
    outputs = Dense(18, activation='softmax')(x)

    model = Model(inputs=model.inputs, outputs=outputs)

    # compile
    lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
        initial_learning_rate=0.01,
        decay_steps=10000,
        decay_rate=0.9)
    
    sgd = SGD(learning_rate=lr_schedule)

    model.compile(optimizer=sgd,
                loss='categorical_crossentropy',
                metrics=['accuracy'])
    
    print(model.summary())
    
    return model

model = model_setup()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 224, 224, 3)]     0         
                                                                 
 block1_conv1 (Conv2D)       (None, 224, 224, 64)      1792      
                                                                 
 block1_conv2 (Conv2D)       (None, 224, 224, 64)      36928     
                                                                 
 block1_pool (MaxPooling2D)  (None, 112, 112, 64)      0         
                                                                 
 block2_conv1 (Conv2D)       (None, 112, 112, 128)     73856     
                                                                 
 block2_conv2 (Conv2D)       (None, 112, 112, 128)     147584    
                                                                 
 block2_pool (MaxPooling2D)  (None, 56, 56, 128)       0     

In [50]:
def train_model(model, train_ds, val_ds):
    history = model.fit_generator(train_ds,
                        validation_data = val_ds,
                        epochs=10
                        )
    
    return history

history = train_model(model, train_ds, val_ds)

Epoch 1/10

KeyboardInterrupt: 

In [None]:
##### PLOTTING FUNCTION #####
def plot_history(H, epochs):
    plt.style.use("seaborn-colorblind")

    plt.figure(figsize=(12,6))
    plt.subplot(1,2,1)
    plt.plot(np.arange(0, epochs), H.history["loss"], label="train_loss")
    plt.plot(np.arange(0, epochs), H.history["val_loss"], label="val_loss", linestyle=":")
    plt.title("Loss curve")
    plt.xlabel("Epoch")
    plt.ylabel("Loss")
    plt.tight_layout()
    plt.legend()

    plt.subplot(1,2,2)
    plt.plot(np.arange(0, epochs), H.history["accuracy"], label="train_acc")
    plt.plot(np.arange(0, epochs), H.history["val_accuracy"], label="val_acc", linestyle=":")
    plt.title("Accuracy curve")
    plt.xlabel("Epoch")
    plt.ylabel("Accuracy")
    plt.tight_layout()
    plt.legend()
    plt.savefig(os.path.join(os.getcwd(), "out", "cnn_pokemon.png"))

def make_predictions(model, test_ds):
    y_test = test_ds.classes

    y_pred = model.predict_generator(test_ds, steps=len(test_ds))

    y_pred = np.argmax(y_pred, axis=1)
    
    return y_test, y_pred

def print_report(y_test, y_pred, test_ds):
    # Get the classification report
    report = classification_report(y_test,
                                   y_pred,
                                   target_names = test_ds.class_indices.keys()
                                   )
    # Save the report
    with open(os.path.join(os.getcwd(), "out", "classification_report.txt"), "w") as f:
            f.write(report)
    # Print the report
    print(report)