In [2]:
import os

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from PIL import Image

In [4]:
test_dir = "../dataset-v4/test"
train_dir = "../dataset-v4/train"
valid_dir = "../dataset-v4/valid"
test_csv = "../dataset-v4/test/_annotations.csv"
train_csv = "../dataset-v4/train/_annotations.csv"
valid_csv = "../dataset-v4/valid/_annotations.csv"
dataset_dirs = [test_dir, train_dir, valid_dir]
csv_dirs = [test_csv, train_csv, valid_csv]
csv_filename = "_annotations.csv"

dataset_dir = "../PokemonData"
ref_csv = "../csv/pokemon-v3.csv"
ref_csv = "../csv/FirstGenPokemon.csv"

neglected_pokemons = ["MrMime"]

In [49]:

input_shape = (64, 64, 3)
output_shape=len(os.listdir(dataset_dir))
print(output_shape)
batch_size = 16
repeat_factor = 2


150


In [36]:
AUTOTUNE = tf.data.experimental.AUTOTUNE


class PokemonType(tf.data.Dataset):
    @staticmethod
    def find_pokemon_type(pokemon_filename, pokemon_csv, pokemon_ref_csv):
        pokemon_df = pd.read_csv(pokemon_csv)
        ref_df = pd.read_csv(pokemon_ref_csv)
        pokemon = pokemon_df[pokemon_df['filename'] == pokemon_filename]
        if pokemon.empty: return None
        pokemon_name = pokemon.iloc[0]["class"]
        if pokemon_name in neglected_pokemons:
            return None
        pokemon = ref_df[ref_df['Name'] == pokemon_name]
        type1 = pokemon.iloc[0]["Type1"]
        type2 = None
        if not pokemon.iloc[0]["Type2"] == 'None':
            type2 = pokemon.iloc[0]["Type2"]

        return type1, type2

    @staticmethod
    def one_hot_encode(pokemon_labels, all_labels):
        label_map = {pokemon_type: one_hot_index for one_hot_index, pokemon_type in
                     enumerate(all_labels)}  #enum(all_labels)->(0,type1),(1,type2),...
        one_hot_labels = np.zeros((len(pokemon_labels), len(all_labels)))
        for index, pokemon_type in enumerate(pokemon_labels):
            one_hot_index = label_map[pokemon_type]
            one_hot_labels[index, one_hot_index] = 1
        one_hot_labels = np.sum(one_hot_labels, axis=0)
        return one_hot_labels  #(1,0,1,0,0,0,....)

    @staticmethod
    def get_all_labels(pokemon_csv, ref_csv):
        ref_df = pd.read_csv(ref_csv)
        pokemon_df = pd.read_csv(pokemon_csv)
        df = pd.merge(pokemon_df, ref_df, left_on="class", right_on="Name", how="inner")
        pokemon_names = df["class"].tolist()
        all_labels = ["dragon"]
        for pokemon_name in pokemon_names:
            pokemon = ref_df[ref_df['Name'] == pokemon_name]
            if not pokemon.empty:
                type1 = pokemon.iloc[0]["Type1"]
                type2 = pokemon.iloc[0]["Type2"]
                all_labels.append(type1)
                if not type2 == "None":
                    all_labels.append(type2)

        all_labels = list(set(all_labels) - {np.NaN})
        return all_labels

    @staticmethod
    def super_generator(dataset_dirs, ref_csv, input_shape=(256, 256, 3)):
        def generator():
            all_labels = PokemonType.get_all_labels(f"{dataset_dirs[0]}/{csv_filename}", ref_csv)
            for dataset_dir in dataset_dirs:
                pokemon_imgs = os.listdir(dataset_dir)
                for pokemon_img in pokemon_imgs:
                    if not pokemon_img.lower().endswith((".jpg", ".png", ".jpeg")):
                        continue
                    pokemon = Image.open(os.path.join(dataset_dir, pokemon_img)).resize(
                        (input_shape[0], input_shape[1]))
                    pokemon_tensor = tf.convert_to_tensor(pokemon)
                    pokemon_tensor = tf.cast(pokemon_tensor, dtype=tf.float32)
                    pokemon_tensor = pokemon_tensor / 255.0
                    pokemon_types = PokemonType.find_pokemon_type(pokemon_img, f"{dataset_dir}/{csv_filename}", ref_csv)
                    if pokemon_types is None:
                        continue
                    pokemon_labels = [pokemon_types[0]]
                    if not pokemon_types[-1] is np.NaN:
                        pokemon_labels.append(pokemon_types[-1])
                    pokemon_labels = PokemonType.one_hot_encode(pokemon_labels, all_labels)

                    yield pokemon_tensor, pokemon_labels

        return generator

    @staticmethod
    def __new__(cls, dataset_dirs, ref_csv):
        return tf.data.Dataset.from_generator(
            cls.super_generator(dataset_dirs, ref_csv, input_shape),
            output_signature=(
                tf.TensorSpec(shape=input_shape, dtype=tf.float32),
                tf.TensorSpec(shape=(15,), dtype=tf.float32),
            )
        ).prefetch(AUTOTUNE)




In [37]:
AUTOTUNE = tf.data.experimental.AUTOTUNE

class PokedexDataset(tf.data.Dataset):
    @staticmethod
    def super_generator( dataset_dir, input_shape):
        def generator():
            pokemon_names = os.listdir(dataset_dir)
            pokemon_labels = {pokemon: label for label, pokemon in enumerate(pokemon_names)}
            for pokemon_name in pokemon_names:
                pokemon_name_path = os.path.join(dataset_dir, pokemon_name)
                pokemons = os.listdir(pokemon_name_path)
                pokemon_label = pokemon_labels[pokemon_name]
                for pokemon in pokemons:
                    pokemon_path = os.path.join(pokemon_name_path, pokemon)
                    if not pokemon.lower().endswith((".jpg", ".jpeg", ".png")):
                        continue
                    pokemon_img = Image.open(pokemon_path).resize((input_shape[0], input_shape[1]))
                    if pokemon_img.mode != "RGB":
                        pokemon_img = pokemon_img.convert("RGB")
                    pokemon_tensor = tf.convert_to_tensor(pokemon_img)
                    pokemon_tensor = tf.cast(pokemon_tensor, dtype=tf.float32)
                    pokemon_tensor = pokemon_tensor / 255.0
            
                    yield pokemon_tensor, pokemon_label
        return generator


    @staticmethod
    def __new__(cls, dataset_dir, input_shape=(64, 64, 3)):
        return tf.data.Dataset.from_generator(
            cls.super_generator(dataset_dir=dataset_dir,input_shape=input_shape),
            output_signature=(
                tf.TensorSpec(shape=input_shape, dtype=tf.float32),
                tf.TensorSpec(shape=(), dtype=tf.int16)
            )
        ).prefetch(AUTOTUNE)

In [56]:
dataset = PokedexDataset(dataset_dir=dataset_dir,input_shape=input_shape)
dataset=dataset.batch(batch_size=batch_size,num_parallel_calls=AUTOTUNE).repeat(repeat_factor)

In [57]:
dataset_size=0
for _ in dataset.as_numpy_iterator():
    dataset_size+=1

In [58]:
print(f"Dataset size: {dataset_size}")
dataset=dataset.shuffle(buffer_size=dataset_size)

Dataset size: 854


In [59]:
train_size=0.8
train_size=tf.math.round(train_size*dataset_size).numpy()
test_size=dataset_size-train_size
train_ds=dataset.take(train_size)
test_ds=dataset.skip(test_size)


In [61]:
steps_per_epoch=train_steps=train_size//batch_size
test_steps=test_size//batch_size
print(test_steps,train_steps)

10.0 42.0


In [21]:
class DatasetSize(tf.data.Dataset):
    @staticmethod
    def super_generator(dataset_dirs):
        def generator():
            for dataset_dir in dataset_dirs:
                pokemon_imgs = os.listdir(dataset_dir)
                for pokemon_img in pokemon_imgs:
                    if not pokemon_img.lower().endswith((".png", ".jpg", ".jpeg")):
                        continue
                    pokemon_df = pd.read_csv(f"{dataset_dir}/{csv_filename}")
                    pokemon = pokemon_df[pokemon_df["filename"] == pokemon_img]
                    if pokemon.empty: continue
                    if pokemon.iloc[0]["class"] in neglected_pokemons:
                        continue
                    yield tf.constant([1], tf.float32), tf.constant([1], tf.float32)

        return generator

    @staticmethod
    def __new__(cls, dataset_dirs):
        return tf.data.Dataset.from_generator(
            cls.super_generator(dataset_dirs),
            output_signature=(
                tf.TensorSpec(shape=(None,), dtype=tf.float32),
                tf.TensorSpec(shape=(None,), dtype=tf.float32)

            )
        )

In [22]:
# dataset = PokemonType(dataset_dirs, ref_csv)
# dummy_dataset = DatasetSize(dataset_dirs)
# 
# dataset = dataset.batch(batch_size, num_parallel_calls=AUTOTUNE).repeat(repeat_factor)
# dummy_dataset = dummy_dataset.batch(batch_size, num_parallel_calls=AUTOTUNE).repeat(repeat_factor)

In [23]:
# dataset_size = 0
# for _ in dummy_dataset.as_numpy_iterator():
#     dataset_size += 1


In [24]:
# o=0
# for _ in dataset.as_numpy_iterator():
#    o+=1 

In [62]:
print(f"The dataset size: {dataset_size}")
steps_per_epoch

The dataset size: 854


42.0

In [83]:
def create_type_model(input_shape, output_shape):
    augmented_model = tf.keras.Sequential([
        tf.keras.layers.RandomFlip("horizontal_and_vertical"),
        tf.keras.layers.RandomRotation(0.20),
    ])

    resnet_model = tf.keras.applications.ResNet50(include_top=False, weights='imagenet',pooling="avg")
    for layer in resnet_model.layers:
        layer.trainable = True

    inputs = tf.keras.layers.Input(input_shape)
    augment_1 = augmented_model(inputs)
    resnet_layers = resnet_model(augment_1)
    flatten = tf.keras.layers.Flatten()(resnet_layers)
    dense_1 = tf.keras.layers.Dense(512, activation='relu')(flatten)
    outputs = tf.keras.layers.Dense(output_shape, activation='softmax')(dense_1)
    outputs=tf.keras.layers.Dense(1,activation="relu")(outputs)
    model = tf.keras.Model(inputs=inputs, outputs=outputs)
    return model




In [84]:
class myCallback(tf.keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs={}):
        if logs.get('accuracy') > 0.99:
            print(
                f'Epoch {epoch + 1} completed. Training accuracy {logs.get("accuracy")} reached. Stopping the training')
            self.model.stop_training = True

In [85]:
all_labels = PokemonType.get_all_labels(f"{dataset_dirs[0]}/{csv_filename}", ref_csv)
model = create_type_model(input_shape, output_shape)


In [86]:
my_callback = myCallback()
model.compile(optimizer='adam', loss=tf.keras.losses.BinaryCrossentropy(), metrics=["accuracy"])

In [87]:
'''Training the model'''
history = model.fit(train_ds, epochs=20,validation_data=test_ds, validation_steps=test_steps,callbacks=[my_callback], batch_size=batch_size,
                    steps_per_epoch=steps_per_epoch)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20

KeyboardInterrupt: 

In [43]:
class Plot():
    #use it to plot layers and writing in a class to keep it organised
    pass
