In [28]:
import os

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from PIL import Image

In [29]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [30]:
test_dir="/content/drive/MyDrive/Pokemon/datasets/dataset-v4/test"
train_dir="/content/drive/MyDrive/Pokemon/datasets/dataset-v4/train"
valid_dir="/content/drive/MyDrive/Pokemon/datasets/dataset-v4/valid"
test_csv="/content/drive/MyDrive/Pokemon/datasets/dataset-v4/test/_annotations.csv"
train_csv="/content/drive/MyDrive/Pokemon/datasets/dataset-v4/train/_annotations.csv"
valid_csv="/content/drive/MyDrive/Pokemon/datasets/dataset-v4/_annotations.csv"
dataset_dirs=[test_dir,train_dir,valid_dir]
csv_dirs=[test_csv,train_csv,valid_csv]
csv_filename="_annotations.csv"

ref_csv="../csv/pokemon-v3.csv"
ref_csv="/content/drive/MyDrive/Pokemon/FirstGenPokemon.csv"

neglected_pokemons=["MrMime"]

In [31]:

input_shape=(256,256,3)
batch_size=4
repeat_factor=4


In [49]:
AUTOTUNE=tf.data.experimental.AUTOTUNE
class PokemonType(tf.data.Dataset):
    @staticmethod
    def find_pokemon_type(pokemon_filename,pokemon_csv,pokemon_ref_csv):
        pokemon_df = pd.read_csv(pokemon_csv)
        ref_df = pd.read_csv(pokemon_ref_csv)
        pokemon=pokemon_df[pokemon_df['filename']==pokemon_filename]
        if pokemon.empty: return None
        pokemon_name=pokemon.iloc[0]["class"]
        if pokemon_name in neglected_pokemons:
            return None
        pokemon=ref_df[ref_df['Name']==pokemon_name]
        type1=pokemon.iloc[0]["Type1"]
        type2=None
        if not pokemon.iloc[0]["Type2"]=='None':
            type2=pokemon.iloc[0]["Type2"]

        return type1,type2



    @staticmethod
    def one_hot_encode(pokemon_labels,all_labels):
        label_map={pokemon_type:one_hot_index for one_hot_index,pokemon_type in enumerate(all_labels)} #enum(all_labels)->(0,type1),(1,type2),...
        one_hot_labels=np.zeros((len(pokemon_labels),len(all_labels)))
        for index,pokemon_type in enumerate(pokemon_labels):
            one_hot_index=label_map[pokemon_type]
            one_hot_labels[index,one_hot_index]=1
        one_hot_labels=np.sum(one_hot_labels,axis=0)
        return one_hot_labels #(1,0,1,0,0,0,....)

    @staticmethod
    def get_all_labels(pokemon_csv,ref_csv):
        ref_df=pd.read_csv(ref_csv)
        pokemon_df=pd.read_csv(pokemon_csv)
        df=pd.merge(pokemon_df,ref_df,left_on="class",right_on="Name",how="inner")
        pokemon_names=df["class"].tolist()
        all_labels=["dragon"]
        for pokemon_name in pokemon_names:
            pokemon=ref_df[ref_df['Name']==pokemon_name]
            if not pokemon.empty:
                type1=pokemon.iloc[0]["Type1"]
                type2=pokemon.iloc[0]["Type2"]
                all_labels.append(type1)
                if not type2=="None":
                    all_labels.append(type2)

        all_labels=list(set(all_labels)-{np.NaN})
        return all_labels
    @staticmethod
    def super_generator(dataset_dirs,ref_csv,input_shape=(256,256,3)):
        def generator():
            all_labels=PokemonType.get_all_labels(f"{dataset_dirs[0]}/{csv_filename}",ref_csv)
            for dataset_dir in dataset_dirs:
                pokemon_imgs=os.listdir(dataset_dir)
                for pokemon_img in pokemon_imgs:
                    if not pokemon_img.lower().endswith((".jpg",".png",".jpeg")):
                        continue
                    pokemon=Image.open(os.path.join(dataset_dir,pokemon_img)).resize((input_shape[0],input_shape[1]))
                    pokemon_tensor=tf.convert_to_tensor(pokemon)
                    pokemon_tensor=tf.cast(pokemon_tensor,dtype=tf.float32)
                    pokemon_tensor=pokemon_tensor/255.0
                    pokemon_types=PokemonType.find_pokemon_type(pokemon_img,f"{dataset_dir}/{csv_filename}",ref_csv)
                    if pokemon_types is None:
                        continue
                    pokemon_labels=[pokemon_types[0]]
                    if not pokemon_types[-1] is None:
                        pokemon_labels.append(pokemon_types[-1])
                    pokemon_labels=PokemonType.one_hot_encode(pokemon_labels,all_labels)

                    yield pokemon_tensor,pokemon_labels
        return generator

    @staticmethod
    def __new__(cls,dataset_dirs,ref_csv):
        return tf.data.Dataset.from_generator(
            cls.super_generator(dataset_dirs,ref_csv,input_shape),
            output_signature=(
                tf.TensorSpec(shape=input_shape,dtype=tf.float32),
                tf.TensorSpec(shape=(15,),dtype=tf.float32),
            )
        ).prefetch(AUTOTUNE)




In [51]:
class DatasetSize(tf.data.Dataset):
    @staticmethod
    def super_generator(dataset_dirs):
        def generator():
            for dataset_dir in dataset_dirs:
                pokemon_imgs=os.listdir(dataset_dir)
                for pokemon_img in pokemon_imgs:
                    if not pokemon_img.lower().endswith((".png",".jpg",".jpeg")):
                        continue
                    pokemon_df=pd.read_csv(f"{dataset_dir}/{csv_filename}")
                    pokemon=pokemon_df[pokemon_df["filename"]==pokemon_img]
                    if pokemon.empty: continue
                    if pokemon.iloc[0]["class"] in neglected_pokemons:
                        continue
                    yield tf.constant([1],tf.float32),tf.constant([1],tf.float32)

        return generator
    @staticmethod
    def __new__(cls,dataset_dirs):
        return tf.data.Dataset.from_generator(
            cls.super_generator(dataset_dirs),
            output_signature=(
                tf.TensorSpec(shape=(None,),dtype=tf.float32),
                tf.TensorSpec(shape=(None,),dtype=tf.float32)

            )
        )

In [52]:
dataset=PokemonType(dataset_dirs,ref_csv)
dummy_dataset=DatasetSize(dataset_dirs)

dataset=dataset.batch(batch_size,num_parallel_calls=AUTOTUNE).repeat(repeat_factor)
dummy_dataset=dummy_dataset.batch(batch_size,num_parallel_calls=AUTOTUNE).repeat(repeat_factor)

In [53]:
dataset_size=0
for _ in dummy_dataset.as_numpy_iterator():
    dataset_size+=1


In [54]:
# o=0
# for _ in dataset.as_numpy_iterator():
#    o+=1

In [55]:
dataset=dataset.shuffle(buffer_size=dataset_size)

In [56]:
print(f"The dataset size: {dataset_size}")
steps_per_epoch=tf.math.ceil(dataset_size/batch_size).numpy()
steps_per_epoch

The dataset size: 908


227.0

In [57]:
def create_type_model(input_shape,output_shape):

    augmented_model=tf.keras.Sequential([
        tf.keras.layers.RandomFlip("horizontal_and_vertical"),
        tf.keras.layers.RandomRotation(0.20),
        tf.keras.layers.Rescaling(1./255),
    ])

    resnet_model=tf.keras.applications.ResNet152(include_top=False,weights='imagenet',pooling='avg')
    for layer in resnet_model.layers:
        layer.trainable=True

    inputs=tf.keras.layers.Input(input_shape)
    augment_1=augmented_model(inputs)
    resnet_layers=resnet_model(augment_1)
    flatten=tf.keras.layers.Flatten()(resnet_layers)
    dense_1=tf.keras.layers.Dense(512, activation='relu')(flatten)
    outputs=tf.keras.layers.Dense(output_shape, activation='sigmoid')(dense_1)
    model=tf.keras.Model(inputs=inputs, outputs=outputs)
    return model




In [58]:
class myCallback(tf.keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs={}):
        if logs.get('accuracy') > 0.99:
            print(f'Epoch {epoch+1} completed. Training accuracy {logs.get("accuracy")} reached. Stopping the training')
            self.model.stop_training =True

In [59]:
all_labels=PokemonType.get_all_labels(f"{dataset_dirs[0]}/{csv_filename}",ref_csv)
model=create_type_model(input_shape,len(all_labels))
print(all_labels)

['poison', 'ghost', 'ice', 'bug', 'dragon', 'ground', 'flying', 'fire', 'electric', 'water', 'rock', 'psychic', 'normal', 'grass', 'fighting']


In [1]:
my_callback=myCallback()
model.compile(optimizer='adam',loss=tf.keras.losses.BinaryCrossentropy(),metrics=["accuracy"])

NameError: name 'myCallback' is not defined

In [61]:
'''Training the model'''
history=model.fit(dataset,epochs=100,callbacks=[my_callback],batch_size=batch_size,steps_per_epoch=steps_per_epoch)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100






In [None]:
class Plot():
    #use it to plot layers and writing in a class to keep it organised
    pass


In [43]:

    def find_pokemon_type(pokemon_filename,pokemon_csv,pokemon_ref_csv):
        pokemon_df = pd.read_csv(pokemon_csv)
        ref_df = pd.read_csv(pokemon_ref_csv)
        pokemon=pokemon_df[pokemon_df['filename']==pokemon_filename]
        if pokemon.empty: return None
        pokemon_name=pokemon.iloc[0]["class"]
        if pokemon_name in neglected_pokemons:
            return None
        pokemon=ref_df[ref_df['Name']==pokemon_name]
        type1=pokemon.iloc[0]["Type1"]
        type2=None
        if not pokemon.iloc[0]["Type2"]=='None':
            type2=pokemon.iloc[0]["Type2"]

        return type1,type2


    def one_hot_encode(pokemon_labels,all_labels):
        label_map={pokemon_type:one_hot_index for one_hot_index,pokemon_type in enumerate(all_labels)} #enum(all_labels)->(0,type1),(1,type2),...
        one_hot_labels=np.zeros((len(pokemon_labels),len(all_labels)))
        for index,pokemon_type in enumerate(pokemon_labels):
            one_hot_index=label_map[pokemon_type]
            one_hot_labels[index,one_hot_index]=1
        one_hot_labels=np.sum(one_hot_labels,axis=0)
        return one_hot_labels #(1,0,1,0,0,0,....)

    def get_all_labels(pokemon_csv,ref_csv):
        ref_df=pd.read_csv(ref_csv)
        pokemon_df=pd.read_csv(pokemon_csv)
        df=pd.merge(pokemon_df,ref_df,left_on="class",right_on="Name",how="inner")
        pokemon_names=df["class"].tolist()
        all_labels=["dragon"]
        for pokemon_name in pokemon_names:
            pokemon=ref_df[ref_df['Name']==pokemon_name]
            if not pokemon.empty:
                type1=pokemon.iloc[0]["Type1"]
                type2=pokemon.iloc[0]["Type2"]
                all_labels.append(type1)
                if not type2=="None":
                    all_labels.append(type2)

        all_labels=list(set(all_labels)-{np.NaN})
        return all_labels


In [44]:
def find_pokemon_type(pokemon_filename,pokemon_csv,pokemon_ref_csv):
    pokemon_df = pd.read_csv(pokemon_csv)
    ref_df = pd.read_csv(pokemon_ref_csv)
    pokemon=pokemon_df[pokemon_df['filename']==pokemon_filename]
    if pokemon.empty: return None
    pokemon_name=pokemon.iloc[0]["class"]
    if pokemon_name in neglected_pokemons:
        return None
    pokemon=ref_df[ref_df['Name']==pokemon_name]
    type1=pokemon.iloc[0]["Type1"]
    type2=None
    if not pokemon.iloc[0]["Type2"]=='None':
        type2=pokemon.iloc[0]["Type2"]

    return type1,type2


def one_hot_encode(pokemon_labels,all_labels):
    label_map={pokemon_type:one_hot_index for one_hot_index,pokemon_type in enumerate(all_labels)} #enum(all_labels)->(0,type1),(1,type2),...
    one_hot_labels=np.zeros((len(pokemon_labels),len(all_labels)))
    print(pokemon_labels)
    for index,pokemon_type in enumerate(pokemon_labels):
        one_hot_index=label_map[pokemon_type]
        one_hot_labels[index,one_hot_index]=1
    one_hot_labels=np.sum(one_hot_labels,axis=0)
    return one_hot_labels #(1,0,1,0,0,0)



def get_all_labels(pokemon_csv,ref_csv):
    ref_df=pd.read_csv(ref_csv)
    pokemon_df=pd.read_csv(pokemon_csv)
    df=pd.merge(pokemon_df,ref_df,left_on="class",right_on="Name",how="inner")
    pokemon_names=df["class"].tolist()
    all_labels=[]
    for pokemon_name in pokemon_names:
        pokemon=ref_df[ref_df['Name']==pokemon_name]
        if not pokemon.empty:
            type1=pokemon.iloc[0]["Type1"]
            type2=pokemon.iloc[0]["Type2"]
            all_labels.append(type1)
            if not type2=="None":
                all_labels.append(type2)
    return all_labels



In [48]:
all_labels=PokemonType.get_all_labels(f"{dataset_dirs[0]}/{csv_filename}",ref_csv)
for dataset_dir in dataset_dirs:
  pokemon_imgs=os.listdir(dataset_dir)
  for pokemon_img in pokemon_imgs:
    if not pokemon_img.lower().endswith((".jpg",".png",".jpeg")):
      continue
    pokemon=Image.open(os.path.join(dataset_dir,pokemon_img)).resize((input_shape[0],input_shape[1]))
    pokemon_tensor=tf.convert_to_tensor(pokemon)
    pokemon_tensor=tf.cast(pokemon_tensor,dtype=tf.float32)
    pokemon_tensor=pokemon_tensor/255.0
    pokemon_types=find_pokemon_type(pokemon_img,f"{dataset_dir}/{csv_filename}",ref_csv)
    if pokemon_types is None:
      continue
    pokemon_labels=[pokemon_types[0]]
    if not pokemon_types[-1] is None:
      pokemon_labels.append(pokemon_types[-1])
    pokemon_labels=one_hot_encode(pokemon_labels,all_labels)

['ground']
['fighting']
['electric']
['poison', 'flying']
['bug', 'poison']
['rock', 'water']
['psychic']
['water', 'ice']
['normal', 'flying']
['water']
['electric']
['ground']


KeyboardInterrupt: 

In [None]:
all_labels=get_all_labels(f"{dataset_dirs[0]}/{csv_filename}",ref_csv)
all_labels