In [1]:
import keras.callbacks
import tensorflow as tf
import numpy as np
from PIL import Image
import os
import random
import pandas as pd

## Todo:
1) use tf.data.Dataset to read from type dataset
2) use a dataset fn to apply a transformation such that it randomly selects an m no of images from n images of that pokemon 

In [100]:
class PokemonTypeDataset(tf.data.Dataset):
    @staticmethod
    def one_hot_encode_labels(labels,all_labels:[]):
        label_map={pokemon_type:one_hot_index for one_hot_index, pokemon_type in enumerate(all_labels)} #enum[(0,type1),(1,type2)...]
        one_hot_labels=np.zeros((len(labels),len(all_labels)),dtype=np.int8)
        for index,pokemon_type  in enumerate(labels):
            one_hot_label_index=label_map[pokemon_type]
            one_hot_labels[index,one_hot_label_index]=1
        result_labels = np.sum(one_hot_labels, axis=0)
        return result_labels.astype(np.int8)
        
    @staticmethod
    def super_generator(all_labels):
        def _generator(csv_dir):
            pokemon_df=pd.read_csv(csv_dir.decode())
            for index, pokemon in pokemon_df.iterrows():
                pokemon_path=pokemon["image_path"]
                if pokemon_path.endswith(".svg"):
                    continue
                pokemon_tensor=np.asarray(Image.open(pokemon_path).convert('RGB').resize((256,256)))
                pokemon_tensor=pokemon_tensor/255.0
                pokemon_types=[]
                if pd.isna(pokemon["type2"]):
                    pokemon_types+=[pokemon["type1"]]
                else:
                    pokemon_types+=[pokemon["type1"],pokemon["type2"]]
                
                yield pokemon_tensor, one_hot_encode(pokemon_types,all_labels)
        return _generator

    def __new__(cls, csv_dir:str,all_labels:list):
        return tf.data.Dataset.from_generator(
            cls.super_generator(all_labels),
            args=(csv_dir,),
            output_signature=(
                tf.TensorSpec(shape=(256,256,3), dtype=tf.float32),
                tf.TensorSpec(shape=(None,),dtype=tf.int8)
            )
        ).prefetch(buffer_size=tf.data.experimental.AUTOTUNE)

In [102]:
dataset_dir="../new_dataset"
csv_dir="../csv/pokemon.csv"
batch_size=32
all_labels=os.listdir(dataset_dir)
label_shapes=[1,2]

In [104]:
dataset=PokemonTypeDataset(csv_dir,all_labels)
dataset=dataset.batch(batch_size,num_parallel_calls=tf.data.experimental.AUTOTUNE)

In [108]:
dataset_size=0
for element in dataset.as_numpy_iterator():
    
    
    dataset_size+=1



In [114]:
dataset_size=dataset_size*batch_size
print(f"dataset size: {dataset_size}")

dataset size: 10688


In [115]:
'''
Test and training data
'''
shuffled_dataset=dataset.shuffle(buffer_size=dataset_size,seed=69) 

train_size=int(0.8*dataset_size)
test_size=dataset_size-train_size
train_ds=shuffled_dataset.take(train_size)
test_ds=shuffled_dataset.skip(train_size)

In [116]:
pokemon_types_len=len(os.listdir(dataset_dir))

In [117]:
pokemon_types_len

17

In [118]:
steps_per_epoch=tf.math.ceil(train_size/batch_size)

In [119]:
class MyCallback(tf.keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs={}):
        if logs.get('accuracy')>=0.99:
            print('Test accuracy: %.2f%%' % (logs.get('accuracy')*100))
            self.model.stop_training = True

            

In [120]:
#MODEL BUILDING

def get_model():
    inputs=tf.keras.layers.Input(shape=(256,256,3))
    output=tf.keras.layers.Conv2D(32,(5,5),activation="relu")(inputs)
    output=tf.keras.layers.MaxPooling2D(pool_size=(2,2))(output)
    output=tf.keras.layers.Conv2D(64,(3,3),activation="relu")(output)
    output=tf.keras.layers.MaxPooling2D(pool_size=(2,2))(output)
    output=tf.keras.layers.Flatten()(output)
    output=tf.keras.layers.Dense(128,activation="relu")(output)
    output=tf.keras.layers.Dense(64,activation="relu")(output)
    output=tf.keras.layers.Dense(pokemon_types_len,activation="sigmoid")(output)
    model=tf.keras.models.Model(inputs=inputs, outputs=output)
    model.compile(optimizer='adam',loss=tf.keras.losses.BinaryCrossentropy(),metrics=['accuracy'])
    return model

In [121]:
my_callback=MyCallback()
model=get_model()

In [122]:
model.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 256, 256, 3)]     0         
                                                                 
 conv2d (Conv2D)             (None, 252, 252, 32)      2432      
                                                                 
 max_pooling2d (MaxPooling2  (None, 126, 126, 32)      0         
 D)                                                              
                                                                 
 conv2d_1 (Conv2D)           (None, 124, 124, 64)      18496     
                                                                 
 max_pooling2d_1 (MaxPoolin  (None, 62, 62, 64)        0         
 g2D)                                                            
                                                                 
 flatten (Flatten)           (None, 246016)            0     

In [None]:
model.fit(x=train_ds,validation_data=test_ds,epochs=10,callbacks=[my_callback],batch_size=batch_size,use_multiprocessing=True,steps_per_epoch=steps_per_epoch)

Epoch 1/10


