In [None]:
import glob
import os
import datetime

import tensorflow as tf
from tensorflow.keras import datasets, layers, models
import pandas as pd
import cv2
from google.colab.patches import cv2_imshow
from sklearn.model_selection import train_test_split
from sklearn import preprocessing

%load_ext tensorboard

# Classification Task

In [None]:
# Unziping data
!unzip /content/Pokemon_dataset.zip

In [None]:
# Loading getting image path list

folder_path = r"/content/Pokemon_dataset"
image_path_list = os.listdir(os.path.join(folder_path,'images'))
print(image_path_list)

In [None]:
csv_file = r"/content/Pokemon_dataset/pokemon.csv"

# Define the missing values in the "Type2" column
na_values = {'Type2': ['']}

# Load the CSV file into a DataFrame
data = pd.read_csv(csv_file, na_values=na_values)

# Print the DataFrame
data = data.drop('Type2',axis=1)

In [None]:
# Adding image path column to dataframe 

def image_path_creator(name):
  name = [i for i in image_path_list if name in i.split('.')][0]
  return os.path.join(folder_path,'images',name)

data['Image_path'] = data['Name'].apply(image_path_creator)
data

In [None]:
# Encoding labels
type_encode = preprocessing.LabelEncoder()
data['type1_label'] = type_encode.fit_transform(data['Type1'].values.tolist())

print('Number of classes = ',len(type_encode.classes_))

In [None]:
# preprocessing images

def preprocess_data(image_path,label):

  # Reading image
  img = tf.io.read_file(image_path)
  img = tf.io.decode_png(img,channels=3)

  # Resizeing image
  img = tf.image.resize(img,(120,120))

  # Normalizing image 
  img = tf.image.convert_image_dtype(img,tf.float32)

  return img,label

In [None]:
# Creating dataset and AUTOTUNE for optimization

image_list = data['Image_path'].values.tolist()
label_list = data['type1_label'].values.tolist()


# Perform train-test split
train_paths, test_paths, train_labels, test_labels = train_test_split(
                    image_list, label_list, test_size=0.2, random_state=42)  # Adjust test_size as desired


def configure_for_performance(dataset, batch_size):
  dataset = dataset.batch(batch_size)
  dataset = dataset.repeat()
  dataset = dataset.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)
  return dataset

# Create the train dataset
TRAIN_BATCH_SIZE = 16
train_dataset = tf.data.Dataset.from_tensor_slices((train_paths, train_labels))
train_dataset = train_dataset.map(preprocess_data)

# Pipeline of Prefetch and use AUTOTUNE for optimization
train_dataset = configure_for_performance(train_dataset, TRAIN_BATCH_SIZE)

#----------------------------------------------------------------------------------------------------

# Create the train/val dataset
TEST_BATCH_SIZE = 8
test_dataset = tf.data.Dataset.from_tensor_slices((test_paths, test_labels))
test_dataset = test_dataset.map(preprocess_data)

# Pipeline of Prefetch and use AUTOTUNE for optimization
test_dataset = test_dataset.shuffle(buffer_size=len(test_labels)).batch(TEST_BATCH_SIZE)
# test_dataset = test_dataset.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)

In [None]:
# Define the Sequential model

model = models.Sequential()
model.add(layers.Conv2D(2, (5, 5),activation='relu', input_shape=(120, 120, 3),kernel_initializer='he_normal'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(4, (3, 3),strides=2 ,activation='relu',kernel_initializer='he_normal'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(8, (3, 3) ,activation='relu',kernel_initializer='he_normal'))

model.add(layers.Flatten())
model.add(layers.Dense(256, activation='relu',kernel_initializer='he_normal'))
model.add(layers.Dropout(0.1))
model.add(layers.Dense(len(type_encode.classes_),activation='softmax'))

model.summary()

In [None]:
# Clear any logs from previous runs
!rm -rf "/content/logs"

log_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)

In [None]:
# Compile the model
model.compile(optimizer=tf.keras.optimizers.AdamW(learning_rate = 1e-4),
              loss='sparse_categorical_crossentropy',  # Use 'categorical_crossentropy' if labels are one-hot encoded
              metrics=['accuracy'])

TRAIN_STEPS_PER_EPOCH = len(train_paths) // TRAIN_BATCH_SIZE

In [None]:
# Fitting model
model.fit(train_dataset,
          steps_per_epoch= TRAIN_STEPS_PER_EPOCH,
          validation_data = test_dataset,
          # validation_steps = TEST_STEPS_PER_EPOCH,
          epochs = 10, 
          callbacks=[tensorboard_callback])

In [None]:
%tensorboard --logdir logs/fit

# Evaluate Model

In [None]:
from sklearn.metrics import classification_report


# Predict output from test dataset
test_predictions = model.predict(test_dataset)
# Convert predictions to class labels
test_predictions = tf.argmax(test_predictions, axis=1).numpy()

# Evaluate the model
loss, accuracy = model.evaluate(test_dataset)

print('Loss:', loss)
print('Accuracy:', accuracy)


In [None]:
# Compute classification report
report = classification_report(test_labels, test_predictions)
print(report)