In [41]:
import os
from PIL import Image
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder



Resize and greyscale images:

In [42]:
image_directory = '../images/dataset'
starters = ["Bulbasaur", "Charmander", "Squirtle"]

image_data = []
labels = []

pokemon_to_feature_num = {
    "Bulbasaur": 0,
    "Charmander": 1,
    "Squirtle": 2,

}

for starter in starters:
    starter_folder = os.path.join(image_directory, starter)
    
    # Make sure the folder exists
    if os.path.isdir(starter_folder):
        for filename in os.listdir(starter_folder):
            if filename.endswith(".jpg") or filename.endswith(".png") or filename.endswith('.jfif'):
                image_path = os.path.join(starter_folder, filename)
                
                # Open the image and resize it to a fixed size 
                img = Image.open(image_path).resize((128, 128))
                #Greyscale
                # img = img.convert('L')
                # Convert to RGB if it's RGBA
                if img.mode == 'RGBA':
                    img = img.convert('RGB')  # Discard the alpha channel
                img_array = np.array(img)
                
                if img_array.shape != (128, 128, 3):
                    print(f"Skipping {filename}, invalid shape: {img_array.shape}")
                    continue
                else:
                    image_data.append(img_array)
                    labels.append(pokemon_to_feature_num[starter])

labels = np.array(labels)
image_data = np.array(image_data)




Skipping 00000114.png, invalid shape: (128, 128)
Skipping 00000190.png, invalid shape: (128, 128)
Skipping 00000009.png, invalid shape: (128, 128)
Skipping 00000021.png, invalid shape: (128, 128)
Skipping 00000061.png, invalid shape: (128, 128)
Skipping 00000078.png, invalid shape: (128, 128)
Skipping 00000172.png, invalid shape: (128, 128)
Skipping 59bef3942b6041b3a6e0526100264536.jpg, invalid shape: (128, 128)


Display the image in greyscale to check data

In [43]:
img = image_data[0].reshape(128, 128,3)
img_pillow = Image.fromarray(img)
img_pillow.show()

In [45]:
print(image_data.shape)
print(labels.shape)

(847, 128, 128, 3)
(847,)


Split data

In [47]:
X_train, X_test, y_train, y_test = train_test_split(image_data, labels, test_size=0.3, random_state=420)

Model:

In [None]:
import tensorflow as tf

from tensorflow.keras import layers, models

model = models.Sequential()
model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(128, 128, 3)))
model.add(layers.BatchNormalization()) # does something good, maybe i'll read more about it but i just added it and it helped a lot
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))

model.add(layers.Flatten())#1d for neural network

model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dropout(0.45)) #address overfitting
model.add(layers.Dense(3, activation='softmax'))

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [80]:
model.summary()

In [81]:
model.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False),
              metrics=['accuracy'])


In [82]:
from tensorflow.keras.callbacks import ModelCheckpoint

checkpoint = ModelCheckpoint('./models/best_model1.keras', 
                             monitor='val_accuracy',    # Monitor validation accuracy
                             save_best_only=True,       # Save only the best weights
                             mode='max',                # 'max' means we want to maximize the metric
                             verbose=1)

history = model.fit(X_train, y_train, 
                    epochs=75, 
                    batch_size=32, 
                    validation_data=(X_test, y_test),
                    callbacks= [checkpoint])

Epoch 1/75
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 226ms/step - accuracy: 0.4551 - loss: 2.8015
Epoch 1: val_accuracy improved from -inf to 0.94118, saving model to ./models/best_model1.keras
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 256ms/step - accuracy: 0.4621 - loss: 2.7462 - val_accuracy: 0.9412 - val_loss: 0.2272
Epoch 2/75
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 230ms/step - accuracy: 0.8945 - loss: 0.3147
Epoch 2: val_accuracy did not improve from 0.94118
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 255ms/step - accuracy: 0.8948 - loss: 0.3138 - val_accuracy: 0.8706 - val_loss: 0.6548
Epoch 3/75
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 224ms/step - accuracy: 0.9218 - loss: 0.2063
Epoch 3: val_accuracy did not improve from 0.94118
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 245ms/step - accuracy: 0.9221 - loss: 0.2061 - val_accuracy: 0.8118 - val_lo

test on hand drawn images

In [83]:
best_model1 = tf.keras.models.load_model('./models/best_model1.keras')

In [57]:
hand_image_directory = '../images/Hand_Drawn'

In [58]:

hand_data = []
if os.path.isdir(hand_image_directory):
    for filename in os.listdir(hand_image_directory):
        if filename.endswith(".jpg") or filename.endswith(".png") or filename.endswith('.jfif'):
            image_path = os.path.join(hand_image_directory, filename)
            
            # Open the image and resize it to a fixed size 
            img = Image.open(image_path).resize((128, 128))
            #Greyscale
            # img = img.convert('L')
            # Convert to RGB if it's RGBA
            if img.mode == 'RGBA':
                img = img.convert('RGB')  # Discard the alpha channel
            img_array = np.array(img)
            
            if img_array.shape != (128, 128, 3):
                print(f"Skipping {filename}, invalid shape: {img_array.shape}")
                continue
            else:
                hand_data.append(img_array)

hand_data = np.array(hand_data)

In [59]:
img = hand_data[0].reshape(128,128 ,3)
img_pillow = Image.fromarray(img)
img_pillow.show()

In [84]:
pred_probs = best_model1.predict(hand_data)
print(pred_probs)
predicted_class = np.argmax(pred_probs, axis=1)
print(predicted_class) # i manually compared by looking in folder

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 110ms/step
[[9.70129073e-01 8.59306137e-09 2.98709366e-02]
 [1.89236354e-03 2.81807110e-02 9.69926953e-01]
 [9.90366101e-01 6.27431832e-03 3.35956179e-03]
 [3.75651420e-12 9.99999762e-01 2.06540548e-07]
 [9.65103880e-03 3.24806981e-02 9.57868278e-01]
 [4.67707898e-04 1.52060045e-02 9.84326303e-01]
 [3.71285724e-10 8.13499014e-07 9.99999166e-01]]
[0 2 0 1 2 2 2]


Note: 30 epochs for all the colored ones to be guessed correctly..., non colored defaults to squirtle

Ok lets see what happens with transformations on the data.

In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

#datagen is interesting
datagen = ImageDataGenerator(
    rotation_range=30,              # Randomly rotate images by up to x degrees
    width_shift_range=0.2,          # Randomly shift images horizontally by x%
    height_shift_range=0.2,         # Randomly shift images vertically by x%
    shear_range=0.2,                # Apply shear transformations
    zoom_range=0.2,                 # Random zoom
    horizontal_flip=True,           # Randomly flip images horizontally
    fill_mode='nearest'             # Strategy for filling in missing pixels (due to rotation or shift)
)

datagen.fit(X_train)

In [74]:
import tensorflow as tf

from tensorflow.keras import layers, models

model2 = models.Sequential()
model2.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(128, 128, 3)))
model2.add(layers.BatchNormalization())
model2.add(layers.MaxPooling2D((2, 2)))
model2.add(layers.Conv2D(64, (3, 3), activation='relu'))
model2.add(layers.MaxPooling2D((2, 2)))
model2.add(layers.Conv2D(64, (3, 3), activation='relu'))
model2.add(layers.MaxPooling2D((2, 2)))

model2.add(layers.Flatten())

model2.add(layers.Dense(64, activation='relu'))
model2.add(layers.Dropout(0.45)) #address overfitting
model2.add(layers.Dense(3, activation='softmax'))

model2.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False),
              metrics=['accuracy'])



  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [75]:
from tensorflow.keras.callbacks import ModelCheckpoint

checkpoint = ModelCheckpoint('./models/best_model.keras', 
                             monitor='val_accuracy',    # Monitor validation accuracy
                             save_best_only=True,       # Save only the best weights
                             mode='max',                # 'max' means we want to maximize the metric
                             verbose=1)

history = model2.fit(
    datagen.flow(X_train, y_train, batch_size=32),  # Augmented images in batches
    epochs=75,
    validation_data=(X_test, y_test),
    callbacks=[checkpoint]
)

Epoch 1/75


  self._warn_if_super_not_called()


[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 290ms/step - accuracy: 0.3692 - loss: 4.3132
Epoch 1: val_accuracy improved from -inf to 0.80392, saving model to ./models/best_model.keras
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 361ms/step - accuracy: 0.3752 - loss: 4.2109 - val_accuracy: 0.8039 - val_loss: 0.4500
Epoch 2/75
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 296ms/step - accuracy: 0.7332 - loss: 0.5517
Epoch 2: val_accuracy did not improve from 0.80392
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 321ms/step - accuracy: 0.7363 - loss: 0.5486 - val_accuracy: 0.6588 - val_loss: 0.8222
Epoch 3/75
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 283ms/step - accuracy: 0.9011 - loss: 0.3474
Epoch 3: val_accuracy did not improve from 0.80392
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 307ms/step - accuracy: 0.9014 - loss: 0.3461 - val_accuracy: 0.7294 - val_loss: 1.7229


In [76]:
best_model = tf.keras.models.load_model('./models/best_model.keras')
best_model.evaluate(X_test, y_test)

[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 44ms/step - accuracy: 0.9758 - loss: 0.2187


[0.14939461648464203, 0.9764705896377563]

In [77]:
pred_probs = best_model.predict(hand_data)
print(pred_probs)
predicted_class = np.argmax(pred_probs, axis=1)
print(predicted_class)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 109ms/step
[[7.0893776e-01 3.7793782e-07 2.9106191e-01]
 [2.9696652e-01 2.1709745e-01 4.8593605e-01]
 [5.0650114e-01 7.4308947e-02 4.1918990e-01]
 [2.2669948e-08 9.9999666e-01 3.3658469e-06]
 [3.4908891e-01 1.5426618e-01 4.9664494e-01]
 [4.0449947e-01 1.0927055e-01 4.8622993e-01]
 [1.9994812e-02 1.1142439e-03 9.7889084e-01]]
[0 2 0 1 2 2 2]


Epoch 1/10


  self._warn_if_super_not_called()


[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 275ms/step - accuracy: 0.9607 - loss: 0.0845 - val_accuracy: 0.9647 - val_loss: 0.1666
Epoch 2/10
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 273ms/step - accuracy: 0.9555 - loss: 0.1903 - val_accuracy: 0.9686 - val_loss: 0.1378
Epoch 3/10
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 278ms/step - accuracy: 0.9682 - loss: 0.1020 - val_accuracy: 0.9490 - val_loss: 0.2029
Epoch 4/10
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 263ms/step - accuracy: 0.9621 - loss: 0.1227 - val_accuracy: 0.9451 - val_loss: 0.1976
Epoch 5/10
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 265ms/step - accuracy: 0.9803 - loss: 0.0798 - val_accuracy: 0.9647 - val_loss: 0.1803
Epoch 6/10
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 261ms/step - accuracy: 0.9636 - loss: 0.1044 - val_accuracy: 0.9569 - val_loss: 0.2177
Epoch 7/10
[1m19/19[0m [32m━━━━━━━━━

Epoch 1/20
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 283ms/step - accuracy: 0.9811 - loss: 0.0734 - val_accuracy: 0.9686 - val_loss: 0.1540
Epoch 2/20
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 308ms/step - accuracy: 0.9743 - loss: 0.0501 - val_accuracy: 0.9569 - val_loss: 0.2048
Epoch 3/20
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 278ms/step - accuracy: 0.9817 - loss: 0.0499 - val_accuracy: 0.9647 - val_loss: 0.1880
Epoch 4/20
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 267ms/step - accuracy: 0.9697 - loss: 0.0701 - val_accuracy: 0.9686 - val_loss: 0.2064
Epoch 5/20
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 267ms/step - accuracy: 0.9521 - loss: 0.1426 - val_accuracy: 0.9529 - val_loss: 0.1857
Epoch 6/20
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 260ms/step - accuracy: 0.9725 - loss: 0.0902 - val_accuracy: 0.9333 - val_loss: 0.2710
Epoch 7/20
[1m19/19[0m [3

In [67]:
pred_probs = model2.predict(hand_data)
print(pred_probs)
predicted_class = np.argmax(pred_probs, axis=1)
print(predicted_class)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
[[7.0132680e-02 1.8187502e-06 9.2986554e-01]
 [3.3418107e-01 3.5123985e-02 6.3069493e-01]
 [6.5888751e-01 2.0125033e-01 1.3986214e-01]
 [1.8295322e-07 9.9995494e-01 4.4885808e-05]
 [3.7500429e-01 1.1292139e-01 5.1207429e-01]
 [4.0733567e-01 7.6039352e-02 5.1662499e-01]
 [6.1818344e-07 2.0156102e-10 9.9999940e-01]]
[2 2 0 1 2 2 2]
