# Derrik Pollock 

## Final Project: Who's That Pokemon

The program first creates a machine learning model and trains the model with the specified data set containing hundreds of generation 1 Pokemon images.

### Initially, the model was trained using a smaller data set of 5 popular pokémon

In order to preserve resources and simplify explanation...


### However, I personally have about 20 gpus spread throughout my house that can provide the processing power needed to train this model quickly

In [2]:
""" Who's that Pokemon?
	Here we write a machine learning program to classify pictures of Pokemon.
	We use machine learning python libraries including TensorFlow and Keras.
	Results will typically be displayed in Plotly and Matplotlib.
	Written by Shaun Miller using the Kaggle dataset https://www.kaggle.com/thedagger/pokemon-generation-one.
	This program follows some of the tutorial from the TensorFlow website: https://www.tensorflow.org/tutorials/images/classification.
"""

" Who's that Pokemon?\n\tHere we write a machine learning program to classify pictures of Pokemon.\n\tWe use machine learning python libraries including TensorFlow and Keras.\n\tResults will typically be displayed in Plotly and Matplotlib.\n\tWritten by Shaun Miller using the Kaggle dataset https://www.kaggle.com/thedagger/pokemon-generation-one.\n\tThis program follows some of the tutorial from the TensorFlow website: https://www.tensorflow.org/tutorials/images/classification.\n"

First, we import the necessary libraries

In [3]:
import matplotlib.pyplot as plt
import numpy as np
import os
from PIL import Image
from PIL import ImageFont
from PIL import ImageDraw
import pathlib
import sys

import tensorflow as tf
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' #silence some of the tf warnings

from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential

Then, we download and explore the dataset

In [4]:
def get_local_data(file_path, val_split=0.2, batch_size = 32, height = 180, width = 180):
	#returns training and validation datasets from a local dataset

	data_dir = pathlib.Path(file_path)
	print('Number of JPEG in dataset:',len(list(data_dir.glob('*/*.jpg')))) #print the total amount of images
	train_ds = tf.keras.preprocessing.image_dataset_from_directory(
		data_dir,
		validation_split = val_split, #100-val_split% used for training, val_split% for validation
		subset="training",
		seed=710,
		image_size=(height, width),
		batch_size=batch_size)

	val_ds = tf.keras.preprocessing.image_dataset_from_directory(
		data_dir,
		validation_split = val_split,
		subset="validation",
		seed=710,
		image_size=(height, width),
		batch_size=batch_size)

	class_names = train_ds.class_names
	print('Pokemon in set:', class_names[:3], ' ... ', class_names[-3:])

	return train_ds, val_ds

Configure the dataset

In [5]:
def configure_ds(train_ds, val_ds):
	AUTOTUNE = tf.data.experimental.AUTOTUNE
	train_ds = train_ds.cache().shuffle(1000).prefetch(buffer_size=AUTOTUNE)
	val_ds = val_ds.cache().prefetch(buffer_size=AUTOTUNE)
	return train_ds, val_ds

Create the model 

In [6]:
def create_model(num_classes = 5, height = 180, width = 180):

	#add data augmentation for more accurate results
	data_augmentation = keras.Sequential(
		[layers.experimental.preprocessing.RandomFlip("horizontal",
		    input_shape=(height,width,3)),
			layers.experimental.preprocessing.RandomRotation(0.1),
			layers.experimental.preprocessing.RandomZoom(0.1)]
		)

	model = Sequential([

		data_augmentation,
		#normalize model with rescaling
		layers.experimental.preprocessing.Rescaling(1./255, input_shape=(height, width, 3)),

		layers.Conv2D(16, 3, padding='same', activation='relu'),
		layers.MaxPooling2D(),
		layers.Conv2D(32, 3, padding='same', activation='relu'),
		layers.MaxPooling2D(),
		#layers.Conv2D(64, 3, padding='same', activation='relu'), #uncomment for entire data set
		#layers.MaxPooling2D(), #uncomment for entire data set
		layers.Dropout(0.15),
		layers.Flatten(),
		layers.Dense(128, activation='relu'), #uncomment for entire data set
		layers.Dense(num_classes)
	])

	#compile the model
	model.compile(optimizer='adam',
		loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
		metrics=['accuracy'])

	return model

Train the model

In [7]:
def train_model(model, train_ds, val_ds, epochs= 12):
	history = model.fit(
		train_ds,
		validation_data=val_ds,
		epochs=epochs
		)
	return history, epochs

Create the prediction function

In [8]:
def make_prediction(model, picture_path, height = 180, width = 180):

	img = keras.preprocessing.image.load_img(
    	picture_path, target_size=(height, width)
	)
	img_array = keras.preprocessing.image.img_to_array(img)
	img_array = tf.expand_dims(img_array, 0) # Create a batch

	predictions = model.predict(img_array)
	score = tf.nn.softmax(predictions[0])

	result_string = "Likely {} with {:.2f}% confidence.".format(class_names[np.argmax(score)], 100 * np.max(score))
	
	img_to_print = Image.open(picture_path)
	draw = ImageDraw.Draw(img_to_print)
	font = ImageFont.truetype("arial.ttf", 100)
	draw.text((0, 0),result_string,(255,255,255),font=font)
	img_to_print.show()

In [None]:
if __name__ == "__main__":

	#Retreive dataset
	train_ds, val_ds = get_local_data('dataset_popular')
	class_names = train_ds.class_names

	#Train the Model
	train_ds, val_ds = configure_ds(train_ds, val_ds)
	model = create_model()
	print(model.summary())
	history, epochs = train_model(model, train_ds, val_ds)

	#Test the Model
	for picture_path in sys.argv[1:]: make_prediction(model, picture_path = picture_path)

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Number of JPEG in dataset: 951
Traceback (most recent call last):
  File "/usr/local/Caskroom/miniconda/base/envs/tensorflow1X/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3457, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "/var/folders/x2/_5cxp41x5gb2t9fvqts2rph00000gn/T/ipykernel_20284/1087968663.py", line 4, in <module>
    train_ds, val_ds = get_local_data('dataset_popular')
  File "/var/folders/x2/_5cxp41x5gb2t9fvqts2rph00000gn/T/ipykernel_20284/1784240429.py", line 6, in get_local_data
    train_ds = tf.keras.preprocessing.image_dataset_from_directory(
  File "/usr/local/Caskroom/miniconda/base/envs/tensorflow1X/lib/python3.7/site-packages/tensorflow_core/python/util/module_wrapper.py", line 193, in __getattr__
    attr = getattr(self._tfmw_wrapped_module, name)
AttributeError: module 'tensorflow.python.keras.api._v1.keras.preprocessing' has no attribute 'image_dataset_from_directory'

During handling of the above exception, anot