# Pokémon Classification

## Dependencies

In [1]:
import glob
import cv2
import os
import matplotlib
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from skimage import color, io

# Dataset

In [19]:
number_of_images = 10908
training_size = .8*number_of_images
testing_size = .2*number_of_images
IMAGE_DIRECTORY= glob.glob('../resources/model-images/')
IMAGE_SIZE = 32
IMAGE_ARR_SIZE = 32 * 32 * 4
KIND_ARRAY=np.array(['Abra', 'Aerodactyl', 'Alakazam', 'Arbok', 'Arcanine',  
                    'Articuno', 'Beedrill', 'Bellsprout', 'Blastoise', 'Bulbasaur',  
                    'Butterfree','Caterpie','Chansey','Charizard','Charmander',
                    'Charmeleon','Clefable', 'Clefairy', 'Cloyster', 'Cubone', 
                    'Dewgong', 'Diglett', 'Ditto', 'Dodrio', 'Doduo', 'Dragonair', 
                    'Dragonite', 'Dratini', 'Drowzee', 'Dugtrio', 'Eevee', 'Ekans', 
                    'Electrabuzz', 'Electrode', 'Exeggcute', 'Exeggutor', 'Farfetchd',
                    'Fearow', 'Flareon', 'Gastly', 'Gengar', 'Geodude', 'Gloom', 
                    'Golbat', 'Goldeen', 'Golduck', 'Golem', 'Graveler', 'Grimer', 
                    'Growlithe', 'Gyarados', 'Haunter', 'Hitmonchan', 'Hitmonlee', 
                    'Horsea', 'Hypno', 'Ivysaur', 'Jigglypuff', 'Jolteon', 'Jynx', 
                    'Kabuto', 'Kabutops', 'Kadabra', 'Kakuna', 'Kangaskhan', 'Kingler',
                    'Koffing', 'Krabby', 'Lapras', 'Lickitung', 'Machamp', 'Machoke', 
                    'Machop', 'Magikarp', 'Magmar', 'Magnemite', 'Magneton', 'Mankey', 
                    'Marowak', 'Meowth', 'Metapod', 'Mew', 'Mewtwo', 'Moltres', 'MrMime', 
                    'Muk', 'Nidoking', 'Nidoqueen', 'Nidorina', 'Nidorino', 'Ninetales', 
                    'Oddish', 'Omantye', 'Omastar', 'Onix', 'Paras', 'Parasect', 'Persian', 
                    'Pidgeot', 'Pidgeotto', 'Pidgey', 'Pikachu', 'Pinsir', 'Poliwag', 
                    'Poliwhirl', 'Poliwrath', 'Ponyta', 'Porygon', 'Primeape', 'Psyduck', 
                    'Raichu', 'Rapidash', 'Raticate', 'Rattata', 'Rhydon', 'Rhyhorn', 
                    'Sandshrew', 'Sandslash', 'Scyther', 'Seadra', 'Seaking', 'Seel', 
                    'Shellder', 'Slowbro', 'Slowpoke', 'Snorlax', 'Spearow', 'Squirtle', 
                    'Starmie', 'Staryu', 'Tangela', 'Tauros', 'Tentacool', 'Tentacruel', 
                    'Vaporeon', 'Venomoth', 'Venonat', 'Venusaur', 'Victreebel', 
                    'Vileplume', 'Voltorb', 'Vulpix', 'Wartortle', 'Weedle', 'Weepinbell', 
                    'Weezing', 'Wigglytuff', 'Zapdos', 'Zupbat'])\

def load_images(image_dir):
    labels = []
    names = []
    read_images = []
    image_index = 0
    
    
    # 10908 Pokemon images
    X = np.ndarray(shape=(10908, IMAGE_ARR_SIZE))
                        
    
    # Loop through all the folders of different types of pokemon.
    for kind in os.listdir(image_dir):
        if kind == '.DS_Store':
                pass  
        else: 
            type_images = os.listdir(image_dir + kind + '/')

            
        # Loop through all the images of a certain type of pokemon in the folders.
        for image in type_images:
            targeted_image = []
            image_file = os.path.join(image_dir, kind, image)
            #Pre-processing: Convert to grayscale and flatten
            targeted_image.append(cv2.imread(image_file, cv2.IMREAD_GRAYSCALE))
            targeted_image = np.asarray(targeted_image).flatten()
            read_images.append(targeted_image)
            names.append(image)
            labels.append(kind)

            if OSError: 
                pass          
            else:
                image_index += 1
    #return arrays of the pixels, labels, and names
#     return (np.asarray(read_images), np.asarray(labels), np.asarray(names))
    return (X, np.asarray(labels), np.asarray(names))

In [20]:
IMAGE_DIRECTORY_STR = ''.join(IMAGE_DIRECTORY)
type(IMAGE_DIRECTORY_STR)

str

In [21]:
dataset = load_images(IMAGE_DIRECTORY_STR)
pokedex = pd.DataFrame({'pixels':dataset[0], 'labels':dataset[1], 'file_names':dataset[2]})

In [7]:
pokedex.head()

Unnamed: 0,pixels,labels,file_names
0,"[57, 57, 58, 83, 35, 48, 64, 48, 56, 54, 48, 6...",Zapdos,c3f34230c7e24d18845c4d2eaad54801.jpg
1,"[249, 249, 249, 249, 249, 249, 249, 249, 249, ...",Zapdos,f5a52cd93f924e28a46d7def3265d6b4.jpg
2,"[88, 88, 89, 89, 89, 90, 90, 90, 91, 91, 92, 9...",Zapdos,26b19f8809ce496eae2e1b822d54492c.jpg
3,"[50, 46, 48, 48, 48, 50, 51, 48, 48, 50, 50, 5...",Zapdos,7a63f55b762a4fe7b6cd92f17ea22ebf.jpg
4,"[255, 255, 255, 255, 255, 255, 255, 255, 255, ...",Zapdos,0b9b5e421d8949e0b2b6698c8d0bb92e.jpg


## Split the Data into Training and Testing

In [10]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(pokedex["pixels"], pokedex["labels"], 
                                                    test_size=0.2, random_state=42)

In [24]:
print("Length X_Train:", len(X_train), "Length y_train:", len(y_train))
print("Length X_test:", len(X_test), "Length y_test:", len(y_test))

Length X_Train: 8726 Length y_train: 8726
Length X_test: 2182 Length y_test: 2182


## Random Forests - No Good

### This model does not work well for images because it assumes the individual pixel values are unique.

In [None]:
from sklearn.ensemble import RandomForestClassifier

clf = RandomForestClassifier()
RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
           max_depth=None, max_features='auto', max_leaf_nodes=None,
           min_impurity_split=1e-07, min_samples_leaf=1,
           min_samples_split=2, min_weight_fraction_leaf=0.0,
           n_estimators=10, n_jobs=1, oob_score=False, random_state=None,
           verbose=0, warm_start=False)

In [None]:
X_train = X_train.reshape((len(X_train), 1))

In [None]:
clf.fit(X_train, y_train)

### Test the accuracy of the model

In [None]:
from sklearn.metrics import accuracy_score

predictions = clf.predict(X_test)
print("Accuracy:", accuracy_score(y_test,predictions))

#### The accuracy score gave about 3%.

## DNN Classifier

In [None]:
import tensorflow as tf

dnn_clf = tf.contrib.learn.DNNClassifier(hidden_units=[300,100], n_classes=10908, 
                                        feature_columns=feature_columns)

dnn_clf.fit(X_train, labels_index, batch_size=10, steps=20000)

### Test the accuracy of the model

In [None]:
sample, sample_labels= X_test, y_test

#sample labels index = y test labels index
sample_labels_index = list(range(len(y_test)))

predictions = list(dnn_clf.predict(X_test))
#predictions = models thoughts on what the y is
print("Predict", predictions)
#the sample labels index is the actual y
print ("Actual ", sample_labels_index)
# plot_pokemons(sample, images_per_row=5)
print(sample_labels)

accuracy_score = dnn_clf.evaluate(sample, sample_labels_index, steps=2)["accuracy"]
print("\nTest Accuracy: {0:f}\n".format(accuracy_score))

## Decision Tree - No Good

In [None]:
# Import Decision Tree Classifier
from sklearn.tree import DecisionTreeClassifier

In [None]:
from sklearn.tree import DecisionTreeClassifier
model = DecisionTreeClassifier()

In [None]:
model.fit(x_train,y_train)
y_predict = model.predict(x_test)

### Test the Accuracy

In [None]:
from sklearn.metrics import accuracy_score
accuracy_score(y_test,y_predict)