# 1 Imports

In [226]:
import numpy as np 
import pandas as pd

import tensorflow as tf
from tensorflow import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras.metrics import top_k_categorical_accuracy
from keras.callbacks import ModelCheckpoint, ReduceLROnPlateau, EarlyStopping
from sklearn.model_selection import train_test_split

# 2 Fetching the data

In [227]:
#finding the amount of rows for each category
df = pd.read_csv('5_animals.csv')
df.head()

Unnamed: 0,countrycode,drawing,key_id,timestamp,word
0,US,"[[[53, 61, 89, 111, 181, 218, 224, 226, 221, 2...",5031287486152704,2017-03-26 23:18:28.376820,cat
1,CZ,"[[[61, 43, 25, 0, 30, 24, 28, 32, 45, 67, 88, ...",5161175954227200,2017-03-25 08:14:21.927480,bird
2,FI,"[[[76, 116, 144, 148, 150, 148, 113, 83, 65, 3...",6683220546420736,2017-01-28 08:04:57.073340,cat
3,BE,"[[[170, 170, 178, 192, 203, 211, 219, 239, 249...",4904568938823680,2017-03-16 20:54:56.220460,elephant
4,US,"[[[68, 62, 26, 9, 0, 3, 11, 41, 64], [36, 36, ...",6633641922789376,2017-03-13 19:10:10.022730,dog


In [228]:
df['word'].value_counts()

cat          10000
bird         10000
elephant     10000
dog          10000
butterfly    10000
Name: word, dtype: int64

There are 5 categories with 10k images of each available as you can see above.

Now i'm going to split the dataframe into a train/test/validation set like 70/15/15

# 3 Prepairing the data

In [229]:
X = df.drop(columns = ['word']).copy()
y = df['word']

In [230]:
X_train, X_rem, y_train, y_rem = train_test_split(X,y, train_size=0.7)
X_val, X_test, y_val, y_test = train_test_split(X_rem,y_rem, test_size=0.5)

In [231]:
df['word'].value_counts()

cat          10000
bird         10000
elephant     10000
dog          10000
butterfly    10000
Name: word, dtype: int64

In [232]:
%%capture
X_train.reset_index()
y_train.reset_index()
X_test.reset_index()
y_test.reset_index()
X_val.reset_index()
y_val.reset_index()

In [233]:
print(X_train.shape), print(y_train.shape)
print(X_val.shape), print(y_val.shape)
print(X_test.shape), print(y_test.shape)

(35000, 4)
(35000,)
(7500, 4)
(7500,)
(7500, 4)
(7500,)


(None, None)

# 4 Training the model

In [234]:
num_classes = 5

model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 3), padding='same', activation='relu', input_shape=(32, 32, 1)))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(64, kernel_size=(3, 3), padding='same', activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.2))
model.add(Flatten())
model.add(Dense(680, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes, activation='softmax'))
model.summary()

Model: "sequential_13"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_26 (Conv2D)          (None, 32, 32, 32)        320       
                                                                 
 max_pooling2d_26 (MaxPoolin  (None, 16, 16, 32)       0         
 g2D)                                                            
                                                                 
 conv2d_27 (Conv2D)          (None, 16, 16, 64)        18496     
                                                                 
 max_pooling2d_27 (MaxPoolin  (None, 8, 8, 64)         0         
 g2D)                                                            
                                                                 
 dropout_26 (Dropout)        (None, 8, 8, 64)          0         
                                                                 
 flatten_13 (Flatten)        (None, 4096)            

In [235]:
def top_3_accuracy(x,y): 
    t3 = top_k_categorical_accuracy(x,y, 3)
    return t3

reduceLROnPlat = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3, 
                                   verbose=1, mode='auto', min_delta=0.005, cooldown=5, min_lr=0.0001)
earlystop = EarlyStopping(monitor='val_top_3_accuracy', mode='max', patience=5) 
callbacks = [reduceLROnPlat, earlystop]

model.compile(loss='categorical_crossentropy',
              optimizer='RMSprop',
              metrics=['accuracy', top_3_accuracy])

model.fit(x=X_train, y=y_train,
          batch_size = 32,
          epochs = 25,
          validation_data = (X_val, y_val),
          callbacks = callbacks,
          verbose = 1)

ValueError: Failed to convert a NumPy array to a Tensor (Unsupported object type int).