In [30]:
import tensorflow

In [31]:
import cv2 
import numpy as np
import os
from random import shuffle
from tqdm import tqdm
from sklearn.model_selection import train_test_split

In [47]:
# images dir 
IMG_SIZE = 32
images_dir = 'train'
# function that looks at file name to check whether it's a dog or cat in training data
def label_img(img):
    word_label = img.split('.')[-3]
    # conversion to one-hot array [cat,dog]
    #  [much cat, no dog]
    if word_label == 'cat': return 1
    #  [no cat, very doggo]
    elif word_label == 'dog': return 0
# creates training data (converts images to grayscale)
def process_data():
    my_data = []
    my_result = []
    # loop through images in training data directory
    for img in tqdm(os.listdir(images_dir)):
        # set the label for the pixel data - either 1 (cat) or 0 (dog)
        label = label_img(img)
        # get training data image
        path = os.path.join(images_dir,img)
        img = cv2.imread(path)
        #gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        gray = cv2.resize(img, (IMG_SIZE,IMG_SIZE))
        #gray = gray.flatten()
        my_data.append(np.array(gray))
        my_result.append(label)
    np.save('my_data.npy', my_data)
    np.save('my_result.npy', my_result)
    return my_data, my_result

In [48]:
X, y = process_data()

100%|██████████| 25000/25000 [01:59<00:00, 209.75it/s]


In [49]:
X = np.array(X)
X = X/255.

In [50]:
# https://machinelearningmastery.com/how-to-manually-scale-image-pixel-data-for-deep-learning/
# Global Centering -  We can test our model both ways - centering before and centering after dividing by 255 (normalization)
mean_X = np.mean(X)
std_X = np.std(X)
X = (X - mean_X)/std_X

In [51]:
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=(3/4), random_state=0)


In [77]:
from tensorflow.keras import layers, models


In [94]:
# https://www.youtube.com/watch?v=7HPwo4wnJeA&t=349s
ann = models.Sequential([
        layers.Flatten(input_shape=(32,32,3)),
        layers.Dense(1000, activation='relu'),
        layers.Dense(300, activation='relu'),
        layers.Dense(50, activation='relu'),
        layers.Dense(10, activation='relu'),
        layers.Dense(2, activation='softmax')    
    ])

ann.compile(optimizer='SGD',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

In [95]:
print(np.array(X_train).shape)
print(np.array(y_train).shape)

(18750, 32, 32, 3)
(18750,)


In [96]:
ann.fit(np.array(X_train), np.array(y_train), epochs=10)


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7ff6e9118dc0>

In [99]:
predict_ann = ann.predict(X_test)
predict_ann_one = []
for outcome in predict_ann:
    if outcome[0] > outcome[1]:
        predict_ann_one.append(0)
    else:
        predict_ann_one.append(1)

In [103]:
truePositives = 0
trueNegatives = 0
falsePositives = 0
falseNegatives = 0

m = 0
for m in range(len(predict_ann_one)):
    if predict_ann_one[m] == y_test[m] and predict_ann_one[m] == 1:
        truePositives += 1
    elif predict_ann_one[m] == y_test[m] and predict_ann_one[m] == 0:
        trueNegatives += 1
    elif predict_ann_one[m] != y_test[m] and predict_ann_one[m] == 1:
        falsePositives += 1
    elif predict_ann_one[m] != y_test[m] and predict_ann_one[m] == 0:
        falseNegatives += 1

precisionPercent = truePositives*100/(truePositives+falsePositives)
print(str(precisionPercent) + "%")
recallPercent = truePositives*100/(truePositives+falseNegatives)
print(str(recallPercent) + "%")
fMeasure = 2*precisionPercent*recallPercent/(precisionPercent+recallPercent)
print(str(fMeasure) + "%")
accuracy = (truePositives+trueNegatives)*100/len(predict_ann_one)
print(str(accuracy) + "%")

64.96577473553205%
67.18146718146718%
66.05504587155963%
65.664%


In [104]:
results = ann.evaluate(X_test, np.array(y_test))



In [105]:
cnn = models.Sequential([
    layers.Conv2D(filters=32, kernel_size=(3, 3), activation='relu', input_shape=(32, 32, 3)),
    layers.MaxPooling2D((2, 2)),
    
    layers.Conv2D(filters=64, kernel_size=(3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    
    layers.Flatten(),
    layers.Dense(64, activation='relu'),
    layers.Dense(2, activation='softmax')
])
cnn.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])


In [106]:
cnn.fit(X_train, np.array(y_train), epochs=10)


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7ff6e955dd90>

In [107]:
cnn_results = cnn.evaluate(X_test, np.array(y_test))



In [108]:
predict_cnn = cnn.predict(X_test) 

In [109]:
predict_cnn_one = []
for outcome in predict_cnn:
    if outcome[0] > outcome[1]:
        predict_cnn_one.append(0)
    else:
        predict_cnn_one.append(1)

In [110]:
truePositives = 0
trueNegatives = 0
falsePositives = 0
falseNegatives = 0

m = 0
for m in range(len(predict_cnn_one)):
    if predict_cnn_one[m] == y_test[m] and predict_cnn_one[m] == 1:
        truePositives += 1
    elif predict_cnn_one[m] == y_test[m] and predict_cnn_one[m] == 0:
        trueNegatives += 1
    elif predict_cnn_one[m] != y_test[m] and predict_cnn_one[m] == 1:
        falsePositives += 1
    elif predict_cnn_one[m] != y_test[m] and predict_cnn_one[m] == 0:
        falseNegatives += 1

precisionPercent = truePositives*100/(truePositives+falsePositives)
print(str(precisionPercent) + "%")
recallPercent = truePositives*100/(truePositives+falseNegatives)
print(str(recallPercent) + "%")
fMeasure = 2*precisionPercent*recallPercent/(precisionPercent+recallPercent)
print(str(fMeasure) + "%")
accuracy = (truePositives+trueNegatives)*100/len(predict_cnn_one)
print(str(accuracy) + "%")

81.84931506849315%
76.8983268983269%
79.29661579296616%
80.032%


In [115]:
ann_2 = models.Sequential([
        layers.Flatten(input_shape=(32,32,3)),
        layers.Dense(100, activation='relu'),
        layers.Dense(2, activation='softmax')    
    ])

ann_2.compile(optimizer='SGD',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

In [116]:
ann_2.fit(np.array(X_train), np.array(y_train), epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7ff7d3314370>

In [117]:
predict_ann_2 = ann_2.predict(X_test)
predict_ann_2_one = []
for outcome in predict_ann_2:
    if outcome[0] > outcome[1]:
        predict_ann_2_one.append(0)
    else:
        predict_ann_2_one.append(1)

In [118]:
truePositives = 0
trueNegatives = 0
falsePositives = 0
falseNegatives = 0

m = 0
for m in range(len(predict_ann_2_one)):
    if predict_ann_2_one[m] == y_test[m] and predict_ann_2_one[m] == 1:
        truePositives += 1
    elif predict_ann_2_one[m] == y_test[m] and predict_ann_2_one[m] == 0:
        trueNegatives += 1
    elif predict_ann_2_one[m] != y_test[m] and predict_ann_2_one[m] == 1:
        falsePositives += 1
    elif predict_ann_2_one[m] != y_test[m] and predict_ann_2_one[m] == 0:
        falseNegatives += 1

precisionPercent = truePositives*100/(truePositives+falsePositives)
print(str(precisionPercent) + "%")
recallPercent = truePositives*100/(truePositives+falseNegatives)
print(str(recallPercent) + "%")
fMeasure = 2*precisionPercent*recallPercent/(precisionPercent+recallPercent)
print(str(fMeasure) + "%")
accuracy = (truePositives+trueNegatives)*100/len(predict_ann_2_one)
print(str(accuracy) + "%")

65.01913265306122%
65.6048906048906%
65.31069827033951%
65.344%


In [119]:
results = ann_2.evaluate(X_test, np.array(y_test))



In [122]:
cnn_5 = models.Sequential([
    layers.Conv2D(filters=32, kernel_size=(3, 3), activation='relu', input_shape=(32, 32, 3)),
    layers.MaxPooling2D((2, 2)),
    
    layers.Conv2D(filters=64, kernel_size=(3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    
    layers.Flatten(),
    layers.Dense(1000, activation='relu'),
    layers.Dense(300, activation='relu'),
    layers.Dense(50, activation='relu'),
    layers.Dense(10, activation='relu'),
    layers.Dense(2, activation='softmax') 
])
cnn_5.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

In [123]:
cnn_5.fit(X_train, np.array(y_train), epochs=10)


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7ff60b9717f0>

In [124]:
cnn_results = cnn_5.evaluate(X_test, np.array(y_test))



In [127]:
predict_cnn_5 = cnn_5.predict(X_test) 

In [128]:
predict_cnn_5_one = []
for outcome in predict_cnn_5:
    if outcome[0] > outcome[1]:
        predict_cnn_5_one.append(0)
    else:
        predict_cnn_5_one.append(1)

In [129]:
truePositives = 0
trueNegatives = 0
falsePositives = 0
falseNegatives = 0

m = 0
for m in range(len(predict_cnn_5_one)):
    if predict_cnn_5_one[m] == y_test[m] and predict_cnn_5_one[m] == 1:
        truePositives += 1
    elif predict_cnn_5_one[m] == y_test[m] and predict_cnn_5_one[m] == 0:
        trueNegatives += 1
    elif predict_cnn_5_one[m] != y_test[m] and predict_cnn_5_one[m] == 1:
        falsePositives += 1
    elif predict_cnn_5_one[m] != y_test[m] and predict_cnn_5_one[m] == 0:
        falseNegatives += 1

precisionPercent = truePositives*100/(truePositives+falsePositives)
print(str(precisionPercent) + "%")
recallPercent = truePositives*100/(truePositives+falseNegatives)
print(str(recallPercent) + "%")
fMeasure = 2*precisionPercent*recallPercent/(precisionPercent+recallPercent)
print(str(fMeasure) + "%")
accuracy = (truePositives+trueNegatives)*100/len(predict_cnn_5_one)
print(str(accuracy) + "%")

75.76119402985074%
81.66023166023166%
78.60018581604211%
77.888%
