In [1]:
import numpy as np
import matplotlib.pyplot as plt
import cv2 as cv
import os
import random

### get all images in grayscale format

In [3]:
data_dir = "./PetImages/"
categories = ["Cat", "Dog"]

# collect training data
training_set = []

for category in categories:
    path = os.path.join(data_dir, category)
    class_num = categories.index(category)
    for img in os.listdir(path):
        img_array = cv.imread(os.path.join(path, img), cv.IMREAD_GRAYSCALE)
        if not img_array is None:
            IMG_SIZE = 75
            new_img_array = cv.resize(img_array, (IMG_SIZE, IMG_SIZE))
            training_set.append([new_img_array, class_num])           

#### Shuffle training data so that neural net does not get stuck into local minimas
1. Prevent from getting mini-batches from highly corelated examples.

In [4]:
random.shuffle(training_set)

In [5]:
x, y = [], []
for feature, label in training_set:
    x.append(feature)
    y.append(label)
print(len(x), len(y))

24946 24946


In [6]:
IMG_SIZE = 75

### the last 1 is because the color used is grayscale

In [7]:
x = np.array(x).reshape(-1, IMG_SIZE, IMG_SIZE, 1)
y = np.array(y)

In [8]:
print(x.shape)

(24946, 75, 75, 1)


### save your data 

In [9]:
import pickle

pickle_out = open("x.pickle", "wb")
pickle.dump(x, pickle_out)
pickle_out.close()

pickle_out = open("y.pickle", "wb")
pickle.dump(y, pickle_out)
pickle_out.close()

### read saved data using pickle

In [3]:
import pickle

pickle_in = open("x.pickle", "rb")
x1 = pickle.load(pickle_in)
print(x1.shape)

pickle_in = open("y.pickle", "rb")
y1 = pickle.load(pickle_in)

(24946, 75, 75, 1)


In [11]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten, Conv2D, MaxPooling2D

In [12]:
x = x/255

In [13]:
print(x)

[[[[0.50588235]
   [0.4745098 ]
   [0.50588235]
   ...
   [0.42745098]
   [0.47843137]
   [0.27843137]]

  [[0.49411765]
   [0.53333333]
   [0.52941176]
   ...
   [0.40392157]
   [0.19215686]
   [0.22352941]]

  [[0.5254902 ]
   [0.51372549]
   [0.56470588]
   ...
   [0.41176471]
   [0.34901961]
   [0.21176471]]

  ...

  [[0.67058824]
   [0.60784314]
   [0.58039216]
   ...
   [0.55294118]
   [0.55294118]
   [0.5372549 ]]

  [[0.57254902]
   [0.7372549 ]
   [0.58431373]
   ...
   [0.6627451 ]
   [0.58823529]
   [0.54509804]]

  [[0.56862745]
   [0.62352941]
   [0.56470588]
   ...
   [0.78431373]
   [0.8745098 ]
   [0.60784314]]]


 [[[0.52941176]
   [0.54509804]
   [0.54901961]
   ...
   [0.54117647]
   [0.52941176]
   [0.51764706]]

  [[0.5372549 ]
   [0.54901961]
   [0.55686275]
   ...
   [0.54509804]
   [0.5372549 ]
   [0.5254902 ]]

  [[0.54117647]
   [0.55686275]
   [0.56470588]
   ...
   [0.54509804]
   [0.5372549 ]
   [0.52941176]]

  ...

  [[0.62352941]
   [0.6       ]
   [0.5

In [14]:
model = Sequential()
model.add(Conv2D(64, (3, 3), input_shape = x.shape[1:]))
model.add(Activation("relu"))
model.add(MaxPooling2D(pool_size=(2,2)))

model.add(Conv2D(64, (3, 3)))
model.add(Activation("relu"))
model.add(MaxPooling2D(pool_size=(2,2)))

model.add(Flatten())
model.add(Dense(64))

model.add(Dense(1))
model.add(Activation("sigmoid"))

model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])



Train on 22451 samples, validate on 2495 samples


<tensorflow.python.keras.callbacks.History at 0x7f28f123f668>

In [32]:
model.fit(x, y, batch_size=32, validation_split=0.1, epochs=5)

Train on 22451 samples, validate on 2495 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x7f2842ecb860>

In [15]:
my_dict = {0:"cat", 1:"dog"}

In [51]:
my_test_dir = "./myTest"
for image in os.listdir(my_test_dir):
    image_path = os.path.join(my_test_dir, image)
    img_data = cv.imread(image_path, cv.IMREAD_GRAYSCALE)
    img_data = img_data / 255
    new_img_array = cv.resize(img_data, (IMG_SIZE, IMG_SIZE))
    new_img_data = new_img_array.reshape(1, IMG_SIZE, IMG_SIZE, 1)
    print(image , my_dict[int(round(model.predict(new_img_data)[0, 0]))])

lioness1.jpeg dog
hyena.jpeg dog
catAndDog1.jpg dog
fox1.jpg dog
cat2.jpg cat
hybrid1.jpeg dog
catAndDog2.jpeg dog
cat1.png cat
simba.jpg dog


In [52]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 73, 73, 64)        640       
_________________________________________________________________
activation (Activation)      (None, 73, 73, 64)        0         
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 36, 36, 64)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 34, 34, 64)        36928     
_________________________________________________________________
activation_1 (Activation)    (None, 34, 34, 64)        0         
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 17, 17, 64)        0         
_________________________________________________________________
flatten (Flatten)            (None, 18496)             0

### Comparison with linear regression

In [1]:
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.model_selection import train_test_split

In [15]:
train_shape = x1.shape
nextd = train_shape[1]*train_shape[2]*train_shape[3]
x = x1.reshape(train_shape[0], nextd)
print(x.shape)

(24946, 5625)


In [20]:
print(y1.shape)

(24946,)


In [23]:
x_train,  x_test,y_train, y_test = train_test_split(x, y1, test_size=0.25)

In [24]:
print(x_train.shape, y_train.shape)

(18709, 5625) (18709,)


In [31]:
linear_reg_model = LinearRegression()
ridge_model = Ridge(alpha=100)
lasso_model = Lasso(alpha=1000, tol=0.0925)

linear_reg_model.fit(x_train, y_train)
ridge_model.fit(x_train, y_train)
lasso_model.fit(x_train, y_train)

Lasso(alpha=1000, copy_X=True, fit_intercept=True, max_iter=1000,
      normalize=False, positive=False, precompute=False, random_state=None,
      selection='cyclic', tol=0.0925, warm_start=False)

In [30]:
print("Linear regression score", linear_reg_model.score(x_test, y_test))
print("Ridge regression score", ridge_model.score(x_test, y_test))
print("Lasso regression score", lasso_model.score(x_test, y_test))

Linear regression score -0.45679994391884415
Ridge regression score -0.45679989325125286
Lasso regression score 0.00838479844109774
