## Motivation
Practice Convolutional Neural Networks and Image Classification and Image processing

## Scenario
Given an image of a dog or cat determine

## Strategy

In [None]:
# Import Libraries

import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
import gzip
from PIL import Image
import random
import os
import glob

import sklearn
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.preprocessing import LabelEncoder
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPool2D
from keras.optimizers import RMSprop,Adam
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import ReduceLROnPlateau

In [None]:
%load_ext watermark
%watermark

In [None]:
%watermark --iversions

In [None]:
# Read Images

# Source: https://www.kaggle.com/datasets/hassanaitnacer/dogs-vs-cats?select=dogs-vs-cats
#f = gzip.open('archive.zip','r')

In [None]:

img = Image.open(r"./dogs-vs-cats/cat/cat.1.jpg")  
img.show()

In [None]:
# Simplify 
img = Image.open(r"./dogs-vs-cats/dog/dog.1.jpg").convert('L')
img.show()

In [None]:
np.asarray(img).shape

In [None]:
# Example of converting 2d to a 3d array as part of input channel
np.asarray(img).reshape(np.asarray(img).shape[0], np.asarray(img).shape[1],1)

In [None]:
## Create DataFrame
df = pd.DataFrame(columns = ['filename','type', 'file_location', 'pixels'])

In [None]:
def fetch_images(animal = 'dog'):
    global df
    for file in glob.glob(f"./dogs-vs-cats/{animal}/*.jpg"):
        record = []

        # len('./dogs-vs-cats/cat/') = 19
        record.append(file[19:])
        record.append(animal)    
        record.append(file)

        # Convert Image to greyscale
        img = Image.open(file).convert('L')    
        
        # Resize Image
        img =  img.resize((400, 400))
        record.append(np.asarray(img).reshape(np.asarray(img).shape[0], np.asarray(img).shape[1],1))

        df = pd.concat([df,pd.DataFrame([record], columns =  ['filename','type', 'file_location', 'pixels'])], axis = 0)

    

In [None]:
fetch_images('dog')
fetch_images('cat')

In [None]:
df.reset_index(inplace = True)
df.drop(columns = ['index'], inplace = True)
df.head()

There are 5000 dogs and cats photos. Need to sample and assign to train/test/validation sets.

In [None]:
X = df['pixels']
X = np.vstack([[X.iloc[i]] for i in range(len(X))])
y = df['type']

In [None]:
# encode class values as integers
encoder = LabelEncoder()
encoder.fit(y)
encoded_y = encoder.transform(y)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, encoded_y, test_size=0.2, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

In [None]:
print(X_train.shape)
print(X_test.shape)
print(X_val.shape)


print(y_train.shape)
print(y_test.shape)
print(y_val.shape)


In [None]:
## Build the network

model = Sequential()

model.add(Conv2D(filters = 8, kernel_size = (5,5),padding = 'Same', 
                 activation ='relu', input_shape = (400,400,1)))

model.add(MaxPool2D(pool_size = (2,2)))
model.add(Dropout(.25))
model.add(Conv2D(filters = 16, kernel_size = (3,3), padding = 'Same', activation = 'relu'))
model.add(MaxPool2D(pool_size = (2,2), strides = (2,2)))
model.add(Dropout(.25))

# Fully Connected Layer
model.add(Flatten())
model.add(Dense(256, activation = 'relu'))
model.add(Dropout(.5))
model.add(Dense(1, activation='sigmoid'))

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
          


In [None]:
model.summary()


In [None]:
# Define parameters

epochs = 5  # for better result increase the epochs
batch_size = 50

In [None]:
# data augmentation
datagen = ImageDataGenerator(
        featurewise_center=False,  # set input mean to 0 over the dataset
        samplewise_center=False,  # set each sample mean to 0
        featurewise_std_normalization=False,  # divide inputs by std of the dataset
        samplewise_std_normalization=False,  # divide each input by its std
        zca_whitening=False,  # dimesion reduction
        rotation_range=5,  # randomly rotate images in the range 5 degrees
        zoom_range = 0.1, # Randomly zoom image 10%
        width_shift_range=0.1,  # randomly shift images horizontally 10%
        height_shift_range=0.1,  # randomly shift images vertically 10%
        horizontal_flip=False,  # randomly flip images
        vertical_flip=False)  # randomly flip images


In [None]:

datagen.fit(X_train)

In [None]:
## Train the Model

history = model.fit_generator(datagen.flow(X_train,y_train, batch_size=batch_size),
                              epochs = epochs, validation_data = (X_val, y_val))#, steps_per_epoch=X_train.shape[0] // batch_size)

In [None]:
# Plot the loss and accuracy curves for training and validation 
plt.plot(history.history['val_loss'], color='b', label="validation loss")
plt.title("Test Loss")
plt.xlabel("Number of Epochs")
plt.ylabel("Loss")
plt.legend()
plt.show()

In [None]:
## Run Predictions

Y_pred = model.predict(X_test)
# Convert predictions classes to one hot vectors 
Y_pred_classes = np.argmax(Y_pred,axis = 1) 

In [None]:
## Evaluate 

# confusion matrix


# Convert validation observations to one hot vectors
# compute the confusion matrix
confusion_mtx = confusion_matrix(y_test, Y_pred_classes) 
# plot the confusion matrix
f,ax = plt.subplots(figsize=(8, 8))
sns.heatmap(confusion_mtx, annot=True, linewidths=0.01,cmap="Greens",linecolor="gray", fmt= '.1f',ax=ax)
plt.xlabel("Predicted Label")
plt.ylabel("True Label")
plt.title("Confusion Matrix")
plt.show()

Looking Ahead
Pull images using gunzip and archive.zip
Create CNN with all 3 channels.
Research/Resources/References
https://www.kaggle.com/datasets/hassanaitnacer/dogs-vs-cats?select=dogs-vs-cats
https://stackoverflow.com/questions/1109422/getting-list-of-pixel-values-from-pil
https://stackoverflow.com/questions/12201577/how-can-i-convert-an-rgb-image-into-grayscale-in-python
converting each image to greyscale
https://www.w3schools.com/python/python_variables_global.asp
for creation function fetch_images
https://imagekit.io/blog/image-resizing-in-python/
resizing images
https://www.atmosera.com/blog/binary-classification-with-neural-networks/
binary classification for image
https://machinelearningmastery.com/binary-classification-tutorial-with-the-keras-deep-learning-library/
binary labeler

