In [99]:
#importing packages

import keras
from keras import backend as K
import tensorflow as tf
import keras_preprocessing
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras.optimizers import Adam
from keras.models import model_from_json
from keras.callbacks import LearningRateScheduler
import os, glob
import numpy as np
import pandas as pd
from tensorflow.keras.preprocessing import image
from tensorflow.keras.models import load_model

## 1. Data Processing: 

#### The train & test data is pretty clean in terms of image data, but we will need to do a bit of prep work to use in our model. 
a) Use the "ImageDataGenerator()" class from keras.processing.image to build out an instance called "train_datagen" with the following parameters: 

rescale = 1./255  
shear_range = 0.2  
zoom_range = 0.2  
horizontal_flip = True  


In [100]:
train_datagen = keras_preprocessing.image.ImageDataGenerator(rescale=1/255, shear_range=0.2, 
                                                             zoom_range= 0.2, horizontal_flip=True)
train_datagen

<keras_preprocessing.image.image_data_generator.ImageDataGenerator at 0x7fb2f1976f10>

#### b) Then build your training set by using the method ".flow_from_directory()"

path (where training data is stored)  
target_size = (64, 64)  
batch_size = 32  
class_mode = categorical   

In [101]:
train_generator = train_datagen.flow_from_directory(
        'dataset_train',
        target_size=(64, 64),
        batch_size=32,
        class_mode='categorical')

train_generator

Found 88 images belonging to 4 classes.


<keras_preprocessing.image.directory_iterator.DirectoryIterator at 0x7fb2f19c9190>

#### c) Take a look at your training set: 

What is the image shape of each training observation?  
How many total classes do we need to predict on? 


In [102]:
#shape of the image
n_shape = train_generator.image_shape
n_shape

(64, 64, 3)

In [103]:
#classes of the image
n_classes = np.unique(train_generator.classes)
n_classes

array([0, 1, 2, 3], dtype=int32)

## 2. Initial Classifier Build: 

Now use keras to build an initial image classifier with the following specifications.

Note: If you get lost, there is great documentation online and homework 7 included details on many of the layers used here.

- Create an instance of Sequential called "classifier"  
- Add a Conv2D layer with the following parameters:   
    - filters = 32  
    - kernel_size = (3,3)  
    - input_shape = image shape found in part 1  
    - activation = relu  
- Add a MaxPooling2D layer where pool_size = (2,2)  
- Add another Conv2D layer:   
    - filters = 64  
    - kernel_size = (3,3)  
    - activation = relu  
- Add a MaxPooling2D layer where pool_size = (2,2)  
- Add a Flatten layer  
- Add a Dense layer  
    - units = 128  
    - activation = relu  
- Add a final Dense layer (this will output our probabilities):  
    - units = # of classes  
    - activation = softmax   
- Compile with the following:   
    - optimize = adam  
    - loss = categorical cross entropy  
    - metric = accuracy  

In [104]:
def classifier() : 
    model = Sequential()
    model.add(Conv2D(32, kernel_size=(3, 3),activation='relu',input_shape=n_shape))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Conv2D(64, (3, 3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Flatten())
    model.add(Dense(128, activation='relu'))
    model.add(Dense(units = 4, activation='softmax'))
    return(model)

In [105]:
# define CNN model
model1 = classifier()

# compile the model
model1.compile(loss=keras.losses.categorical_crossentropy, optimizer='adam', metrics=['accuracy'])

In [106]:
model1.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_27 (Conv2D)           (None, 62, 62, 32)        896       
_________________________________________________________________
max_pooling2d_27 (MaxPooling (None, 31, 31, 32)        0         
_________________________________________________________________
conv2d_28 (Conv2D)           (None, 29, 29, 64)        18496     
_________________________________________________________________
max_pooling2d_28 (MaxPooling (None, 14, 14, 64)        0         
_________________________________________________________________
flatten_14 (Flatten)         (None, 12544)             0         
_________________________________________________________________
dense_27 (Dense)             (None, 128)               1605760   
_________________________________________________________________
dense_28 (Dense)             (None, 4)                 516       
Total para

## 3. Model Runs: 
This will be run various times with different numbers of steps_per_epoch and epochs. 


#### a) Use .fit() with the training set. For the first run, use the following parameters: 
    - steps_per_epoch = 3
    - epochs = 3


In [107]:
my_model = model1.fit_generator(train_generator,steps_per_epoch=3, epochs=3)

Epoch 1/3
Epoch 2/3
Epoch 3/3


#### b) save model to a file. An example is below:
save model  
classifier.save('my_model.h5')  
print("Saved model")

In [108]:
model1.save('my_model.h5')
print("Model Saved")

Model Saved


c) Predict using the model built in step 2. An example below shows how to load a model: 

In [109]:
# returns a compiled model
# identical to the previous one
model = load_model('my_model.h5')
print("Loaded model from disk")

# test data path
img_dir = "dataset_test" # Enter Directory of test set

# iterate over each test image
data_path = os.path.join(img_dir, '*g')
files = glob.glob(data_path)

# print the files in the dataset_test folder 
for f in files:
    print(f)
    
# make a prediction and add to results 
data = []
results = []
for f1 in files:
    img = image.load_img(f1, target_size = (64, 64))
    img = image.img_to_array(img)
    img = np.expand_dims(img, axis = 0)
    data.append(img)
    result = model.predict(img)
    r = np.argmax(result, axis=1)
    results.append(r)

results

Loaded model from disk
dataset_test/C033.png
dataset_test/1022.png
dataset_test/4011.png
dataset_test/1053.png
dataset_test/6051.png
dataset_test/4053.png
dataset_test/C014.png
dataset_test/6023.png


[array([3]),
 array([0]),
 array([2]),
 array([0]),
 array([1]),
 array([2]),
 array([3]),
 array([1])]

d) Determine accuracy.
- Look into the training data(images) in the dataset_train folder, and then determine how a category was coded in keras using the following code:
    - check category labels in training_set
    - training_set.class_indices
- look in the test data(images) in the dataset_test folder, and identify what category each images belongs to using images in the training set as references(there are only 8 test observations).
- Create a list to store the category/labels for the test data as the actual values. 
    - For example, test_label= [3, 0, 2, 0, 1, 2, 3, 1]
- Compare the predicted values to the actual values for the test set and calculate accuracy score
 


In [110]:
#check category labels in training_set
train_generator.class_indices

{'category 1': 0, 'category 2': 1, 'category 3': 2, 'category 4': 3}

In [111]:
#idenitifying the labels of the test data
files

['dataset_test/C033.png',
 'dataset_test/1022.png',
 'dataset_test/4011.png',
 'dataset_test/1053.png',
 'dataset_test/6051.png',
 'dataset_test/4053.png',
 'dataset_test/C014.png',
 'dataset_test/6023.png']

Based on the above test data the test labels are  **test_label = [3,0,2,0,1,2,3,1]**

In [112]:
test_label = [3,0,2,0,1,2,3,1]

Since accuracy is the ratio of the  measure the number of correct decisions your classifier makes, and the total number of test examples, and the result is the accuracy of your classifier

In [113]:
#calculating accuracy by comparing the predicted values and test_labels
accuracy = (6/8)*100
print('accuracy:', accuracy,'%')

accuracy: 75.0 %


e) Run this process for the following combinations:

* (steps_per_epoch: 1, epochs: 1)
* (steps_per_epoch: 1, epochs: 2)
* (steps_per_epoch: 1, epochs: 3)
* (steps_per_epoch: 2, epochs: 4)
* (steps_per_epoch: 2, epochs: 5)
* (steps_per_epoch: 2, epochs: 6)
* (steps_per_epoch: 3, epochs: 7)
* (steps_per_epoch: 3, epochs: 8)
* (steps_per_epoch: 5, epochs: 9)
* (steps_per_epoch: 5, epochs: 10)

In [114]:
#iteration proccess for creating different models for various steps_per_epoch and epochs
steps_per_epoch = [1,1,1,2,2,2,3,3,5,5]
epochs = [1,2,3,4,5,6,7,8,9,10]
score=[]

for i in range(0,len(steps_per_epoch)):
    model1 = classifier()
    # compile the model
    model1.compile(loss=keras.losses.categorical_crossentropy, optimizer='adam', metrics=['accuracy'])
    model1.fit_generator(train_generator,steps_per_epoch=steps_per_epoch[i], epochs=epochs[i])
    model_name = "model_" +str(steps_per_epoch[i])+"_"+str(epochs[i])
    model1.save(model_name)
    
    model = load_model(model_name)
# make a prediction and add to results 
    data = []
    results = []
    for f1 in files:
        img = image.load_img(f1, target_size = (64, 64))
        img = image.img_to_array(img)
        img = np.expand_dims(img, axis = 0)
        data.append(img)
        result = model.predict(img)
        r = np.argmax(result, axis=1)
        results.append(r)

    results = list(np.concatenate(results))
    
    score.append([steps_per_epoch[i], epochs[i], results])

Epoch 1/1
Epoch 1/2
Epoch 2/2
Epoch 1/3
Epoch 2/3
Epoch 3/3
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6
Epoch 1/7
Epoch 2/7
Epoch 3/7
Epoch 4/7
Epoch 5/7
Epoch 6/7
Epoch 7/7
Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8
Epoch 1/9
Epoch 2/9
Epoch 3/9
Epoch 4/9
Epoch 5/9
Epoch 6/9
Epoch 7/9
Epoch 8/9
Epoch 9/9
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


f) Create a final dataframe that combines the accuracy across each combination.



In [123]:
#labels of the test data
test_label

[3, 0, 2, 0, 1, 2, 3, 1]

In [117]:
accuracy_scores = []
for i in range(0,len(steps_per_epoch)):
    accuracy=0
    for j in range(0,len(test_label)):
        if score[i][2][j] == test_label[j]:
            accuracy += 1
        
    accuracy_scores.append(accuracy)

In [119]:
data = {'steps_per_epoch' : steps_per_epoch, 'epoch': epochs,'accuracy' : accuracy_scores}
accuracy_comp = pd.DataFrame(data)
accuracy_comp['accuracy'] = (accuracy_comp['accuracy']/8)*100
accuracy_comp

Unnamed: 0,steps_per_epoch,epoch,accuracy
0,1,1,50.0
1,1,2,37.5
2,1,3,25.0
3,2,4,75.0
4,2,5,87.5
5,2,6,75.0
6,3,7,75.0
7,3,8,87.5
8,5,9,75.0
9,5,10,75.0


## Conceptual Questions: 

#### 4. Discuss the effect of the following on accuracy and loss (train & test): 
- Increasing the steps_per_epoch
- Increasing the number of epochs





As the steps_per_epoch and number_of_epochs increases the accuracy of both the train set and test increases, however, over a point of time the network starts to overfit. We can observe the same in the above iterations.
For example, consider the iteration steps_per_epoch=5 and number_of_epochs=10. We can observe that the train accuracy is increasing and after a point the network starts to overfit. And similarly the test accuracy tends to increase and then decrease.

As for the loss we can observe that the loss decreases gradually as we keep increasing the steps_per_epoch and the corresponding epochs.

<img src = "Screen Shot 2020-11-19 at 10.31.57 PM.png" width="800">

#### 5. Name two uses of zero padding in CNN.


- Zero padding allows modification of the size of the input to be adjusted to our requirement. It is mostly used in designing the CNN layers when the dimensions of the input volume need to be preserved in the output volume
- Zero padding can also be used to control the shrinkage of dimension after applying filters larger than 1x1
- Can also be used to be avoid loosing information at the boundaries, e.g. when weights in a filter drop rapidly away from its center



#### 6. What is the use of a 1 x 1 kernel in CNN? 


- The 1×1 kernel can be used to create a linear projection of a stack of feature maps
- The projection created by a 1×1 can act like channel-wise pooling and be used for dimensionality reduction
- The projection created by a 1×1 can also be used directly or be used to increase the number of feature maps in a model
- Create deeper network through “Bottle-Neck” layer
- Create smaller CNN network which retains higher degree of accuracy.

#### 7. What are the advantages of a CNN over a fully connected DNN for this image classification problem?

A convolutional layer is much more specialized, and efficient, than a fully connected layer.

In a fully connected layer each neuron is connected to every neuron in the previous layer, and each connection has it's own weight. This is a totally general purpose connection pattern and makes no assumptions about the features in the data. It's also very expensive in terms of memory (weights) and computation.

In contrast, in a convolutional layer each neuron is only connected to a few nearby neurons in the previous layer, and the same set of weights is used for every neuron. This connection pattern only makes sense for cases where the data can be interpreted as spatial with the locally extrated features and equally likely to occur at any input position. The typical use case for convolutional layers is for image data where, as required, the features are local (e.g. a "nose" consists of a set of nearby pixels, not spread all across the image), and equally likely to occur anywhere (in general case, that nose might be anywhere in the image).

In the current problem, CNN is prefferd over the fully connected neural network due to the data being images. Images are made up of large number of pixels. The fewer number of connections and weights make convolutional layers relatively cheap vs full connect, in terms of memory and compute power needed.