# Pre-Processing & Modeling

### Pre-Processing

Referenced: https://git.generalassemb.ly/DSIR-Lancelot/8.04-lesson-cnns/blob/master/solution-code/02-cnn.ipynb

In [1]:
# Imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D
from tensorflow.keras.preprocessing.image import img_to_array, load_img
from tensorflow.keras.utils import to_categorical

# For reproducibility
np.random.seed(42)

In [2]:
# Create a list for each food class
burgers_arrays = []
hotdogs_arrays = []
pizza_arrays = []
tacos_arrays = []
sushi_arrays = []

# Create a list of all food class arrays
class_arrays = [burgers_arrays, hotdogs_arrays, pizza_arrays, tacos_arrays, sushi_arrays]

# Create a list of strings containing the food classes
food_classes = ['burgers', 'hotdogs', 'pizza', 'tacos', 'sushi']

In [3]:
# Define a function that converts all images
def image_converter(food_arrays, food_class):
    food_path = f'../images/{food_class}/'
    for file in os.listdir(food_path):
        try:
            image = load_img(food_path + file, target_size=(256, 256))
            image_arr = img_to_array(image) / 255
            food_arrays.append(image_arr)
        except:
            print(f'Error for file: {file}')
    print(f'{len(food_arrays)} pictures have been converted for {food_class}.')

In [4]:
# Iterate through the image_converter function
my_index = 0
for food_class in class_arrays:
    image_converter(food_class, food_classes[my_index])
    my_index += 1

Error for file: .DS_Store
282 pictures have been converted for burgers.
Error for file: .DS_Store
253 pictures have been converted for hotdogs.
Error for file: .DS_Store
276 pictures have been converted for pizza.
Error for file: .DS_Store
260 pictures have been converted for tacos.
Error for file: .DS_Store
268 pictures have been converted for sushi.


### Modeling

In [5]:
# Define X
X = burgers_arrays + hotdogs_arrays + pizza_arrays + tacos_arrays + sushi_arrays
X = np.array(X)
X.shape

(1339, 256, 256, 3)

In [9]:
# Define y
# 0 for burger, 1 for hotdog, 2 for pizza, 3 for tacos, 4 for sushi
y = [0]*282 + [1]*253 + [2]*276 + [3]*260 + [4]*268
y = np.array(y)
y = to_categorical(y)
y.shape

(1339, 5)

In [10]:
# Baseline accuracy of majority class
282/(282+253+276+260+268)

0.210604929051531

In [11]:
# Train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, stratify=y)

In [12]:
# Reshape
X_train = np.array(X_train)
X_test = np.array(X_test)

In [13]:
# Check shape
X_train[0].shape

(256, 256, 3)

In [14]:
# Check shape
y_train.shape

(1004, 5)

### Modeling

In [15]:
# Build a CNN model
model = Sequential([
    Conv2D(64, (4,4), activation='relu', input_shape=(X_train[0].shape)),
    MaxPooling2D(pool_size=(2,2)),
    Conv2D(64, (4,4), activation='relu'),
    MaxPooling2D(pool_size=(2,2)),
    Flatten(),
    Dense(64, activation='relu'),
    Dense(5, activation='softmax')
])

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', 
              metrics=['acc']) # 'Recall', 'Precision'

# Fit the model
results = model.fit(X_train, y_train, validation_data=(X_test, y_test),
                   batch_size=8, epochs=3)

Epoch 1/3
Epoch 2/3
Epoch 3/3


In [None]:
# Check model summary
model.summary()

### Post-Modeling

In [None]:
# probas = np.array([[0.4, 0.1, 0.5],[0.7, 0.2, 0.1],[0.3, 0.4, 0.3]])
probas = model.predict(X_test)
labels = np.argmax(probas, axis=-1)    
print(labels)

In [None]:
from sklearn.metrics import plot_confusion_matrix

In [None]:
# Predictions
y_preds = model.predict(X_test)
y_preds.shape

In [None]:
# Plot confusion matrix
plot_confusion_matrix(model, y_preds, y_test)
# labels=['burger', 'hotdog', 'pizza', 'tacos', 'sushi']

In [None]:
# Try 1-2 changes to hidden layers - look for improvements
# If yes, there were low hanging fruit worth my time
# If no, go collect more images (start with image augmentation in keras)
# Keras directory and my data setup should work perfectly

In [None]:
# Use Google Colab to run this

In [None]:
# Need more hidden layers/neurons

In [None]:
# Precision/recall

In [None]:
# Image Data Augmentation

In [None]:
# Analysis of misclassifications