### Data Cleaning, EDA & Data Visualization

Referenced: https://git.generalassemb.ly/DSIR-Lancelot/8.04-lesson-cnns/blob/master/solution-code/02-cnn.ipynb

In [1]:
# Imports
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
from tensorflow.keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D
from tensorflow.keras.models import Sequential
from tensorflow.keras.preprocessing.image import img_to_array, load_img
from sklearn.model_selection import train_test_split

# For reproducibility
np.random.seed(42)

In [2]:
# Create a list for each food class
burgers_arrays = []
hotdogs_arrays = []
pizza_arrays = []
pasta_arrays = []
sushi_arrays = []

# Create a list of all food class arrays
class_arrays = [burgers_arrays, hotdogs_arrays, pizza_arrays, pasta_arrays, sushi_arrays]

# Create a list of strings containing the food classes
food_classes = ['burgers', 'hotdogs', 'pizza', 'pasta', 'sushi']

In [3]:
# Define a function that converts all images
def image_converter(food_arrays, food_class):
    food_path = f'../images/{food_class}/'
    for file in os.listdir(food_path):
        try:
            image = load_img(food_path + file, target_size=(256, 256))
            image_arr = img_to_array(image) / 255
            food_arrays.append(image_arr)
        except:
            print(f'Error for file: {file}')
    print(f'{len(food_arrays)} pictures have been converted for {food_class}.')

In [4]:
# Iterate through the image_converter function
my_index = 0
for food_class in class_arrays:
    image_converter(food_class, food_classes[my_index])
    my_index += 1

Error for file: .DS_Store
119 pictures have been converted for burgers.
Error for file: .DS_Store
96 pictures have been converted for hotdogs.
Error for file: .DS_Store
86 pictures have been converted for pizza.
Error for file: .DS_Store
73 pictures have been converted for pasta.
Error for file: .DS_Store
100 pictures have been converted for sushi.


In [5]:
# Define X
X = burgers_arrays + hotdogs_arrays + pizza_arrays + pasta_arrays + sushi_arrays
X_arr = np.array(X)
print(f'X shape: {X_arr.shape}')

X shape: (474, 256, 256, 3)


In [7]:
# Define y
# 0 for burger, 1 for hotdog, 2 for pizza, 3 for pasta, 4 for sushi
y = [0]*119 + [1]*96 + [2]*86 + [3]*73 + [4]*100
y = np.array(y)
print(f'y shape: {y.shape}')

y shape: (474,)


In [8]:
# Train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, stratify=y)

In [13]:
# Reshape
X_train = np.array(X_train)
X_test = np.array(X_test)

In [14]:
# Check shape
X_train[0].shape

(256, 256, 3)

In [15]:
# Check shape
y_train.shape

(355,)

### Modeling

In [20]:
model = Sequential([
    Conv2D(64, (3,3), activation='relu', input_shape=(X_train[0].shape)),
    MaxPooling2D(pool_size=(2,2)),
    Conv2D(64, (3,3), activation='relu'),
    MaxPooling2D(pool_size=(2,2)),
    Flatten(),
    Dense(64, activation='relu'),
    Dense(1, activation='softmax')
])

model.compile(optimizer='adam', loss='categorical_crossentropy', 
              metrics=['acc', 'Recall', 'Precision'])

results = model.fit(X_train, y_train, validation_data=(X_test, y_test),
                   batch_size=8, epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [None]:
# Image Data Augmentation