# Analysis I

**Source reference material for this notebook include:
GA DSI Lecture Notebooks**

In [1]:
# imports
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd

from tensorflow.keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D
from tensorflow.keras.models import Sequential
from tensorflow.keras.preprocessing.image import img_to_array, load_img

from sklearn.model_selection import train_test_split

# For reproducibility
np.random.seed(42)

In [2]:
from tensorflow.keras.preprocessing import image_dataset_from_directory
import tensorflow as tf
from tensorflow.keras import layers

## Preprocessing

### Importing the data

The code below will do the following:

- Create a list to store array representations of images from a given class
- Convert the array to a normalized representation
- Store the normalized array
- Print an error message for files that could not be converted
- Convert the lists to a numpy array representation for compatibility with keras

In [30]:
# create list
good_arrays = []
# define filepath for Dog class
good_path = '../data/yelp_mex_food_pics/good/'

# convert each image to normalized array and store
for file in os.listdir(good_path):
    try:
        # target_size automatically resizes each img on import
        good = load_img(good_path + file, target_size=(348, 348))
        good_arr = img_to_array(good) / 347
        good_arrays.append(good_arr)
    except:
        print(f'Error for file: {file}')

print(f'{len(good_arrays)} pictures converted.')    

509 pictures converted.


In [31]:
# create list
bad_arrays = []
# define filepath for bad class
bad_path = '../data/yelp_mex_food_pics/bad/'

# convert each image to normalized array and store
for file in os.listdir(bad_path):
    try:
        bad = load_img(bad_path + file, target_size=(348, 348))
        bad_arr = img_to_array(bad) / 347
        bad_arrays.append(bad_arr)
    except:
        print(f'Error for file: {file}')
    
print(f'{len(bad_arrays)} pictures converted.')    

509 pictures converted.


In [32]:
# X should contain both bad and good
X = good_arrays + bad_arrays

# convert to array and check shape
X_arr = np.array(X)
print(f'X shape: {X_arr.shape}')

# 1 for good, 0 for bad
y = [1] * 509 + [0] * 509
# convert to array and check shape
y = np.array(y,)
print(f'y shape: {y.shape}')

X shape: (1018, 348, 348, 3)
y shape: (1018,)


### Train-test splitting

In [33]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, stratify=y)

In [7]:
# # X_train = X_train.astype('float32')
# # X_test = X_test.astype('float32')

# X_train = X_train / 255
# X_test = X_test / 255

In [43]:
X_train = np.array(X_train)#.reshape(1499, 256, 256, 3)

In [44]:
X_test = np.array(X_test)#.reshape(500, 256, 256, 3)

In [45]:
X_train[0].shape

(348, 348, 3)

In [46]:
y_train.shape

(763,)

## Convolutional Neural Networks

A CNN with three types of layers:

- Convolutional Layer
- Pooling Layer
- Densely Connected Layer

In [50]:
model = Sequential()

model.add(Conv2D(64, (3,3), activation = 'relu', input_shape = (348, 348, 3)))
model.add(MaxPooling2D(pool_size = (2,2)))

# Add another:
model.add(Conv2D(64, (3,3), activation = 'relu'))
model.add(MaxPooling2D(pool_size = (2,2)))

model.add(Flatten())
model.add(Dense(64, activation='relu'))

model.add(Dense(1, activation='sigmoid'))

model.compile(optimizer="adam",
              loss='binary_crossentropy',
              metrics=['accuracy'])

In [48]:
h = model.fit(X_train, y_train, validation_data=(X_test, y_test), batch_size=64, epochs=5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [49]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 346, 346, 64)      1792      
                                                                 
 max_pooling2d (MaxPooling2D  (None, 173, 173, 64)     0         
 )                                                               
                                                                 
 conv2d_1 (Conv2D)           (None, 171, 171, 64)      36928     
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 85, 85, 64)       0         
 2D)                                                             
                                                                 
 flatten (Flatten)           (None, 462400)            0         
                                                                 
 dense (Dense)               (None, 64)                2