# The objective of this exercise is to create an image classification task for garments in an ecommerce store


In [None]:


import pandas as pd
import os
import shutil
from sklearn.model_selection import train_test_split


from keras.models import Sequential
from keras.layers import Dropout, Flatten, Dense, Conv2D, MaxPooling2D, BatchNormalization
from keras.optimizers import SGD
import keras
# from sklearn.model_selection import train_test_split
import cv2

# ImageDataGenerator
datagen = ImageDataGenerator(
        rotation_range=10, # rotation
        width_shift_range=0.2, # horizontal shift
        height_shift_range=0.2, # vertical shift
        rescale=1./255,
        shear_range=0.2,
        fill_mode='nearest',
        zoom_range=0.2, # zoom
        horizontal_flip=True, # horizontal flip
        brightness_range=[0.2,1.2]) # brightness


# Home directory
home_path = r'./train_LbELtWX'

# Original df
df = pd.read_csv(home_path + r'/train.csv')

df['id'] = df['id'].astype('str') # requires target in string
df['label'] = df['label'].astype('str')

train_generator_df = datagen.flow_from_dataframe(dataframe=df, 
                                              directory=home_path+'/train/',
                                              x_col="id", 
                                              y_col="label", 
                                              class_mode="categorical", 
                                              target_size=(200, 200), 
                                              batch_size=1,
                                              rescale=1.0/255,
                                              seed=2020)
# plotting images
fig, ax = plt.subplots(nrows=1, ncols=4, figsize=(15,15))

for i in range(4):

    # convert to unsigned integers for plotting
    image = next(train_generator_df)[0].astype('uint8')

    # changing size from (1, 200, 200, 3) to (200, 200, 3) for plotting the image
    image = np.squeeze(image)

    # plot raw pixel data
    ax[i].imshow(image)
    ax[i].axis('off')
    
def process_image(raw_image):
    raw_image = tf.reshape(raw_image, [-1])
    img_rgb = tf.io.decode_jpeg(raw_image, channels=3)
    img = tf.image.convert_image_dtype(img, tf.float32)
    resized_img = tf.image.resize_with_pad(
        img,
        target_height=300,
        target_width=300
        )
    img_grayscale = tf.image.rgb_to_grayscale(resized_img)
    return tf.reshape(img_grayscale, [-1, 300, 300, 1])

# Images and Labels
X = df.loc[:,'id']
y = df.loc[:,'label']


# Train-Test splitfor train and validation images
train_x, val_x, train_y, val_y = train_test_split(X, y, test_size = 0.3, random_state = 27, stratify=y)

# Train df
df_train = pd.DataFrame(columns=['id','label'])
df_train['id'] = train_x
df_train['label'] = train_y

# Validation df
df_test = pd.DataFrame(columns=['id','label'])
df_test['id'] = val_x
df_test['label'] = val_y

# Reset index
df_train.reset_index(drop=True, inplace=True)
df_test.reset_index(drop=True, inplace=True)

# Images
train_images = df_train.loc[:,'id']
train_labels = df_train.loc[:,'label']

test_images = df_test.loc[:,'id']
test_labels = df_test.loc[:,'label']

# Train images
x_train = []
for i in train_images:
    image = home_path+'/train/'+i
    img = cv2.imread(image)
    x_train.append(img)

# Train labels
y_train=keras.utils.to_categorical(train_labels)

# Test images
x_test = []
for i in test_images:
    image = home_path+'/train/'+i
    img = cv2.imread(image)
    x_test.append(img)

# Test labels
y_test=keras.utils.to_categorical(test_labels)

# Normalize images
x_train = np.array(x_train, dtype="float") / 255.0
x_test = np.array(x_test, dtype="float") / 255.0

from keras.backend import image_data_format

if image_data_format() == 'channel_first':
    train.reshape(train.shape[0],1,x,x) # where x is image format such as 32
    input_shape=(1,x,x)
else:
    train.reshape(train.shape[0],x,x,1)
    input_shape=(x,x,1)
    


# Model architechture
model = Sequential()

model.add(Conv2D(32, (3, 3), input_shape=(200, 200, 1), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(32, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Flatten())
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.24))
model.add(Dense(2,activation='softmax'))

model.summary()

model = tf.keras.models.Sequential([
    tf.keras.layers.Conv2D(64, (3, 3), activation='relu', input_shape=(200, 200, 1)),
    tf.keras.layers.MaxPooling2D(2, 2),
    tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D(2, 2),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(128, activation=tf.nn.relu),
    tf.keras.layers.Dense(26, activation=tf.nn.softmax)])

model.compile(optimizer = tf.optimizers.Adam(),
              loss = 'sparse_categorical_crossentropy',
              metrics=['accuracy'])

history = model.fit(train_datagen.flow(training_images, training_labels, batch_size=32),
                              steps_per_epoch=len(training_images) / 32,
                              epochs=15,
                              validation_data=validation_datagen.flow(testing_images, testing_labels, batch_size=32),
                              validation_steps=len(testing_images) / 32)

model.evaluate(testing_images, testing_labels)

# Compile
import tensorflow as tf
from keras.optimizers import RMSprop


optim = RMSprop(learning_rate=0.01)
model.compile(loss='categorical_crossentropy',
              optimizer=optim,
              metrics=['accuracy'])
# Fit
history = model.fit(x_train,y_train,
                    epochs=25,
                    validation_data=(x_test,y_test),
                    batch_size=32,
                    verbose=1)

from keras.backend import image_data_format

if image_data_format() == 'channel_first':
    train.reshape(train.shape[0],1,x,x) # where x is image format such as 32
    input_shape=(1,x,x)
else:
    train.reshape(train.shape[0],x,x,1)
    input_shape=(x,x,1)
    
# Augmenting on the fly with fit_generator()

# or use iterator from .flow_from_datafram()
model.fit_generator(train_generator_df, 
                    epochs=epochs,  # one forward/backward pass of training data
                    steps_per_epoch=x_train.shape[0]//batch_size,  # number of images comprising of one epoch
                    validation_data=(x_test, y_test),  # Or validation_data=valid_generator
                    validation_steps=x_test.shape[0]//batch_size)