# Clothes Image Classifier

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plot
import seaborn as sb

### Reading the training data
<br />The file <code>train.csv</code> consists of image ID (from <code>/images</code> folder) and its label. 

In [None]:
train=pd.read_csv(r"train.csv")
train.head()

In [None]:
from tensorflow.keras.preprocessing.image import load_img, img_to_array

Loading all the images whose image ID is present in the training set.
<br />Each image is adjusted to 32 x 32 pixels and is converted into an array.
<br /><code>X_train</code> consists of all the training images (array).

In [None]:
%%time
height, width=32, 32
X_train=np.empty((train.shape[0], height, width, 3))
for i in range(train.shape[0]):
    img=load_img(r"images/images/{}.jpg".format(train.loc[i, 'img_id']), \
                 target_size=(height, width))
    X_train[i]=img_to_array(img)
X_train.shape

### Preprocessing the training data
<br /><b>Standardizing</b> all the pixels of the images in the training set.

In [None]:
%%time
m, s=X_train.mean(), X_train.std()
X_train=(X_train-m)/s

In [None]:
X_train.max(), X_train.min()

In [None]:
from sklearn.preprocessing import OneHotEncoder

<b>One-Hot encoding</b> of labels.
<br /><code>y_train</code> consists of labels of all the training images.

In [None]:
ohe=OneHotEncoder()
y_train=ohe.fit_transform(np.array(train['label']).reshape(-1, 1)).toarray()
y_train

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import SGD, Adam
from tensorflow.keras.layers import Dense, Conv2D, Flatten, MaxPooling2D, AveragePooling2D, Dropout, \
        BatchNormalization, Activation

### Building the model using Convolutional Networks
<br />The model consists of 4 <b>convolution layers</b> with 64, 64, 72, 72 number of nodes in each layer respectively and 2 <b>fully connected layers</b> with 256 and 64 nodes respectively. Final output layer consists of 4 nodes with activation <code>softmax</code>. All other layers (convolution and fully-connected) use <code>relu</code> activation.
<br /><b>Average Pooling, Dropout,</b> and <b>Batch Normalization</b> are also used.

In [None]:
model=Sequential()
model.add(Conv2D(64, kernel_size=3, padding="valid", activation='relu'))
model.add(Conv2D(64, kernel_size=3, padding="valid", activation='relu'))
model.add(AveragePooling2D(pool_size=2, strides=2))
model.add(Dropout(0.5))
model.add(Conv2D(72, kernel_size=3, padding="valid", activation='relu'))
model.add(Conv2D(72, kernel_size=3, padding="valid", activation='relu'))
model.add(AveragePooling2D(pool_size=2, strides=2))
model.add(Dropout(0.5))
model.add(Flatten())
model.add(BatchNormalization())
model.add(Dense(256, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(84, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(4, activation='softmax'))

The model runs for 125 epochs. A <b>validation set</b> is taken out with 25% of the training images and is used in training the model. <code>Adam</code> optimizier is used along with a batch size of 128 examples is used in fitting the model.

In [None]:
%%time
epochs=125
model.compile(loss='categorical_crossentropy', optimizer="adam", metrics=['accuracy'])
stats=model.fit(X_train, y_train, epochs=epochs, batch_size=128, validation_split=0.25)
model.summary()

### Displaying the training results - Categorical loss and accuracy

In [None]:
stats_df=pd.DataFrame(stats.history)
stats_df['epoch']=list(range(1, epochs+1))
plot.figure(figsize=(20, 8))
sb.lineplot(y='loss', x='epoch', data=stats_df, color='deeppink', linewidth=2.5, label="Training loss")
sb.lineplot(y='val_loss', x='epoch', data=stats_df, color='darkturquoise', linewidth=2.5, label="Validation loss")
plot.grid()
plot.legend()
plot.title("Training and validation loss")

In [None]:
stats_df=pd.DataFrame(stats.history)
stats_df.accuracy=stats_df.accuracy*100
stats_df.val_accuracy=stats_df.val_accuracy*100
stats_df['epoch']=list(range(1, epochs+1))
plot.figure(figsize=(20, 8))
sb.lineplot(y='accuracy', x='epoch', data=stats_df, color='deeppink', linewidth=2.5, label="Training accuracy")
sb.lineplot(y='val_accuracy', x='epoch', data=stats_df, color='darkturquoise', linewidth=2.5, \
            label="Validation accuracy")
plot.grid()
plot.legend()
plot.title("Training and validation accuracy")

### Reading the test data
<br />The file <code>test.csv</code> consists of image ID (from <code>/images</code> folder).

In [None]:
test=pd.read_csv(r"test.csv")
test.head()

Loading all the images whose image ID is present in the testing set.
<br />Each image is adjusted to 32 x 32 pixels and is converted into an array.
<br /><code>X_test</code> consists of all the testing images (array).

In [None]:
%%time
X_test=np.empty((test.shape[0], height, width, 3))
for i in range(test.shape[0]):
    img=load_img(r"images/images/{}.jpg".format(test.loc[i, 'img_id']), \
                 target_size=(height, width))
    X_test[i]=img_to_array(img)
X_test.shape

### Preprocessing the testing data
<br /><b>Standardizing</b> all the pixels of the images in the testing set.

In [None]:
%%time
m, s=X_test.mean(), X_test.std()
X_test=(X_test-m)/s 

In [None]:
X_test.max(), X_test.min()

### Building the final model
<br />Building the model once again with without taking out the validation set.

In [None]:
model=Sequential()
model.add(Conv2D(64, kernel_size=3, padding="valid", activation='relu'))
model.add(Conv2D(64, kernel_size=3, padding="valid", activation='relu'))
model.add(AveragePooling2D(pool_size=2, strides=2))
model.add(Dropout(0.5))
model.add(Conv2D(72, kernel_size=3, padding="valid", activation='relu'))
model.add(Conv2D(72, kernel_size=3, padding="valid", activation='relu'))
model.add(AveragePooling2D(pool_size=2, strides=2))
model.add(Dropout(0.5))
model.add(Flatten())
model.add(BatchNormalization())
model.add(Dense(256, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(84, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(4, activation='softmax'))

In [None]:
%%time
epochs=125
model.compile(loss='categorical_crossentropy', optimizer="adam", metrics=['accuracy'])
stats=model.fit(X_train, y_train, epochs=epochs, batch_size=128)
model.summary()

### Making predictions on the test set

In [None]:
predictions=model.predict(X_test)
predictions

Converting predictions from one-hot encoding format to the actual label.

In [None]:
test['label']=ohe.inverse_transform(predictions)
test

### Writing the predictions off to a <code>.csv</code> file

In [None]:
test.to_csv(r"Predictions.csv", index=False)