**Importing Libraries To Connect Google Drive**

In [1]:
import os
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials

**Connect Google Drive**

In [2]:
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)

**Downloading Training Data Uploaded at Google Drive bold text**

In [3]:
download_train = drive.CreateFile({'id': '16ldCVmTIM95z1-m3FQZe1Kj-HrEKe7fJ'})

In [None]:
download_train.GetContentFile('train_LbELtWX.zip')
!unzip train_LbELtWX.zip

**Importing Required Libraries and Installing keras-tuner**

In [None]:
import keras
from tqdm import tqdm
import pandas as pd
import numpy as np
from keras.preprocessing import image
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Activation
from keras.layers import Conv2D, MaxPooling2D, Convolution1D, GlobalMaxPooling2D
from keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from keras import backend as K
from keras.models import load_model
import tensorflow as tf
from tensorflow import keras
import IPython

!pip install -U keras-tuner
import kerastuner as kt


**Reading train.csv with image name and corresponding label**

**Dividing Training Set into Training and Testing Dataset**

In [6]:
train_dataset = pd.read_csv("train_LbELtWX/train.csv", dtype=str);
train_dir = ('train_LbELtWX/train');

train_dataset["id"] = train_dataset["id"].apply(lambda x : '{}{}'.format(x, '.png'));

train_df=train_dataset[0:50000]
test_df=train_dataset[50000:]

train_df

Unnamed: 0,id,label
0,1.png,9
1,2.png,0
2,3.png,0
3,4.png,3
4,5.png,0
...,...,...
49995,49996.png,6
49996,49997.png,6
49997,49998.png,5
49998,49999.png,1


**Dividing Training dataset into training and cross-validation dataset**

In [7]:
X_train, X_test, y_train, y_test = train_test_split(train_df["id"], train_df["label"], random_state=42, test_size=0.2)

df_train = pd.concat([X_train, y_train], axis=1);
df_test = pd.concat([X_test, y_test], axis=1);

df_test

Unnamed: 0,id,label
33553,33554.png,5
9427,9428.png,7
199,200.png,2
12447,12448.png,9
39489,39490.png,0
...,...,...
28567,28568.png,4
25079,25080.png,0
18707,18708.png,9
15200,15201.png,9


**Creating Training and Test Image Data generator object**

In [19]:
image_width, image_height = 30, 30;
epochs = 20;
batch_size = 20;

if K.image_data_format() == "channels_first":
    input_shape = (3, image_width, image_height);
else:
    input_shape = (image_width, image_height, 3);
    
train_datagen = ImageDataGenerator(
    rescale = 1.0/255,
    shear_range = 0.2,
    zoom_range = 0.2,
    horizontal_flip = True
);

test_datagen = ImageDataGenerator(rescale = 1.0/255);


**Creating training and validation data generator**

In [9]:
train_generator = train_datagen.flow_from_dataframe(
    dataframe = df_train,
    directory = "train_LbELtWX/train/",
    x_col="id", 
    y_col="label", 
    target_size = (image_width, image_height),
    batch_size = batch_size,
    class_mode = 'categorical',
    color_mode="rgb",
    seed=42
    #classes=["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"]  
);

validation_generator = test_datagen.flow_from_dataframe(
    dataframe = df_test,
    directory = "train_LbELtWX/train/",
    x_col="id", 
    y_col="label", 
    target_size = (image_width, image_height),
    batch_size = batch_size,
    class_mode = 'categorical',
    color_mode="rgb",
    seed=42
    #classes=["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"]  
);

Found 40000 validated image filenames belonging to 10 classes.
Found 10000 validated image filenames belonging to 10 classes.


**Creating feature and target data for tuning and searching best model**

In [11]:
from tqdm import tqdm;

train_img = []
for i in tqdm(range(1,df_train.shape[0] + 1)):
    img = image.load_img('train_LbELtWX/train/'+str(i)+'.png', target_size=(28,28,1), color_mode = 'grayscale')
    img = image.img_to_array(img)
    img = img/255.0
    train_img.append(img)

train_images = np.array(train_img)

train_labels = train_df['label'].astype(int).values
#y = to_categorical(train_labels)

train_labels


100%|██████████| 40000/40000 [00:07<00:00, 5010.35it/s]


array([9, 0, 0, ..., 5, 1, 7])

**Function to Build CNN Model**

In [12]:
def build_model(hp):
  model = keras.Sequential([
              keras.layers.Conv2D(
                    filters=hp.Int('conv_1_filter', min_value=32, max_value=128, step=16),
                    kernel_size = hp.Choice('conv_1_kernel', values = [3,5]),
                    activation='relu',
                    input_shape=(28,28,1)
                    ),

              keras.layers.Conv2D(
                    filters=hp.Int('conv_2_filter', min_value=32, max_value=64, step=16),
                    kernel_size = hp.Choice('conv_2_kernel', values = [3,5]),
                    activation='relu'
                    ),
                  
              keras.layers.Conv2D(
                    filters=hp.Int('conv_3_filter', min_value=32, max_value=64, step=16),
                    kernel_size = hp.Choice('conv_3_kernel', values = [3,5]),
                    activation='relu'
                    ),
              
              keras.layers.Conv2D(
                    filters=hp.Int('conv_4_filter', min_value=32, max_value=64, step=16),
                    kernel_size = hp.Choice('conv_4_kernel', values = [3,5]),
                    activation='relu'
                    ),
                  
              keras.layers.Flatten(),
              keras.layers.Dense(
                  units=hp.Int('dense_1_units', min_value=32, max_value=128, step=16),
                  activation='relu'
                    ),
              
              keras.layers.Dense(10, activation='softmax')
              ])
  
  model.compile(optimizer=keras.optimizers.Adam(hp.Choice('learning_rate', values=[1e-2, 1e-3])),
                loss='sparse_categorical_crossentropy',
                metrics=['accuracy'])
  
  return model


**Creating RandomSearch variable with function to create CNN model as argument**

In [13]:
from kerastuner import RandomSearch
from kerastuner.engine.hyperparameters import HyperParameters

tuner_search = RandomSearch(build_model, objective='val_accuracy', max_trials=5)

**Searching Best Parameters for CNN model**

In [14]:
tuner_search.search(train_images, train_labels, epochs=3, validation_split=0.2)

Trial 5 Complete [00h 00m 15s]
val_accuracy: 0.10050000250339508

Best val_accuracy So Far: 0.8927500247955322
Total elapsed time: 00h 01m 19s
INFO:tensorflow:Oracle triggered exit


**Finding Best CNN Model**

In [15]:
model = tuner_search.get_best_models(num_models=1)[0]

model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 24, 24, 80)        2080      
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 20, 20, 48)        96048     
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 18, 18, 48)        20784     
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 14, 14, 48)        57648     
_________________________________________________________________
flatten (Flatten)            (None, 9408)              0         
_________________________________________________________________
dense (Dense)                (None, 32)                301088    
_________________________________________________________________
dense_1 (Dense)              (None, 10)                3

**Using parameters suggested by RandomSearch and creating a CNN Model**

In [16]:
model = Sequential()
model.add(Conv2D(80, (3, 3), input_shape = input_shape))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2,2)))

model.add(Conv2D(48, (3, 3)));
model.add(Activation('relu'));
model.add(MaxPooling2D(pool_size=(2, 2)));

model.add(Conv2D(48, (3, 3)));
model.add(Activation('relu'));

model.add(Conv2D(48, (3, 3)));
model.add(Activation('relu'));
model.add(MaxPooling2D(pool_size=(2, 2)));
model.add(Dropout(0.25));

model.add(Flatten());
model.add(Dense(32));
model.add(Activation('relu'));
model.add(Dropout(0.5));

model.add(Dense(10));
model.add(Activation('softmax'));

model.summary()


Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_4 (Conv2D)            (None, 28, 28, 80)        2240      
_________________________________________________________________
activation (Activation)      (None, 28, 28, 80)        0         
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 14, 14, 80)        0         
_________________________________________________________________
conv2d_5 (Conv2D)            (None, 12, 12, 48)        34608     
_________________________________________________________________
activation_1 (Activation)    (None, 12, 12, 48)        0         
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 6, 6, 48)          0         
_________________________________________________________________
conv2d_6 (Conv2D)            (None, 4, 4, 48)         

**Compiling our CNN Model**

In [17]:
opt = keras.optimizers.RMSprop(lr=0.0001, decay=1e-6);

model.compile(optimizer = opt,
              loss='categorical_crossentropy',
              #optimizer = 'rmsprop',
              metrics = ['accuracy']);

model

<tensorflow.python.keras.engine.sequential.Sequential at 0x7f8270324ef0>

**Training our CNN model and finding it's accuracy with validation dataset**

In [20]:
STEP_SIZE_TRAIN=train_generator.n//train_generator.batch_size
STEP_SIZE_VALID=validation_generator.n//validation_generator.batch_size

model.fit(train_generator,
          steps_per_epoch=STEP_SIZE_TRAIN,
          validation_data=validation_generator,
          validation_steps=STEP_SIZE_VALID,
          epochs=epochs);

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


**Save Trained Model Which can be later used for predictions**

In [21]:
model.save_weights('apparel_prediction.h5');

**Create Test Generator**

In [22]:
test_generator = test_datagen.flow_from_dataframe(
    dataframe = test_df,
    directory = "train_LbELtWX/train/",
    x_col="id", 
    target_size = (image_width, image_height),
    batch_size = batch_size,
    class_mode = None,
    color_mode = "rgb",
    seed=42,
    shuffle=False
)


Found 10000 validated image filenames.


**Predicting Test Dataset**

In [28]:
test_generator.reset()

predictions=model.predict(test_generator,verbose=1,steps=test_generator.n/batch_size);

predictions = np.argmax(predictions, axis=1);

filenames=test_generator.filenames;

results=pd.DataFrame({"id":filenames,
                      "Predictions":predictions});

results["Expected"]=test_df.reset_index().drop(columns=['index'])["label"].astype('float32')

results["Predictions"]=results["Predictions"].astype('float32')

results=results.reindex(columns=['id','Expected','Predictions'])

results




Unnamed: 0,id,Expected,Predictions
0,50001.png,9.0,9.0
1,50002.png,2.0,2.0
2,50003.png,1.0,1.0
3,50004.png,0.0,0.0
4,50005.png,2.0,2.0
...,...,...,...
9995,59996.png,5.0,5.0
9996,59997.png,1.0,1.0
9997,59998.png,3.0,3.0
9998,59999.png,0.0,6.0


**Result Accuracy**

In [35]:
Prediction_Percentage = len(results[results["Predictions"]==results["Expected"]])*100/len(results)

print("Prediction Percentage: ", '{}{}'.format(Prediction_Percentage,'%'))

Prediction Percentage:  85.11%


**Predicting Apparels for Test Dataset**

In [36]:
download_test = drive.CreateFile({'id': '1d6buKxF5ioKx5BV3m9__XVSeTLQjCkmh'})

In [None]:
download_test.GetContentFile('test_ScVgIM0.zip')
!unzip test_ScVgIM0.zip

In [38]:
test = pd.read_csv('test_ScVgIM0/test.csv')

test["id"] = test["id"].apply(lambda x : '{}{}'.format(x,".png") )


In [39]:
# making predictions

test_generator = test_datagen.flow_from_dataframe(
    dataframe = test,
    directory = "test_ScVgIM0/test/",
    x_col="id", 
    target_size = (image_width, image_height),
    batch_size = batch_size,
    class_mode = None,
    color_mode = "rgb",
    seed=42,
    shuffle=False
)


Found 10000 validated image filenames.


In [40]:
test_generator.reset()

predictions=model.predict(test_generator,verbose=1,steps=test_generator.n/batch_size);

predictions = np.argmax(predictions, axis=1);

filenames=test_generator.filenames;

results=pd.DataFrame({"id":filenames,
                      "label":predictions});

results




Unnamed: 0,id,label
0,60001.png,9
1,60002.png,2
2,60003.png,1
3,60004.png,1
4,60005.png,6
...,...,...
9995,69996.png,9
9996,69997.png,1
9997,69998.png,8
9998,69999.png,1
