# Setting up Colab and linking Google Drive 

In [0]:
!pip install pyDrive



**Importing Google Drive support libraries**

In [0]:
import os
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials

**Creating a drive variable to access Google Drive**



In [0]:
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)

**Downlaod and Unzip the file** 

In [0]:
download = drive.CreateFile({'id' : '1zoyDVBWsZhfxHJnIagvUJeXKN-9oZ2Hz'}) # unique file id of train data from google drive 

In [0]:
download.GetContentFile('train_LbELtWX.zip')
!unzip train_LbELtWX.zip

# Main

**1. Importing Libraries**

In [0]:
import keras
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Conv2D
from keras.layers import Flatten
from keras.layers import MaxPool2D
from keras.layers import Dropout
from keras.preprocessing import image
import tensorflow as tf

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

from tqdm import tqdm # display progress bar

**2. Loading and Preprocessing**

 *Dataset contains 60,000 images (28 x 28) as part of train images along with 'train.csv' containing the label of the type of apparel (total classes: 10)*

 *Given below is the code description for each of the apparel class/label.*

0. *T-shirt/top*
1. 	*Trouser*
2. 	*Pullover*
3. 	*Dress*
4. 	*Coat*
5. 	*Sandal*
6. 	*Shirt*
7. 	*Sneaker*
8. 	*Bag*
9. 	*Ankle boot*



> Reading, storing and converting the images to a numpy array


In [0]:
train = pd.read_csv('train.csv')

In [131]:
train_images = []
for i in tqdm(range(train.shape[0])):
  img = image.load_img('train/'+train['id'][i].astype('str')+'.png', color_mode = 'grayscale', target_size = (28,28,1)) # load the image
  img = image.img_to_array(img) # convert image to a list of values
  img = img/255 # scale the list to range 0 - 1 (0 is 0(0x0) and 1 is 255(0xFF))
  train_images.append(img) # append the array to 'train_images'
X = np.array(train_images) # convert the list to a nupy array

100%|██████████| 60000/60000 [00:17<00:00, 3423.65it/s]


> One Hot Encoding Categorical Data

In [132]:
y = train['label'].values
y = tf.keras.utils.to_categorical(y)
y

array([[0., 0., 0., ..., 0., 0., 1.],
       [1., 0., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]], dtype=float32)

**3. Creating a validation set from training data**

In [0]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42)

**4. Defining model structure**





> Creating 2 Convolution layers, 2 Hidden layers and 1 Output layer 

In [0]:
# initializing the neural network
classifier = Sequential()
# convolution layer 1
classifier.add(Conv2D(filters = 32, kernel_size=(3,3), activation = 'relu', input_shape = (28,28,1))) # 32 feature detectors, each with size 3x3
# max pooling
classifier.add(MaxPool2D(pool_size = (2,2), strides = 2))
# dropout layer
classifier.add(Dropout(rate = 0.25))

# convolution layer 2
classifier.add(Conv2D(filters = 64, kernel_size=(3, 3), activation = 'relu'))
# second max pooling layer
classifier.add(MaxPool2D(pool_size = (2,2), strides = 2))
# dropout layer
classifier.add(Dropout(rate = 0.25))


# flatten layer
classifier.add(Flatten())

# hidden layer
classifier.add(Dense(units = 128, activation = 'relu', ))
# dropout layer
classifier.add(Dropout(rate = 0.5))

# hidden layer 2
classifier.add(Dense(units = 64, activation = 'relu', ))
# dropout layer
classifier.add(Dropout(rate = 0.4))
# output later
classifier.add(Dense(units = 10, activation = 'softmax' ))

# compiling the model
classifier.compile(optimizer = 'Adam', loss = 'categorical_crossentropy', metrics = ['accuracy'])


**5. Training the Model**

In [167]:
classifier.fit(X_train, y_train, batch_size = 32, epochs = 15, validation_data = (X_test, y_test))

Train on 48000 samples, validate on 12000 samples
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


<keras.callbacks.callbacks.History at 0x7f8c92c630f0>

**6. Making predictions on train data**

> Downloading and unziping the training dataset

In [0]:
download_test = drive.CreateFile({'id': '152Do2m-MMkG-VHfbvs9kJrhNPKzdsf4a'}) # unique file id of test data from google drive 
download_test.GetContentFile('test_ScVgIM0.zip')
!unzip test_ScVgIM0.zip

> Import test file

In [0]:
test = pd.read_csv('test.csv')



> Load and convert test data



In [95]:
test_images = []
for i in tqdm(range(test.shape[0])):
  img = image.load_img('test/'+test['id'][i].astype('str')+'.png', target_size = (28,28,1), color_mode = 'grayscale')
  img = image.img_to_array(img)
  img = img/255
  test_images.append(img) 
test = np.array(test_images)

100%|██████████| 10000/10000 [00:02<00:00, 3450.08it/s]


> Making predictions

In [77]:
prediction = classifier.predict_classes(test)
prediction

array([9, 2, 1, ..., 8, 1, 5])

**7. Creating Output File** 

> Create an output dataframe


In [305]:
id = []
label = []
j = 0
for i in tqdm(range(test.shape[0])):
    label.append(i+60001) #starts from 60001, based on test data
    id.append(prediction[j])
    j = j+1
output_file = pd.DataFrame(np.column_stack([label, id]),columns=['id', 'label']) #merge id and label 


100%|██████████| 10000/10000 [00:00<00:00, 681900.86it/s]


In [306]:
output_file

Unnamed: 0,id,label
0,60001,9
1,60002,2
2,60003,1
3,60004,1
4,60005,6
...,...,...
9995,69996,9
9996,69997,1
9997,69998,8
9998,69999,1


> Output as .csv

In [0]:
output_file.to_csv('file.csv', header = True, index = False)