### Creating an end to end submission using fastai v3
Credits : https://www.kaggle.com/vijaykris/mnist-classification-using-fast-ai-v2

In [None]:
import numpy as np
import pandas as pd

import os

os.listdir('../input')

In [None]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

### Imports

In [None]:
from fastai.imports import *
from fastai.conv_learner import *

from fastai.transforms import *
from fastai.models import *

from fastai.sgdr import *
from fastai.plots import *
from fastai.dataset import *

import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

### The path

In [None]:
PATH = "../input"
os.listdir(PATH)

In [None]:
train = pd.read_csv(f'{PATH}/train.csv')
test = pd.read_csv(f'{PATH}/test.csv')

In [None]:
train[:5]

#We need to seperate out the labels from the training set

### Seperating labels from training set

from : https://www.geeksforgeeks.org/python-extracting-rows-using-pandas-iloc/
Pandas provide a unique method to retrieve rows from a Data frame. Dataframe.iloc[] method is used when the index label of a data frame is something other than numeric series of 0, 1, 2, 3….n or in case the user doesn’t know the index label. FOr extracting multiple rows

In [None]:
image = train.iloc[:,1:] # all rows from 1 column onwards
lbl = train.iloc[:,0:1]

### Viewing as image  and loading to fast ai using image classification

Since resnet has 3 channels we need to multiply teh np stack by 3

In [None]:
img = image.as_matrix()
img = img.reshape(-1,28,28)
test_img = test.as_matrix()
test_img = test_img.reshape(-1,28,28)

img = np.stack((img,)*3, axis=-1).astype('float32')
test_img = np.stack((test_img,)*3, axis=-1).astype('float32')

In [None]:
plt.imshow(img[4])

### Splitting the training set into Train and Valid

In [None]:
import warnings
warnings.filterwarnings('ignore')

In [None]:
train_img, val_img, train_lbl, val_lbl  = train_test_split(img, lbl, train_size=0.8, random_state=1)

In [None]:
grp = [train_img, val_img, train_lbl, val_lbl, test_img]
print([e.shape for e in grp])
print([type(e) for e in grp])

### converting labels from dataframe into array

In [None]:
train_lbl = train_lbl.values.flatten()
val_lbl = val_lbl.values.flatten()

### getting data

In [None]:
arch= resnet34
sz=28
data = ImageClassifierData.from_arrays(path='/tmp', trn=(train_img/255, train_lbl), val=(val_img/255, val_lbl), classes=train_lbl,test=test_img/255, tfms=tfms_from_model(arch, sz, max_zoom=1.1))

### ConvLearner

In [None]:
learn = ConvLearner.pretrained(arch, data, precompute=True)

### Finding best learner rate

In [None]:
learn.lr_find()

In [None]:
learn.sched.plot_lr()

In [None]:
learn.sched.plot()

based on plot we will select 0.01 as the learning rate.

### training at optimum learning rate

In [None]:
learn = ConvLearner.pretrained(arch, data, precompute=True)

In [None]:
learn.fit(0.01,9)

### using cycle rate and data augmentations
We will use cycle rate and data augmentations

In [None]:
learn = ConvLearner.pretrained(arch, data, precompute=False)


In [None]:
learn.fit(0.01, 9, cycle_len=1)

Cycle_len enables stochastic gradient descent with restarts (SGDR).  
This helps model to jump to the different part in the weight space

In [None]:
learn.sched.plot_lr()

### Fine tuning with other layers.

with final layer being trained

In [None]:
learn.unfreeze()

 The earlier layers (as we've seen) have more general-purpose features. Therefore we would expect them to need less fine-tuning for new datasets. For this reason we will use different learning rates for different layers: the first few layers will be at 1e-4, the middle layers at 1e-3, and our FC layers we'll leave at 1e-2 as before. We refer to this as differential learning rates,

In [None]:
lr = np.array([1e-4, 1e-3, 1e-2])


In [None]:
learn.fit(lr, 4, cycle_len=1, cycle_mult=2)

Cycle Mult multiplies the length of the cycle after each cycle. e.g. epoch=4, cycle_mult=2 then it multiples the length of the cycle after each cycle (1 epoch + 2 epoch + 4 epoch + 8 epoch=15 epochs)

In [None]:
learn.sched.plot_lr()

In [None]:
learn.save('4_epochs')

In [None]:
learn.export('../working/mnist.pkl')

### predicting in test data

In [None]:
%time log_pred_test, y_test=learn.TTA(is_test=True)
probs_test = np.mean(np.exp(log_pred_test),0)
probs_test.shape

### making a submission file

In [None]:
df = pd.DataFrame(probs_test)

In [None]:
df.head()

In [None]:
df = df.assign(Label=df.values.argmax(axis=1))
df = df.assign(ImageId=df.index.values +1)

In [None]:
df1=df[['ImageId', 'Label']]


In [None]:
df1.head()

In [None]:
df1.shape

In [None]:
df1.to_csv('submission.csv', index=False)