# MNIST SAMPLE Model
### Own model code

Here are the steps that would be followed:
1. Download the MNIST Sample Data
2. Read the data from location
3. Full SGD with mini-batch - Code written from scratch (except dataloader)
4. Use Optimizer - nn.Linear + Write a class for Basic Optimzation
5. Use SGD class instead of Basic Optimizer class
6. User Dataloaders and Learner
7. Create simple NN
8. Compare with CNN Learner

In [1]:
from fastai.vision.all import *

## 1. Get MNIST Sample

In [2]:
URLs.MNIST_SAMPLE

'https://s3.amazonaws.com/fast-ai-sample/mnist_sample.tgz'

In [3]:
path = untar_data(URLs.MNIST_SAMPLE)
path

Path('/storage/data/mnist_sample')

In [4]:
path.ls()

(#3) [Path('/storage/data/mnist_sample/labels.csv'),Path('/storage/data/mnist_sample/valid'),Path('/storage/data/mnist_sample/train')]

## 2. Read data from the location

In [5]:
train_threes_loc = (path/'train'/'3').ls()
train_sevens_loc = (path/'train'/'7').ls()
valid_threes_loc = (path/'valid'/'3').ls()
valid_sevens_loc = (path/'valid'/'7').ls()

In [6]:
train_threes_tensor = torch.stack([tensor(Image.open(i)) for i in train_threes_loc]).float()/255
train_sevens_tensor = torch.stack([tensor(Image.open(i)) for i in train_sevens_loc]).float()/255
valid_threes_tensor = torch.stack([tensor(Image.open(i)) for i in valid_threes_loc]).float()/255
valid_sevens_tensor = torch.stack([tensor(Image.open(i)) for i in valid_sevens_loc]).float()/255

train_threes_tensor.shape, train_sevens_tensor.shape, valid_threes_tensor.shape, valid_sevens_tensor.shape

(torch.Size([6131, 28, 28]),
 torch.Size([6265, 28, 28]),
 torch.Size([1010, 28, 28]),
 torch.Size([1028, 28, 28]))

## 3. Full SGD

Steps for a SGD for this problem:  

- 3.i. Convert x into vector and define y
- 3.ii. Store the data into dset tuple
- 3.iii. Load data into dataloader so as to read it as mini-batches
- 3.iv. Initialize the parameters
- 3.v. Define the model function
- 3.vi. Define MNIST Loss function
- 3.vii. Define the SGD function, which would include
    - Finding out predictions
    - Calculating Loss
    - Calcuate Loss gradient
    - Updating Parameters
- 3.viii. Define function to calcuate the batch metric
- 3.ix. Define validation accuracy function
- 3.x. Run the SGD process in a loop and print accuracy on each iteration

#### 3.i. Convert x into vector and define y

In [7]:
train_x = torch.cat([train_threes_tensor, train_sevens_tensor]).view(-1, 28*28)
valid_x = torch.cat([valid_threes_tensor, valid_sevens_tensor]).view(-1, 28*28)

train_x.shape, valid_x.shape

(torch.Size([12396, 784]), torch.Size([2038, 784]))

In [8]:
train_y = tensor([1]*len(train_threes_tensor) + [0]*len(train_sevens_tensor)).unsqueeze(1)
valid_y = tensor([1]*len(valid_threes_tensor) + [0]*len(valid_sevens_tensor)).unsqueeze(1)

train_y.shape, valid_y.shape

(torch.Size([12396, 1]), torch.Size([2038, 1]))

#### 3.ii. Store the data into dset tuple

In [9]:
train_dset = list(zip(train_x, train_y))
valid_dset = list(zip(valid_x, valid_y))

#### 3.iii. Load data into dataloader so as to read it as mini-batches

In [10]:
train_dl = DataLoader(train_dset, batch_size=256, shuffle=True)
valid_dl = DataLoader(valid_dset, batch_size=256, shuffle=True)

#### 3.iv. Initialize the parameters

In [30]:
def init_params(size): return torch.randn(size).requires_grad_()

In [112]:
weights = init_params(28*28)
weights.shape

torch.Size([784])

In [113]:
bias = init_params(1)
bias.shape

torch.Size([1])

#### 3.v. Define the model function

In [14]:
def linear1(xb): return xb@weights + bias

In [15]:
def sigmoid(x): return 1/(1+torch.exp(-x))

#### 3.vi. Define MNIST Loss function

In [114]:
def mnist_loss(pred, actual):
    pred = pred.sigmoid()
    return torch.where(actual.T==1, 1-pred, pred).mean()
    
#return (sigmoid(pred)-actual.T).abs().mean()

#### 3.vi. Define the SGD function, which would include

In [116]:
def calc_grad(xb, yb, model):
    preds = model(xb)
    loss = mnist_loss(preds, yb)
    loss.backward()

def train_epoch(model, lr, params):
    for xb,yb in train_dl:
        calc_grad(xb, yb, model)
        for p in params:
            p.data -= p.grad*lr
            p.grad.zero_()

# def train_epoch(dl, model, lr, params):
#     for xb, yb in dl:
#         pred = model(xb)
#         loss = mnist_loss(pred, yb)
#         loss.backward()
#         #print(loss)
        
#         # Update the parameters
#         for p in params:
#             p.data -= p.grad*lr
#             p.grad.zero_()

#### 3.viii. Define function to calcuate the batch metric

In [18]:
# def batch_accuracy(xb, yb): return (((xb.sigmoid()) > 0.5).float() == yb.T).float().mean()

In [19]:
def batch_accuracy(xb, yb):
    preds = xb.sigmoid()
    correct = (preds>0.5) == yb
    return correct.float().mean()

#### 3.ix. Define validation accuracy function


In [20]:
# def validate_epoch(dl, model): return round(torch.stack([batch_accuracy(model(xb.data), yb) for xb,yb in dl]).mean().item(),4)

In [21]:
def validate_epoch(model):
    accs = [batch_accuracy(model(xb), yb) for xb,yb in valid_dl]
    return round(torch.stack(accs).mean().item(), 4)

#### 3.x. Run the SGD process in a loop and print accuracy on each iteration

In [117]:
lr = 1.
params = weights,bias

for i in range(20):
    train_epoch(linear1, lr, params)
    print(validate_epoch(linear1), end=' ')

0.5025 0.5041 0.5026 0.5013 0.5012 0.5018 0.5016 0.5014 0.5058 0.5005 0.5014 0.5018 0.5003 0.5014 0.5019 0.5007 0.5012 0.5028 0.5032 0.5025 

In [23]:
# print(validate_epoch(linear1), end=' ')
# for i in range(40):
#     train_epoch(train_dl, linear1, 1., params)
#     print(validate_epoch(linear1), end='  ')

## 4. Use Optimizer - nn.Linear + Write a class for Basic Optimzation

In [24]:
linear_model = nn.Linear(28*28, 1)

In [25]:
w,b = linear_model.parameters()
w.shape, b

(torch.Size([1, 784]),
 Parameter containing:
 tensor([-0.0092], requires_grad=True))

In [26]:
class BasicOptim:
    def __init__(self,params,lr): self.params,self.lr = list(params),lr

    def step(self, *args, **kwargs):
        for p in self.params: p.data -= p.grad * self.lr

    def zero_grad(self, *args, **kwargs):
        for p in self.params: p.grad = None

In [27]:
lr = 1.
opt = BasicOptim(linear_model.parameters(), lr)

In [28]:
def calc_grad(xb, yb, model):
    preds = model(xb)
    loss = mnist_loss(preds, yb)
    loss.backward()

def train_epoch(dl, model):
    for xb,yb in dl:
        calc_grad(xb, yb, model)
        opt.step()
        opt.zero_grad()

In [29]:
print(validate_epoch(valid_dl, linear_model), end=' ')
for i in range(40):
    train_epoch(train_dl, linear_model)
    print(validate_epoch(valid_dl, linear_model), end='  ')

TypeError: validate_epoch() takes 1 positional argument but 2 were given

# :( The above didn't work :`(  

--- 


## 5. Use SGD class instead of Basic Optimizer class

In [None]:
linear_model = nn.Linear(28*28,1)
opt = SGD(linear_model.parameters(), lr)

In [None]:
print(valid_accuracy(valid_dl, linear_model), end=' ')
for i in range(40):
    train_epoch(train_dl, linear_model)
    print(valid_accuracy(valid_dl, linear_model), end='  ')

## 6. User Dataloaders and Learner

In [None]:
dls = DataLoaders(train_dl, valid_dl)

In [None]:
learn = Learner(dls, nn.Linear(28*28,1), opt_func=SGD,
                loss_func=mnist_loss, metrics=accuracy)

In [None]:
learn.fit(10, lr=lr)

## 7. Create simple NN

In [None]:
simple_net = nn.Sequential(
    nn.Linear(28*28,30),
    nn.ReLU(),
    nn.Linear(30,1)
)

In [None]:
learn = Learner(dls, simple_net, opt_func=SGD,
                loss_func=mnist_loss, metrics=accuracy)

In [None]:
learn.fit(40, 0.1)

In [None]:
plt.plot(L(learn.recorder.values).itemgot(2));

## 8. Compare with CNN Learner

In [None]:
dls = ImageDataLoaders.from_folder(path)
learn = cnn_learner(dls, resnet18, pretrained=False,
                    loss_func=F.cross_entropy, metrics=accuracy)
learn.fit_one_cycle(1, 0.1)