## A simple CNN in Pytorch
Following [Michael Li](https://towardsdatascience.com/build-a-fashion-mnist-cnn-pytorch-style-efb297e22582), who explains how to do all of this. 

In [1]:
import torch
print(f"""Using Torch version {torch.__version__}.  
        CUDA is {'available' if torch.cuda.is_available() else 'not available'}. 
        MPS is {'available' if torch.backends.mps.is_available() else 'not available'}""")
gpu = 'mps' if torch.backends.mps.is_available() else 'cuda'
cpu = 'cpu'
# Plotting libraries
import bokeh
from bokeh.plotting import figure, output_notebook, show
from bokeh.models import Label
print(f"Using bokeh version {bokeh.__version__}.")

# numpy and pandas
import numpy as np
import pandas as pd
print(f"Using pandas version {pd.__version__}.")

# tqdm makes progress bars
import tqdm
# we use train test split
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder

Using Torch version 1.13.1.  
        CUDA is available. 
        MPS is not available
Using bokeh version 3.0.3.
Using pandas version 1.5.2.


In [2]:
output_notebook()
device = gpu

In [3]:
# Build the neural network, expand on top of nn.Module
class Network(torch.nn.Module):
    def __init__(self):
        super().__init__()

        # define layers
        self.conv1 = torch.nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5)
        ## takes a 28 x 28 matrix and produces 6 x 24 x 24 matrices
        self.conv2 = torch.nn.Conv2d(in_channels=6, out_channels=12, kernel_size=5)
        ## takes 6 24 x 24 matrices and producs 12 x 20 x 20 matrices

        self.fc1 = torch.nn.Linear(in_features=12*4*4, out_features=120)
        self.fc2 = torch.nn.Linear(in_features=120, out_features=60)
        self.out = torch.nn.Linear(in_features=60, out_features=10)

    # define forward function
    def forward(self, t):
        # conv 1
        t = self.conv1(t)
        ## 1 x 28 x 28 goes to 6 x 24 x 24
        t = torch.nn.functional.relu(t)
        t = torch.nn.functional.max_pool2d(t, kernel_size=2, stride=2)
        ## 6 x 24 x 24 goes to 6 x 12 x 12 through pooling

        # conv 2
        t = self.conv2(t)
        ## 6 x 12 x 12 goes to 12 x 8 x 8 
        t = torch.nn.functional.relu(t)
        t = torch.nn.functional.max_pool2d(t, kernel_size=2, stride=2)
        ## pooling 12 x 4 x 4

        # fc1
        t = t.reshape(-1, 12*4*4)
        t = self.fc1(t)
        t = torch.nn.functional.relu(t)
        ## 120 features in to 60 out

        # fc2
        t = self.fc2(t)
        t = torch.nn.functional.relu(t)
        ## 60 in to 10 out

        # output
        t = self.out(t)
        # don't need softmax here since we'll use cross-entropy as activation.

        return t

In [4]:
class ImageDataSet(torch.utils.data.Dataset):
    def __init__(self, data, labels):
        self.labels = labels
        self.data = data
        self.num = data.shape[0]
       
        
    def __len__(self):
        return self.num
    
    def __getitem__(self,idx):
        return self.data[idx,:], self.labels[idx,:]

In [5]:
train = pd.read_csv("fmnist_train.csv").values

test = pd.read_csv("fmnist_test.csv").values
train_images = train[:,1:]
train_labels = train[:,0].reshape(-1,1)
test_images = test[:,1:]
test_labels = test[:,0].reshape(-1,1)
train_labels = OneHotEncoder().fit_transform(train_labels).toarray()
test_labels = OneHotEncoder().fit_transform(test_labels).toarray()



In [6]:
train.shape

(60000, 785)

In [54]:
Xtrain= torch.tensor(train_images, dtype=torch.float32, device=device).reshape((train_images.shape[0],1,28,28))
Ytrain = torch.tensor(train_labels, dtype=torch.float32, device=device).reshape((train_labels.shape[0],train_labels.shape[1]))
Xtest= torch.tensor(test_images, dtype=torch.float32, device=device).reshape((test_images.shape[0],1,28,28))
Ytest = torch.tensor(test_labels, dtype=torch.float32, device=device).reshape((test_labels.shape[0],test_labels.shape[1]))
criterion = torch.nn.functional.cross_entropy
data = ImageDataSet(Xtrain,Ytrain)
data_source = torch.utils.data.DataLoader(data,batch_size=5000,shuffle=True)

In [38]:
def train(model, Xt, Yt):
    """One step through the training loop"""
    # reset the gradient calculations
    for batch_data, batch_target in data_source:
    
            # reset the gradient calculations
        optimizer.zero_grad()
        
        predicted = model(batch_data)
    
            # compute the loss
        loss = criterion(predicted,batch_target)
    
            # compute the gradients by backward propogation
        loss.backward()        
        
            # adjust the weights
        optimizer.step()   
    

    
    return loss.item()

In [39]:
def training_loop(model, data, target, learning_rate=.0001,threshold=1e-6,max_iter=100000):
    """Run the training loop and return the losses"""
    

    losses = []
    prior_loss=1000000
    for i in tqdm.tqdm(range(max_iter)):
        loss = train(model,data, target)
        losses.append(loss)
        if abs(loss-prior_loss) < threshold:
            break
        prior_loss = loss
        
    return losses
    

In [40]:
def plot_loss(losses):
    """Plot the losses"""
    f=figure(title=f"Loss over time",x_axis_label="Epoch",y_axis_label="Loss")
    f.line(x=list(range(len(losses))),y=losses)

    
    return f

In [61]:
#model = Network().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.00001)
losses = training_loop(model, Xtrain,Ytrain,threshold=0,max_iter=100)
show(plot_loss(losses))

100%|█████████████████████████████████████████████████████████████████████████████████| 100/100 [00:56<00:00,  1.78it/s]


In [62]:
(torch.argmax(torch.nn.functional.softmax(model(Xtrain),dim=1),dim=1)==torch.argmax(Ytrain,dim=1)).sum().item()/Xtrain.shape[0]

0.95635

In [63]:
(torch.argmax(torch.nn.functional.softmax(model(Xtest),dim=1),dim=1)==torch.argmax(Ytest,dim=1)).sum().item()/Xtest.shape[0]

0.8894