## A simple CNN in Pytorch
Following [Michael Li](https://towardsdatascience.com/build-a-fashion-mnist-cnn-pytorch-style-efb297e22582), who explains how to do all of this. 

In [168]:
import torch
print(f"""Using Torch version {torch.__version__}.  
        CUDA is {'available' if torch.cuda.is_available() else 'not available'}. 
        MPS is {'available' if torch.backends.mps.is_available() else 'not available'}""")
gpu = 'mps' if torch.backends.mps.is_available() else 'cuda'
cpu = 'cpu'
# Plotting libraries
import bokeh
from bokeh.plotting import figure, output_notebook, show
from bokeh.models import Label
print(f"Using bokeh version {bokeh.__version__}.")

# numpy and pandas
import numpy as np
import pandas as pd
print(f"Using pandas version {pd.__version__}.")

# tqdm makes progress bars
import tqdm
# we use train test split
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder

Using Torch version 2.0.0.  
        CUDA is not available. 
        MPS is available
Using bokeh version 3.1.0.
Using pandas version 2.0.0.


In [169]:
output_notebook()
device = gpu

In [170]:
# Build the neural network, expand on top of nn.Module
class Network(torch.nn.Module):
    def __init__(self):
        super().__init__()

        # define layers
        self.conv1 = torch.nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5)
        ## takes a 28 x 28 matrix and produces 6 x 24 x 24 matrices
        self.conv2 = torch.nn.Conv2d(in_channels=6, out_channels=12, kernel_size=5)
        ## takes 6 24 x 24 matrices and producs 12 x 20 x 20 matrices

        self.fc1 = torch.nn.Linear(in_features=12*4*4, out_features=120)
        self.fc2 = torch.nn.Linear(in_features=120, out_features=60)
        self.out = torch.nn.Linear(in_features=60, out_features=10)

    # define forward function
    def forward(self, t):
        # conv 1
        t = self.conv1(t)
        ## 1 x 28 x 28 goes to 6 x 24 x 24
        t = torch.nn.functional.relu(t)
        t = torch.nn.functional.max_pool2d(t, kernel_size=2, stride=2)
        ## 6 x 24 x 24 goes to 6 x 12 x 12 through pooling

        # conv 2
        t = self.conv2(t)
        ## 6 x 12 x 12 goes to 12 x 8 x 8 
        t = torch.nn.functional.relu(t)
        t = torch.nn.functional.max_pool2d(t, kernel_size=2, stride=2)
        ## pooling 12 x 4 x 4

        # fc1
        t = t.reshape(-1, 12*4*4)
        t = self.fc1(t)
        t = torch.nn.functional.relu(t)
        ## 120 features in to 60 out

        # fc2
        t = self.fc2(t)
        t = torch.nn.functional.relu(t)
        ## 60 in to 10 out

        # output
        t = self.out(t)
        # don't need softmax here since we'll use cross-entropy as activation.

        return t

In [171]:
X.shape

torch.Size([9999, 1, 28, 28])

In [172]:
images = pd.read_csv("fmnist_data.csv")
labels = pd.read_csv("fmnist_labels.csv")
labels_ohe = OneHotEncoder().fit_transform(labels.values).toarray()



In [216]:
X = torch.tensor(images.values, dtype=torch.float32, device=device).reshape((images.values.shape[0],1,28,28))
Y = torch.tensor(labels_ohe, dtype=torch.float32, device=device).reshape((labels_ohe.shape[0],labels_ohe.shape[1]))
criterion = torch.nn.functional.cross_entropy
optimizer = torch.optim.SGD(model.parameters(), lr=0.0001)
model = Network().to(device)

In [217]:
def train(model, criterion, optimizer, Xt, Yt):
    """One step through the training loop"""
    # reset the gradient calculations
    optimizer.zero_grad()

    # forward pass
    predicted = model(Xt)
    
    # compute the loss
    loss = criterion(predicted,Yt)
    

    # compute the gradients by backward propogation
    loss.backward()        
        
    # adjust the weights
    optimizer.step()
    
    return loss.item()

In [218]:
def training_loop(model, data, target, learning_rate=.0001,threshold=1e-3,max_iter=100000):
    """Run the training loop and return the losses"""
    criterion = torch.nn.functional.cross_entropy
    optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

    losses = []
    prior_loss=1000000
    for i in range(max_iter):
        if (i % 100) == 0:
            print(prior_loss)
        loss = train(model,criterion,optimizer,data, target)
        losses.append(loss)
        if abs(loss-prior_loss) < threshold:
            break
        prior_loss = loss
            
    return losses
    

In [219]:
def plot_loss(losses):
    """Run the model and collect the losses; return a figure"""
    
    f=figure(title=f"Loss over time",x_axis_label="Epoch",y_axis_label="Loss")
    f.line(x=list(range(len(losses))),y=losses)

    
    return f

In [220]:
#model = Network().to(device)
losses = training_loop(model, X,Y,threshold=1e-7,learning_rate=.00001)
show(plot_loss(losses))

1000000


KeyboardInterrupt: 

In [194]:
ct=0
for i in range(X.shape[0]):
               ct = ct + (torch.argmax(torch.nn.functional.softmax(model(X[i,:]),dim=1)).item()==torch.argmax(Y[i]).item())

In [195]:
ct/9999

0.8161816181618162

In [191]:
model(X).shape

torch.Size([9999, 10])

In [221]:
criterion(model(X),Y)

tensor(2.6898, device='mps:0', grad_fn=<DivBackward1>)

In [205]:
model(X[0,:]).shape

torch.Size([1, 10])

In [208]:
Y[0].reshape(1,10).shape

torch.Size([1, 10])

In [210]:
criterion(model(X[1,:]),Y[1].reshape(1,10))

tensor(1.2112, device='mps:0', grad_fn=<DivBackward1>)

In [222]:
model(X)

tensor([[ 2.3759,  1.2019,  1.1919,  ...,  0.2710,  0.1830,  1.0719],
        [ 1.2176,  1.2414,  0.1260,  ...,  0.5107,  0.4700, -0.1921],
        [ 1.0353,  3.0996,  0.4740,  ...,  1.9166, -0.9285, -0.7557],
        ...,
        [ 0.6525,  1.5084, -0.5573,  ...,  0.3865, -0.4294, -0.3489],
        [ 1.4063,  1.6833,  1.1898,  ..., -0.6133, -0.8799,  0.4384],
        [ 2.1146,  3.3303,  0.5251,  ...,  0.4390, -0.0489, -1.0522]],
       device='mps:0', grad_fn=<LinearBackward0>)

In [223]:
Y

tensor([[0., 0., 0.,  ..., 0., 0., 1.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [1., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 1., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]], device='mps:0')

In [225]:
criterion(model(X)[0,:],Y[0])

tensor(2.1893, device='mps:0', grad_fn=<DivBackward1>)