<a href="https://colab.research.google.com/github/chainzero/pytorch_models/blob/main/ann_breadth_vs_depth.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Model depth vs. breadth

In [1]:
# import libraries
import torch
import torch.nn as nn
import torch.nn.functional as F

import numpy as np
import matplotlib.pyplot as plt
import matplotlib_inline.backend_inline
matplotlib_inline.backend_inline.set_matplotlib_formats('svg')

Import and organize the data


In [2]:
# import dataset
import pandas as pd
iris = pd.read_csv('https://raw.githubusercontent.com/mwaskom/seaborn-data/master/iris.csv')

# convert from pandas dataframe to tensor
data = torch.tensor( iris[iris.columns[0:4]].values ).float()

# transform species to number
labels = torch.zeros(len(data), dtype=torch.long)
# labels[iris.species=='setosa'] = 0 # don't need!
labels[iris.species=='versicolor'] = 1
labels[iris.species=='virginica'] = 2

Construct and sanity-check the model


In [10]:
# create a class for the model

class ANNiris(nn.Module):
  def __init__(self,nUnits,nLayers):
    super().__init__()

    # create dictionary to store the layers
    self.layers = nn.ModuleDict()
    self.nLayers = nLayers

    ### input layer
    self.layers['input'] = nn.Linear(4,nUnits)

    ### hidden layers
    for i in range(nLayers):
      self.layers[f'hidden{i}'] = nn.Linear(nUnits,nUnits)

    ### output layer
    self.layers['output'] = nn.Linear(nUnits,3)


  # forward pass
  def forward(self,x):
    # input layer (note: the code in the video omits the relu after this layer)
    x = F.relu( self.layers['input'](x) )

    # hidden layers
    for i in range(self.nLayers):
      x = F.relu( self.layers[f'hidden{i}'](x) )

    # return output layer
    x = self.layers['output'](x)
    return x



In [11]:
# generate an instance of the model and inspect it
nUnitsPerLayer = 12
nLayers = 4
net = ANNiris(nUnitsPerLayer, nLayers)
net

ANNiris(
  (layers): ModuleDict(
    (input): Linear(in_features=4, out_features=12, bias=True)
    (hidden0): Linear(in_features=12, out_features=12, bias=True)
    (hidden1): Linear(in_features=12, out_features=12, bias=True)
    (hidden2): Linear(in_features=12, out_features=12, bias=True)
    (hidden3): Linear(in_features=12, out_features=12, bias=True)
    (output): Linear(in_features=12, out_features=3, bias=True)
  )
)

In [12]:
# Quick test of the model by running some random numbers/data thru it
# Simply ensures that the architecture is good

# 10 samples with 4 dimensions
tmpx = torch.rand(10,4)

# run it thru the DL
y = net(tmpx)

# examine the shape of the output
print(y.shape), print(" ")

# print the output itself
print(y)

torch.Size([10, 3])
 
tensor([[ 0.2006,  0.0591, -0.0246],
        [ 0.2010,  0.0598, -0.0251],
        [ 0.2016,  0.0602, -0.0249],
        [ 0.2025,  0.0601, -0.0247],
        [ 0.2008,  0.0583, -0.0240],
        [ 0.2016,  0.0598, -0.0247],
        [ 0.2021,  0.0606, -0.0251],
        [ 0.2036,  0.0621, -0.0257],
        [ 0.2008,  0.0595, -0.0247],
        [ 0.2037,  0.0618, -0.0254]], grad_fn=<AddmmBackward0>)


Create a function that trains the model


In [13]:
def trainTheModel(theModel):

  # define the loss function and optimizer
  lossfun = nn.CrossEntropyLoss()
  optimizer = torch.optim.SGD(theModel.parameters(), lr=0.01)

  # loop over the epochs
  for epochi in range(numepochs):

    # forward pass
    yHat = theModel(data)

    # compute loss
    loss = lossfun(yHat, labels)

    # backprop
    optmizer.zero_grad()
    loss.backward()
    optimizer.step()

  # final forward pass to get accuracy
  predictions = theModel(data)
  predLabels = torch.argmax(predictions, axis=1)
  acc = 100*torch.mean((predLabels == labels).float())

  # total number of trainable parameters in the model
  nParams = sum(p.numel() for p in theModel.parameters() if p.requires_grad)

  # function outputs
  return acc,nParams
