In [1]:
import torch
from torch import nn, optim
import numpy as np
import pandas as pd
import torchvision
from torchvision import *
from torch.utils.data import *
from torchsummary import summary
from PIL import *

### Backpropagation in pytorch

In [2]:
class network(nn.Module):
    '''neural network architecture'''
    def __init__(self, input_size, hidden_size, output_size):
        super(). __init__()
        self.hidden = nn.Linear(input_size, hidden_size)
        self.output = nn.Linear(hidden_size, output_size)
    
    def forward(self, x):
        x = self.hidden(x)
        x = torch.sigmoid(x)
        x = self.output(x)
        returnx

In [3]:
# you can add a whole lot of hidden layers in the network above

In [4]:
# instatiate the network
model = network(1, 2, 1)

In [5]:
# fixing the weights manually using the diagram from  here
model.state_dict()['hidden.weight'][:] = torch.tensor([[1], [-1]])
model.state_dict()['hidden.bias'][:] = torch.tensor([1, 2])
model.state_dict()['output.weight'][:] = torch.tensor([[1, 2]])
model.state_dict()['output.bias'][:] = torch.tensor([-1])

In [6]:
# x and y data
x, y = torch.tensor([1.0]), torch.tensor([3.0])

In [7]:
model.output.bias.grad

In [8]:
# define a loss function
criterion = nn.MSELoss()

In [9]:
#loss = criterion(model(x), y)

# Convolutional Neural Networks(CNN)

In [10]:
class CNN(nn.Module):
    def __init__(self):
        '''inputs the architecture in the neural network'''
        super(). __init__()
        self.main = nn.Sequential(
            nn.Conv2d(in_channels=1, out_channels=3, kernel_size=(3,3), padding=1),
            nn.ReLU(),
            nn.MaxPool2d((2, 2)),
            nn.Conv2d(in_channels=3, out_channels=2, kernel_size=(3,3), padding=1),
            nn.ReLU(),
            nn.MaxPool2d((2, 2)),
            nn.Flatten(),
            nn.Linear(1250, 1)
        )
        
    def forward(self, x):
        x = self.main(x)
        return x

In [11]:
model = CNN()

#### lets define a function to ensure our code usues GPU if available, and defaults to CPU if its not

In [12]:
# checks if our computing system has a gpu or not
torch.cuda.is_available()

False

In [13]:
def get_default_devices():
    ''' pick GPU if available, else CPU'''
    if torch.cuda.is_available():
        return torch.device('cuda')
    else:
        return torch.device('cpu')

In [14]:
get_default_devices()

device(type='cpu')

#### define a function to move data to chosen device

In [15]:
def to_device(data, device):
    '''move tensor(s) to chosen device'''
    if isinstance(data, (list, tuple)):
        return [to_device(x, device) for x in data]
    return data.to(device, non_blocking = True)

#### define a device or dataloader class to wrap our existing data loaders and move to selected devices(ie puts batches of data to gpu)

In [16]:
class DeviceDataLoader():
    '''wrap a dataloader to move data to a device'''
    def __init__(self, dl, device):
        self.dl = dl
        self.device = device
        
    def __iter__(self):
        '''yield a batch of data after moving it to device'''
        for b in self.dl:
            yield to_device(b, self.device)
            # yield pauses the execution of a function and return things one by one
    
    def __len__(self):
        '''number of batches'''
        return len(self.dl)

### Gradient descent

Xt+1= xt- η∆xt
- generate predictions
- calculate the loss
- compute gradients with respect to weights and biases
- adjust weights by subtracting a small quantity proportional to the gradient
- reset the gradients to zero.

#### training a model

In [17]:
def loss_batch(model, loss_func, xb, yb, opt=None, metric=None):
    # loss batch calculates loss and metric value for a batch of data and optionally performs gradient descent
    #generate pred
    preds = model(xb)
    #calculate loss
    loss = loss_func(preds, yb)
    
    if opt is not None:
        #compute gradients
        loss.backward()
        #update parameters
        opt.step()
        #reset gradients
        opt.zero_grad()
        
    metric_result = None
    if metric is not None:
        #compute the metric
        metric_result = metric(preds, yb)
    return loss.item(), len(xb), metric_result

In [19]:
def fit(epochs, lr, model, loss_fn, train_dl, valid_dl, metric=None, opt_fn= None):
    losses, metrics = [], []
    
    # instantiate the optimizer
    if opt_fn is None:
        opt_fn = torch.optim.SGD
        opt = opt_fn(model.parameters(), lr = lr)
        
    for epoch in range(epochs):
        #training
        for xb, yb in train_dl:
            loss_batch(model, loss_fn, xb, yb, opt)
            
        # evaluation
        result = evaluate(model, loss_fn, valid_dl, metric)
        result = val_loss, total, val_metric
        
        # record the loss and metric
        losses.append(val_loss)
        metrics.append(val_metrics)

In [20]:
def accuracy(outputs, labels):
    _, preds = torch.max(outputs, dim=1)
    return torch.sum(preds == labels).item() / len(preds)