## Linear Regression

### Using implementations of linear reg from scratch

In [None]:
import numpy as np
import torch

Yields of Apples and Oranges in Pokeregions

In [None]:
# Input (temp, rainfall, humidity)
inputs = np.array([[73, 67, 43], 
                   [91, 88, 64], 
                   [87, 134, 58], 
                   [102, 43, 37], 
                   [69, 96, 70]], dtype='float32')

targets = np.array([[56, 70],
                    [81, 101],
                    [119, 133],
                    [22, 37],
                    [103, 119]], dtype='float32')

In [None]:
inputs = torch.from_numpy(inputs)
targets = torch.from_numpy(targets)
print(f"Inputs: {inputs}. Shape: {inputs.shape}")
print(f"Targets: {targets}. Shape: {targets.shape}")

In [None]:
w = torch.randn(2, 3, requires_grad=True)
b = torch.randn(2, requires_grad=True)
print(f"Weights: {w}. Shape: {w.shape}")
print(f"Bias: {b}. Shape: {b.shape}")

In [None]:
def model(x):
    """ Linear reg model. 
    torch: @ is matrix multiplication, t() is transpose.
    """
    return x @ w.t() + b

def mse(t1, t2):
    """ Mean squared error. """
    diff = t1 - t2
    return torch.sum(diff * diff) / diff.numel()

In [None]:
preds = model(inputs)
print(f"Predictions: {preds}")

In [None]:
# Actual targets
print(f"Targets: {targets}")

In [None]:
# diff.numel() # Number of elements in diff tensor
# other key torch functions: torch.sum(), torch.mean(), torch.exp(), torch.log(), torch.abs(), torch.max(), torch.min(), torch.sigmoid(), torch.relu(), torch.nn.functional.cross_entropy(), torch.nn.functional.mse_loss()

In [None]:
mse_loss = mse(preds, targets)
print(f"Loss: {mse_loss}")

Compute gradients

Note: Loss is a fn(preds and targets). Preds is a fn(inputs, weights, biases?). Therefore loss is a fn(weight and biases). Also, the Loss function is a quadratic fn because squaring is involved.
The main objective is to find the set of weights where the loss is minimised. The graadient indicates the rate of change of the loss, or the slope of the loss function w.r.t the weights and biases.

The geometric interpretation of a derivative is that it represents the slope of a graph or the rate of change of the loss. If the derivative is postive, the slope is increasing and vice versa??

If the gradient element is positive, increasing the elements value slightly increases the loss and vice versa. The whole situation is reversed if the derivative is negative - increasing the elements value will decrease the loss and vice versa.

Gradient descent thus corresponds to going down the slope and reducing the loss, along any one weight element.


In [None]:
mse_loss.backward()

In [None]:
print(w)
print(w.grad)
# Note: The gradients are stored in the .grad property of the respective tensors. The gradients are used to update the weights and biases. Accessible only after calling .backward() on the loss tensor.

In [None]:
# w.grad.zero_()
# b.grad.zero_()
print(w.grad)
print(b.grad)

Adjust weights and biases with gradient descent

In [None]:
with torch.no_grad():
    w -= w.grad * 1e-5
    b -= b.grad * 1e-5
    w.grad.zero_()
    b.grad.zero_()

In [None]:
print(w)
print(b)

In [None]:
# with new weights and biases, calculate loss and gradients again
preds = model(inputs)
mse_loss = mse(preds, targets)
print(f"Loss: {mse_loss}")


Train for multiple epochs

In [None]:
# Training loop for 100 epochs
for i in range(100):
    preds = model(inputs)
    mse_loss = mse(preds, targets)
    mse_loss.backward()
    with torch.no_grad():
        w -= w.grad * 1e-5
        b -= b.grad * 1e-5
        w.grad.zero_()
        b.grad.zero_()

In [None]:
print(f"Loss: {mse_loss}")

In [None]:
preds = model(inputs)
print(f"Predictions: {preds}")
mse_loss = mse(preds, targets)
print(f"Loss: {mse_loss}")

In [None]:
# print preds, targets 
print(f"Predictions: {preds}")
print(f"Targets: {targets}")

Notes:
Counter being locked into local minimums by starting out with a higher learning rate and taper off to a lower one over time. 
Select ideal learning rate (default is 1e5 : 10 ^ -5) using various strategies to be discussed.

### Using Pytorch implementations of linear reg

In [None]:
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader
import torch.nn.functional as F

In [None]:
# Input (temp, rainfall, humidity)
inputs = np.array([[73, 67, 43], 
                   [91, 88, 64], 
                   [87, 134, 58], 
                   [102, 43, 37], 
                   [69, 96, 70], 
                   [74, 66, 43], 
                   [91, 88, 64], 
                   [88, 134, 59], 
                   [101, 44, 37], 
                   [68, 96, 71], 
                   [73, 66, 44], 
                   [92, 87, 64], 
                   [87, 135, 57], 
                   [103, 43, 36], 
                   [68, 97, 70]], 
                    dtype='float32')

# Targets (apples, oranges)
targets = np.array([[56, 70], 
                     [81, 101], 
                     [119, 133], 
                     [22, 37], 
                     [103, 119],
                     [57, 69], 
                     [80, 102], 
                     [118, 132], 
                     [21, 38], 
                     [104, 118], 
                     [57, 69], 
                     [82, 100], 
                     [118, 134], 
                     [20, 38], 
                     [102, 120]], 
                    dtype='float32')

inputs = torch.from_numpy(inputs)
targets = torch.from_numpy(targets)

N.B: Dataset and data loaders allow batch training.

In [None]:
# Define dataset and dataloader
train_ds = TensorDataset(inputs, targets)
batch_size = 5
train_dl = DataLoader(train_ds, batch_size, shuffle=True)

In [None]:
for xb, yb in train_dl:
    print(xb)
    print(yb)
    break

In [None]:
# define model
model = nn.Linear(3, 2)
# print(model.weight)
# print(model.bias)
# Parameters
list(model.parameters())

In [None]:
# generate predictions
preds = model(inputs)
print(preds)

In [None]:
# define loss function
loss_fn = F.mse_loss
mse_loss = loss_fn(model(inputs), targets)
print(mse_loss)

In [None]:
# optimise with SGD 
opt = torch.optim.SGD(model.parameters(), lr=1e-5)

In [None]:
# training function 
def fit(num_epochs, model, loss_fn, opt, train_dl):
    for epoch in range(num_epochs):
        for xb, yb in train_dl:
            preds = model(xb)
            loss = loss_fn(preds, yb)
            loss.backward()
            opt.step()
            opt.zero_grad()
        if (epoch+1) % 10 == 0:
            print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item()}')

In [None]:
fit(400, model, loss_fn, opt, train_dl)

In [None]:
preds = model(inputs)
print(preds)

In [None]:
# target comparison 
targets

Nb. Run for as many epochs as necessary to make sure mse is less than the error size that modeller is willing to tolerate e.g if target error is 10 from actual, mse should be lower than 100.
In this toy dataset, by changing from 100 to 400 epochs, loss went from 50 to 1.5. Loss at start with random weights was 19639.0762/ 9000 -varies.

TO-DO: Read PyTorch docs. Try with Kaggle dataset.

# Logistic regression

Using MNIST

In [None]:
import torch
import torchvision
from torchvision.datasets import MNIST
import matplotlib.pyplot as plt
%matplotlib inline
import torchvision.transforms as transforms
from torch.utils.data import random_split
from torch.utils.data import DataLoader
import torch.nn as nn

In [None]:
# dataset = MNIST(root='data/', download=True) original raw data
dataset = MNIST(root='data/', train=True, transform=transforms.ToTensor()) # convert to tensor for pytorch
train_dataset, val_dataset = random_split(dataset, [50000, 10000])
test_dataset = MNIST(root='data/', train=False, transform=transforms.ToTensor())

batch_size = 128
train_loader = DataLoader(train_dataset, batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size)
test_loader = DataLoader(test_dataset, batch_size)

In [None]:
# dataset 
# len(dataset)
test_dataset

In [None]:
print(f"Train dataset: {len(train_dataset)}")
print(f"Validation dataset: {len(val_dataset)}")
print(f"Test dataset: {len(test_dataset)}")

In [None]:
# raw data, pre-transform
# dataset[0]

# matplot plot of PIL image representation of data
# image, label = dataset[0]
# plt.imshow(image, cmap='gray')
# print('Label:', label)

# tensor representation
# dataset[0]  

# Data transformation flow 
# Raw Data -> DataLoader -> Model -> Loss Function -> Optimizer -> Training Loop -> Evaluation Loop

In [None]:
img_tensor, label = dataset[0]
print(img_tensor.shape, label)

In [None]:
print(img_tensor[:,10:15,10:15])
print(torch.max(img_tensor), torch.min(img_tensor))

In [None]:
print(img_tensor)

In [None]:
plt.imshow(img_tensor[0], cmap='gray')

In [None]:
plt.imshow(img_tensor[:,10:15,10:15].numpy().reshape(5,5), cmap='gray')

In [None]:


# # Logistic regression model
# model = nn.Linear(input_size, num_classes)


# print(model.weight.shape)
# print(model.bias.shape)




In [None]:
input_size = 28*28
num_classes = 10


# Logistic regression model for MNIST

class MnistModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear = nn.Linear(input_size, num_classes)
        
    def forward(self, xb):
        xb = xb.reshape(-1, 784) # flatten. -1 is a placeholder for any size.
        out = self.linear(xb)
        return out
    
    def training_step(self, batch):
        images, labels = batch 
        out = self(images)                  # Generate predictions
        loss = F.cross_entropy(out, labels)
        return loss
    
    def validation_step(self, batch):
        images, labels = batch 
        out = self(images)                    # Generate predictions
        loss = F.cross_entropy(out, labels)
        acc = accuracy(out, labels)           # Calculate accuracy
        return {'val_loss': loss, 'val_acc': acc}
    
    def validation_epoch_end(self, outputs):
        batch_losses = [x['val_loss'] for x in outputs]
        epoch_loss = torch.stack(batch_losses).mean()
        batch_accs = [x['val_acc'] for x in outputs]
        epoch_acc = torch.stack(batch_accs).mean()
        return {'val_loss': epoch_loss.item(), 'val_acc': epoch_acc.item()}
    
    def epoch_end(self, epoch, result):
        print("Epoch [{}], val_loss: {:.4f}, val_acc: {:.4f}".format(epoch, result['val_loss'], result['val_acc']))
              

In [None]:

def accuracy(outputs, labels):
    _, preds = torch.max(outputs, dim=1)
    return torch.tensor(torch.sum(preds == labels).item() / len(preds))

def evaluate(model, val_loader):
    outputs = [model.validation_step(batch) for batch in val_loader]
    return model.validation_epoch_end(outputs)

def fit(epochs, lr, model, train_loader, val_loader, opt_func=torch.optim.SGD):
    history = []
    optimizer = opt_func(model.parameters(), lr)
    for epoch in range(epochs):
        # Training Phase 
        for batch in train_loader:
            loss = model.training_step(batch)
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()
        # Validation phase
        result = evaluate(model, val_loader)
        model.epoch_end(epoch, result)
        history.append(result)
    return history

In [None]:
model = MnistModel()

In [None]:
# print(model.linear.weight.shape, model.linear.bias.shape)
# print(list(model.parameters()))

In [None]:
# for images, labels in train_loader:
#     print(labels)
#     print(images.shape)
#     outputs = model(images)
#     break

# for images, labels in train_loader:
#     print('images.shape:', images.shape)
#     outputs = model(images)
#     break

# print('outputs.shape:', outputs.shape)
# print('Sample outputs:\n', outputs[:2].data)

# loss_fn = F.cross_entropy

# # Loss for current batch of data
# loss = loss_fn(outputs, labels)
# print(loss)

In [None]:
result0 = evaluate(model, val_loader)
print(result0)

In [None]:
history1 = fit(5, 0.001, model, train_loader, val_loader)

In [None]:
history2 = fit(5, 0.001, model, train_loader, val_loader)


In [None]:
history3 = fit(5, 0.001, model, train_loader, val_loader)

In [None]:
history4 = fit(5, 0.001, model, train_loader, val_loader)

In [None]:

history = [result0] + history1 + history2 + history3 + history4

losses = [r['val_loss'] for r in history]
accuracies = [r['val_acc'] for r in history]

plt.plot(losses, '-x')
plt.xlabel('epoch')
plt.ylabel('loss')
plt.title('Loss vs. No. of epochs')

plt.plot(accuracies, '-x')
plt.xlabel('epoch')
plt.ylabel('accuracy')
# legend
plt.legend(['loss', 'accuracy'])

plt.title('Accuracy/Loss vs. No. of epochs')


In [None]:
# plot with seaborn 
import seaborn as sns
sns.set_style('whitegrid')

losses = [x['val_loss'] for x in history]
accuracies = [x['val_acc'] for x in history]
plt.plot(losses, '-x')
plt.plot(accuracies, '-o')
plt.xlabel('epoch')
plt.ylabel('loss')
plt.ylabel('accuracy')
plt.legend(['loss', 'accuracy'])
plt.title('Loss vs. No. of epochs')


Testing

In [None]:
img, label = test_dataset[0]
plt.imshow(img[0], cmap='gray')
print('Label:', label)


In [None]:
img.unsqueeze(0).shape

In [None]:
def predict_image(img, model):
    xb = img.unsqueeze(0)
    yb = model(xb)
    _, preds  = torch.max(yb, dim=1)
    return preds[0].item()

In [None]:
img, label = test_dataset[0]
plt.imshow(img[0], cmap='gray')
print('Label:', label, ', Predicted:', predict_image(img, model))

In [None]:
test_loader = DataLoader(test_dataset, batch_size=256)
result = evaluate(model, test_loader)
result

save model

In [None]:
torch.save(model.state_dict(), 'mnist-logistic.pth')

In [None]:
model.state_dict()

In [None]:
new_model = MnistModel()
new_model.load_state_dict(torch.load('mnist-logistic.pth'))
new_model.state_dict()

In [None]:
# test new model  
test_loader = DataLoader(test_dataset, batch_size=256)
result_new = evaluate(new_model, test_loader)
result_new

# Classification with CNN

On MNIST

In [None]:
import torch
import torchvision
import numpy as np
import matplotlib.pyplot as plt
import torch.nn as nn
import torch.nn.functional as F
from torchvision.datasets import MNIST
# import torchvision.transforms as transforms
from torchvision.transforms import ToTensor
from torch.utils.data import random_split
from torchvision.utils import make_grid
from torch.utils.data import DataLoader

%matplotlib inline

In [None]:
# dataset = MNIST(root='data/', download=True) original raw data #necessary first step
dataset = MNIST(root='data/', train=True, transform=transforms.ToTensor()) # convert to tensor for pytorch
train_dataset, val_dataset = random_split(dataset, [50000, 10000])
test_dataset = MNIST(root='data/', train=False, transform=transforms.ToTensor())

# dataloaders - for batches
batch_size = 128 # explore batch size in multiples of 2.
train_loader = DataLoader(train_dataset, batch_size, shuffle=True, num_workers=4, pin_memory=True)
val_loader = DataLoader(val_dataset, batch_size * 2, num_workers=4, pin_memory=True)
test_loader = DataLoader(test_dataset, batch_size)

In [None]:
# EDA 

for images, _ in train_loader:
    print('images.shape:', images.shape)
    plt.figure(figsize=(16,8))
    plt.axis('off')
    plt.imshow(make_grid(images, nrow=16).permute((1, 2, 0)))
    break

In [None]:
def accuracy(outputs, labels):
    _, preds = torch.max(outputs, dim=1)
    return torch.tensor(torch.sum(preds == labels).item() / len(preds))

class MnistModel(nn.Module):
    """Feedforward NN with one hidden layer"""
    def __init__(self, input_size, hidden_size, out_size) -> None:
        super().__init__()
        self.linear1 = nn.Linear(input_size, hidden_size) # hidden layer
        self.linear2 = nn.Linear(hidden_size, out_size) # output layer

    def forward(self, xb):
        xb = xb.view(xb.size(0), -1) # flatten images into tensors
        out = self.linear1(xb)  # hidden layer
        out = F.relu(out) # activation function
        out = self.linear2(out)
        return out
    
    def training_step(self, batch):
        images, labels = batch 
        out = self(images)                  # Generate predictions
        loss = F.cross_entropy(out, labels) # Calculate loss
        return loss
    
    def validation_step(self, batch):
        images, labels = batch 
        out = self(images)                    # Generate predictions
        loss = F.cross_entropy(out, labels)
        acc = accuracy(out, labels)           # Calculate accuracy
        return {'val_loss': loss.detach(), 'val_acc': acc}
    
    def validation_epoch_end(self, outputs):
        batch_losses = [x['val_loss'] for x in outputs]
        epoch_loss = torch.stack(batch_losses).mean()
        batch_accs = [x['val_acc'] for x in outputs]
        epoch_acc = torch.stack(batch_accs).mean()
        return {'val_loss': epoch_loss.item(), 'val_acc': epoch_acc.item()}
    
    def epoch_end(self, epoch, result):
        print("Epoch [{}], val_loss: {:.4f}, val_acc: {:.4f}".format(epoch, result['val_loss'], result['val_acc']))

input_size = 28*28

In [None]:
input_size, hidden_size = 784, 32
hidden_size = 32
num_classes = 10

model = MnistModel(input_size, hidden_size=hidden_size, out_size=num_classes)

In [None]:
model

In [None]:
for t in model.parameters():
    print(t.shape)

In [None]:
# initial weights and biases

for images, labels in train_loader:
    outputs = model(images)
    loss = F.cross_entropy(outputs, labels)
    print('Loss:', loss.item())
    break

print('outputs.shape:', outputs.shape)
print('Sample outputs:\n', outputs[:2].data)

In [None]:
torch.cuda.is_available()

In [None]:
def get_default_device():
    """Pick GPU if available, else CPU"""
    if torch.cuda.is_available():
        return torch.device('cuda')
    else:
        return torch.device('cpu')
    
def to_device(data, device):
    """Move tensor(s) to chosen device"""
    if isinstance(data, (list,tuple)):
        return [to_device(x, device) for x in data]
    return data.to(device, non_blocking=True)

device = get_default_device()
device

In [None]:
for images, labels in train_loader:
    print(images.shape)
    images = to_device(images, device)
    print(images.device)
    break

In [None]:
class DeviceDataLoader():
    """Wrap a dataloader to move data to a device"""
    def __init__(self, dl, device):
        self.dl = dl
        self.device = device
        
    def __iter__(self):
        """Yield a batch of data after moving it to device"""
        for b in self.dl: 
            yield to_device(b, self.device)

    def __len__(self):
        """Number of batches"""
        return len(self.dl)
    
# NB  data and model parameters should be on the same device.

In [None]:
train_loader = DeviceDataLoader(train_loader, device)
val_loader = DeviceDataLoader(val_loader, device)

In [None]:
def evaluate(model, val_loader):
    outputs = [model.validation_step(batch) for batch in val_loader]
    return model.validation_epoch_end(outputs)

def fit(epochs, lr, model, train_loader, val_loader, opt_func=torch.optim.SGD):
    history = []
    optimizer = opt_func(model.parameters(), lr)
    for epoch in range(epochs):
        # Training Phase 
        for batch in train_loader:
            loss = model.training_step(batch)
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()
        # Validation phase
        result = evaluate(model, val_loader)
        model.epoch_end(epoch, result)
        history.append(result)
    return history

In [None]:
history = [evaluate(model, val_loader)]
history

In [None]:
history += fit(5, 0.5, model, train_loader, val_loader)

In [None]:
history += fit(5, 0.1, model, train_loader, val_loader)

In [None]:
# plot losses, accuracy
losses = [x['val_loss'] for x in history]
plt.plot(losses, '-x')
plt.xlabel('epoch')
plt.ylabel('loss')
plt.title('Loss vs. No. of epochs')

In [None]:
accuracies = [x['val_acc'] for x in history]
plt.plot(accuracies, '-x')
plt.xlabel('epoch')
plt.ylabel('accuracy')
plt.legend(['loss', 'accuracy'])
plt.title('Accuracy/Loss vs. No. of epochs')

In [None]:
# Improve model performance by adding more hidden layers, increasing hidden layer size, or training for longer.

On Anime face

In [None]:
# import env 
import os
from dotenv import load_dotenv
load_dotenv()

DATA_DIR = os.getenv('ANIME_DIR')

In [None]:

import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader
import torch.nn.functional as F
import os


In [None]:
print(os.listdir(DATA_DIR+'images'))[:10]