<a href="https://colab.research.google.com/github/blooming-ai/generativeai/blob/main/pytorch_basics/linear_classifier.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Linear Classification
## Implementation using PyTorch [[1](https://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html), [2](https://pytorch.org/tutorials/beginner/basics/buildmodel_tutorial.html), [3](https://pytorch.org/tutorials/beginner/basics/optimization_tutorial.html)]

* Dataset
* Linear Classification Model
* Loss Function
* Training
* Evaluation

In [None]:
import torch
import torch.nn as nn #has all neural network modules
from torch.nn import functional as F # has many functions required for building a neural network
import numpy as np
import matplotlib.pyplot as plt

## Generate Data

In [None]:
#Generate data
N = 100
ones = torch.ones((N,1), dtype = torch.int64)
class1  = torch.randn((N,2))# (x,y) stacked one below the other
label1 = ones
class2  = torch.randn((N,2)) + torch.tensor([[0,10]]) # addition is broadcasted
label2 = ones*2
class3  = torch.randn((N,2)) + torch.tensor([[10,0]]) # addition is broadcasted
label3 = ones*3
X = torch.cat( (class1, class2, class3), dim = 0) # concatinate data
Y = torch.cat( (label1, label2, label3), dim = 0) # concatinate labels

plt.plot(class1[:,0].numpy(), class1[:,1].numpy(), 'bo', label='class1')
plt.plot(class2[:,0].numpy(), class2[:,1].numpy(), 'rx', label='class2')
plt.plot(class3[:,0].numpy(), class3[:,1].numpy(), 'g+', label='class3')
plt.xlabel('x')
plt.ylabel('y')
plt.legend()
plt.grid('True', color='y')
plt.show()


## Define a Dataset and a Dataloader

In [None]:
from torch.utils.data import Dataset, DataLoader, random_split

class Dataset2D(Dataset):
    def __init__(self, X:torch.tensor, class_labels:torch.tensor):
        self.input = X
        self.class_labels = class_labels
        self.n_labels = torch.max(class_labels).item() # assumes labels are 1,2,3 ..., .item() extracts data from a tensor
        #Create a one hot vector
        self.output = torch.zeros((X.size(0), self.n_labels))
        label_idx = class_labels - 1
        self.output.scatter_(1, label_idx.view(-1,1), 1)

        assert(self.input.shape[0] == self.output.shape[0]) # check if every input has a corresponding output

    def __len__(self):
        return self.input.shape[0]

    def __getitem__(self, idx):
        return self.input[idx,:], self.output[idx,:]

dataset = Dataset2D(X,Y)
# Randomly split a dataset into non-overlapping new datasets of given lengths. Fractions summing up to 1 can be given.
#The lengths will be floor(frac * len(dataset)) for each fraction provided.
training, testing = random_split(dataset, [0.8, 0.2])


#Test
input, output = dataset[0]
print("Sample input: ",input, ", output: ", output)
print("len of training: ", len(training), "len of testing: ", len(testing) )



## Define model
### Linear classifier
$y = \mathbf{W} x + \mathbf{b} \qquad
x, b \in \mathbb{R}^2 \;\; y \in \mathbb{R}^3 \;\; W \in \mathbb{R}^{2 \times 3}$

$\text{Soft Max: } z_i = \sigma(y_i)= e^y_i / \sum e^{y_i}$

In [None]:
import pdb
# nn.Module is Base class for all neural network modules. Your models should also subclass this class.
# Modules can also contain other Modules, allowing to nest them in a tree structure.
class Linear_Classifier(nn.Module):
    def __init__(self, feature_dim:int = 2, n_labels:int = 3):
        super(Linear_Classifier, self).__init__()
        self.feature_dim = feature_dim
        self.n_labels = n_labels
        self.linear = nn.Linear(self.feature_dim, n_labels ) #(input dim, output dim)

        #https://www.geeksforgeeks.org/initialize-weights-in-pytorch/
        torch.nn.init.xavier_uniform_(self.linear.weight)

    def forward(self, x):
        y = self.linear(x)
        # pdb.set_trace()
        y = F.softmax(y, dim = 1)
        return y

    @torch.no_grad() # - disables gradient calculation. Useful for inference.
    def evaluate(self, input ):
        self.eval() # Sets the module in evaluation mode.This affects only on certain modules. e.g. Dropout, BatchNorm, etc.
        output_estimate = self(input)
        return output_estimate



### Loss Function
$l(Z, \hat{Z}) = \frac{1}{N}\sum_i (z_i - \hat{z_i})^2$

In [None]:
# Mean Squared Error (MSE) as our loss function.
loss_fun = torch.nn.MSELoss()

### Training

In [None]:
def train(model, loss_fun, optimizer, data_loader, epochs:int = 10):
    '''
    Preform training using the given optimizer to reduce the loss_fun. The data_loader loads the data.
    The training is performed for the number of epochs
    '''
    model.train() # Set to train mode
    history = []
    for epoch in range(epochs):
        # print("===================== epoch: ", epoch,"=======================")
        train_losses = []
        for batch in data_loader:
            input, output = batch
            output_estimate = model(input) # calls the forward function

            loss = loss_fun(output_estimate, output)

            train_losses.append(loss.data.item())
            #Calculate gradient
            loss.backward()
            #Take a step in the negative of gradient direction
            optimizer.step()
            #explicitly set the gradients to zero. Otherwise gradient accumulate.
            optimizer.zero_grad()

        history.extend(train_losses)

    return history


In [None]:
batch_size = 20 # The number of data samples considered for gradient computation
learning_rate = 1e-3 # Gradient descent step size
epochs = 200 # the number times to iterate over the dataset

#Instantiate the classifier model
model = Linear_Classifier()

# Reads the dataset into minibatchs. The batches are reshuffled at every epochs
train_dataloader = DataLoader(training, batch_size=batch_size, shuffle=True)

#Define a gradient descent classifier
optimizer = torch.optim.RMSprop(model.parameters(), lr=learning_rate)

#Train the classifier
losses = train(model, loss_fun, optimizer, train_dataloader, epochs)
plt.plot(losses)
plt.show()

#Save the model
torch.save(model, "./linear_classifier.pt")

### Evaluation

In [None]:
model = torch.load("./linear_classifier.pt")
misclassified = 0
for test_data, test_output in testing:
    output = model.evaluate(test_data.unsqueeze(dim = 0)).squeeze()
    idx_out = torch.argmax(output).item()
    idx_expected = torch.argmax(test_output).item()
    if idx_out != idx_expected: misclassified += 1

print("Accuracy: ", round(1- misclassified/len(testing),2))

### H.W Draw - Classifier Lines

In [None]:
for name, param in model.named_parameters():
    if param.requires_grad:
        print(name, param.data)

### References for other important topics:
* [Setting different learning rates for different layers](https://stackoverflow.com/questions/73629330/what-exactly-is-meant-by-param-groups-in-pytorch)
* Managing tensor on CPU or GPU [[1](https://pytorch.org/docs/stable/notes/cuda.html), [2](https://pytorch.org/docs/stable/generated/torch.Tensor.to.html#torch.Tensor.to)]
* [Automatic computation of gradient](https://pytorch.org/blog/overview-of-pytorch-autograd-engine/#:~:text=PyTorch%20computes%20the%20gradient%20of,ways%3B%20forward%20and%20reverse%20mode.)
* [Loss function](https://pytorch.org/docs/stable/nn.functional.html#loss-functions)
* [Non-linear activation functions](https://pytorch.org/docs/stable/nn.functional.html#non-linear-activation-functions)
* [Normalization techniques](https://towardsdatascience.com/different-normalization-layers-in-deep-learning-1a7214ff71d6)