# Barebones Implementation

In [1]:
import pandas as pd
import torch
from torch import Tensor

In [2]:
data = pd.read_csv("./data/IRIS.csv").drop("Id", axis=1)

In [3]:
data.head()

Unnamed: 0,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa


### Prepare Data

In [4]:
target_map = {val: index for index, val in enumerate(data.Species.unique())}
X_numpy = data.drop("Species", axis=1).values
y_numpy = data.Species.map(target_map).values

X = torch.tensor(X_numpy, dtype=torch.float32)
y = torch.tensor(y_numpy)

### One-Hot-Encode Class Labels

In [5]:
def one_hot_encode(vector, n_classes):
    # assumes that vector is one dimentional
    one_hot = torch.zeros((vector.shape[0], n_classes)).type(torch.LongTensor)
    return one_hot.scatter(1, vector.type(torch.LongTensor).unsqueeze(1), 1)

y_one_hot = one_hot_encode(y, 3)

### Initialize Model Parameters

In [6]:
torch.manual_seed(-999)

<torch._C.Generator at 0x12554d930>

In [7]:
w = torch.rand((4, 3))
b = torch.rand(3)

### Define Softmax Activation and Cross Entropy Loss Functions

In [8]:
def softmax_activation(z: Tensor) -> Tensor:
    exponentials: Tensor = torch.exp(z)
    exponentials_row_sums: Tensor = torch.sum(exponentials, axis=1).unsqueeze(1)
    return exponentials / exponentials_row_sums

def cross_entropy_loss(targets: Tensor, activations: Tensor) -> Tensor:
    return torch.mean(-torch.log(torch.sum(targets * activations, axis=1)))

### Simple Training Loop

In [9]:
n_iterations = 100
learning_rate = 0.1
for i in range(1, n_iterations + 1):
    
    Z = torch.mm(X, w) + b
    predictions = softmax_activation(Z)
    loss = cross_entropy_loss(y_one_hot, predictions)
    w_gradients = -torch.mm(X.transpose(0, 1), y_one_hot - predictions) / X.shape[0]
    b_gradients = -torch.mean(y_one_hot - predictions, axis=0)
    
    w -= learning_rate * w_gradients
    b -= learning_rate * b_gradients
    
    if i % 10 == 0:
        print("Loss at iteration {}: {}".format(i, loss))
    

Loss at iteration 10: 0.6981450319290161
Loss at iteration 20: 0.6961764693260193
Loss at iteration 30: 0.6425224542617798
Loss at iteration 40: 0.602511465549469
Loss at iteration 50: 0.5691211223602295
Loss at iteration 60: 0.5393685698509216
Loss at iteration 70: 0.5117704272270203
Loss at iteration 80: 0.48551255464553833
Loss at iteration 90: 0.4601267874240875
Loss at iteration 100: 0.4353489577770233


# Free Differentiation with PyTorch's Autograd

In [10]:
w_autograd = torch.rand((4, 3), requires_grad=True)
b_autograd = torch.rand(3, requires_grad=True)

In [11]:
Z = torch.mm(X, w_autograd) + b_autograd
predictions = softmax_activation(Z)
loss = cross_entropy_loss(y_one_hot, predictions)
loss.backward()

In [12]:
print(w_autograd.grad)

tensor([[-0.5716,  1.4152, -0.8436],
        [-0.5902,  0.8152, -0.2250],
        [ 0.2740,  0.8346, -1.1086],
        [ 0.1701,  0.2812, -0.4513]])


In [13]:
print(-torch.mm(X.transpose(0, 1), y_one_hot - predictions.detach()) / X.shape[0])

tensor([[-0.5716,  1.4152, -0.8436],
        [-0.5902,  0.8152, -0.2250],
        [ 0.2740,  0.8346, -1.1086],
        [ 0.1701,  0.2812, -0.4513]])


In [14]:
n_iterations = 100
learning_rate = 0.1
for i in range(1, n_iterations + 1):
    if w_autograd.grad is not None:
        w_autograd.grad.zero_()
    if b_autograd.grad is not None:
        b_autograd.grad.zero_()
    
    
    Z = torch.mm(X, w_autograd) + b_autograd
    predictions = softmax_activation(Z)
    loss = cross_entropy_loss(y_one_hot, predictions)
    loss.backward()
    
    with torch.no_grad():
        w_autograd -= learning_rate * w_autograd.grad
        b_autograd -= learning_rate * b_autograd.grad
    
    if i % 10 == 0:
        print("Loss at iteration {}: {}".format(i, loss))

Loss at iteration 10: 0.954537034034729
Loss at iteration 20: 0.7833609580993652
Loss at iteration 30: 0.7053467035293579
Loss at iteration 40: 0.6537990570068359
Loss at iteration 50: 0.6124182343482971
Loss at iteration 60: 0.5758307576179504
Loss at iteration 70: 0.5418170690536499
Loss at iteration 80: 0.5093513131141663
Loss at iteration 90: 0.47795018553733826
Loss at iteration 100: 0.4474251866340637


# PyTorch Neural Network Module

In [15]:
model = torch.nn.Sequential(
    torch.nn.Linear(4, 3)
)

In [16]:
sgd_optimizer = torch.optim.SGD(model.parameters(), lr=0.1)

In [17]:
cross_entropy_loss = torch.nn.CrossEntropyLoss()

In [18]:
n_iterations = 100
for i in range(1, n_iterations + 1):
    Z = model(X)
    loss = cross_entropy_loss(Z, y)
    sgd_optimizer.zero_grad()
    loss.backward()
    sgd_optimizer.step()
    
    if i % 10 == 0:
        print("Loss at iteration {}: {}".format(i, loss))

Loss at iteration 10: 0.8743402361869812
Loss at iteration 20: 0.7898827791213989
Loss at iteration 30: 0.7347971200942993
Loss at iteration 40: 0.6914253830909729
Loss at iteration 50: 0.6535581946372986
Loss at iteration 60: 0.6185600161552429
Loss at iteration 70: 0.5851718783378601
Loss at iteration 80: 0.5527639985084534
Loss at iteration 90: 0.5210314989089966
Loss at iteration 100: 0.48985880613327026


In [19]:
torch.softmax(model(X), 1)

tensor([[9.0356e-01, 9.5569e-02, 8.7547e-04],
        [8.4530e-01, 1.5261e-01, 2.0888e-03],
        [8.8109e-01, 1.1729e-01, 1.6244e-03],
        [8.3592e-01, 1.6093e-01, 3.1513e-03],
        [9.1098e-01, 8.8168e-02, 8.5557e-04],
        [8.9638e-01, 1.0269e-01, 9.2407e-04],
        [8.7987e-01, 1.1816e-01, 1.9702e-03],
        [8.8006e-01, 1.1855e-01, 1.3846e-03],
        [8.2086e-01, 1.7505e-01, 4.0849e-03],
        [8.5133e-01, 1.4667e-01, 2.0017e-03],
        [9.1331e-01, 8.6087e-02, 5.9965e-04],
        [8.6247e-01, 1.3540e-01, 2.1356e-03],
        [8.5283e-01, 1.4512e-01, 2.0530e-03],
        [8.8803e-01, 1.1010e-01, 1.8690e-03],
        [9.5678e-01, 4.3091e-02, 1.3126e-04],
        [9.5093e-01, 4.8858e-02, 2.1680e-04],
        [9.3519e-01, 6.4429e-02, 3.8330e-04],
        [8.9702e-01, 1.0198e-01, 9.9740e-04],
        [8.9747e-01, 1.0187e-01, 6.6552e-04],
        [9.1237e-01, 8.6801e-02, 8.3276e-04],
        [8.5724e-01, 1.4139e-01, 1.3694e-03],
        [8.9733e-01, 1.0159e-01, 1