# Bare-Bones Implementation

In [1]:
import pandas as pd
import torch
from torch import Tensor

In [2]:
data = pd.read_csv("./data/Iris.csv").drop("Id", axis=1)

In [3]:
data.head()

Unnamed: 0,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa


### Prepare Data

In [4]:
X_numpy = data.drop("Species", axis=1).values

target_map = {val: index for index, val in enumerate(data.Species.unique())}
y_numpy = data.Species.map(target_map).values

X = torch.tensor(X_numpy, dtype=torch.float32)
y = torch.tensor(y_numpy)

In [5]:
target_map

{'Iris-setosa': 0, 'Iris-versicolor': 1, 'Iris-virginica': 2}

### One-Hot-Encode Class Labels

In [6]:
def one_hot_encode(vector):
    n_classes = len(vector.unique())
    one_hot = torch.zeros((vector.shape[0], n_classes))\
        .type(torch.LongTensor)  # 1
    return one_hot.scatter(
        1, vector.type(torch.LongTensor).unsqueeze(1), 1
    )


y_one_hot = one_hot_encode(y)

## Create Train and Test Sets

In [7]:
torch.manual_seed(-999)

<torch._C.Generator at 0x1287e6a70>

In [8]:
random_indices = torch.randperm(X.shape[0])

n_train = int(0.8 * X.shape[0])
X_train = X[random_indices[:n_train]]
y_train = y[random_indices[:n_train]]
y_train_one_hot = y_one_hot[random_indices[:n_train]]

X_test = X[random_indices[n_train:]]
y_test = y[random_indices[n_train:]]
y_test_one_hot = y_one_hot[random_indices[n_train:]]

### Initialize Model Parameters

In [9]:
w = torch.rand((4, 3))
b = torch.rand(3)

In [10]:
w

tensor([[0.2796, 0.1976, 0.3208],
        [0.7487, 0.3949, 0.4665],
        [0.2918, 0.6943, 0.9894],
        [0.5497, 0.1376, 0.7568]])

In [11]:
b

tensor([0.6119, 0.7690, 0.1274])

### Define Softmax Activation and Cross Entropy Loss Functions

In [12]:
def softmax_activation(z):
    exponentials = torch.exp(z)
    exponentials_row_sums = torch.sum(exponentials, axis=1).unsqueeze(1)
    return exponentials / exponentials_row_sums

def cross_entropy_loss(y_one_hot, activations):
    return torch.mean(-torch.log(torch.sum(y_one_hot * activations, axis=1)))

### Simple Training Loop

In [13]:
n_iterations = 250
learning_rate = 0.1
lambda_param = 0.01
for i in range(1, n_iterations + 1):

    Z = torch.mm(X_train, w) + b
    A = softmax_activation(Z)
    l2_regularization = torch.sum(w ** 2)
    loss = cross_entropy_loss(y_train_one_hot, A) \
           + lambda_param * l2_regularization
    w_gradients = -torch.mm(X_train.transpose(0, 1), y_train_one_hot - A) / n_train \
                  + (2 * lambda_param * w)
    b_gradients = -torch.mean(y_train_one_hot - A, axis=0)

    w -= learning_rate * w_gradients
    b -= learning_rate * b_gradients

    if i == 1 or i % 25 == 0:
        print("Loss at iteration {}: {}".format(i, loss))

test_predictions = torch.argmax(
    softmax_activation(torch.mm(X_test, w) + b), axis=1
)
test_accuracy = float(sum(test_predictions == y_test)) / y_test.shape[0]
print("\nFinal Test Accuracy: {}".format(test_accuracy))

Loss at iteration 1: 1.1546878814697266
Loss at iteration 25: 0.7025735378265381
Loss at iteration 50: 0.5796783566474915
Loss at iteration 75: 0.5690209269523621
Loss at iteration 100: 0.49405404925346375
Loss at iteration 125: 0.47681987285614014
Loss at iteration 150: 0.43222129344940186
Loss at iteration 175: 0.41156941652297974
Loss at iteration 200: 0.3894343376159668
Loss at iteration 225: 0.37786394357681274
Loss at iteration 250: 0.3720541298389435

Final Test Accuracy: 0.9666666666666667


# Free Differentiation with PyTorch's Autograd

In [14]:
w_autograd = torch.rand((4, 3), requires_grad=True)
b_autograd = torch.rand(3, requires_grad=True)

In [15]:
Z = torch.mm(X, w_autograd) + b_autograd
A = softmax_activation(Z)
lambda_param = 0.01
l2_regularization = torch.sum(w_autograd ** 2)
loss = cross_entropy_loss(y_one_hot, A) + lambda_param * l2_regularization
loss.backward()

In [16]:
w_autograd.grad

tensor([[-0.9056, -1.9042,  2.8366],
        [-0.7511, -0.8797,  1.6608],
        [ 0.0596, -1.3847,  1.3582],
        [ 0.1005, -0.4149,  0.3504]])

In [17]:
-torch.mm(X.transpose(0, 1), y_one_hot - A) / X.shape[0] \
    + 2 * lambda_param * w_autograd

tensor([[-0.9056, -1.9042,  2.8366],
        [-0.7511, -0.8797,  1.6608],
        [ 0.0596, -1.3847,  1.3582],
        [ 0.1005, -0.4149,  0.3504]], grad_fn=<AddBackward0>)

In [18]:
b_autograd.grad

tensor([-0.2048, -0.3209,  0.5257])

In [19]:
-torch.mean(y_one_hot - A, axis=0)

tensor([-0.2048, -0.3209,  0.5257], grad_fn=<NegBackward>)

In [20]:
n_iterations = 250
learning_rate = 0.1
for i in range(1, n_iterations + 1):
    
    Z = torch.mm(X, w_autograd) + b_autograd
    A = softmax_activation(Z)
    l2_regularization = torch.sum(w_autograd ** 2)
    loss = cross_entropy_loss(y_one_hot, A) \
           + lambda_param * l2_regularization
    
    if w_autograd.grad is not None:
        w_autograd.grad.zero_()
    if b_autograd.grad is not None:
        b_autograd.grad.zero_()
        
    loss.backward()
    
    with torch.no_grad():
        w_autograd -= learning_rate * w_autograd.grad
        b_autograd -= learning_rate * b_autograd.grad
    
    if i == 1 or i % 25 == 0:
        print("Loss at iteration {}: {}".format(i, loss))

test_predictions = torch.argmax(
    softmax_activation(torch.mm(X_test, w_autograd) + b_autograd), axis=1
)
test_accuracy = float(sum(test_predictions == y_test)) / y_test.shape[0]
print("\nFinal Test Accuracy: {}".format(test_accuracy))

Loss at iteration 1: 2.50476336479187
Loss at iteration 25: 0.8208405375480652
Loss at iteration 50: 0.6555004119873047
Loss at iteration 75: 0.6429731845855713
Loss at iteration 100: 0.5476508140563965
Loss at iteration 125: 0.5351016521453857
Loss at iteration 150: 0.4730842113494873
Loss at iteration 175: 0.4522818326950073
Loss at iteration 200: 0.41606056690216064
Loss at iteration 225: 0.3964562714099884
Loss at iteration 250: 0.38062769174575806

Final Test Accuracy: 0.9666666666666667


# PyTorch Neural Network Module

In [21]:
model = torch.nn.Sequential(
    torch.nn.Linear(4, 3)
)

In [22]:
learning_rate = 0.1
lambda_param = 0.01
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, weight_decay=lambda_param)

In [23]:
loss_function = torch.nn.CrossEntropyLoss()

In [24]:
n_iterations = 250
for i in range(1, n_iterations + 1):
    Z = model(X_train)
    loss = loss_function(Z, y_train)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
    if i == 1 or i % 25 == 0:
        print("Loss at iteration {}: {}".format(i, loss))
        
test_predictions = torch.argmax(
    torch.softmax(model(X_test), 1), axis=1
)
test_accuracy = float(sum(test_predictions == y_test)) / y_test.shape[0]
print("\nFinal Test Accuracy: {}".format(test_accuracy))

Loss at iteration 1: 0.9752714037895203
Loss at iteration 25: 0.5917924046516418
Loss at iteration 50: 0.5611095428466797
Loss at iteration 75: 0.4597735106945038
Loss at iteration 100: 0.4310804307460785
Loss at iteration 125: 0.3661951720714569
Loss at iteration 150: 0.3313466012477875
Loss at iteration 175: 0.29825714230537415
Loss at iteration 200: 0.2813898026943207
Loss at iteration 225: 0.27167195081710815
Loss at iteration 250: 0.26376885175704956

Final Test Accuracy: 0.9666666666666667
