In [1]:
import torch
import numpy as np
import random
import torch.nn.functional as F

# for easier reading np
np.set_printoptions(precision=3,suppress=True)

In [2]:
# Prepare the data
from sklearn import datasets
data = datasets.load_iris()
X = data.data
y = data.target
def to1hot(labels):
    """Converts an array of class labels into their 1hot encodings.
    Assumes that there are at most three classes."""
    return torch.eye(3)[labels]

print('X', X.shape, X[0])
print('y', y.shape, y[0])
print('y_1hot', to1hot(y).shape, to1hot(y)[0])

X (150, 4) [5.1 3.5 1.4 0.2]
y (150,) 0
y_1hot torch.Size([150, 3]) tensor([1., 0., 0.])


In [3]:
# Partition the data into Training and Testing (80:20 split)
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2)

X_train, X_test = torch.from_numpy(X_train).float(), torch.from_numpy(X_test).float()
y_train, y_test = torch.from_numpy(y_train).int(), torch.from_numpy(y_test).int()

print('X_train', X_train.shape)
print('X_test', X_test.shape)

X_train torch.Size([120, 4])
X_test torch.Size([30, 4])


In [4]:
# Reading the dataset
def data_iter(batch_size, features, labels):
    num_examples = len(features)

    # The examples are read at random, in no particular order
    indices = list(range(num_examples))
    random.shuffle(indices)
    for i in range(0, num_examples, batch_size):
        j = indices[i:i + batch_size]
        yield features[j], labels[j]

# Check data reader
for X_batch, y_batch in data_iter(batch_size=10, features=X_train, labels=y_train):
    print('X_batch', X_batch.shape, X_batch[0])
    print('y_batch', y_batch.shape, y_batch[0])
    break

X_batch torch.Size([10, 4]) tensor([5.4000, 3.9000, 1.7000, 0.4000])
y_batch torch.Size([10]) tensor(0, dtype=torch.int32)


In [5]:
# Optimization algorithm: Stochastic Gradient Descent
def sgd(params, grads, lr):
    """Minibatch stochastic gradient descent."""
    for p, g in zip(params, grads):
        p.data -= lr * g

In [6]:
# Loss Function
def cross_entropy(y_hat, y):
    """Cross Entropy Loss."""
    loss_per_sample = F.cross_entropy(y_hat, y)
    return loss_per_sample.mean()

# Check loss
loss = cross_entropy(torch.tensor([[0.2, 0.8, 0.1],[0.6, 0.3, 0.1]]), torch.tensor([0, 1]))
print('loss =', loss)

loss = tensor(1.2344)


In [7]:
# Initializing Model Parameters
w = torch.randn(4, 3, requires_grad=True)
b = torch.randn(1, 3, requires_grad=True)
print('w', w)
print('b', b)

w tensor([[ 1.4490,  0.5984,  0.2998],
        [ 0.4253, -0.7106, -0.5121],
        [-1.0223,  0.3731,  1.1503],
        [-0.5052, -0.4552,  0.6248]], requires_grad=True)
b tensor([[ 0.8639, -0.7394,  0.6217]], requires_grad=True)


In [8]:
# Implement the model
def model(X, w, b):
    # Single Neuron Model with Softmax Activation
    Z = torch.softmax(X @ w + b, dim = 1)
    return Z

# Check model
for X_batch, y_batch in data_iter(batch_size=10, features=X_train, labels=y_train):
    out_batch = model(X_batch, w, b)
    print('X_batch', X_batch.shape, X_batch[0])
    print('out_batch', out_batch.shape, out_batch[0])
    break

X_batch torch.Size([10, 4]) tensor([6.7000, 3.3000, 5.7000, 2.1000])
out_batch torch.Size([10, 3]) tensor([0.0236, 0.0012, 0.9752], grad_fn=<SelectBackward0>)


**Training**

In [9]:
# Hyperparameters
lr = 0.01
batch_size = 10
num_epochs = 50

In [10]:
# Learning
# Initialize the parameters of the model
torch.nn.init.normal_(w, mean=0, std=0.01)
torch.nn.init.zeros_(b)

y_train = y_train.long()

for epoch in range(num_epochs):
    with torch.no_grad():
        train_loss = cross_entropy(model(X_train, w, b), y_train)
        print(f'training epoch {epoch + 1}, loss {float(train_loss):f}')

    # Train for one epoch
    for X_batch, y_batch in data_iter(batch_size=10, features=X_train, labels=y_train):
        # Use model to compute predictions
        yhat = model(X_batch, w, b)
        loss = cross_entropy(yhat, y_batch)

        # Compute gradients by back propagation
        loss.backward()

        # Update parameters using their gradient
        sgd([w, b], [w.grad, b.grad], lr)

        # Reset gradients
        w.grad = b.grad = None

training epoch 1, loss 1.098999
training epoch 2, loss 1.083689
training epoch 3, loss 1.074124
training epoch 4, loss 1.066308
training epoch 5, loss 1.058918
training epoch 6, loss 1.051566
training epoch 7, loss 1.044101
training epoch 8, loss 1.036564
training epoch 9, loss 1.028840
training epoch 10, loss 1.021043
training epoch 11, loss 1.013317
training epoch 12, loss 1.005754
training epoch 13, loss 0.998234
training epoch 14, loss 0.991069
training epoch 15, loss 0.984047
training epoch 16, loss 0.977386
training epoch 17, loss 0.971036
training epoch 18, loss 0.965052
training epoch 19, loss 0.959222
training epoch 20, loss 0.953754
training epoch 21, loss 0.948671
training epoch 22, loss 0.943633
training epoch 23, loss 0.938953
training epoch 24, loss 0.934496
training epoch 25, loss 0.930282
training epoch 26, loss 0.926220
training epoch 27, loss 0.922386
training epoch 28, loss 0.918724
training epoch 29, loss 0.915227
training epoch 30, loss 0.911887
training epoch 31, 

In [11]:
from sklearn.metrics import confusion_matrix

yhat = model(X_test, w, b).argmax(dim=1)
conf_matrix = confusion_matrix(y_test, yhat)

print('Confusion Matrix:\n', conf_matrix)

Confusion Matrix:
 [[11  0  0]
 [ 0  0  8]
 [ 0  0 11]]
