# Basics of PyTorch

Reference: https://medium.com/analytics-vidhya/a-simple-neural-network-classifier-using-pytorch-from-scratch-7ebb477422d2

## Preparing the data sets

In [None]:
from sklearn import datasets
iris = datasets.load_iris()
data = iris.data
target = iris.target

print(data[:5])
print(target[:5])

[[5.1 3.5 1.4 0.2]
 [4.9 3.  1.4 0.2]
 [4.7 3.2 1.3 0.2]
 [4.6 3.1 1.5 0.2]
 [5.  3.6 1.4 0.2]]
[0 0 0 0 0]


## Training and test data sets split

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(
  data, target, test_size=0.33, random_state=42)

print(X_train[:1])
print(Y_train[:1])

[[5.7 2.9 4.2 1.3]]
[1]


In [None]:
# another way of splitting data
'''
gen_seed = torch.Generator().manual_seed(42)

train_size = int(0.7 * len(data))
test_size = len(data) - train_size

train_dataset, test_dataset = torch.utils.data.random_split(data, [train_size, test_size], generator=gen_seed )
train_target, test_target = torch.utils.data.random_split(target, [train_size, test_size], generator=gen_seed )

print(train_dataset[:1])
print(train_target[:1])

print(test_dataset[:1])
print(test_target[:1])
'''

## A simple data structure

In [None]:
import torch
from torch.utils.data import Dataset
import numpy as np

class Data(Dataset):
  def __init__(self, X_train, y_train):
    # need to convert float64 to float32
    self.X = torch.from_numpy(X_train.astype(np.float32))    

    # need to convert float64 to Long
    self.y = torch.from_numpy(y_train).type(torch.LongTensor)

    self.len = self.X.shape[0]
  
  def __getitem__(self, index):
    return self.X[index], self.y[index]
  def __len__(self):
    return self.len

In [None]:
traindata = Data(X_train, Y_train)

print(traindata[25:34])

(tensor([[5.4000, 3.0000, 4.5000, 1.5000],
        [6.2000, 3.4000, 5.4000, 2.3000],
        [5.5000, 2.3000, 4.0000, 1.3000],
        [5.4000, 3.9000, 1.7000, 0.4000],
        [5.0000, 2.3000, 3.3000, 1.0000],
        [6.4000, 2.7000, 5.3000, 1.9000],
        [5.0000, 3.3000, 1.4000, 0.2000],
        [5.0000, 3.2000, 1.2000, 0.2000],
        [5.5000, 2.4000, 3.8000, 1.1000]]), tensor([1, 2, 1, 0, 1, 2, 0, 0, 1]))


## DataLoader tool

In [None]:
from torch.utils.data import DataLoader

batch_size = 4
trainloader = DataLoader(traindata, batch_size = batch_size, shuffle = True, num_workers=2)

## Building a simple custom neural network

In [None]:
# building neural network

import torch.nn as nn

# number of features (len of X cols)
noin = len(X_train[0])   # 4
# number of hidden layer nodes
nohn = 20
# number of classes (unique of y)
noon = 3

class SimpleNeuralNetwork(nn.Module):
  def __init__(self):
    super(SimpleNeuralNetwork, self).__init__()
    self.linear1 = nn.Linear(noin, nohn)
    self.linear2 = nn.Linear(nohn, noon)

  def forward(self, x):
    x = torch.sigmoid(self.linear1(x))
    x = self.linear2(x)
    return x

In [None]:
clf = SimpleNeuralNetwork()

In [None]:
# check the parameters
print(clf.parameters)

<bound method Module.parameters of SimpleNeuralNetwork(
  (linear1): Linear(in_features=4, out_features=20, bias=True)
  (linear2): Linear(in_features=20, out_features=3, bias=True)
)>


## Cofiguring the optimizer

In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(clf.parameters(), lr=0.1)

## Training the neural network

In [None]:
epochs = 10
for epoch in range(epochs):
  running_loss = 0.0
  for i, data in enumerate(trainloader, 0):
    inputs, labels = data
    # set optimizer to zero grad to remove previous epoch gradients
    optimizer.zero_grad()
    # forward propagation
    outputs = clf(inputs)
    loss = criterion(outputs, labels)
    # backward propagation
    loss.backward()
    # optimize
    optimizer.step()
    running_loss += loss.item()
  # display statistics
  print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 1000:.5f}')

[1,    25] loss: 0.00982
[2,    25] loss: 0.00964
[3,    25] loss: 0.00912
[4,    25] loss: 0.00889
[5,    25] loss: 0.00874
[6,    25] loss: 0.00815
[7,    25] loss: 0.00812
[8,    25] loss: 0.00763
[9,    25] loss: 0.00766
[10,    25] loss: 0.00716


## Saving the weights 

In [None]:
from google.colab import drive # loads a library to mount your google drive
drive.mount('/content/drive') 

save_path = "/content/drive/My Drive/Colab Notebooks/models/pytorch_iris.pth"
torch.save(clf.state_dict(), save_path)

Mounted at /content/drive


In [None]:
# load
clf = SimpleNeuralNetwork()
clf.load_state_dict(torch.load(save_path))


## Testing the neural network with few test data

In [None]:
testdata = Data(X_test, Y_test)

testloader = DataLoader(testdata, batch_size=batch_size, 
                        shuffle=True, num_workers=2)

In [None]:
dataiter = iter(testloader)
inputs, labels = next(dataiter)

print(inputs)
print(labels)

tensor([[5.8000, 2.7000, 3.9000, 1.2000],
        [6.5000, 3.2000, 5.1000, 2.0000],
        [5.6000, 2.5000, 3.9000, 1.1000],
        [7.7000, 2.6000, 6.9000, 2.3000]])
tensor([1, 2, 1, 2])


In [None]:
outputs = clf(inputs)

__, predicted = torch.max(outputs, 1)
print(predicted)

tensor([1, 2, 1, 2])


## Testing the test set then calculating the accuracy

In [None]:
correct, total = 0, 0
# no need to calculate gradients during inference
with torch.no_grad():
  for data in testloader:
    inputs, labels = data
    # calculate output by running through the network
    outputs = clf(inputs)
    # get the predictions
    __, predicted = torch.max(outputs.data, 1)
    # update results
    total += labels.size(0)
    correct += (predicted == labels).sum().item()
print(f'Accuracy of the network on the {len(testdata)} test data: {100 * correct // total} %')

Accuracy of the network on the 50 test data: 94 %
