# 0.Import Libraries

In [5]:
import torch

import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt

plt.style.use('ggplot')
from pylab import rcParams
rcParams['figure.figsize']=24,10

# 1.First Neural Network

In [6]:
import torch.nn as nn

In [7]:
input_tensor=torch.tensor([0.34, 0.45, -0.23])

In [8]:
linear_layer = nn.Linear(in_features=3, out_features=2)

In [9]:
output = linear_layer(input_tensor)
output

tensor([-0.2865,  0.3412], grad_fn=<ViewBackward0>)

In [10]:
linear_layer.weight

Parameter containing:
tensor([[ 0.4043, -0.1267,  0.5539],
        [ 0.3641,  0.5579, -0.4660]], requires_grad=True)

In [11]:
linear_layer.bias

Parameter containing:
tensor([-0.2396, -0.1408], requires_grad=True)

# 2.Stacking Layers with nn.Sequentials

In [12]:
model_one=nn.Sequential(nn.Linear(10,18),
                        nn.Linear(18,20),
                        nn.Linear(20,25))

# 3.Activation Functions
## 3.1 Sigmoid

In [13]:
input_tensor =  torch.tensor([[6.0]])
sigmoid = nn.Sigmoid()
output = sigmoid(input_tensor)

In [14]:
output

tensor([[0.9975]])

## 3.2 Softmax 

In [15]:
input_tensor = torch.tensor([[4.3,6.1,2.3]])

probabilities = nn.Softmax(dim=-1)
output_tensor = probabilities(input_tensor)

output_tensor

tensor([[0.1392, 0.8420, 0.0188]])

# 4.Forward Pass
## 4.1 Binary Classification

In [16]:
input_tensor = torch.Tensor([[3, 4, 6, 2, 3, 6, 8, 9]])

# Implement a small neural network for binary classification
model = nn.Sequential(
  nn.Linear(8,1),
  nn.Sigmoid()
)

output = model(input_tensor)
print(output)

tensor([[0.1803]], grad_fn=<SigmoidBackward0>)


## 4.2 Multi Class Classification

In [17]:
input_tensor = torch.Tensor([[3, 4, 6, 7, 10, 12, 2, 3, 6, 8, 9]])

# Update network below to perform a multi-class classification with four labels
model = nn.Sequential(
  nn.Linear(11, 20),
  nn.Linear(20, 12),
  nn.Linear(12, 6),
  nn.Linear(6, 4), 
  nn.Softmax(dim=-1)
)

output = model(input_tensor)
print(output)

tensor([[0.2444, 0.1882, 0.3356, 0.2318]], grad_fn=<SoftmaxBackward0>)


## 4.3 Regression

In [18]:
input_tensor = torch.Tensor([[3, 4, 6, 7, 10, 12, 2, 3, 6, 8, 9]])

# Implement a neural network with exactly four linear layers
model = nn.Sequential(nn.Linear(11,8),
                      nn.Linear(8,5),
                      nn.Linear(5,2),
                      nn.Linear(2,1))

output = model(input_tensor)
print(output)

tensor([[-0.7540]], grad_fn=<AddmmBackward0>)


# 5.Loss Function

In [19]:
import torch.nn.functional as F

In [20]:
F.one_hot(torch.tensor(0), num_classes=3)

tensor([1, 0, 0])

In [21]:
F.one_hot(torch.tensor(1), num_classes=3)

tensor([0, 1, 0])

In [22]:
F.one_hot(torch.tensor(2), num_classes=3)

tensor([0, 0, 1])

## 5.1 Cross Entrophy Loss

In [23]:
from torch.nn import CrossEntropyLoss

scores= torch.tensor([[-0.1211, 0.1059]])
one_hot_target = torch.tensor([[1,0]])

criterion = CrossEntropyLoss()
criterion(scores.double(), one_hot_target.double())

tensor(0.8131, dtype=torch.float64)

## 5.2 MSELoss

In [24]:
y_hat = np.array(10)
y = np.array(1)

mse_numpy = np.mean((y_hat - y) ** 2)

criterion = nn.MSELoss()

mse_torch = criterion(torch.tensor(y_hat).float(), torch.tensor(y).float())
print(mse_torch)

tensor(81.)


# 6.Using derivates to update model parameters

In [25]:
input_tensor = torch.Tensor([[3, 4, 6, 7, 10, 12, 2, 3]])
target = torch.Tensor([[3, 3.2]])

model = nn.Sequential(nn.Linear(8,6),
                     nn.Linear(6,4),
                     nn.Linear(4,2))

prediction = model(input_tensor)

In [26]:
prediction

tensor([[ 1.1152, -0.3552]], grad_fn=<AddmmBackward0>)

In [27]:
criterion = CrossEntropyLoss()
loss = criterion(prediction, target)
loss.backward()

In [28]:
model[0].weight.grad, model[0].bias.grad

(tensor([[-0.4898, -0.6531, -0.9796, -1.1429, -1.6327, -1.9592, -0.3265, -0.4898],
         [-2.2622, -3.0163, -4.5245, -5.2786, -7.5408, -9.0490, -1.5082, -2.2622],
         [-0.1802, -0.2402, -0.3604, -0.4204, -0.6006, -0.7207, -0.1201, -0.1802],
         [-2.0375, -2.7167, -4.0750, -4.7542, -6.7917, -8.1501, -1.3583, -2.0375],
         [ 0.3392,  0.4522,  0.6783,  0.7914,  1.1306,  1.3567,  0.2261,  0.3392],
         [ 0.0829,  0.1106,  0.1658,  0.1935,  0.2764,  0.3317,  0.0553,  0.0829]]),
 tensor([-0.1633, -0.7541, -0.0601, -0.6792,  0.1131,  0.0276]))

In [29]:
model[1].weight.grad, model[1].bias.grad

(tensor([[-2.8263,  0.2984, -2.4086, -1.7226,  0.1301,  1.8478],
         [-0.2772,  0.0293, -0.2363, -0.1690,  0.0128,  0.1812],
         [ 7.6821, -0.8110,  6.5468,  4.6822, -0.3535, -5.0225],
         [ 4.2697, -0.4508,  3.6387,  2.6024, -0.1965, -2.7915]]),
 tensor([ 0.5447,  0.0534, -1.4804, -0.8228]))

In [30]:
model[2].weight.grad, model[2].bias.grad

(tensor([[ 4.6433, -2.3961, -6.1969,  7.3562],
         [-4.6433,  2.3961,  6.1969, -7.3562]]),
 tensor([ 2.0413, -2.0413]))

In [31]:
lr = 0.001

weight = model[0].weight
weight_grad = model[0].weight.grad
weight = weight - lr * weight_grad

bias = model[0].bias
bias_grad = model[0].bias.grad
bias = bias - lr * bias_grad

In [32]:
import torch.optim as optim

optimizer = optim.SGD(model.parameters(), lr=0.001)

In [33]:
optimizer.step()

# 7.Training loop

In [34]:
# Loop over the number of epochs and the dataloader
for i in range(num_epochs):
    for data in dataloader:
        # Set the gradients to zero
        optimizer.zero_grad()
        # Run a forward pass
        feature, target = data
        prediction = model(feature)    
        # Calculate the loss
        loss = criterion(prediction, target)    
        # Compute the gradients
        loss.backward()
        # Update the model's parameters
        optimizer.step()
show_results(model, dataloader)

NameError: name 'num_epochs' is not defined

# 8.Learning rate and momentum
*** Implement these parameters using optimizer

# 9.Layer initialization

In [75]:
layer = nn.Linear(64,128)
print(layer.weight.min(), layer.weight.max())

tensor(-0.1250, grad_fn=<MinBackward1>) tensor(0.1249, grad_fn=<MaxBackward1>)


In [76]:
layer = nn.Linear(64,128)
nn.init.uniform_(layer.weight)
print(layer.weight.min(), layer.weight.max())

tensor(0.0005, grad_fn=<MinBackward1>) tensor(0.9999, grad_fn=<MaxBackward1>)


# 10.Transfer Learning and fine-tuning

In [79]:
model = nn.Sequential(nn.Linear(5,4),
                     nn.Linear(4,2))

for name, param in model.named_parameters():
    print(name)
    print(param)

0.weight
Parameter containing:
tensor([[ 0.1024, -0.3381, -0.0277,  0.2188, -0.0333],
        [ 0.2185, -0.0533,  0.0214, -0.4002, -0.0949],
        [ 0.2506, -0.1270,  0.0484,  0.4328, -0.2036],
        [-0.4252,  0.3722, -0.3024,  0.0449, -0.0666]], requires_grad=True)
0.bias
Parameter containing:
tensor([-0.3604,  0.0904,  0.1973,  0.0812], requires_grad=True)
1.weight
Parameter containing:
tensor([[-0.2610,  0.4530, -0.2303,  0.2022],
        [-0.4732, -0.2995, -0.2672, -0.0394]], requires_grad=True)
1.bias
Parameter containing:
tensor([ 0.4268, -0.2198], requires_grad=True)


In [80]:
model = nn.Sequential(nn.Linear(64,128),
                     nn.Linear(128,256))

for name, param in model.named_parameters():
    if name=='0.weight':
        param.requires_grad = False

# 11.A deeper dive into Loading Data

In [83]:
import torch
from torch.utils.data import TensorDataset
from torch.utils.data import DataLoader

In [99]:
from sklearn.datasets import load_iris

iris = load_iris()
x = iris.data
y = iris.target

## 11.1 TensorDataset()

In [100]:
dataset = TensorDataset(torch.tensor(x).float(), torch.tensor(y).float())

In [102]:
dataset.tensors

(tensor([[5.1000, 3.5000, 1.4000, 0.2000],
         [4.9000, 3.0000, 1.4000, 0.2000],
         [4.7000, 3.2000, 1.3000, 0.2000],
         [4.6000, 3.1000, 1.5000, 0.2000],
         [5.0000, 3.6000, 1.4000, 0.2000],
         [5.4000, 3.9000, 1.7000, 0.4000],
         [4.6000, 3.4000, 1.4000, 0.3000],
         [5.0000, 3.4000, 1.5000, 0.2000],
         [4.4000, 2.9000, 1.4000, 0.2000],
         [4.9000, 3.1000, 1.5000, 0.1000],
         [5.4000, 3.7000, 1.5000, 0.2000],
         [4.8000, 3.4000, 1.6000, 0.2000],
         [4.8000, 3.0000, 1.4000, 0.1000],
         [4.3000, 3.0000, 1.1000, 0.1000],
         [5.8000, 4.0000, 1.2000, 0.2000],
         [5.7000, 4.4000, 1.5000, 0.4000],
         [5.4000, 3.9000, 1.3000, 0.4000],
         [5.1000, 3.5000, 1.4000, 0.3000],
         [5.7000, 3.8000, 1.7000, 0.3000],
         [5.1000, 3.8000, 1.5000, 0.3000],
         [5.4000, 3.4000, 1.7000, 0.2000],
         [5.1000, 3.7000, 1.5000, 0.4000],
         [4.6000, 3.6000, 1.0000, 0.2000],
         [5

In [104]:
sample = dataset[0]
input_sample, label_sample = sample
print('input_sample:', input_sample)
print('label_sample:', label_sample)

input_sample: tensor([5.1000, 3.5000, 1.4000, 0.2000])
label_sample: tensor(0.)


## 11.2 DataLoader()

In [107]:
batch_size=2
shuffle = True

dataloader = DataLoader(dataset=dataset,
                       batch_size=batch_size,
                       shuffle=shuffle) 

In [120]:
dataloader.batch_size

2

In [None]:
# Get a batch of features and labels
features, labels = next(iter(dataloader))
print(features, labels)

In [121]:
for batch_inputs, batch_labels in dataloader:
    print('batch_inputs:', batch_inputs)
    print('batch_labels:', batch_labels)

batch_inputs: tensor([[6.4000, 3.1000, 5.5000, 1.8000],
        [6.7000, 3.3000, 5.7000, 2.5000]])
batch_labels: tensor([2., 2.])
batch_inputs: tensor([[5.4000, 3.9000, 1.3000, 0.4000],
        [5.7000, 3.0000, 4.2000, 1.2000]])
batch_labels: tensor([0., 1.])
batch_inputs: tensor([[6.2000, 2.9000, 4.3000, 1.3000],
        [5.5000, 3.5000, 1.3000, 0.2000]])
batch_labels: tensor([1., 0.])
batch_inputs: tensor([[5.0000, 3.2000, 1.2000, 0.2000],
        [5.7000, 2.9000, 4.2000, 1.3000]])
batch_labels: tensor([0., 1.])
batch_inputs: tensor([[4.7000, 3.2000, 1.6000, 0.2000],
        [6.1000, 2.8000, 4.7000, 1.2000]])
batch_labels: tensor([0., 1.])
batch_inputs: tensor([[7.6000, 3.0000, 6.6000, 2.1000],
        [5.2000, 3.4000, 1.4000, 0.2000]])
batch_labels: tensor([2., 0.])
batch_inputs: tensor([[5.0000, 3.6000, 1.4000, 0.2000],
        [5.1000, 3.5000, 1.4000, 0.3000]])
batch_labels: tensor([0., 0.])
batch_inputs: tensor([[4.9000, 2.4000, 3.3000, 1.0000],
        [6.1000, 3.0000, 4.9000, 1

In [123]:
iris = load_iris()

df = pd.DataFrame(
    iris.data, 
    columns=iris.feature_names
    )

df['target'] = iris.target

# Map targets to target names
target_names = {
    0:'setosa',
    1:'versicolor', 
    2:'virginica'
}

df['target_names'] = df['target'].map(target_names)
df.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),target,target_names
0,5.1,3.5,1.4,0.2,0,setosa
1,4.9,3.0,1.4,0.2,0,setosa
2,4.7,3.2,1.3,0.2,0,setosa
3,4.6,3.1,1.5,0.2,0,setosa
4,5.0,3.6,1.4,0.2,0,setosa


In [126]:
df[['target','target_names','sepal length (cm)']].to_numpy

<bound method DataFrame.to_numpy of      target target_names  sepal length (cm)
0         0       setosa                5.1
1         0       setosa                4.9
2         0       setosa                4.7
3         0       setosa                4.6
4         0       setosa                5.0
..      ...          ...                ...
145       2    virginica                6.7
146       2    virginica                6.3
147       2    virginica                6.5
148       2    virginica                6.2
149       2    virginica                5.9

[150 rows x 3 columns]>

# 12.Evaluation with torchmetrics
## 12.1 Writing the evaluation loop

In [None]:
# Set the model to evaluation mode
model.eval()
validation_loss = 0.0

with torch.no_grad():
    for data in validationloader:
        outputs = model(data[0])
        loss = criterion(outputs, data[1])
      
        # Sum the current loss to the validation_loss variable
        validation_loss += loss.item()
        
# Calculate the mean loss value
validation_loss_epoch = validation_loss / len(validationloader)
print(validation_loss_epoch)

# Set the model back to training mode
model.train()

## 12.2 Calculating accuracy using torchmetrics

In [129]:
import torchmetrics

In [None]:
# Create accuracy metric using torch metrics
metric = torchmetrics.Accuracy(task="multiclass", num_classes=3)
for data in dataloader:
    features, labels = data
    outputs = model(features)
    
    # Calculate accuracy over the batch
    acc = metric(outputs.softmax(dim=-1), labels.argmax(dim=-1))

# Calculate accuracy over the whole epoch
acc = metric.compute()

# Reset the metric for the next epoch 
metric.reset()

# 13.Dropout layers

In [None]:
# Using the same model, set the dropout probability to 0.8
model = nn.Sequential(nn.Linear(3072,16),
                        nn.ReLU(0.1),
                        nn.Dropout(p=0.8))
model(input_tensor)

# 14.Hyperparameter Tuning

In [35]:
values = []
for idx in range(10):
    # Randomly sample a learning rate factor between 2 and 4
    factor = np.random.uniform(2,4)
    lr = 10 ** -factor
    
    # Randomly select a momentum between 0.85 and 0.99
    momentum = np.random.uniform(0.85, 0.99)
    
    values.append((lr, momentum))