# Introduction to Deep Learning with PyTorch

## 1. Introduction to PyTorch, a Deep Learning library

Answer:

    Traditional Machine Learning:
        Typically relies on hand-crafted feature engineering
        Requires relatively less data
        Does not require a GPU

    Deep Learning:
        Can extract patterns from audio signals
        Often requires a GPU
        Models have a lot of parameters
        Can learn features from large, unstructured data

In [1]:
# Import PyTorch
import torch

list_a = [1, 2, 3, 4]

# Create a tensor from list_a
tensor_a = torch.tensor(list_a)

print(tensor_a)

tensor([1, 2, 3, 4])


In [2]:
# Display the tensor device
print(tensor_a.device)

# Display the tensor data type
print(tensor_a.dtype)

cpu
torch.int64


In [4]:
import numpy as np

array_a = np.array([[1, 1, 1], [2, 3, 4], [4, 5, 6]])
array_b = np.array([[7, 5, 4], [2, 2, 8], [6, 3, 8]])

In [5]:
# Create two tensors from the arrays
tensor_a = torch.from_numpy(array_a)
tensor_b = torch.from_numpy(array_b)

# Subtract tensor_b from tensor_a 
tensor_c = tensor_a - tensor_b

# Multiply each element of tensor_a with each element of tensor_b
tensor_d = tensor_a * tensor_b

# Add tensor_c with tensor_d
tensor_e = tensor_c + tensor_d
print(tensor_e)

tensor([[ 1,  1,  1],
        [ 4,  7, 28],
        [22, 17, 46]], dtype=torch.int32)


In [6]:
import torch
import torch.nn as nn

input_tensor = torch.Tensor([[2, 3, 6, 7, 9, 3, 2, 1]])

# Implement a small neural network with exactly two linear layers
model = nn.Sequential(nn.Linear(8, 1),
                      nn.Linear(1, 1)
                     )

output = model(input_tensor)
print(output)

tensor([[0.6033]], grad_fn=<AddmmBackward0>)


Answer:
    
    nn.Sequential(
        nn.Linear(12, 20),
        nn.Linear(20, 14),
        nn.Linear(14, 3),
        nn.Linear(3, 2)
       )

Answer:
    
    A neural network with a single linear layer followed by a sigmoid activation is similar to a logistic regression model.
    The input dimension of a linear layer must be equal to the output dimension of the previous layer.

In [7]:
score = torch.tensor([[0.8]])

# Create a sigmoid function and apply it on the score tensor
sigmoid = nn.Sigmoid()
probability = sigmoid(score)
print(probability)

tensor([[0.6900]])


In [8]:
scores = torch.tensor([[1.0, -6.0, 2.5, -0.3, 1.2, 0.8]])

# Create a softmax function and apply it on the score tensor
softmax = nn.Softmax(dim=-1)
probabilities = softmax(scores)
print(probabilities)

tensor([[1.2828e-01, 1.1698e-04, 5.7492e-01, 3.4961e-02, 1.5669e-01, 1.0503e-01]])


## 2. Training Our First Neural Network with PyTorch

In [9]:
import torch
import torch.nn as nn

input_tensor = torch.Tensor([[3, 4, 6, 2, 3, 6, 8, 9]])

# Implement a small neural network for binary classification
model = nn.Sequential(
  nn.Linear(8, 1),
  nn.Linear(1, 2),
  nn.Sigmoid()
)

output = model(input_tensor)
print(output)

tensor([[0.1197, 0.3246]], grad_fn=<SigmoidBackward0>)


Answer: It can take any float value.

In [10]:
import torch
import torch.nn as nn

input_tensor = torch.Tensor([[3, 4, 6, 7, 10, 12, 2, 3, 6, 8, 9]])

# Implement a neural network with exactly four linear layers
model = nn.Sequential(
    nn.Linear(11, 7),
    nn.Linear(7, 3),
    nn.Linear(3, 6),
    nn.Linear(6, 1)
)

output = model(input_tensor)
print(output)

tensor([[0.0723]], grad_fn=<AddmmBackward0>)


In [11]:
import torch
import torch.nn as nn

input_tensor = torch.Tensor([[3, 4, 6, 7, 10, 12, 2, 3, 6, 8, 9]])

# Update network below to perform a multi-class classification with four labels
model = nn.Sequential(
  nn.Linear(11, 20),
  nn.Linear(20, 12),
  nn.Linear(12, 6),
  nn.Linear(6, 4),
  nn.Softmax(dim=-1)
)

output = model(input_tensor)
print(output)

tensor([[0.1988, 0.4097, 0.2425, 0.1490]], grad_fn=<SoftmaxBackward0>)


In [13]:
import torch.nn.functional as F

y = 1
num_classes = 3

# Create the one-hot encoded vector using NumPy
one_hot_numpy = np.array([0, 1, 0])

# Create the one-hot encoded vector using PyTorch
one_hot_pytorch = F.one_hot(torch.tensor(y), num_classes)

In [14]:
import torch
import torch.nn as nn
import torch.nn.functional as F

y = [2]
scores = torch.tensor([[0.1, 6.0, -2.0, 3.2]])

# Create a one-hot encoded vector of the label y
one_hot_label = F.one_hot(torch.tensor(y), num_classes=scores.size(1))

In [25]:
# Create the cross entropy loss function
criterion = nn.CrossEntropyLoss()

# Calculate the cross entropy loss
loss = criterion(scores.double(), one_hot_label.double())
print(loss)

tensor(8.0619, dtype=torch.float64)


In [None]:
preds = torch.tensor([[0.5949, 1.7531]], requires_grad=True)
target = torch.tensor([1], dtype=torch.long)
weight = torch.tensor([[5.0948e-01, 3.5392e-01, 1.5075e+00, -1.1572e+00, 1.9524e+00, -1.0543e-03, 2.5741e-01, 8.4456e-01, 
                        2.5092e-01], [-2.9786e-01, -1.8137e-01, 1.7039e+00, -3.0650e-01, 1.8729e-01, 9.8279e-01, 3.1741e-01, 
                                      -1.1760e+00, 8.2644e-01]], requires_grad=True)
bias = torch.tensor([-2.0824, 0.8604], requires_grad=True)

# Create the cross entropy loss function
criterion = nn.CrossEntropyLoss()

# Calculate the loss
loss = criterion(preds, target)

# Compute the gradients of the loss
loss.backward()

# Display gradients of the weight and bias tensors in order
print(weight.grad)
print(bias.grad)

In [32]:
model = nn.Sequential(nn.Linear(16, 8),
                      nn.Sigmoid(),
                      nn.Linear(8, 2))

# Access the weight of the first linear layer
weight_0 = model[0].weight

# Access the bias of the second linear layer
bias_1 = model[2].bias

In [None]:
weight0 = model[0].weight
weight1 = model[1].weight
weight2 = model[2].weight

# Access the gradients of the weight of each linear layer
grads0 = weight0.grad
grads1 = weight1.grad
grads2 = weight2.grad

In [None]:
weight0 = model[0].weight
weight1 = model[1].weight
weight2 = model[2].weight

# Access the gradients of the weight of each linear layer
grads0 = model[0].weight.grad
grads1 = model[1].weight.grad
grads2 = model[2].weight.grad

# Update the weights using the learning rate and the gradients
weight0 = weight0 - lr * grads0
weight1 = weight1 - lr * grads1
weight2 = weight2 - lr * grads2

In [35]:
import torch.optim as optim

# Create the optimizer
optimizer = optim.SGD(model.parameters(), lr=0.01)

In [37]:
pred = torch.tensor([[-0.3243, -0.2986]], requires_grad=True)
target = torch.tensor([[1., 0.]])

# Create the optimizer
optimizer = optim.SGD(model.parameters(), lr=0.001)

loss = criterion(pred, target)
loss.backward()

# Update the model's parameters using the optimizer
optimizer.step()

In [38]:
y_hat = np.array(10)
y = np.array(1)

# Calculate the MSELoss using NumPy
mse_numpy = np.mean((y_hat - y)**2)

# Create the MSELoss function
criterion = nn.MSELoss(y_hat, y)

# Calculate the MSELoss using the created loss function
mse_pytorch = criterion(torch.tensor(y_hat, dtype=torch.float32), torch.tensor(y, dtype=torch.float32))
print(mse_pytorch)

tensor(81.)




In [None]:
# Loop over the number of epochs and the dataloader
for i in range(num_epochs):
    for data in dataloader:
        # Set the gradients to zero
        optimizer.zero_grad()
        # Run a forward pass
        feature, target = data
        prediction = model(feature)    
        # Calculate the loss
        loss = criterion(prediction, target)    
        # Compute the gradients
        loss.backward()
        # Update the model's parameters
        optimizer.step()
show_results(model, dataloader)

## 3. Neural Network Architecture and Hyperparameters

In [41]:
# Create a ReLU function with PyTorch
relu_pytorch = nn.ReLU()

# Apply your ReLU function on x, and calculate gradients
x = torch.tensor(-1.0, requires_grad=True)
y = relu_pytorch(x)
y.backward()

# Print the gradient of the ReLU function for x
gradient = x.grad
print(gradient)

tensor(0.)


In [42]:
# Create a leaky relu function in PyTorch
leaky_relu_pytorch = nn.LeakyReLU(negative_slope=0.05)

x = torch.tensor(-2.0)
# Call the above function on the tensor x
output = leaky_relu_pytorch(x)
print(output)

tensor(-0.1000)


In [43]:
# Create a leaky relu function in PyTorch
leaky_relu_pytorch = nn.LeakyReLU(negative_slope=0.05)

x = torch.tensor(-3.0)
# Call the above function on the tensor x
output = leaky_relu_pytorch(x)
print(output)

tensor(-0.1500)


Answer: -0.15

Answer:
    
    The ReLU activation function only outputs zero for negative inputs.
    ReLU(x) = 3 for x = 3.

In [47]:
model = nn.Sequential(nn.Linear(16, 4),
                      nn.Linear(4, 2),
                      nn.Linear(2, 1))

total = 0

# Calculate the number of parameters in the model
for parameter in model.parameters():
    total += parameter.numel()
    
print(total)

81


Answer: 81

In [50]:
def calculate_capacity(model):
    total = 0
    for p in model.parameters():
        total += p.numel()
    return total

In [51]:
n_features = 8
n_classes = 2

input_tensor = torch.Tensor([[3, 4, 6, 2, 3, 6, 8, 9]])

# Create a neural network with less than 120 parameters
model = nn.Sequential(
    nn.Linear(n_features, 6),
    nn.Linear(6, 4),
    nn.Linear(4, n_classes)
)
output = model(input_tensor)

print(calculate_capacity(model))

92


In [52]:
n_features = 8
n_classes = 2

input_tensor = torch.Tensor([[3, 4, 6, 2, 3, 6, 8, 9]])

# Create a neural network with more than 120 parameters
model = nn.Sequential(
    nn.Linear(n_features, 8),
    nn.Linear(8, 5),
    nn.Linear(5, 4),
    nn.Linear(4, n_classes)
)

output = model(input_tensor)

print(calculate_capacity(model))

151


In [None]:
# Try a first learning rate value
lr0 = 0.001
optimize_and_plot(lr=lr0)

In [None]:
# Try a second learning rate value
lr1 = 0.1
optimize_and_plot(lr=lr1)

In [None]:
# Try a third learning rate value
lr2 = 0.085
optimize_and_plot(lr=lr2)

In [None]:
# Try a first value for momentum
mom0 = 0.85
optimize_and_plot(momentum=mom0)

In [None]:
# Try a second value for momentum
mom1 = 0.95
optimize_and_plot(momentum=mom1)

Answer:
    
    Find a model trained on a similar task
    Load pre-trained weights
    Freeze (or not) some of the layers in the model
    Train with a smaller learning rate
    Look at the loss values and see if the learning rate needs to be adjusted

In [54]:
for name, param in model.named_parameters():    
  
    # Check if the parameters belong to the first layer
    if name == '0.weight' or name == '0.bias':
      
        # Freeze the parameters
        param.requires_grad = False
  
    # Check if the parameters belong to the second layer
    if name == '1.weight' or name == '1.bias':
      
        # Freeze the parameters
        param.requires_grad = False

In [55]:
layer0 = nn.Linear(16, 32)
layer1 = nn.Linear(32, 64)

# Use uniform initialization for layer0 and layer1 weights
nn.init.uniform_(layer0.weight, a=0, b=1)
nn.init.uniform_(layer1.weight, a=0, b=1)

model = nn.Sequential(layer0, layer1)

## 4. Evaluating and Improving Models

In [56]:
import numpy as np
import torch
from torch.utils.data import TensorDataset

np_features = np.array(np.random.rand(12, 8))
np_target = np.array(np.random.rand(12, 1))

# Convert arrays to PyTorch tensors
torch_features = torch.tensor(np_features)
torch_target = torch.tensor(np_target)

# Create a TensorDataset from two tensors
dataset = TensorDataset(torch_features, torch_target)

# Return the last element of this dataset
print(dataset[-1])

(tensor([0.3216, 0.6335, 0.6974, 0.4446, 0.8613, 0.2529, 0.8226, 0.1453],
       dtype=torch.float64), tensor([0.4125], dtype=torch.float64))


In [58]:
import pandas as pd

dataframe = pd.read_csv('water_potability.csv')
dataframe.head()

Unnamed: 0,ph,Hardness,Solids,Chloramines,Sulfate,Conductivity,Organic_carbon,Trihalomethanes,Turbidity,Potability
0,0.587349,0.577747,0.386298,0.568199,0.647347,0.292985,0.654522,0.795029,0.630115,0
1,0.643654,0.4413,0.314381,0.439304,0.514545,0.356685,0.377248,0.202914,0.520358,0
2,0.388934,0.470876,0.506122,0.524364,0.561537,0.142913,0.249922,0.401487,0.219973,0
3,0.72582,0.715942,0.506141,0.521683,0.751819,0.148683,0.4672,0.658678,0.242428,0
4,0.610517,0.532588,0.237701,0.270288,0.495155,0.494792,0.409721,0.469762,0.585049,0


In [64]:
from torch.utils.data import DataLoader, TensorDataset

# Load the different columns into two PyTorch tensors
features = torch.tensor(np.array(dataframe[['ph', 'Sulfate', 'Conductivity', 'Organic_carbon']])).float()
target = torch.tensor(np.array(dataframe['Potability'])).float()

# Create a dataset from the two generated tensors
dataset = TensorDataset(features, target)

# Create a dataloader using the above dataset
dataloader = DataLoader(dataset, shuffle=True, batch_size=2)
x, y = next(iter(dataloader))

# Create a model using the nn.Sequential API
model = nn.Sequential(
  nn.Linear(len(features[0]), 8),
  nn.Linear(8, 1)
)
output = model(features)
print(output)

tensor([[-0.3362],
        [-0.2760],
        [-0.2499],
        ...,
        [-0.2864],
        [-0.3019],
        [-0.3390]], grad_fn=<AddmmBackward0>)


In [None]:
# Set the model to evaluation mode
model.eval()
validation_loss = 0.0

with torch.no_grad():
  
    for data in validationloader:
    
        outputs = model(data[0])
        loss = criterion(outputs, data[1])
      
        # Sum the current loss to the validation_loss variable
        validation_loss += loss.item()
        
# Calculate the mean loss value
validation_loss_epoch = validation_loss / len(validationloader)
print(validation_loss_epoch)

# Set the model back to training mode
model.train()

In [None]:
import torchmetrics

# Create accuracy metric using torch metrics
metric = torchmetrics.Accuracy(task="multiclass", num_classes=3)
for data in dataloader:
    features, labels = data
    outputs = model(features)
    
    # Calculate accuracy over the batch
    acc = metric(outputs.softmax(dim=-1), labels.argmax(dim=-1))
    
# Calculate accuracy over the whole epoch
acc = metric.compute()

# Reset the metric for the next epoch 
metric.reset()
plot_errors(model, dataloader)

In [None]:
# Create a small neural network
model = nn.Sequential(
    nn.Linear(3072, 16),
    nn.ReLU(),
    nn.Dropout()
)
model(input_tensor)

In [None]:
# Using the same model, set the dropout probability to 0.8
model = nn.Sequential(
    nn.Linear(3072, 16),
    nn.ReLU(),
    nn.Dropout(p=0.8)
)
model(input_tensor)

Answer:
    
    Overfitting happens when the model is performing worse on the validation set than on the training set.
    Data augmentation can reduce overfitting by artificially increasing the size of the training set.
    A dropout layer with a probability strictly superior to zero will reduce overfitting.

In [70]:
values = []
for idx in range(10):
    # Randomly sample a learning rate factor between 0.01 and 0.0001
    factor = np.random.uniform(2, 4) 

    lr = 10 ** -factor
    
    # Randomly select a momentum between 0.85 and 0.99
    momentum = np.random.uniform(0.85, 0.99)
    
    values.append((lr, momentum))