# Introduction to Deep Learning with PyTorch

In [None]:
# https://pytorch.org/tutorials/beginner/basics/buildmodel_tutorial.html

In [4]:
import torch.nn as nn
import torch

in_tensor = torch.tensor([[0.7,0.8,0.8]])

In [8]:
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)
print(f"Using {device} device")

Using cpu device


In [10]:
# Ex 1

In [6]:
import torch
import torch.nn as nn

input_tensor = torch.Tensor([[2, 3, 6, 7, 9, 3, 2, 1]])

# Implement a small neural network with exactly two linear layers
model = nn.Sequential( nn.Linear(1*8, 8),
                       nn.Linear(1*8, 1)
                     )

output = model(input_tensor)
print(output)

tensor([[-2.8321]], grad_fn=<AddmmBackward0>)


Sigmoid functions are used for binary classification problems, whereas softmax functions are often used for multiclass classification problems

In [16]:
input_tensor = torch.tensor([[0.8]])

# Create a sigmoid function and apply it on input_tensor
sigmoid = nn.Sigmoid()
probability = sigmoid(input_tensor)
print(probability)

tensor([[0.6900]])


In [18]:
import torch
import torch.nn as nn

input_tensor = torch.tensor([[1.0, -6.0, 2.5, -0.3, 1.2, 0.8]])

# Create a softmax function and apply it on input_tensor
softmax = nn.Softmax(dim=-1)
probabilities = softmax(input_tensor)
print(probabilities)

tensor([[1.2828e-01, 1.1698e-04, 5.7492e-01, 3.4961e-02, 1.5669e-01, 1.0503e-01]])


In [60]:
import torch
import torch.nn as nn

input_tensor = torch.Tensor([[3, 4, 6, 7, 10, 12, 2, 3, 6, 8, 9]])

# Update network below to perform a multi-class classification with four labels
model = nn.Sequential(
  nn.Linear(11, 20),
  nn.Linear(20, 12),
  nn.Linear(12, 6),
  nn.Linear(6, 4),
  nn.Softmax(dim=-1)
)

output = model(input_tensor)
print(output)

tensor([[0.2522, 0.3094, 0.1491, 0.2893]], grad_fn=<SoftmaxBackward0>)


# One Hot Encodinng

In [21]:
import torch.nn.functional as F

# CLASS 1,2,3 Onehot enccoding

F.one_hot(torch.tensor(0), num_classes=3)

tensor([1, 0, 0])

In [23]:
F.one_hot(torch.tensor(1), num_classes=3)

tensor([0, 1, 0])

In [25]:
F.one_hot(torch.tensor(2), num_classes=3)

tensor([0, 0, 1])

In [57]:
import numpy as np
y = 1
num_classes = 3

# Create the one-hot encoded vector using NumPy
one_hot_numpy = np.array([0,1,0])

# Create the one-hot encoded vector using PyTorch
one_hot_pytorch =F.one_hot(torch.tensor(y), num_classes=3)

# Cross Etropy Loss

In [30]:
from torch.nn import CrossEntropyLoss

In [40]:
scores = torch.tensor([[-0.1211,0.1059]])
one_hot_target = torch.tensor([[1,0]])

In [46]:
criterion = CrossEntropyLoss()
criterion(scores.double(),one_hot_target.double())

tensor(0.8131, dtype=torch.float64)

In [None]:
import torch
import torch.nn.functional as F
from torch.nn import CrossEntropyLoss

y = [2]
scores = torch.tensor([[0.1, 6.0, -2.0, 3.2]])

# Create a one-hot encoded vector of the label y
one_hot_label = F.one_hot(torch.tensor(y), scores.shape[1])

# Create the cross entropy loss function
criterion = CrossEntropyLoss()

# Calculate the cross entropy loss
loss = criterion(scores.double(),one_hot_label.double())
print(loss)

# Backpropogation

In [62]:
model[0] # layer 0

Linear(in_features=11, out_features=20, bias=True)

In [64]:
model[0].weight.grad , model[0].bias.grad

(None, None)

In [None]:
lr = 0.01

weight = model[0].weight
weight_grad = model[0].weight.grad
weight = weight - lr * weight_grad

bias = model[0].bias
bias_grad = model[0].bias.grad
bias = weight - lr * bias_grad

In [68]:
import torch.optim as optim

optimizer  = optim.SGD(model.parameters(), lr = 0.0001)
optmizer.step()

In [70]:
model = nn.Sequential(nn.Linear(16, 8),
                      nn.Linear(8, 2)
                     )

# Access the weight of the first linear layer
weight_0 = model[0].weight

# Access the bias of the second linear layer
bias_1 = model[1].bias

In [None]:
leaky_relu = nn.LeakyRelu(negative_slope = 0.05)

# TRAIN A NN

In [79]:
def mean_square_loss(prediction, target):
    return np.mean((prediction - target)**2)

In [None]:
MSE = nn.MSELoss()
loss = MSE(prediction,target) # float tensor

In [None]:
from torch.utils.data import TensorDataset
dataset = TensorDataset(torch.tensor(features).float(), torch.tensor(target).float())
dataloader =  DataLoader(dataset , batch_size=4,shuffle=True)

for batch_inputs, batch_labels in dataloader:
    

model = nn.Sequential(nn.Linear(16, 8),
                      nn.Linear(8, 2)
                     )
MSE = nn.MSELoss()
optimizer  = optim.SGD(model.parameters(), lr = 0.0001, momentum=0.95)

In [None]:
for epoch in range(num_epoch):
    for data in dataLoader:
        # set gradient to 0
        optimizer.zero_grad()
        feature , target = data
        pred = model(feature)
        loss  = MSE (pred,target)
        loss.backward()
        optimizer.step()

In [None]:
# Create a ReLU function with PyTorch
relu_pytorch = nn.ReLU()

# Apply your ReLU function on x, and calculate gradients
x = torch.tensor(-1.0, requires_grad=True)
y = relu_pytorch(x)

# Calculate gradients
y.backward()

# Print the gradient of the ReLU function for x
gradient = x.grad
print(gradient)

.numel() returns the no of elements in tensor

model.parameters

In [None]:
model = nn.Sequential(nn.Linear(16, 4),
                      nn.Linear(4, 2),
                      nn.Linear(2, 1))

total = 0

# Calculate the number of parameters in the model
for param in model.parameters():
  total += param.numel()

for name,param in model.parameters():
  if name == '0.weight':
      param.requires_grad = False 

In [None]:
for name, param in model.named_parameters():    
  
    # Check if the parameters belong to the first layer
    if name == '0.weight' or name == '0.bias':
      
        # Freeze the parameters
        param.requires_grad = False 
  
    # Check if the parameters belong to the second layer
    if name == '1.weight' or name == '1.bias':
      
        # Freeze the parameters
        param.requires_grad = False

In [None]:
layer = nn.Linear(65,128)
layer.weight.min(), layer.weight.max()

nn.layer0 = nn.Linear(16, 32)
layer1 = nn.Linear(32, 64)

# Use uniform initialization for layer0 and layer1 weights
nn.init.uniform_(layer0.weight)
nn.init.uniform_(layer1.weight)

model = nn.Sequential(layer0, layer1)(layer.weight)

In [None]:
# Load the different columns into two PyTorch tensors
features = torch.tensor(dataframe[['ph', 'Sulfate', 'Conductivity', 'Organic_carbon']].to_numpy()).float()
target = torch.tensor(dataframe['Potability'].to_numpy()).float()

# Create a dataset from the two generated tensors
dataset = TensorDataset(features, target)

# Create a dataloader using the above dataset
dataloader = DataLoader(dataset, batch_size=2, shuffle=True)
x, y = next(iter(dataloader))

# Overfitting
Add dropout
weight decay
augmentation

In [125]:
model = nn.Sequential(nn.Linear(4, 4),
                      nn.ReLU(),
                      nn.Dropout(p=0.5))
# Behaves Differently durinmg testing and inference

In [141]:
features = torch.randn((1,4))

In [143]:
model.train()  # Set the model to training mode

Sequential(
  (0): Linear(in_features=4, out_features=4, bias=True)
  (1): ReLU()
  (2): Dropout(p=0.5, inplace=False)
)

In [145]:
output_before_dropout = model[0](features)  # Apply the first Linear layer
output_before_dropout = model[1](output_before_dropout)  # Apply ReLU activation
print("Output before Dropout:", output_before_dropout)

Output before Dropout: tensor([[0.4013, 0.0382, 0.0000, 0.6071]], grad_fn=<ReluBackward0>)


In [147]:

output_after_dropout = model[2](output_before_dropout)  # Apply Dropout
print("Output after Dropout:", output_after_dropout)

Output after Dropout: tensor([[0.8026, 0.0765, 0.0000, 1.2142]], grad_fn=<MulBackward0>)


# WEIGHT DECAY

In [161]:
optmizer = optim.SGD(model.parameters(), lr=1e-3, weight_decay=1e-4)
# weight decay between 0 and 1

# Fine Tunning
Use Grid Search

In [168]:
for factor in range(2,6):
    lr = 10** -factor

In [166]:
factor = np.random.uniform(2,6)
lr = 10** -factor