<a href="https://colab.research.google.com/github/bechirzammouri/translation-task-hf/blob/main/TP_2_DL.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import torch

In [None]:
import torch.nn as nn
from torch.utils.data import DataLoader
import torch.optim as optim
from torch.nn.init import xavier_uniform_,kaiming_uniform_

In [None]:
import torchvision
import torchvision.transforms as transforms

torch.manual_seed(1110)
# Download and load the MNIST dataset
train_dataset = torchvision.datasets.MNIST(root='./data',
                                           train=True,
                                           transform=transforms.ToTensor(),
                                           download=True)

test_dataset = torchvision.datasets.MNIST(root='./data',
                                          train=False,
                                          transform=transforms.ToTensor())

100%|██████████| 9.91M/9.91M [00:01<00:00, 4.98MB/s]
100%|██████████| 28.9k/28.9k [00:00<00:00, 131kB/s]
100%|██████████| 1.65M/1.65M [00:01<00:00, 1.23MB/s]
100%|██████████| 4.54k/4.54k [00:00<00:00, 8.18MB/s]


Architecture of the model

In [None]:
import torch.nn as nn
from torch.utils.data import DataLoader
import torch.optim as optim
from torch.nn.init import xavier_uniform_,kaiming_uniform_

class mnist_model(nn.Module):
  def __init__(self, lr = 0.01,bn= False):
    super().__init__()

    self.bn = bn #batch normalisation activation status


    self.flatten = nn.Flatten()
    self.layer1 = nn.Linear(784, 240)

    self.bn1 = nn.BatchNorm1d(240)

    self.layer2 = nn.Linear(240, 150)
    self.bn2 = nn.BatchNorm1d(150)

    self.layer3 = nn.Linear(150, 50)
    self.bn3 = nn.BatchNorm1d(50)

    self.layer4 = nn.Linear(50, 28)
    self.bn4 = nn.BatchNorm1d(28)

    self.layer5 = nn.Linear(28, 10)

    self.criterion = nn.CrossEntropyLoss()
    self.lr = lr # Store learning rate


  def init_random_weights(self):
    def init_weights(m):
      if isinstance(m,nn.Linear):
        nn.init.normal_(m.weight)
    self.apply(init_weights)
    self.init_optim() # Initialize optimizer after weight

  def init_optim(self):
    self.optimizer = optim.SGD(self.parameters(), lr=self.lr)

  def forward(self, x):
    x = self.flatten(x)

    if self.bn == False :

      x = nn.functional.sigmoid(self.layer1(x))
      x = nn.functional.sigmoid(self.layer2(x))
      x = nn.functional.sigmoid(self.layer3(x))
      x = nn.functional.sigmoid(self.layer4(x))
      x = self.layer5(x)
    else :
      x = nn.functional.sigmoid(self.bn1(self.layer1(x)))
      x = nn.functional.sigmoid(self.bn2(self.layer2(x)))
      x = nn.functional.sigmoid(self.bn3(self.layer3(x)))
      x = nn.functional.sigmoid(self.bn4(self.layer4(x)))
      x = self.layer5(x)
    return x

  def train_model(self, num_epochs, train_loader, device, clip_gradient=False):

    self.to(device) # ensure model parameters are on gpu
    epoch_losses = []
    for epoch in range(num_epochs):
        self.train()
        running_loss = 0.0
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)

            self.optimizer.zero_grad()
            outputs = self(images)
            loss = self.criterion(outputs, labels)
            loss.backward()
            if clip_gradient :
              torch.nn.utils.clip_grad_norm_(self.parameters(), max_norm=1.0)
            self.optimizer.step()
            running_loss += loss.item() * images.size(0)
        epoch_loss = running_loss / len(train_loader.dataset)
        epoch_losses.append(epoch_loss)
        print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.4f}")
    return epoch_losses

In [None]:
train_loader = DataLoader(train_dataset, batch_size=32, shuffle =True)
my_model = mnist_model(bn=True)
my_model.init_optim()

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
xavier_clip = my_model.train_model(num_epochs= 20,
                  train_loader=train_loader,clip_gradient=True,device=device)

Epoch [1/20], Loss: 1.5377
Epoch [2/20], Loss: 0.9487
Epoch [3/20], Loss: 0.6899
Epoch [4/20], Loss: 0.5495
Epoch [5/20], Loss: 0.4588
Epoch [6/20], Loss: 0.3983
Epoch [7/20], Loss: 0.3513
Epoch [8/20], Loss: 0.3147
Epoch [9/20], Loss: 0.2865
Epoch [10/20], Loss: 0.2639
Epoch [11/20], Loss: 0.2429
Epoch [12/20], Loss: 0.2278
Epoch [13/20], Loss: 0.2127
Epoch [14/20], Loss: 0.2016
Epoch [15/20], Loss: 0.1877
Epoch [16/20], Loss: 0.1796
Epoch [17/20], Loss: 0.1699
Epoch [18/20], Loss: 0.1622
Epoch [19/20], Loss: 0.1533
Epoch [20/20], Loss: 0.1478


In [None]:
my_model.parameters()

#with Random init

In [None]:
random_w_model = mnist_model(bn=True)
random_w_model.init_optim()

In [None]:
random_list = random_w_model.train_model(num_epochs= 20,
                  train_loader=train_loader,device = "gpu")

In [None]:
for i in random_w_model.children():
  print(i)

In [None]:
import matplotlib.pyplot as plt

# Suppose these are your loss lists
# list_sgd, list_rmsprop, list_adam
# Each should have 20 values (for 20 epochs)

epochs = range(1, 21)

plt.figure(figsize=(8,5))
plt.plot(epochs, [loss for loss in xavier_clip], label="clipping gradient ", marker='o')
plt.plot(epochs, [loss for loss in random_list], label="randomized weights", marker='s')

plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.title("Loss Comparison")
plt.xticks(epochs)
plt.grid(True)
plt.legend()
plt.show()

# ADDing dropout for the model

In [None]:
import torch.nn as nn
from torch.utils.data import DataLoader
import torch.optim as optim
from torch.nn.init import xavier_uniform_,kaiming_uniform_

class ModelWithDropOut(nn.Module):
  def __init__(self, lr = 0.01,bn= False):
    super().__init__()

    self.bn = bn #batch normalisation activation status

    # add dropout
    drop =  nn.Droput(p=0.2)

    self.flatten = nn.Flatten()
    self.layer1 = nn.Linear(784, 240)

    self.bn1 = nn.BatchNorm1d(240)

    self.layer2 = nn.Linear(240, 150)
    self.bn2 = nn.BatchNorm1d(150)

    self.layer3 = nn.Linear(150, 50)
    self.bn3 = nn.BatchNorm1d(50)

    self.layer4 = nn.Linear(50, 28)
    self.bn4 = nn.BatchNorm1d(28)

    self.layer5 = nn.Linear(28, 10)

    self.criterion = nn.CrossEntropyLoss()
    self.lr = lr # Store learning rate


  def init_random_weights(self):
    def init_weights(m):
      if isinstance(m,nn.Linear):
        nn.init.normal_(m.weight)
    self.apply(init_weights)
    self.init_optim() # Initialize optimizer after weight

  def init_optim(self):
    self.optimizer = optim.SGD(self.parameters(), lr=self.lr)

  def forward(self, x):
    x = self.flatten(x)

    if self.bn == False:
      x = self.drop(nn.functional.sigmoid(self.layer1(x)))
      x = self.drop(nn.functional.sigmoid(self.layer2(x)))
      x = self.drop(nn.functional.sigmoid(self.layer3(x)))
      x = self.drop(nn.functional.sigmoid(self.layer4(x)))
      x = self.layer5(x)
    else:
      x = self.drop(nn.functional.sigmoid(self.bn1(self.layer1(x))))
      x = self.drop(nn.functional.sigmoid(self.bn2(self.layer2(x))))
      x = self.drop(nn.functional.sigmoid(self.bn3(self.layer3(x))))
      x = self.drop(nn.functional.sigmoid(self.bn4(self.layer4(x))))
      x = self.layer5(x)
    return x

  def train_model(self, num_epochs, train_loader, device, clip_gradient=False):

    self.to(device) # ensure model parameters are on gpu
    epoch_losses = []
    for epoch in range(num_epochs):
        self.train()
        running_loss = 0.0
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)

            self.optimizer.zero_grad()
            outputs = self(images)
            loss = self.criterion(outputs, labels)
            loss.backward()
            if clip_gradient :
              torch.nn.utils.clip_grad_norm_(self.parameters(), max_norm=1.0)
            self.optimizer.step()
            running_loss += loss.item() * images.size(0)
        epoch_loss = running_loss / len(train_loader.dataset)
        epoch_losses.append(epoch_loss)
        print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.4f}")
    return epoch_losses

# Overfitting

In [None]:
import torch.nn as nn
from torch.utils.data import DataLoader
import torch.optim as optim
from torch.nn.init import xavier_uniform_,kaiming_uniform_

class ModelWithBN(nn.Module):
  def __init__(self, lr = 0.01,bn= False):
    super().__init__()

    self.bn = bn #batch normalisation activation status


    self.flatten = nn.Flatten()
    self.layer1 = nn.Linear(784, 240)

    self.bn1 = nn.BatchNorm1d(240)

    self.layer2 = nn.Linear(240, 150)
    self.bn2 = nn.BatchNorm1d(150)

    self.layer3 = nn.Linear(150, 50)
    self.bn3 = nn.BatchNorm1d(50)

    self.layer4 = nn.Linear(50, 28)
    self.bn4 = nn.BatchNorm1d(28)

    self.layer5 = nn.Linear(28, 10)

    self.criterion = nn.CrossEntropyLoss()
    self.lr = lr # Store learning rate


  def init_random_weights(self):
    def init_weights(m):
      if isinstance(m,nn.Linear):
        nn.init.normal_(m.weight)
    self.apply(init_weights)
    self.init_optim() # Initialize optimizer after weight

  def init_optim(self):
    self.optimizer = optim.SGD(self.parameters(), lr=self.lr)

  def forward(self, x):
    x = self.flatten(x)

    if self.bn == False :

      x = nn.functional.sigmoid(self.layer1(x))
      x = nn.functional.sigmoid(self.layer2(x))
      x = nn.functional.sigmoid(self.layer3(x))
      x = nn.functional.sigmoid(self.layer4(x))
      x = self.layer5(x)
    else :
      x = nn.functional.sigmoid(self.bn1(self.layer1(x)))
      x = nn.functional.sigmoid(self.bn2(self.layer2(x)))
      x = nn.functional.sigmoid(self.bn3(self.layer3(x)))
      x = nn.functional.sigmoid(self.bn4(self.layer4(x)))
      x = self.layer5(x)
    return x

  def train_model(self, num_epochs, train_loader, device, clip_gradient=False):

    self.to(device) # ensure model parameters are on gpu
    epoch_losses = []
    for epoch in range(num_epochs):
        self.train()
        running_loss = 0.0
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)

            self.optimizer.zero_grad()
            outputs = self(images)
            loss = self.criterion(outputs, labels)
            loss.backward()
            if clip_gradient :
              torch.nn.utils.clip_grad_norm_(self.parameters(), max_norm=1.0)
            self.optimizer.step()
            running_loss += loss.item() * images.size(0)
        epoch_loss = running_loss / len(train_loader.dataset)
        epoch_losses.append(epoch_loss)
        print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.4f}")
    return epoch_losses