<a href="https://colab.research.google.com/github/champsleague/DeepLearning/blob/main/DL_Lab06.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


# Import Dependencies

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import models
import torchvision.datasets as datasets
import torchvision.transforms as transforms


# Device Definition 

In [None]:
use_cuda = torch.cuda.is_available() 
device = torch.device("cuda" if use_cuda else "cpu")
print(device)

cuda


# Define Hyperparameters

In [None]:
batch_size = 128
print_train_step = int(1000*16/batch_size)
#learnning_rate = 0.01
learnning_rate = 0.02
epochs = 10

kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}

# Downloading & Loading MNIST data

In [None]:
train_dataset = datasets.MNIST(
    './data', train=True, download=True,
    transform=transforms.Compose([
                                  transforms.Resize([112, 112]),
                                  transforms.ToTensor(),
                                  transforms.Normalize((0.1307,), (0.3081,))
                                  ])
    )

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, **kwargs)

test_dataset = datasets.MNIST(
    './data', train=False, 
    transform=transforms.Compose([
                                  transforms.Resize([112, 112]),
                                  transforms.ToTensor(),
                                  transforms.Normalize((0.1307,), (0.3081,))
                                  ])
    )

test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=True, **kwargs)

print('number of training data : ',len(train_dataset))
print('number of test data : ',len(test_dataset))

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:00<00:00, 145916551.71it/s]

Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz to ./data/MNIST/raw






Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<00:00, 38664441.05it/s]

Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz



100%|██████████| 1648877/1648877 [00:00<00:00, 34001432.63it/s]


Extracting ./data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4542/4542 [00:00<00:00, 18695317.73it/s]


Extracting ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw

number of training data :  60000
number of test data :  10000


# Define model class (Transfer Learning)

In [None]:
model = models.resnet18(pretrained=True)
print(model)


Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 273MB/s]


ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

# Fine-tuning


In [None]:
model.conv1 = nn.Conv2d(1, 64, kernel_size=(3, 3), stride = (1, 1), padding = (1, 1), bias=False)
model.fc = nn.Linear(512, 10, bias = True)

model.to(device)

for name, param in model.named_parameters():
  param.requires_grad = False

model.conv1.weight.requires_grad = True
model.bn1.weight.requires_grad = True
model.bn1.bias.requires_grad = True
model.fc.weight.requires_grad = True
model.fc.bias.requires_grad = True

# params_to_update = []
# for name, param in model.named_parameters():
#   if param.requires_grad == True:
#     print(name)
#     params_to_update.append(param)  

# print(model)

params_to_update = []
for name, param in model.named_parameters():
  if 'conv1' in name:
    print(name)    
    param.requires_grad = True
    params_to_update.append(param)
  elif 'fc' in name:
    print(name)    
    param.requires_grad = True
    params_to_update.append(param)
  else:
    param.requires_grad = False

print(model)

conv1.weight
layer1.0.conv1.weight
layer1.1.conv1.weight
layer2.0.conv1.weight
layer2.1.conv1.weight
layer3.0.conv1.weight
layer3.1.conv1.weight
layer4.0.conv1.weight
layer4.1.conv1.weight
fc.weight
fc.bias
ResNet(
  (conv1): Conv2d(1, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1):

# Define Loss Function & Optimizer

In [None]:
optimizer = optim.Adadelta(params_to_update, lr=learnning_rate)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.1)
loss_function = nn.CrossEntropyLoss()

# Define Model Training

In [None]:
def train(model, device, train_loader, optimizer, loss_function, epoch):
  model.train()
  for batch_idx, (data, target) in enumerate(train_loader):
    data, target = data.to(device), target.to(device)
    optimizer.zero_grad()
    output = model(data)
    loss = loss_function(output, target)
    loss.backward()
    optimizer.step()
        
    if batch_idx % print_train_step == 0:
      print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
          epoch, batch_idx * len(data), len(train_loader.dataset),
          100. * batch_idx / len(train_loader), loss.item()))


# Define Model Evaluation

In [None]:
def test(model, device, test_loader, loss_function):
  model.eval()
  test_loss = 0
  correct = 0
  
  with torch.no_grad():
    for data, target in test_loader:
      data, target = data.to(device), target.to(device)
      output = model(data)
      test_loss += loss_function(output, target) # sum up batch loss
      pred = output.argmax(dim=1, keepdim=True) # get the index of the max log-probability
      correct += pred.eq(target.view_as(pred)).sum().item()
      
  test_loss /= len(test_loader.dataset)
  print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
      test_loss, correct, len(test_loader.dataset),
      100. * correct / len(test_loader.dataset)))


# Train the Model


In [None]:
for epoch in range(epochs):
  train(model, device, train_loader, optimizer, loss_function, epoch)
  test(model, device, test_loader, loss_function)
  scheduler.step()


Test set: Average loss: 0.0006, Accuracy: 9873/10000 (99%)


Test set: Average loss: 0.0005, Accuracy: 9893/10000 (99%)


Test set: Average loss: 0.0005, Accuracy: 9891/10000 (99%)


Test set: Average loss: 0.0005, Accuracy: 9891/10000 (99%)


Test set: Average loss: 0.0005, Accuracy: 9886/10000 (99%)


Test set: Average loss: 0.0005, Accuracy: 9891/10000 (99%)


Test set: Average loss: 0.0005, Accuracy: 9891/10000 (99%)


Test set: Average loss: 0.0005, Accuracy: 9890/10000 (99%)


Test set: Average loss: 0.0005, Accuracy: 9886/10000 (99%)


Test set: Average loss: 0.0005, Accuracy: 9888/10000 (99%)



# Hint --> 98% Model (Fine-tuning)

In [None]:
params_to_update = []
for name, param in model.named_parameters():
  if 'conv1' in name:
    print(name)    
    param.requires_grad = True
    params_to_update.append(param)
  elif 'fc' in name:
    print(name)    
    param.requires_grad = True
    params_to_update.append(param)
  else:
    param.requires_grad = False

print(model)

conv1.weight
layer1.0.conv1.weight
layer1.1.conv1.weight
layer2.0.conv1.weight
layer2.1.conv1.weight
layer3.0.conv1.weight
layer3.1.conv1.weight
layer4.0.conv1.weight
layer4.1.conv1.weight
fc.weight
fc.bias
ResNet(
  (conv1): Conv2d(1, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1):