## HW2

In [1]:
import torch
import torchvision
import torchvision.transforms as transforms
import torchvision.models as models
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import numpy as np
import time
from torch.optim.lr_scheduler import ReduceLROnPlateau

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cuda


In [2]:
# see more data augmentation https://pytorch.org/vision/stable/transforms.html
mean = (0.5071, 0.4867, 0.4408)
std = (0.2675, 0.2565, 0.2761)

#參考 https://github.com/weiaicunzai/pytorch-cifar100/blob/master/utils.py
train_transform = transforms.Compose(
    [
    #  transforms.RandomCrop(32, padding=4),
    #  transforms.RandomHorizontalFlip(),
    #  transforms.RandomRotation(15),
     transforms.RandAugment(),
     transforms.ToTensor(),
     transforms.Normalize(mean, std)]) # calculte yourself

test_transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize(mean, std)]) # calculte yourself 

batch_size = 128
num_classes = 100    # check

trainset = torchvision.datasets.CIFAR100(root='./data', train=True,
                                        download=True, transform=train_transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
                                          shuffle=True, num_workers=8)

testset = torchvision.datasets.CIFAR100(root='./data', train=False,
                                       download=True, transform=test_transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
                                         shuffle=False, num_workers=8)


Files already downloaded and verified
Files already downloaded and verified


In [3]:
def model_module(model, trainloader, testloader):

  start = time.time()
  criterion = nn.CrossEntropyLoss()
  optimizer = torch.optim.Adam(model.parameters(), lr=1e-4, weight_decay=1e-4)
  scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=3, min_lr=1e-8, verbose=True)

  total_epoch = 50
  print_per_iteration = 100

  train_acc=[]
  train_loss=[]
  test_acc=[]
  test_loss=[]

  min_acc=0

  for epoch in range(total_epoch):  # loop over the dataset multiple times
      for i, data in enumerate(trainloader, 0):
          # get the inputs; data is a list of [inputs, labels]
          inputs, labels = data
          inputs = inputs.to(device)
          labels = labels.to(device)
          # zero the parameter gradients
          optimizer.zero_grad()
          # forward + backward + optimize
          outputs = model(inputs)
          loss = criterion(outputs, labels)
          loss.backward()
          optimizer.step()
      

          # print statistics
          if (i+1) % print_per_iteration == 0:  
              print(f'[ep {epoch + 1}][{i + 1:5d}/{len(trainloader):5d}] loss: {loss.item():.3f}')

      # Test acc,loss in epoch
      # fixed testing process
      correct = 0
      total = 0
      # since we're not training, we don't need to calculate the gradients for our outputs
      with torch.no_grad():
          for data in testloader:
              images, labels = data
              images = images.to(device)
              labels = labels.to(device)
              # calculate outputs by running images through the network
              outputs = model(images)
              loss = criterion(outputs, labels)
              # the class with the highest energy is what we choose as prediction
              _, predicted = torch.max(outputs.data, 1)
              total += labels.size(0)
              correct += (predicted == labels).sum().item()

      test_acc.append(100 * correct / total)
      test_loss.append(loss.cpu().numpy())
      print('test loss', test_loss[-1])
      print(f'test accuracy {test_acc[-1]:.2f} %')

      # Train acc,loss in epoch
      correct = 0
      total = 0
      with torch.no_grad():
          for data in trainloader:
              images, labels = data
              images = images.to(device)
              labels = labels.to(device)
              # calculate outputs by running images through the network
              outputs = model(images)
              loss = criterion(outputs, labels)
              # the class with the highest energy is what we choose as prediction
              _, predicted = torch.max(outputs.data, 1)
              total += labels.size(0)
              correct += (predicted == labels).sum().item()

      train_acc.append(100 * correct / total)
      train_loss.append(loss.cpu().numpy())
      print('train loss', train_loss[-1])
      print(f'train accuracy {train_acc[-1]:.2f} %')

      scheduler.step(test_loss[-1])
      
      if min_acc<test_acc[-1]:
        min_acc = test_acc[-1]
        torch.save(model, save_path)
        print("model saved !")
      
      print("--------------------------------------------------")

  end = time.time()
  cost_time = end-start
  return [cost_time, train_acc, train_loss, test_acc, test_loss]


In [3]:
class Identity(nn.Module):
  def __init__(self):
    super(Identity, self).__init__()

  def forward(self, x):
    return x

In [5]:
model = models.resnet18(pretrained=True) 
model.conv1 = nn.Conv2d(3,64,3,1,1)
model.maxpool = Identity()
model.fc = torch.nn.Linear(512, num_classes)
# print(model)
model.to(device)
save_path = './resnet18_model.pth'
torch.save(model, save_path)
stats_resnet18 = model_module(model, trainloader, testloader)
np.save("stats_resnet18",stats_resnet18) 

[ep 1][  100/  391] loss: 3.637
[ep 1][  200/  391] loss: 3.202
[ep 1][  300/  391] loss: 2.637
test loss 2.4003334
test accuracy 48.15 %
train loss 2.4624274
train accuracy 45.12 %
model saved !
--------------------------------------------------
[ep 2][  100/  391] loss: 1.847
[ep 2][  200/  391] loss: 1.938
[ep 2][  300/  391] loss: 1.682
test loss 1.8726538
test accuracy 59.26 %
train loss 1.4657743
train accuracy 58.99 %
model saved !
--------------------------------------------------
[ep 3][  100/  391] loss: 1.579
[ep 3][  200/  391] loss: 1.274
[ep 3][  300/  391] loss: 1.568
test loss 1.5819621
test accuracy 64.01 %
train loss 1.0777464
train accuracy 67.38 %
model saved !
--------------------------------------------------
[ep 4][  100/  391] loss: 1.093
[ep 4][  200/  391] loss: 1.167
[ep 4][  300/  391] loss: 1.086
test loss 1.4878446
test accuracy 66.81 %
train loss 1.0992831
train accuracy 72.88 %
model saved !
--------------------------------------------------
[ep 5][  100

  return array(a, dtype, copy=False, order=order, subok=True)


In [10]:
model = models.resnet34(pretrained=True) 
model.conv1 = nn.Conv2d(3,64,3,1,1)
model.maxpool = Identity()
model.fc = torch.nn.Linear(512, num_classes)
# print(model)
model.to(device)
save_path = './resnet34_model.pth'
torch.save(model, save_path)
stats_resnet34 = model_module(model, trainloader, testloader)
np.save("stats_resnet34",stats_resnet34) 

[ep 1][  100/  391] loss: 3.382
[ep 1][  200/  391] loss: 2.720
[ep 1][  300/  391] loss: 2.150
test loss 1.8308368
test accuracy 58.09 %
train loss 1.7909819
train accuracy 55.55 %
model saved !
--------------------------------------------------
[ep 2][  100/  391] loss: 1.444
[ep 2][  200/  391] loss: 1.736
[ep 2][  300/  391] loss: 1.400
test loss 1.2262957
test accuracy 68.29 %
train loss 0.9529275
train accuracy 69.36 %
model saved !
--------------------------------------------------
[ep 3][  100/  391] loss: 1.053
[ep 3][  200/  391] loss: 0.873
[ep 3][  300/  391] loss: 1.173
test loss 1.2449962
test accuracy 72.42 %
train loss 0.70791566
train accuracy 76.79 %
model saved !
--------------------------------------------------
[ep 4][  100/  391] loss: 0.871
[ep 4][  200/  391] loss: 0.836
[ep 4][  300/  391] loss: 0.820
test loss 0.6412384
test accuracy 73.56 %
train loss 0.57736623
train accuracy 81.31 %
model saved !
--------------------------------------------------
[ep 5][  1

In [11]:
model = models.resnet50(pretrained=True) 
model.conv1 = nn.Conv2d(3,64,3,1,1)
model.maxpool = Identity()
model.fc = torch.nn.Linear(2048, num_classes)
#print(model)
model.to(device)
save_path = './resnet50_model.pth'
torch.save(model, save_path)
stats_resnet50 = model_module(model, trainloader, testloader)
np.save("stats_resnet50",stats_resnet50) 

Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to C:\Users\Eclab/.cache\torch\hub\checkpoints\resnet50-0676ba61.pth
100%|██████████| 97.8M/97.8M [00:08<00:00, 11.8MB/s]


[ep 1][  100/  391] loss: 3.331
[ep 1][  200/  391] loss: 2.414
[ep 1][  300/  391] loss: 1.899
test loss 2.0989385
test accuracy 56.70 %
train loss 2.0392523
train accuracy 54.23 %
model saved !
--------------------------------------------------
[ep 2][  100/  391] loss: 1.701
[ep 2][  200/  391] loss: 1.273
[ep 2][  300/  391] loss: 1.384
test loss 1.467102
test accuracy 66.29 %
train loss 1.3760376
train accuracy 68.28 %
model saved !
--------------------------------------------------
[ep 3][  100/  391] loss: 1.190
[ep 3][  200/  391] loss: 0.855
[ep 3][  300/  391] loss: 0.971
test loss 1.2384456
test accuracy 69.95 %
train loss 0.9702525
train accuracy 74.28 %
model saved !
--------------------------------------------------
[ep 4][  100/  391] loss: 0.684
[ep 4][  200/  391] loss: 0.810
[ep 4][  300/  391] loss: 0.737
test loss 1.262459
test accuracy 71.08 %
train loss 0.7840253
train accuracy 78.98 %
model saved !
--------------------------------------------------
[ep 5][  100/ 

In [4]:
from typing import Iterable

import torch
from torch.optim._multi_tensor import SGD

__all__ = ["SAMSGD"]


class SAMSGD(SGD):
    """ SGD wrapped with Sharp-Aware Minimization
    Args:
        params: tensors to be optimized
        lr: learning rate
        momentum: momentum factor
        dampening: damping factor
        weight_decay: weight decay factor
        nesterov: enables Nesterov momentum
        rho: neighborhood size
    """

    def __init__(self,
                 params: Iterable[torch.Tensor],
                 lr: float,
                 momentum: float = 0,
                 dampening: float = 0,
                 weight_decay: float = 0,
                 nesterov: bool = False,
                 rho: float = 0.05,
                 ):
        if rho <= 0:
            raise ValueError(f"Invalid neighborhood size: {rho}")
        super().__init__(params, lr, momentum, dampening, weight_decay, nesterov)
        # todo: generalize this
        if len(self.param_groups) > 1:
            raise ValueError("Not supported")
        self.param_groups[0]["rho"] = rho

    @torch.no_grad()
    def step(self,
             closure
             ) -> torch.Tensor:
        """
        Args:
            closure: A closure that reevaluates the model and returns the loss.
        Returns: the loss value evaluated on the original point
        """
        closure = torch.enable_grad()(closure)
        loss = closure().detach()

        for group in self.param_groups:
            grads = []
            params_with_grads = []

            rho = group['rho']
            # update internal_optim's learning rate

            for p in group['params']:
                if p.grad is not None:
                    # without clone().detach(), p.grad will be zeroed by closure()
                    grads.append(p.grad.clone().detach())
                    params_with_grads.append(p)
            device = grads[0].device

            # compute \hat{\epsilon}=\rho/\norm{g}\|g\|
            grad_norm = torch.stack([g.detach().norm(2).to(device) for g in grads]).norm(2)
            epsilon = grads  # alias for readability
            torch._foreach_mul_(epsilon, rho / grad_norm)

            # virtual step toward \epsilon
            torch._foreach_add_(params_with_grads, epsilon)
            # compute g=\nabla_w L_B(w)|_{w+\hat{\epsilon}}
            closure()
            # virtual step back to the original point
            torch._foreach_sub_(params_with_grads, epsilon)

        super().step()
        return loss

In [5]:
def model_module_SAM(model, trainloader, testloader):

  start = time.time()
  criterion = nn.CrossEntropyLoss()
  # optimizer = torch.optim.Adam(model.parameters(), lr=1e-4, weight_decay=1e-4)
  optimizer = SAMSGD(model.parameters(), lr=1e-1, rho=0.05)
  scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=3, min_lr=1e-8, verbose=True)
  
  total_epoch = 200
  print_per_iteration = 100

  train_acc=[]
  train_loss=[]
  test_acc=[]
  test_loss=[]

  min_acc=0

  for epoch in range(total_epoch):  # loop over the dataset multiple times
      for i, data in enumerate(trainloader, 0):
          # get the inputs; data is a list of [inputs, labels]
          inputs, labels = data
          inputs = inputs.to(device)
          labels = labels.to(device)
          # zero the parameter gradients
          def closure():
            optimizer.zero_grad()
            # forward + backward + optimize
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            return loss
          
          loss = optimizer.step(closure)
          
          # print statistics
          if (i+1) % print_per_iteration == 0:  
              print(f'[ep {epoch + 1}][{i + 1:5d}/{len(trainloader):5d}] loss: {loss.item():.3f}')

      # Test acc,loss in epoch
      # fixed testing process
      correct = 0
      total = 0
      # since we're not training, we don't need to calculate the gradients for our outputs
      with torch.no_grad():
          for data in testloader:
              images, labels = data
              images = images.to(device)
              labels = labels.to(device)
              # calculate outputs by running images through the network
              outputs = model(images)
              loss = criterion(outputs, labels)
              # the class with the highest energy is what we choose as prediction
              _, predicted = torch.max(outputs.data, 1)
              total += labels.size(0)
              correct += (predicted == labels).sum().item()

      test_acc.append(100 * correct / total)
      test_loss.append(loss.cpu().numpy())
      print('test loss', test_loss[-1])
      print(f'test accuracy {test_acc[-1]:.2f} %')

      # Train acc,loss in epoch
      correct = 0
      total = 0
      with torch.no_grad():
          for data in trainloader:
              images, labels = data
              images = images.to(device)
              labels = labels.to(device)
              # calculate outputs by running images through the network
              outputs = model(images)
              loss = criterion(outputs, labels)
              # the class with the highest energy is what we choose as prediction
              _, predicted = torch.max(outputs.data, 1)
              total += labels.size(0)
              correct += (predicted == labels).sum().item()

      train_acc.append(100 * correct / total)
      train_loss.append(loss.cpu().numpy())
      print('train loss', train_loss[-1])
      print(f'train accuracy {train_acc[-1]:.2f} %')

      scheduler.step(test_loss[-1])
      
      if min_acc<test_acc[-1]:
        min_acc = test_acc[-1]
        torch.save(model, save_path)
        print("model saved !")
      
      print("--------------------------------------------------")

  end = time.time()
  cost_time = end-start
  return [cost_time, train_acc, train_loss, test_acc, test_loss]


In [16]:
model = models.resnet18(pretrained=True) 
model.conv1 = nn.Conv2d(3,64,3,1,1)
model.maxpool = Identity()
model.fc = torch.nn.Linear(512, num_classes)
#print(model)
model.to(device)
save_path = './resnet18_SAM_model.pth'
torch.save(model, save_path)
stats_resnet18_SAM = model_module_SAM(model, trainloader, testloader)
np.save("stats_resnet18_SAM",stats_resnet18_SAM)

[ep 1][  100/  391] loss: 2.677
[ep 1][  200/  391] loss: 2.298
[ep 1][  300/  391] loss: 2.013
test loss 2.0676746
test accuracy 54.40 %
train loss 1.5310919
train accuracy 52.14 %
model saved !
--------------------------------------------------
[ep 2][  100/  391] loss: 1.568
[ep 2][  200/  391] loss: 1.316
[ep 2][  300/  391] loss: 1.320
test loss 1.7512313
test accuracy 63.13 %
train loss 1.2681013
train accuracy 63.86 %
model saved !
--------------------------------------------------
[ep 3][  100/  391] loss: 1.256
[ep 3][  200/  391] loss: 1.179
[ep 3][  300/  391] loss: 1.257
test loss 1.1630785
test accuracy 67.15 %
train loss 1.0565994
train accuracy 69.44 %
model saved !
--------------------------------------------------
[ep 4][  100/  391] loss: 0.946
[ep 4][  200/  391] loss: 0.901
[ep 4][  300/  391] loss: 1.121
test loss 0.87678754
test accuracy 70.13 %
train loss 0.91306365
train accuracy 76.32 %
model saved !
--------------------------------------------------
[ep 5][  1

In [17]:
model = models.resnet34(pretrained=True) 
model.conv1 = nn.Conv2d(3,64,3,1,1)
model.maxpool = Identity()
model.fc = torch.nn.Linear(512, num_classes)
#print(model)
model.to(device)
save_path = './resnet34_SAM_model.pth'
torch.save(model, save_path)
stats_resnet34_SAM = model_module_SAM(model, trainloader, testloader)
np.save("stats_resnet34_SAM", stats_resnet34_SAM)

[ep 1][  100/  391] loss: 2.998
[ep 1][  200/  391] loss: 2.279
[ep 1][  300/  391] loss: 1.832
test loss 1.2909801
test accuracy 59.66 %
train loss 1.4650767
train accuracy 57.03 %
model saved !
--------------------------------------------------
[ep 2][  100/  391] loss: 1.217
[ep 2][  200/  391] loss: 1.581
[ep 2][  300/  391] loss: 1.199
test loss 1.4393035
test accuracy 68.02 %
train loss 1.0051525
train accuracy 69.10 %
model saved !
--------------------------------------------------
[ep 3][  100/  391] loss: 1.034
[ep 3][  200/  391] loss: 0.906
[ep 3][  300/  391] loss: 0.934
test loss 0.978389
test accuracy 72.57 %
train loss 0.8418201
train accuracy 76.26 %
model saved !
--------------------------------------------------
[ep 4][  100/  391] loss: 0.783
[ep 4][  200/  391] loss: 0.739
[ep 4][  300/  391] loss: 0.929
test loss 0.64115876
test accuracy 75.30 %
train loss 0.7467891
train accuracy 81.41 %
model saved !
--------------------------------------------------
[ep 5][  100

  return array(a, dtype, copy=False, order=order, subok=True)


In [18]:
model = models.resnet50(pretrained=True) 
model.conv1 = nn.Conv2d(3,64,3,1,1)
model.maxpool = Identity()
model.fc = torch.nn.Linear(2048, num_classes)
#print(model)
model.to(device)
save_path = './resnet50_SAM_model.pth'
torch.save(model, save_path)
stats_resnet50_SAM = model_module_SAM(model, trainloader, testloader)
np.save("stats_resnet50_SAM", stats_resnet50_SAM)

[ep 1][  100/  391] loss: 2.821
[ep 1][  200/  391] loss: 1.968
[ep 1][  300/  391] loss: 1.444
test loss 1.3780203
test accuracy 63.36 %
train loss 1.5123036
train accuracy 61.03 %
model saved !
--------------------------------------------------
[ep 2][  100/  391] loss: 1.468
[ep 2][  200/  391] loss: 1.168
[ep 2][  300/  391] loss: 1.183
test loss 1.2391137
test accuracy 69.66 %
train loss 1.0584048
train accuracy 70.13 %
model saved !
--------------------------------------------------
[ep 3][  100/  391] loss: 0.815
[ep 3][  200/  391] loss: 0.906
[ep 3][  300/  391] loss: 1.004
test loss 0.7996159
test accuracy 73.43 %
train loss 0.6597194
train accuracy 77.89 %
model saved !
--------------------------------------------------
[ep 4][  100/  391] loss: 0.684
[ep 4][  200/  391] loss: 0.668
[ep 4][  300/  391] loss: 0.855
test loss 0.7603228
test accuracy 75.48 %
train loss 0.46425086
train accuracy 81.34 %
model saved !
--------------------------------------------------
[ep 5][  10

  return array(a, dtype, copy=False, order=order, subok=True)


In [6]:
model = models.resnet50(pretrained=True) 
model.conv1 = nn.Conv2d(3,64,3,1,1)
model.maxpool = Identity()
model.fc = torch.nn.Linear(2048, num_classes)
#print(model)
model.to(device)
save_path = './resnet50_SAM_model_ep200.pth'
torch.save(model, save_path)
stats_resnet50_SAM_ep200 = model_module_SAM(model, trainloader, testloader)
np.save("stats_resnet50_SAM_ep200",stats_resnet50_SAM_ep200)

[ep 1][  100/  391] loss: 2.287
[ep 1][  200/  391] loss: 1.503
[ep 1][  300/  391] loss: 1.430
test loss 1.3000727
test accuracy 65.64 %
train loss 1.2258074
train accuracy 63.59 %
model saved !
--------------------------------------------------
[ep 2][  100/  391] loss: 1.191
[ep 2][  200/  391] loss: 1.017
[ep 2][  300/  391] loss: 0.987
test loss 0.9708352
test accuracy 73.81 %
train loss 0.98314476
train accuracy 76.08 %
model saved !
--------------------------------------------------
[ep 3][  100/  391] loss: 1.016
[ep 3][  200/  391] loss: 0.683
[ep 3][  300/  391] loss: 0.776
test loss 0.85316694
test accuracy 75.24 %
train loss 0.5151474
train accuracy 79.60 %
model saved !
--------------------------------------------------
[ep 4][  100/  391] loss: 0.534
[ep 4][  200/  391] loss: 0.654
[ep 4][  300/  391] loss: 0.495
test loss 0.8264072
test accuracy 77.10 %
train loss 0.55772245
train accuracy 83.31 %
model saved !
--------------------------------------------------
[ep 5][  

  return array(a, dtype, copy=False, order=order, subok=True)


In [None]:
# load trained model
model = torch.load("./model.pth")
model.to(device)

# fixed testing process
correct = 0
total = 0
# since we're not training, we don't need to calculate the gradients for our outputs
with torch.no_grad():
    for data in testloader:
        images, labels = data
        images = images.to(device)
        labels = labels.to(device)
        # calculate outputs by running images through the network
        outputs = model(images)
        # the class with the highest energy is what we choose as prediction
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
print(f'Accuracy of the network on the 10000 test images: {100 * correct / total:.2f} %')

Accuracy of the network on the 10000 test images: 13.33 %


Accuracy of the network on the 10000 test images: 60.19 %

In [None]:
# model = models.mobilenet_v3_large()
# torch.save(model, "./model.pth")

# see size of saved model
! du -h model.pth

98M	model.pth
