# Nguyễn Ngọc Đạt - 11200745 

In [48]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
import torch
import torchvision
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torchvision import models

# Data augmentation

In [49]:
transform = transforms.Compose([
    transforms.Resize((28,28)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

# Load dataset

In [51]:
train_data = torchvision.datasets.FashionMNIST(
    root="./data", train=True, download=True, transform=transform
)
test_data = torchvision.datasets.FashionMNIST(
    root="./data", train=False, download=True, transform=transform
)

# Create dataloaders
train_loader = torch.utils.data.DataLoader(
    train_data, batch_size=64, shuffle=True
)
test_loader = torch.utils.data.DataLoader(
    test_data, batch_size=64, shuffle=False
)

In [52]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [53]:
#cifar_10_pre_model = models.vgg11(pretrained = True)
#cifar_10_pre_model.load_state_dict(torch.load("cifar10_mini_vgg.pth"))

# Model and Fine-tuning

In [54]:
class MiniVGG(nn.Module):
    def __init__(self,):
        super(MiniVGG, self).__init__()
        self.features = nn.Sequential(nn.Conv2d(1,64,kernel_size=3,padding=1),
                                      nn.ReLU(inplace=True),
                                      nn.Conv2d(64,64,kernel_size=3,padding=1),
                                      nn.ReLU(inplace=True),
                                      nn.MaxPool2d(kernel_size=2,stride=2),
                                      
                                      nn.Conv2d(64,128,kernel_size=3,padding=1),
                                      nn.ReLU(inplace=True),
                                      nn.Conv2d(128,128,kernel_size=3,padding=1),
                                      nn.ReLU(inplace=True),
                                      nn.MaxPool2d(kernel_size=2,stride=2),
                                      
                                      nn.Conv2d(128,256,kernel_size=3,padding=1),
                                      nn.ReLU(inplace=True),
                                      nn.Conv2d(256,256,kernel_size=3,padding=1),
                                      nn.ReLU(inplace=True),
                                      nn.MaxPool2d(kernel_size=2, stride=2),
                                     
                                      
        )
        self.classifier = nn.Linear(256*3*3,10)
        nn.init.normal_(self.classifier.weight,0, 0.01)
        nn.init.constant_(self.classifier.bias,0)
    
    
    def forward(self,x,):
        x = self.features(x)
        x = torch.flatten(x,1)
        x = self.classifier(x)
        return x

In [55]:
model = MiniVGG().to(device)
model.load_state_dict(torch.load("cifar10_mini_vgg.pth", map_location=torch.device('cpu')))
for param in model.parameters():
    param.requires_grad = False
for param in model.classifier.parameters():
    param.requires_grad = True

In [56]:
model_2 = MiniVGG().to(device)
model_2.load_state_dict(torch.load('mnist_mini_vgg.pth',map_location=torch.device('cpu')))
for param in model_2.parameters():
    param.requires_grad = False
for param in model_2.classifier.parameters():
    param.requires_grad = True

In [57]:
model_3 = MiniVGG().to(device)

In [58]:
model

MiniVGG(
  (features): Sequential(
    (0): Conv2d(1, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (classifier): Linear(in_features=2304, out_features=10, bias=True)
)

In [59]:
model_2

MiniVGG(
  (features): Sequential(
    (0): Conv2d(1, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (classifier): Linear(in_features=2304, out_features=10, bias=True)
)

In [60]:
model_3

MiniVGG(
  (features): Sequential(
    (0): Conv2d(1, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (classifier): Linear(in_features=2304, out_features=10, bias=True)
)

In [61]:
# Loss function and optimizer
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

# Set the batch size of the model
#model.batch_size = 3

for epoch in range(5):
    for i, (images, labels) in enumerate(train_loader):
        images = images.to(device)
        labels = labels.to(device)

        # Forward pass
        outputs = model(images)
        #loss.requires_grad = True
        loss = criterion(outputs, labels)

        # Backward pass
        optimizer.zero_grad()
        loss.backward()
        #loss.requires_grad = True
        optimizer.step()

        if (i + 1) % 100 == 0:
            print("Epoch: {}/5, Step: {}/{}, Loss: {:.4f}".format(
                epoch + 1, i + 1, len(train_loader), loss.item()
            ))
model.eval()
with torch.no_grad():
    total_correct = 0
    total_samples = 0

    for images, labels in test_loader:
        images = images.to(device)
        labels = labels.to(device)
        batch_size = images.size(0)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total_correct += (predicted == labels).sum().item()
        total_samples += labels.size(0)

    accuracy = total_correct / total_samples

    print(f' Accuracy: {accuracy}')

Epoch: 1/5, Step: 100/938, Loss: 0.9675
Epoch: 1/5, Step: 200/938, Loss: 1.6599
Epoch: 1/5, Step: 300/938, Loss: 0.4458
Epoch: 1/5, Step: 400/938, Loss: 0.8714
Epoch: 1/5, Step: 500/938, Loss: 0.3587
Epoch: 1/5, Step: 600/938, Loss: 0.4434
Epoch: 1/5, Step: 700/938, Loss: 0.5062
Epoch: 1/5, Step: 800/938, Loss: 0.7080
Epoch: 1/5, Step: 900/938, Loss: 0.4727
Epoch: 2/5, Step: 100/938, Loss: 0.4045
Epoch: 2/5, Step: 200/938, Loss: 0.3888
Epoch: 2/5, Step: 300/938, Loss: 0.3312
Epoch: 2/5, Step: 400/938, Loss: 0.2357
Epoch: 2/5, Step: 500/938, Loss: 0.4038
Epoch: 2/5, Step: 600/938, Loss: 0.4527
Epoch: 2/5, Step: 700/938, Loss: 0.3560
Epoch: 2/5, Step: 800/938, Loss: 0.3118
Epoch: 2/5, Step: 900/938, Loss: 0.4891
Epoch: 3/5, Step: 100/938, Loss: 0.4354
Epoch: 3/5, Step: 200/938, Loss: 0.2841
Epoch: 3/5, Step: 300/938, Loss: 0.3805
Epoch: 3/5, Step: 400/938, Loss: 0.1917
Epoch: 3/5, Step: 500/938, Loss: 0.4397
Epoch: 3/5, Step: 600/938, Loss: 0.3439
Epoch: 3/5, Step: 700/938, Loss: 0.3759


In [62]:
# Loss function and optimizer
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model_2.parameters(), lr=0.001, momentum=0.9)

# Set the batch size of the model
model_2.batch_size = 3

for epoch in range(5):
    for i, (images, labels) in enumerate(train_loader):
        images = images.to(device)
        labels = labels.to(device)

        # Forward pass
        outputs = model_2(images)
        #loss.requires_grad = True
        loss = criterion(outputs, labels)

        # Backward pass
        optimizer.zero_grad()
        loss.backward()
        #loss.requires_grad = True
        optimizer.step()

        if (i + 1) % 100 == 0:
            print("Epoch: {}/5, Step: {}/{}, Loss: {:.4f}".format(
                epoch + 1, i + 1, len(train_loader), loss.item()
            ))
            
        
        
        
model_2.eval()
with torch.no_grad():
    total_correct = 0
    total_samples = 0

    for images, labels in test_loader:
        images = images.to(device)
        labels = labels.to(device)
        batch_size = images.size(0)
        outputs = model_2(images)
        _, predicted = torch.max(outputs.data, 1)
        total_correct += (predicted == labels).sum().item()
        total_samples += labels.size(0)

    accuracy = total_correct / total_samples

    print(f'Accuracy: {accuracy}')


Epoch: 1/5, Step: 100/938, Loss: 1.1490
Epoch: 1/5, Step: 200/938, Loss: 0.4822
Epoch: 1/5, Step: 300/938, Loss: 1.0779
Epoch: 1/5, Step: 400/938, Loss: 0.4449
Epoch: 1/5, Step: 500/938, Loss: 0.5311
Epoch: 1/5, Step: 600/938, Loss: 0.4766
Epoch: 1/5, Step: 700/938, Loss: 0.5198
Epoch: 1/5, Step: 800/938, Loss: 0.4444
Epoch: 1/5, Step: 900/938, Loss: 0.4142
Epoch: 2/5, Step: 100/938, Loss: 0.3893
Epoch: 2/5, Step: 200/938, Loss: 0.3088
Epoch: 2/5, Step: 300/938, Loss: 0.4858
Epoch: 2/5, Step: 400/938, Loss: 0.7726
Epoch: 2/5, Step: 500/938, Loss: 0.3890
Epoch: 2/5, Step: 600/938, Loss: 0.4285
Epoch: 2/5, Step: 700/938, Loss: 0.3552
Epoch: 2/5, Step: 800/938, Loss: 0.4504
Epoch: 2/5, Step: 900/938, Loss: 0.3976
Epoch: 3/5, Step: 100/938, Loss: 0.3937
Epoch: 3/5, Step: 200/938, Loss: 0.4769
Epoch: 3/5, Step: 300/938, Loss: 0.4749
Epoch: 3/5, Step: 400/938, Loss: 0.2886
Epoch: 3/5, Step: 500/938, Loss: 0.5102
Epoch: 3/5, Step: 600/938, Loss: 0.3737
Epoch: 3/5, Step: 700/938, Loss: 0.3354


In [63]:
# Loss function and optimizer
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model_3.parameters(), lr=0.001, momentum=0.9)


for epoch in range(5):
    for i, (images, labels) in enumerate(train_loader):
        images = images.to(device)
        labels = labels.to(device)

        # Forward pass
        outputs = model_3(images)
        #loss.requires_grad = True
        loss = criterion(outputs, labels)

        # Backward pass
        optimizer.zero_grad()
        loss.backward()
        #loss.requires_grad = True
        optimizer.step()

        if (i + 1) % 100 == 0:
            print("Epoch: {}/5, Step: {}/{}, Loss: {:.4f}".format(
                epoch + 1, i + 1, len(train_loader), loss.item()
            ))

Epoch: 1/5, Step: 100/938, Loss: 2.3002
Epoch: 1/5, Step: 200/938, Loss: 2.3010
Epoch: 1/5, Step: 300/938, Loss: 2.2979
Epoch: 1/5, Step: 400/938, Loss: 2.2763
Epoch: 1/5, Step: 500/938, Loss: 2.1363
Epoch: 1/5, Step: 600/938, Loss: 1.0979
Epoch: 1/5, Step: 700/938, Loss: 0.7458
Epoch: 1/5, Step: 800/938, Loss: 0.8440
Epoch: 1/5, Step: 900/938, Loss: 0.6088
Epoch: 2/5, Step: 100/938, Loss: 0.7156
Epoch: 2/5, Step: 200/938, Loss: 0.6481
Epoch: 2/5, Step: 300/938, Loss: 0.7465
Epoch: 2/5, Step: 400/938, Loss: 0.7266
Epoch: 2/5, Step: 500/938, Loss: 0.3460
Epoch: 2/5, Step: 600/938, Loss: 0.6642
Epoch: 2/5, Step: 700/938, Loss: 0.3827
Epoch: 2/5, Step: 800/938, Loss: 0.4673
Epoch: 2/5, Step: 900/938, Loss: 0.6610
Epoch: 3/5, Step: 100/938, Loss: 0.5888
Epoch: 3/5, Step: 200/938, Loss: 0.4494
Epoch: 3/5, Step: 300/938, Loss: 0.5883
Epoch: 3/5, Step: 400/938, Loss: 0.4256
Epoch: 3/5, Step: 500/938, Loss: 0.4793
Epoch: 3/5, Step: 600/938, Loss: 0.5066
Epoch: 3/5, Step: 700/938, Loss: 0.4024


In [64]:
model_3.eval()
with torch.no_grad():
    total_correct = 0
    total_samples = 0

    for images, labels in test_loader:
        images = images.to(device)
        labels = labels.to(device)
        batch_size = images.size(0)
        outputs = model_3(images)
        _, predicted = torch.max(outputs.data, 1)
        total_correct += (predicted == labels).sum().item()
        total_samples += labels.size(0)

    accuracy = total_correct / total_samples

    print(f' Accuracy: {accuracy}')

 Accuracy: 0.8522


### Mô hình được pretrain với tập dữ liệu ci_far10 cho ra kết quả accuracy cao nhất so với 2 mô hình còn lại. Lí do em nghĩ vì mô hình này đã được pretrain trước nên n sẽ học được đa dạng các thông tin hơn. Nếu train nhiều epochs hơn thì em nghĩ accuracy của 2 mô hình được pretrain vẫn sẽ nhỉnh hơn so với mô hình gốc 

# Feature extractor

In [66]:
from torchvision.models.feature_extraction import get_graph_node_names
from torchvision.models.feature_extraction import create_feature_extractor

train_nodes, eval_nodes = get_graph_node_names(model_3)

In [67]:
create_feature_extractor(model_3, train_return_nodes= train_nodes, eval_return_nodes= eval_nodes)

MiniVGG(
  (features): Module(
    (0): Conv2d(1, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (classifier): Linear(in_features=2304, out_features=10, bias=True)
)

In [68]:
model_3.features[0].weight

Parameter containing:
tensor([[[[ 0.3133,  0.3257,  0.0892],
          [ 0.2212, -0.2326, -0.2578],
          [ 0.1199, -0.2532, -0.1962]]],


        [[[-0.1134,  0.2938,  0.1979],
          [-0.0522,  0.2250, -0.1292],
          [ 0.1637, -0.2342, -0.0569]]],


        [[[-0.2860, -0.2889, -0.2345],
          [ 0.2677, -0.0758, -0.1925],
          [-0.0365,  0.1645, -0.1211]]],


        [[[-0.1775,  0.2954,  0.2730],
          [-0.0598, -0.1778,  0.0235],
          [ 0.1670,  0.2258, -0.1984]]],


        [[[ 0.1088,  0.2465,  0.3005],
          [ 0.2318, -0.0497,  0.2998],
          [ 0.2250, -0.2388, -0.1088]]],


        [[[-0.1128, -0.0531, -0.1690],
          [-0.3497, -0.0665, -0.3758],
          [-0.3149, -0.2658, -0.0942]]],


        [[[ 0.0211,  0.3315,  0.0070],
          [ 0.2136, -0.2346, -0.2234],
          [-0.1390, -0.1503,  0.2971]]],


        [[[ 0.1855, -0.2177,  0.0836],
          [ 0.0762,  0.1932, -0.2714],
          [ 0.1657, -0.1377, -0.2827]]],


        [[