# Transfer learning

In [49]:
import numpy as np

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data  import DataLoader,TensorDataset
import copy 
from sklearn.model_selection import train_test_split
import torchvision
import torchvision.transforms as T

import matplotlib.pyplot as plt
import pandas as pd

# Mnist dataset

In [41]:
df_train = pd.read_csv('../data/mnist_train.csv')
df_train.head()

Unnamed: 0,label,0,1,2,3,4,5,6,7,8,...,774,775,776,777,778,779,780,781,782,783
0,5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,9,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [42]:
data = df_train.values[:30000,1:]
labels = df_train.values[:30000,0]
data.shape , labels.shape

((30000, 784), (30000,))

In [43]:
dataNorm = data / np.max(data)
dataNorm = dataNorm.reshape(dataNorm.shape[0],1,28,28)
dataNorm.shape

(30000, 1, 28, 28)

In [44]:
dataT = torch.tensor(dataNorm,dtype=torch.float32)
labelsT = torch.tensor(labels,dtype=torch.long)

In [45]:
train_data , test_data , train_labels , test_labels = train_test_split(dataT,labelsT,test_size=.4,random_state=42)

In [46]:
train_data = TensorDataset(train_data, train_labels)
test_data = TensorDataset(test_data, test_labels)

In [47]:
batch_size = 32
train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_data, batch_size=test_data.tensors[0].shape[0])

In [48]:
train_loader.dataset.tensors[0].shape

torch.Size([18000, 1, 28, 28])

# FMNIST

In [50]:
transform = T.Compose([
    T.ToTensor(),
    T.Normalize(.5,.5)
])

In [51]:
fmnist_train = torchvision.datasets.FashionMNIST(root='../data',train=True,download=True,transform=transform)
fmnist_test = torchvision.datasets.FashionMNIST(root='../data',train=False,download=True,transform=transform)

100%|██████████| 26.4M/26.4M [01:40<00:00, 264kB/s] 
100%|██████████| 29.5k/29.5k [00:00<00:00, 280kB/s]
100%|██████████| 4.42M/4.42M [00:36<00:00, 120kB/s] 
100%|██████████| 5.15k/5.15k [00:00<00:00, 967kB/s]


In [54]:
len(fmnist_train)

60000

In [55]:
fmnist_train_loader = DataLoader(fmnist_train,batch_size=batch_size,shuffle=True,drop_last=True)
fmnist_test_loader =  DataLoader(fmnist_test,batch_size=len(fmnist_test))

# Models

In [56]:
def createMnisNet(printtogle=False):
    class MNISnet(nn.Module):
        def __init__(self,printtogle):
            super().__init__()
             
             # convolutional layers

            self.conv1 = nn.Conv2d(in_channels=1, out_channels=10, kernel_size=5, stride=1, padding=1)
            # out_channels represents the number of feature maps
            # the output will be np.floor((28-5+2*1)/1) + 1 = 26 
            # then we apply max pooling by spatial extent 2*2 so the output will be 26/2 = 13 
            self.conv2 = nn.Conv2d(in_channels=10, out_channels=20, kernel_size=5, stride=1, padding=1) 
            # output will be 13-5+2*1 = 11
            # then we apply max pooling by spatial extent 2*2 so the output will be 11/2 = 5
            # in this case we take the floor so the ceiling mode in the max pooling will be False since it give 5 not 6
            
            # Computer number of units in FC layer (number of output of conv2)
            expectSize = np.floor((5+2*0-1)/1) + 1
            expectSize = 20*int(expectSize**2)

            ### fully connected layer
            self.fc1 = nn.Linear(expectSize, 50)
            self.out = nn.Linear(50, 10)
            # togle for printing out our tensorsizes during forward pass
            self.print = printtogle

        def forward(self, x,doBN=False):
            print(f'Input: {x.shape} ') if self.print else None

            # convol -> maxpool -> relu
            x = F.relu(F.max_pool2d(self.conv1(x),2))
            print(f'Conv1/pool1: {x.shape} ') if self.print else None

            x = F.relu(F.max_pool2d(self.conv2(x),2))
            print(f'Conv2/pool2: {x.shape} ') if self.print else None

            nUnits  = x.shape.numel() / x.shape[0] # number of units 
            #we get it by dividing the total number of units by the batch size to get the number of units per image
            x = x.view(-1,int(nUnits))
            # -1 tells pytorch to infer the first dimension
            # Reshape x to (batch_size, nUnits)
            # we can do that x = x.view(x.shape[0], int(nUnits)) by -1 is automatically and well


            print(f'Vectorized: {x.shape} ') if self.print else None

            # Linear layers
            x = F.relu(self.fc1(x))
            print(f'FC1: {x.shape} ') if self.print else None

            x = self.out(x)
            print(f'Output: {x.shape} ') if self.print else None

            return x

    net = MNISnet(printtogle)
    lossfun = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(net.parameters(), lr=.005)
    
    
    return net, lossfun, optimizer    

In [67]:
import sys
sys.path.append('../utils')
sys.path.append('../models')
import importlib 
import training
importlib.reload(training)
from training import trainTheM0del

In [77]:
num_epochs = 20
model, lossfun, optimizer = createMnisNet(printtogle=False)

In [78]:
device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
device

device(type='mps')

In [79]:
losses, train_accuracy, test_accuracy = trainTheM0del(
        isClassification=True,
        optimizer = optimizer,
        doBN=False,
        model=model,
        train_loader=train_loader,
        test_loader=test_loader,
        num_epochs=num_epochs, 
        loss_function= lossfun,
        device = device
        )

Epoch 1/20, Train Loss: 0.2443, Train Acc: 92.17, Test Acc: 96.17
Epoch 2/20, Train Loss: 0.0814, Train Acc: 97.47, Test Acc: 97.14
Epoch 3/20, Train Loss: 0.0625, Train Acc: 98.03, Test Acc: 97.74
Epoch 4/20, Train Loss: 0.0460, Train Acc: 98.61, Test Acc: 97.54
Epoch 5/20, Train Loss: 0.0382, Train Acc: 98.67, Test Acc: 97.98
Epoch 6/20, Train Loss: 0.0411, Train Acc: 98.70, Test Acc: 98.05
Epoch 7/20, Train Loss: 0.0300, Train Acc: 99.01, Test Acc: 97.90
Epoch 8/20, Train Loss: 0.0321, Train Acc: 99.01, Test Acc: 97.87
Epoch 9/20, Train Loss: 0.0326, Train Acc: 99.00, Test Acc: 97.60
Epoch 10/20, Train Loss: 0.0233, Train Acc: 99.29, Test Acc: 98.18
Epoch 11/20, Train Loss: 0.0262, Train Acc: 99.30, Test Acc: 97.91
Epoch 12/20, Train Loss: 0.0255, Train Acc: 99.27, Test Acc: 97.96
Epoch 13/20, Train Loss: 0.0292, Train Acc: 99.15, Test Acc: 97.73
Epoch 14/20, Train Loss: 0.0253, Train Acc: 99.31, Test Acc: 97.79
Epoch 15/20, Train Loss: 0.0209, Train Acc: 99.41, Test Acc: 98.37
Epoc

# Fine-tune the MNIST model on FMNIST

In [None]:
for name, param in model.named_parameters():
    print(name)   # e.g., 'fc1.weight'
    print(param)  # the actual tensor

conv1.weight
Parameter containing:
tensor([[[[-2.4280e-01,  7.0474e-02,  1.0293e-01, -3.0825e-03, -1.9726e-01],
          [-6.8802e-01, -4.0762e-01, -1.0186e-02, -3.1067e-01,  1.9792e-01],
          [-8.4532e-01, -1.4128e-01, -2.8844e-01,  2.6224e-01,  3.5622e-01],
          [-6.6717e-01, -1.6544e-01,  1.2890e-01,  4.6189e-01,  2.4482e-01],
          [-2.5795e-01, -2.0679e-02, -1.5538e-01,  4.6556e-01,  2.7924e-01]]],


        [[[-2.4005e-02,  3.1989e-01,  1.8976e-01,  4.8423e-02,  1.7424e-01],
          [ 2.1086e-03, -1.0800e-01, -3.2814e-01, -2.8427e-01, -1.1542e-01],
          [-2.2748e-01, -8.4293e-01, -5.6044e-01,  3.0844e-01,  2.8362e-01],
          [ 3.5503e-02, -5.2392e-01,  2.2155e-01,  6.5860e-01,  1.5974e-01],
          [-5.6431e-02,  3.6196e-01,  4.1099e-01,  3.6693e-01, -4.3148e-01]]],


        [[[ 1.1497e-01,  4.1667e-01,  3.7679e-01,  4.7430e-01, -7.8030e-02],
          [-1.9697e-01,  1.2160e-01,  1.6314e-01, -1.0936e-02, -2.9331e-01],
          [-2.0306e-01, -2.7580e-

In [81]:
# create the target model
fmnist_model , f_loss_fun , f_optimizer = createMnisNet()

# replace all weights in a Target model from source model
for target,source in zip(fmnist_model.named_parameters(),model.named_parameters()):
    target[1].data = copy.deepcopy(source[1].data)

In [85]:
losses, train_accuracy, test_accuracy = trainTheM0del(
        isClassification=True,
        optimizer = optimizer,
        doBN=False,
        model=model,
        train_loader=fmnist_train_loader,   
        test_loader=fmnist_test_loader,
        num_epochs=5, 
        loss_function= lossfun,
        device = device
        )

Epoch 1/5, Train Loss: 0.6434, Train Acc: 77.13, Test Acc: 78.44
Epoch 2/5, Train Loss: 0.4815, Train Acc: 82.81, Test Acc: 83.64
Epoch 3/5, Train Loss: 0.4385, Train Acc: 84.19, Test Acc: 85.22
Epoch 4/5, Train Loss: 0.4071, Train Acc: 85.41, Test Acc: 84.72
Epoch 5/5, Train Loss: 0.3880, Train Acc: 86.01, Test Acc: 84.77
