Training Parameters to be worked on for accuracy

1. the mean and the std values for the dataloader source and target could be tried varying.


In [2]:
from __future__ import print_function
import argparse
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
import numpy as np
import torch.utils.data as utils
import time
import os
import torch.nn as nn
from torch.autograd import Function
import torchvision
import torch.utils.data as data
from PIL import Image
import os
import matplotlib.pyplot as plt

### Gradient Reversal Layer

In [3]:
from torch.autograd import Function

In [4]:
class GradientReverseLayer(Function):
    
    @staticmethod
    def forward(ctx, x, alpha):
        ctx.alpha = alpha
        return x.view_as(x)*alpha
    
    @staticmethod
    def backward(ctx, grad_output):
        output = grad_output.neg()*ctx.alpha
        return output, None

### The three models: Feature Extractor, Class Classifier, Domain CLassifier

In [5]:
import torch.nn as nn
from collections import OrderedDict
from torchsummary import summary

In [11]:
class DANN(nn.Module):
    def __init__(self):
        super(DANN, self).__init__()
        self.feature_extractor = nn.Sequential()
        self.feature_extractor.add_module('conv1', nn.Conv2d(3, 64, kernel_size=5)),
        self.feature_extractor.add_module('batchnorm1', nn.BatchNorm2d(64)),
        self.feature_extractor.add_module('maxpool1', nn.MaxPool2d(2)),
        self.feature_extractor.add_module('relu1', nn.ReLU(True)),
        self.feature_extractor.add_module('conv2', nn.Conv2d(64, 50, kernel_size=5)),
        self.feature_extractor.add_module('batchnorm2', nn.BatchNorm2d(50)),
        self.feature_extractor.add_module('drop1', nn.Dropout2d())
        self.feature_extractor.add_module('maxpool2', nn.MaxPool2d(2)),
        self.feature_extractor.add_module('relu2', nn.ReLU(True))
        
        self.class_classifier = nn.Sequential()
        self.class_classifier.add_module('fc1', nn.Linear(50*4*4, 100)),
        self.class_classifier.add_module('batchnorm1', nn.BatchNorm1d(100)),
        self.class_classifier.add_module('relu1', nn.ReLU(True)),
        self.class_classifier.add_module('drop1', nn.Dropout2d()),
        self.class_classifier.add_module('fc2', nn.Linear(100, 100)),
        self.class_classifier.add_module('batchnorm2', nn.BatchNorm1d(100)),
        self.class_classifier.add_module('relu2', nn.ReLU(True)),
        self.class_classifier.add_module('fc3', nn.Linear(100, 10)),
        self.class_classifier.add_module('softmax', nn.LogSoftmax()),
        
        self.domain_classifier = nn.Sequential()
        self.domain_classifier.add_module('fc1', nn.Linear(50*4*4, 100)),
        self.domain_classifier.add_module('batchnorm1', nn.BatchNorm1d(100)),
        self.domain_classifier.add_module('relu1', nn.ReLU(True)),
        self.domain_classifier.add_module('fc2', nn.Linear(100, 2)),
        self.domain_classifier.add_module('softmax', nn.LogSoftmax(dim=1))
        
    def forward(self, x):
        alpha=0.5
        x = x.expand(x.data.shape[0], 3, 28, 28)
        feature = self.feature_extractor(x)
        feature = feature.view(-1, 50*4*4)
        reverse = GradientReverseLayer(feature, alpha)
        class_output = self.class_classifier(feature)
        domain_ouput = self.domain_classifier(reverse)
        return class_output, domain_output

In [13]:
model = DANN()
summary(model,(3,28,28))

  input = module(input)


TypeError: linear(): argument 'input' (position 1) must be Tensor, not GradientReverseLayer

#### Dataset Loading

In [7]:
import functools

In [8]:
def repeat_image(x):
    return x.repeat(3, 1, 1)

In [9]:
from torchvision.datasets import MNIST        
import torchvision.transforms as transforms 

trainset_bla = torchvision.datasets.MNIST(root='./data', train=True, download=True)
print('Min Pixel Value: {} \nMax Pixel Value: {}'.format(trainset_bla.data.min(), trainset_bla.data.max()))
print('Mean Pixel Value {} \nPixel Values Std: {}'.format(trainset_bla.data.float().mean(), trainset_bla.data.float().std()))
print('Scaled Mean Pixel Value {} \nScaled Pixel Values Std: {}'.format(trainset_bla.data.float().mean() / 255, trainset_bla.data.float().std() / 255))

Min Pixel Value: 0 
Max Pixel Value: 255
Mean Pixel Value 33.31842041015625 
Pixel Values Std: 78.56748962402344
Scaled Mean Pixel Value 0.13066047430038452 
Scaled Pixel Values Std: 0.30810779333114624


In [10]:
cuda = True
lr = 1e-3
image_size = 28


img_transform_source = transforms.Compose([
    transforms.Resize(image_size),
    transforms.ToTensor(),
    transforms.Normalize(mean = (0.1307,), std = (0.3081,))
])
img_transform_target = transforms.Compose([
    transforms.Resize(image_size),
    transforms.ToTensor(),
    transforms.Normalize(mean = (0.5,0.5,0.5), std = (0.5,0.5,0.5))
])

In [11]:
source_train = torch.utils.data.DataLoader(
        datasets.MNIST('./', train=True, download=True,
                       transform=transforms.Compose([
                           transforms.ToTensor(),
                           transforms.Normalize((0.1307,), (0.3081,)),
                       ])),
        batch_size=128, shuffle=True,num_workers=1)
source_test = torch.utils.data.DataLoader(
        datasets.MNIST('./', train=False, transform=transforms.Compose([
                           transforms.ToTensor(),
                           transforms.Normalize((0.1307,), (0.3081,)),
                       ])),
        batch_size=128, shuffle=False,num_workers=1)

In [12]:
class GetLoader(data.Dataset):
    def __init__(self, data_root, data_list, transform=None):
        self.root = data_root
        self.transform = transform

        f = open(data_list, 'r')
        data_list = f.readlines()
        f.close()

        self.n_data = len(data_list)

        self.img_paths = []
        self.img_labels = []

        for data in data_list:
            self.img_paths.append(data[:-3])
            self.img_labels.append(data[-2])

    def __getitem__(self, item):
        img_paths, labels = self.img_paths[item], self.img_labels[item]
        imgs = Image.open(os.path.join(self.root, img_paths)).convert('RGB')

        if self.transform is not None:
            imgs = self.transform(imgs)
            labels = int(labels)

        return imgs, labels

    def __len__(self):
        return self.n_data


In [13]:
image_size=28
img_transform = transforms.Compose([
    transforms.Resize((image_size)),
    transforms.ToTensor(),
    transforms.Normalize(mean=(0.5,0.5,0.5), std=(0.5,0.5,0.5))
])
train_list = 'C:\\Users\\Dell\\Downloads\\mnist_m\\mnist_m\\mnist_m_train_labels.txt'
dataset_train_target = GetLoader(
    data_root='C:\\Users\\Dell\\Downloads\\mnist_m\\mnist_m\\mnist_m_train',
    data_list=train_list,
    transform=img_transform
)
test_list = 'C:\\Users\\Dell\\Downloads\\mnist_m\\mnist_m\\mnist_m_test_labels.txt'
dataset_test_target = GetLoader(
    data_root='C:\\Users\\Dell\\Downloads\\mnist_m\\mnist_m\\mnist_m_test',
    data_list=test_list,
    transform=img_transform
)
target_train = torch.utils.data.DataLoader(dataset_train_target,batch_size=128, shuffle=True,num_workers=1)
target_test = torch.utils.data.DataLoader(dataset_test_target,batch_size=128, shuffle=True,num_workers=1)

### Model Training

In [14]:
import os
import matplotlib.pyplot as plt
import torch.backends.cudnn as cudnn
import torch.optim as optim
import torch.utils.data
from torchvision import transforms
from torchvision import datasets
from PIL import Image

In [15]:
model = DANN()
optimizer = optim.SGD(model.parameters(), lr= lr, momentum= 0.9)
criterion = nn.CrossEntropyLoss()

def optimizer_scheduler(optimizer, p):
    for param_group in optimizer.param_groups:
        param_group['lr'] = 0.01 / (1. + 10 * p) ** 0.75
    return optimizer

loss_class = torch.nn.NLLLoss()
loss_domain = torch.nn.NLLLoss()

In [16]:
def test(epoch):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in source_test:
            data, target = data.to(device), target.to(device)
            output, _ = model(data,0.5)
            test_loss += float(criterion(output, target))  # sum up batch loss
            pred = output.max(1, keepdim=True)[1]  # get the index of the max log-probability
            correct += float(pred.eq(target.view_as(pred)).sum())

    test_loss /= len(source_test.dataset)
    print('Test set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)'.format(
        test_loss, correct, len(source_test.dataset),
        100. * correct / len(source_test.dataset)))

    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in target_train:
            data, target = data.to(device), target.to(device)
            output, _ = model(data,0.5)
            test_loss += float(criterion(output, target))  # sum up batch loss
            pred = output.max(1, keepdim=True)[1]  # get the index of the max log-probability
            correct += float(pred.eq(target.view_as(pred)).sum())

    test_loss /= len(target_train.dataset)
    print('Test set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(target_train.dataset),
        100. * correct / len(target_train.dataset)))
    
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in target_test:
            data, target = data.to(device), target.to(device)
            output, _ = model(data,0.5)
            test_loss += float(criterion(output, target))  # sum up batch loss
            pred = output.max(1, keepdim=True)[1]  # get the index of the max log-probability
            correct += float(pred.eq(target.view_as(pred)).sum())

    test_loss /= len(target_test.dataset)
    print('Test set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(target_test.dataset),
        100. * correct / len(target_test.dataset)))

In [17]:
print(len(target_train),len(source_train))

461 469


#### Training

In [None]:
allepoch=100

for epoch in range(allepoch):
    len_dataloader = min(len(source_train), len(target_train))
    total_steps = allepoch * len(source_train)
    i = 0
    for batch_idx, (data_source, data_target) in enumerate(zip(source_train, target_train)):
        print(batch_idx, epoch)
        start_time = time.time()
        s_img, s_label = data_source
        print("***", s_label)

        start_steps = epoch * len(source_train)

        p = float(i + start_steps) / total_steps
        alpha = 2. / (1. + np.exp(-10 * p)) - 1

        optimizer = optimizer_scheduler(optimizer, p)
        optimizer.zero_grad()

        batch_size = len(s_label)

        domain_label = torch.zeros(batch_size)
        domain_label = domain_label.long()

        
        a,b = model(s_img,alpha)
        err_s_label = criterion(a, s_label)
        err_s_domain = criterion(b, domain_label)

        # training model using target data
        t_img, _ = data_target

        batch_size = len(t_img)

        domain_label = torch.ones(batch_size)
        domain_label = domain_label.long()



        _, b = model(t_img,alpha)
        err_t_domain = criterion(b, domain_label)
        err = err_s_label + err_s_domain + err_t_domain
        err.backward()
        optimizer.step()


        if(i % 1000 == 0):
            print('epoch:{},[{}/{}],s_label:{:.3f},s_domain:{:.3f},t_domain:{:.3f},time{}'.
                      format(epoch, i, len_dataloader, float(err_s_label), float(err_s_domain),
                             float(err_t_domain), time.time() - start_time))

        i += 1

    test(epoch)

### alternate

In [18]:
my_net = DANN()

# setup optimizer
n_epoch = 100
batch_size = 128
lr = 1e-3
image_size = 28
cuda = True
cudnn.benchmark = True
optimizer = optim.Adam(my_net.parameters(), lr=lr)

loss_class = torch.nn.NLLLoss()
loss_domain = torch.nn.NLLLoss()

for p in my_net.parameters():
    p.requires_grad = True


In [None]:
for epoch in range(n_epoch):

    len_dataloader = min(len(source_train), len(target_train))
    data_source_iter = iter(source_train)
    data_target_iter = iter(target_train)
    
    print("Hi", epoch)
    
    i = 0
    while i < len_dataloader:
        p = float(i + epoch * len_dataloader) / n_epoch / len_dataloader
        alpha = 2. / (1. + np.exp(-10 * p)) - 1
        
        # training model using source data
        data_source = data_source_iter.next()
        s_img, s_label = data_source
        
        print(s_label, "**")
        my_net.zero_grad()
        batch_size = len(s_label)

        input_img = torch.FloatTensor(batch_size, 3, image_size, image_size)
        class_label = torch.LongTensor(batch_size)
        domain_label = torch.zeros(batch_size)
        domain_label = domain_label.long()
        
        input_img.resize_as_(s_img).copy_(s_img)
        class_label.resize_as_(s_label).copy_(s_label)
        
        class_output, domain_output = my_net(input_data=input_img, alpha=alpha)
        err_s_label = loss_class(class_output, class_label)
        err_s_domain = loss_domain(domain_output, domain_label)
        
        # training model using target data
        data_target = data_target_iter.next()
        t_img, _ = data_target

        batch_size = len(t_img)
        
        input_img = torch.FloatTensor(batch_size, 3, image_size, image_size)
        domain_label = torch.ones(batch_size)
        domain_label = domain_label.long()
        
        input_img.resize_as_(t_img).copy_(t_img)

        _, domain_output = my_net(input_data=input_img, alpha=alpha)
        err_t_domain = loss_domain(domain_output, domain_label)
        err = err_t_domain + err_s_domain + err_s_label
        err.backward()
        optimizer.step()
        
        i += 1

        print ('epoch: %d, [iter: %d / all %d], err_s_label: %f, err_s_domain: %f, err_t_domain: %f' \
              % (epoch, i, len_dataloader, err_s_label.cpu().data.numpy(),
                 err_s_domain.cpu().data.numpy(), err_t_domain.cpu().data.numpy()))

    test(epoch)