In [2]:

from torch.autograd import Function
import load_mnist_data

import torch
import torch.nn as nn
from torch.autograd import Variable
from torch.utils.data import TensorDataset, DataLoader
import torch.optim as optim
import numpy as np
import torch.backends.cudnn as cudnn
import random
import os
import sys


In [3]:
class ReverseLayerF(Function):

    @staticmethod
    def forward(ctx, x, alpha):
        ctx.alpha = alpha

        return x.view_as(x)

    @staticmethod
    def backward(ctx, grad_output):
        output = grad_output.neg() * ctx.alpha

        return output, None

class DANN(nn.Module):
    def __init__(self):
        super(DANN, self).__init__()
        self.feature = nn.Sequential(
                    nn.Conv2d(3, 64, kernel_size=5),
                    nn.BatchNorm2d(64),
                    nn.MaxPool2d(2),
                    nn.ReLU(True),
                    nn.Conv2d(64, 50, kernel_size=5),
                    nn.BatchNorm2d(50),
                    nn.Dropout2d(),
                    nn.MaxPool2d(2),
                    nn.ReLU(True)
                )
                
        self.avgpool=nn.AdaptiveAvgPool2d((5,5))
        self.classifier = nn.Sequential(
                    nn.Linear(50 * 4 * 4, 100),
                    nn.BatchNorm1d(100),
                    nn.ReLU(True),
                    nn.Dropout(),
                    nn.Linear(100, 100),
                    nn.BatchNorm1d(100),
                    nn.ReLU(True),
                    nn.Linear(100, 10),
                )


        self.domain_classifier = nn.Sequential(
                    nn.Linear(50 * 4 * 4, 100),
                    nn.BatchNorm1d(100),
                    nn.ReLU(True),
                    nn.Linear(100, 2),
                )
    def forward(self, input_data, alpha):
        input_data = input_data.expand(input_data.data.shape[0], 3, 28, 28)
        feature = self.feature(input_data)
        feature = feature.view(-1, 50 * 4 * 4)
        reverse_feature = ReverseLayerF.apply(feature, alpha)
        class_output = self.classifier(feature)
        domain_output = self.domain_classifier(reverse_feature)

        return class_output, domain_output


In [4]:

class ReverseLayerF(Function):

    @staticmethod
    def forward(ctx, x, alpha):
        ctx.alpha = alpha

        return x.view_as(x)

    @staticmethod
    def backward(ctx, grad_output):
        output = grad_output.neg() * ctx.alpha

        return output, None


class DANN_with_avg(nn.Module):
    def __init__(self):
        super(DANN, self).__init__()
        self.feature = nn.Sequential(
                    nn.Conv2d(3, 32, kernel_size=5),
                    nn.BatchNorm2d(32),
                    nn.MaxPool2d(2),
                    nn.ReLU(True),
                    nn.Conv2d(32, 48, kernel_size=5),
                    nn.BatchNorm2d(48),
                    nn.Dropout2d(),
                    nn.MaxPool2d(2),
                    nn.ReLU(True)
                )
                
        self.avgpool=nn.AdaptiveAvgPool2d((5,5))
        self.classifier = nn.Sequential(
                    nn.Linear(48*5*5, 100),
                    nn.BatchNorm1d(100),
                    nn.ReLU(True),
                    nn.Dropout(),
                    nn.Linear(100, 100),
                    nn.BatchNorm1d(100),
                    nn.ReLU(True),
                    nn.Linear(100, 10),
                )


        self.domain_classifier = nn.Sequential(
                    nn.Linear(48*5*5, 100),
                    nn.BatchNorm1d(100),
                    nn.ReLU(True),
                    nn.Linear(100, 2),
                )
    def forward(self,x,alpha):
        x = x.expand(x.data.shape[0], 3, 28,28)
        x=self.feature(x)
        x=self.avgpool(x)
        x=torch.flatten(x,1)
        task_predict=self.classifier(x)
        x = ReverseLayerF.apply(x,alpha)
        domain_predict=self.domain_classifier(x)
        return task_predict,domain_predict

In [5]:
def train(source, target, net, criterion, optimizer, epoch, use_cuda=True):
    net.train() # Sets the module in training mode.

    train_loss = 0
    correct_source_label = 0
    correct_source_domain = 0
    correct_target_label = 0
    correct_target_domain = 0
    total = 0
    batch_size = 128

    data_target_iter = iter(target)
    len_dataloader = min(len(source), len(target))

    for batch_idx, (inputs, source_label) in enumerate(source):

        p = float(batch_idx + epoch * len_dataloader) / (200 * len_dataloader)
        alpha = 2. / (1. + np.exp(-10 * p)) - 1

        batch_size = inputs.size(0)
        total += batch_size

        # Feed source image to the network
        source_label = source_label.type(torch.LongTensor)
        domain_label = torch.zeros(batch_size).long()

        if use_cuda:
            inputs, source_label, domain_label = inputs.cuda(), source_label.cuda(), domain_label.cuda()
            
        optimizer.zero_grad()
        inputs, source_label = Variable(inputs), Variable(source_label)
        
        class_output, domain_output = net(inputs, alpha)
        
        _, predicted = torch.max(class_output.data, 1)
        correct_source_label += predicted.eq(source_label.data).cpu().sum().item()
        _, predicted = torch.max(domain_output.data, 1)
        correct_source_domain += predicted.eq(domain_label.data).cpu().sum().item()

        loss_s_label = criterion(class_output, source_label)
        loss_s_domain = criterion(domain_output, domain_label)

        # Feed target image to the network
        target_inputs, target_label = data_target_iter.next()
        domain_label = torch.ones(batch_size).long()
        if use_cuda:
            target_inputs, target_label, domain_label = target_inputs.cuda(), target_label.cuda(), domain_label.cuda()
        
        class_output, domain_output = net(target_inputs, alpha)
        loss_t_domain = criterion(domain_output, domain_label)

        _, predicted = torch.max(class_output.data, 1)
        correct_target_label += predicted.eq(target_label.data).cpu().sum().item()
        _, predicted = torch.max(domain_output.data, 1)
        correct_target_domain += predicted.eq(domain_label.data).cpu().sum().item()

        loss = loss_s_label + loss_s_domain + loss_t_domain
        loss.backward()
        optimizer.step()
    
    return correct_source_label, correct_source_domain, correct_target_label, correct_target_domain, total


In [6]:
loader_source, loader_target = load_mnist_data.get_data_loader(1.0)

In [9]:
criterion = nn.CrossEntropyLoss()

net = DANN()
if (torch.cuda.is_available()):
    torch.cuda.manual_seed_all(42)
    cudnn.benchmark = True
    net.cuda()
    criterion = criterion.cuda()
    
optimizer = optim.Adam(net.parameters(), lr=0.001) 

for epoch in range(0, 201):
    sl, sd, tl, td, total = train(loader_source, loader_target, net, criterion, optimizer, epoch) 

    if (epoch % 5 == 0):
        print("e: %d, sl: %f, sd: %f, tl: %f, td: %f" % (epoch, sl/total, sd/total, tl/total, td/total))

e: 0, sl: 0.821167, sd: 0.787333, tl: 0.373383, td: 0.757300
e: 5, sl: 0.936450, sd: 0.788267, tl: 0.523683, td: 0.780600
e: 10, sl: 0.937300, sd: 0.735517, tl: 0.572850, td: 0.726467
e: 15, sl: 0.937650, sd: 0.705933, tl: 0.599617, td: 0.695617
e: 20, sl: 0.936783, sd: 0.690167, tl: 0.622283, td: 0.680483
e: 25, sl: 0.936917, sd: 0.677417, tl: 0.656167, td: 0.668767
e: 30, sl: 0.938467, sd: 0.667750, tl: 0.667667, td: 0.661617
e: 35, sl: 0.937900, sd: 0.658850, tl: 0.680700, td: 0.652017
e: 40, sl: 0.937000, sd: 0.654783, tl: 0.687700, td: 0.646700
e: 45, sl: 0.937267, sd: 0.654967, tl: 0.700917, td: 0.644800
e: 50, sl: 0.938650, sd: 0.646283, tl: 0.707400, td: 0.635217
e: 55, sl: 0.940733, sd: 0.645667, tl: 0.713717, td: 0.638583
e: 60, sl: 0.940400, sd: 0.642867, tl: 0.717233, td: 0.633417
e: 65, sl: 0.941683, sd: 0.639550, tl: 0.727867, td: 0.633250
e: 70, sl: 0.940667, sd: 0.635067, tl: 0.732617, td: 0.625783
e: 75, sl: 0.943467, sd: 0.629417, tl: 0.736100, td: 0.619667
e: 80, sl: