<a href="https://colab.research.google.com/github/jeffreyfeng99/SYDE_522_A3/blob/master/SYDE_522_Assignment_3_joeydev_MCD.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.backends.cudnn as cudnn
import torch.utils.data as data
from torchvision import datasets, transforms, models
from torch.autograd import Variable
from torch.autograd import Function

from PIL import Image
import random
import pandas as pd
import numpy as np
import os

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
cuda = True
cudnn.benchmark = True

model_name = "resnet18"
optim_name = "momentum"

LR = 1e-3
MOMENTUM = 0.9
BATCH_SIZE = 32
IMAGE_SIZE = 224 #227
# FT_OUT_SIZE = 512
N_EPOCH = 50

AUGMENT = False
N_K = 4 # how many steps to repeat the generator update?
N_LAYER = 2 # how many layers for classifier
LOG_INTERVAL = 50


In [4]:
dataset_root = '/content/drive/MyDrive/4B/SYDE-522/data'
output_root = '/content/drive/MyDrive/4B/SYDE-522/submission/03302022_'+model_name
source_dataset_name = 'train_set'
target_dataset_name = 'test_set'

source_image_root = os.path.join(dataset_root, source_dataset_name)
target_image_root = os.path.join(dataset_root, target_dataset_name)

train_path = os.path.join(source_image_root, 'train_set')
val_path = os.path.join(target_image_root, 'test_set')

train_label_list = os.path.join(dataset_root, 'train_labels.csv')

os.makedirs(output_root, exist_ok=True)

In [5]:
class GradReverse(Function):
    def __init__(self, lambd):
        self.lambd = lambd
    def forward(self, x):
        return x.view_as(x)
    def backward(self, grad_output):
        return (grad_output*-self.lambd)

def grad_reverse(x,lambd=1.0):
    return GradReverse(lambd)(x)

class ResBase(nn.Module):
    def __init__(self,option='resnet18',pret=True):
        super(ResBase, self).__init__()
        self.dim = 2048
        if option == 'resnet18':
            model_ft = models.resnet18(pretrained=pret)
            self.dim = 512
        if option == 'resnet50':
            model_ft = models.resnet50(pretrained=pret)
        if option == 'resnet101':
            model_ft = models.resnet101(pretrained=pret)
        if option == 'resnet152':
            model_ft = models.resnet152(pretrained=pret)
        mod = list(model_ft.children())
        mod.pop()
        #self.model_ft =model_ft
        self.features = nn.Sequential(*mod)
        
    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), self.dim)
        return x

class ResClassifier(nn.Module):
    def __init__(self, num_classes=7, num_layer=2, num_unit=2048, prob=0.5, middle=1000):
        super(ResClassifier, self).__init__()
        layers = []
        # currently 10000 units
        layers.append(nn.Dropout(p=prob))
        layers.append(nn.Linear(num_unit,middle))
        layers.append(nn.BatchNorm1d(middle,affine=True))
        layers.append(nn.ReLU(inplace=True))

        for i in range(num_layer-1):
            layers.append(nn.Dropout(p=prob))
            layers.append(nn.Linear(middle,middle))
            layers.append(nn.BatchNorm1d(middle,affine=True))
            layers.append(nn.ReLU(inplace=True))
        layers.append(nn.Linear(middle,num_classes))
        self.classifier = nn.Sequential(*layers)

        #self.classifier = nn.Sequential(
        #    nn.Dropout(),
        #    nn.Linear(2048, 1000),
        #    nn.BatchNorm1d(1000,affine=True),
        #    nn.ReLU(inplace=True),
        #    nn.Dropout(),
        #    nn.Linear(1000, 1000),
        #    nn.BatchNorm1d(1000,affine=True),
        #    nn.ReLU(inplace=True),
        #    nn.Linear(1000, num_classes),

    def set_lambda(self, lambd):
        self.lambd = lambd
    def forward(self, x,reverse=False):
        if reverse:
            x = grad_reverse(x, self.lambd)
        x = self.classifier(x)
        return x

In [6]:
class GetLoader(data.Dataset):
    def __init__(self, data_root, data_list=None, transform=None):
        self.root = data_root
        self.transform = transform

        # we only pass data_list if it's training set
        if data_list is not None:
            df = pd.read_csv(data_list)
            self.img_paths = df['dir'].to_list()

            if 'label2' in df.columns:
                self.img_labels = df['label2'].to_list()
            else: 
                self.img_labels = ['0' for i in range(len(self.img_paths))]

            if 'label1' in df.columns:
                self.domain_labels = df['label1'].to_list()
            else: 
                self.domain_labels = ['0' for i in range(len(self.img_paths))]
        else:
            # Walk through test folder - we don't need labels
            self.img_paths = [f for root,dirs,files in os.walk(data_root) for f in files if f.endswith('.png')]
            self.img_labels = ['0' for i in range(len(self.img_paths))]
            self.domain_labels = ['0' for i in range(len(self.img_paths))]

        self.n_data = len(self.img_paths)

    def __getitem__(self, item):
        img_paths, labels, domain_labels = self.img_paths[item%self.n_data], self.img_labels[item%self.n_data], self.domain_labels[item%self.n_data]
        imgs = Image.open(os.path.join(self.root, img_paths)).convert('RGB')

        if self.transform is not None:

            if isinstance(self.transform, list):
                tform = self.transform[int(domain_labels)]
            else:
                tform = self.transform

            imgs = tform(imgs)
            labels = int(labels)
            domain_labels = int(domain_labels)

        return imgs, labels #, domain_labels, img_paths

    def __len__(self):
        return self.n_data

class PairedData(object):
    def __init__(self, data_loader_A, data_loader_B, max_dataset_size, flip):
        self.data_loader_A = data_loader_A
        self.data_loader_B = data_loader_B
        self.stop_A = False
        self.stop_B = False
        self.max_dataset_size = max_dataset_size
        self.flip = flip

    def __iter__(self):
        self.stop_A = False
        self.stop_B = False
        self.data_loader_A_iter = iter(self.data_loader_A)
        self.data_loader_B_iter = iter(self.data_loader_B)
        self.iter = 0
        return self

    def __next__(self):
        A, A_paths = None, None
        B, B_paths = None, None
        try:
            A, A_paths = next(self.data_loader_A_iter)
        except StopIteration:
            if A is None or A_paths is None:
                self.stop_A = True
                self.data_loader_A_iter = iter(self.data_loader_A)
                A, A_paths = next(self.data_loader_A_iter)

        try:
            B, B_paths = next(self.data_loader_B_iter)
        except StopIteration:
            if B is None or B_paths is None:
                self.stop_B = True
                self.data_loader_B_iter = iter(self.data_loader_B)
                B, B_paths = next(self.data_loader_B_iter)

        if (self.stop_A and self.stop_B) or self.iter > self.max_dataset_size:
            self.stop_A = False
            self.stop_B = False
            raise StopIteration()
        else:
            self.iter += 1
            if self.flip and random.random() < 0.5:
                idx = [i for i in range(A.size(3) - 1, -1, -1)]
                idx = torch.LongTensor(idx)
                A = A.index_select(3, idx)
                B = B.index_select(3, idx)
            return {'S': A, 'S_label': A_paths, # TODO: prob not exactly what we want
                    'T': B, 'T_label': B_paths}

class CVDataLoader(object):
    def initialize(self, dataset_A,dataset_B,batch_size,shuffle=True):
        #normalize = transforms.Normalize(mean=mean_im,std=std_im)
        self.max_dataset_size = float("inf")
        data_loader_A = torch.utils.data.DataLoader(
            dataset_A,
            batch_size=batch_size,
            shuffle=shuffle,
            num_workers=4,
            drop_last=True)
        data_loader_B = torch.utils.data.DataLoader(
            dataset_B,
            batch_size=batch_size,
            shuffle=shuffle,
            num_workers=4,
            drop_last=True)
        self.dataset_A = dataset_A
        self.dataset_B = dataset_B
        flip = False
        self.paired_data = PairedData(data_loader_A, data_loader_B, self.max_dataset_size, flip)

    def name(self):
        return 'UnalignedDataLoader'

    def load_data(self):
        return self.paired_data

    def __len__(self):
        return max([len(self.dataset_A), len(self.dataset_B)])


In [7]:
def preprocess_fn(mu=(0.6399, 0.6076, 0.5603), std=(0.3065, 0.3082, 0.3353), augment=False):
    if augment:
        # I guess we will also try augmentation
        img_transform = transforms.Compose([
            transforms.Resize(256),
            transforms.RandomHorizontalFlip(),
            transforms.CenterCrop(IMAGE_SIZE),
            transforms.ToTensor(),
            transforms.Normalize(mean=mu, std=std)
        ])
    else:
        img_transform = transforms.Compose([
          transforms.Resize(IMAGE_SIZE),
          transforms.ToTensor(),
          transforms.Normalize(mean=mu, std=std) 
        ])

    return img_transform

def prep_dataloader(trds, vlds, shuffle=False):
    dataloader = CVDataLoader()
    dataloader.initialize(trds, vlds, BATCH_SIZE, shuffle=shuffle)
    dataset = dataloader.load_data()
    
    return dataloader, dataset

In [8]:
data_transforms = {
    train_path: preprocess_fn(augment=True),
    val_path: preprocess_fn(augment=True)
}


datasets = {
        train_path: GetLoader(
                data_root=train_path,
                data_list=train_label_list,
                transform=data_transforms[train_path]),
        val_path: GetLoader(
            data_root=val_path,
            transform=data_transforms[val_path])
        }

train_loader, dataset = prep_dataloader(datasets[train_path], datasets[val_path])
test_loader, dataset_test = prep_dataloader(datasets[train_path], datasets[val_path], shuffle=True)


  cpuset_checked))


In [9]:
# data_loader_A = torch.utils.data.DataLoader(
#             datasets[train_path],
#             batch_size=BATCH_SIZE,
#             shuffle=False,
#             num_workers=4)
# data_loader_A_iter = iter(data_loader_A)

In [10]:
use_gpu = torch.cuda.is_available()
manual_seed = random.randint(1, 10000)
random.seed(manual_seed)
if cuda:
    torch.cuda.manual_seed(manual_seed)

def weights_init(m):
    classname = m.__class__.__name__
    if classname.find('Conv') != -1:
        m.weight.data.normal_(0.0, 0.01)
        m.bias.data.normal_(0.0, 0.01)
    elif classname.find('BatchNorm') != -1:
        m.weight.data.normal_(1.0, 0.01)
        m.bias.data.fill_(0)
    elif classname.find('Linear') != -1:
        m.weight.data.normal_(0.0, 0.01)
        m.bias.data.normal_(0.0, 0.01)
        

G = ResBase(model_name)
F1 = ResClassifier(num_layer=N_LAYER, num_unit=512)
F2 = ResClassifier(num_layer=N_LAYER, num_unit=512)
F1.apply(weights_init)
F2.apply(weights_init)

if cuda:
    G.cuda()
    F1.cuda()
    F2.cuda()
if optim_name == 'momentum':
    optimizer_g = optim.SGD(list(G.features.parameters()), lr=LR, weight_decay=0.0005)
    optimizer_f = optim.SGD(list(F1.parameters())+list(F2.parameters()), 
                            momentum=MOMENTUM, lr=LR, weight_decay=0.0005)
elif optim_name == 'adam':
    optimizer_g = optim.Adam(G.features.parameters(), lr=LR,weight_decay=0.0005)
    optimizer_f = optim.Adam(list(F1.parameters())+list(F2.parameters()), 
                             lr=LR, weight_decay=0.0005)
else:
    optimizer_g = optim.Adadelta(G.features.parameters(), lr=LR, weight_decay=0.0005)
    optimizer_f = optim.Adadelta(list(F1.parameters())+list(F2.parameters()), 
                                 lr=LR, weight_decay=0.0005)   

In [11]:
def train(num_epoch):
    criterion = nn.CrossEntropyLoss().cuda()
    for ep in range(num_epoch):
        G.train()
        F1.train()
        F2.train()
        len_dataloader = max(len(train_loader), len(test_loader))

        # i = 0
        # while i < len_dataloader:
        for batch_idx, data in enumerate(dataset):
            # if batch_idx * BATCH_SIZE > len_dataloader:
            #     break
            # data = next(dataset)
            if cuda:
                data1 = data['S']
                target1 = data['S_label']
                data2  = data['T']
                target2 = data['T_label']
                data1, target1 = data1.cuda(), target1.cuda()
                data2, target2 = data2.cuda(), target2.cuda()
            # when pretraining network source only
            eta = 1.0
            data = Variable(torch.cat((data1,data2),0))
            target1 = Variable(target1)
            # Step A train all networks to minimize loss on source
            optimizer_g.zero_grad()
            optimizer_f.zero_grad()
            output = G(data)
            output1 = F1(output)
            output2 = F2(output)

            output_s1 = output1[:BATCH_SIZE,:]
            output_s2 = output2[:BATCH_SIZE,:]
            output_t1 = output1[BATCH_SIZE:,:]
            output_t2 = output2[BATCH_SIZE:,:]
            output_t1 = F.softmax(output_t1)
            output_t2 = F.softmax(output_t2)

            entropy_loss = - torch.mean(torch.log(torch.mean(output_t1,0)+1e-6))
            entropy_loss -= torch.mean(torch.log(torch.mean(output_t2,0)+1e-6))

            loss1 = criterion(output_s1, target1)
            loss2 = criterion(output_s2, target1)
            all_loss = loss1 + loss2 + 0.01 * entropy_loss
            all_loss.backward()
            optimizer_g.step()
            optimizer_f.step()

            #Step B train classifier to maximize discrepancy
            optimizer_g.zero_grad()
            optimizer_f.zero_grad()

            output = G(data)
            output1 = F1(output)
            output2 = F2(output)
            output_s1 = output1[:BATCH_SIZE,:]
            output_s2 = output2[:BATCH_SIZE,:]
            output_t1 = output1[BATCH_SIZE:,:]
            output_t2 = output2[BATCH_SIZE:,:]
            output_t1 = F.softmax(output_t1)
            output_t2 = F.softmax(output_t2)
            loss1 = criterion(output_s1, target1)
            loss2 = criterion(output_s2, target1)
            entropy_loss = - torch.mean(torch.log(torch.mean(output_t1,0)+1e-6))
            entropy_loss -= torch.mean(torch.log(torch.mean(output_t2,0)+1e-6))
            loss_dis = torch.mean(torch.abs(output_t1-output_t2))
            F_loss = loss1 + loss2 - eta*loss_dis  + 0.01 * entropy_loss
            F_loss.backward()
            optimizer_f.step()
            # Step C train genrator to minimize discrepancy
            for i in range(N_K):
                optimizer_g.zero_grad()
                output = G(data)
                output1 = F1(output)
                output2 = F2(output)

                output_s1 = output1[:BATCH_SIZE,:]
                output_s2 = output2[:BATCH_SIZE,:]
                output_t1 = output1[BATCH_SIZE:,:]
                output_t2 = output2[BATCH_SIZE:,:]

                loss1 = criterion(output_s1, target1)
                loss2 = criterion(output_s2, target1)
                output_t1 = F.softmax(output_t1)
                output_t2 = F.softmax(output_t2)
                loss_dis = torch.mean(torch.abs(output_t1-output_t2))
                entropy_loss = -torch.mean(torch.log(torch.mean(output_t1,0)+1e-6))
                entropy_loss -= torch.mean(torch.log(torch.mean(output_t2,0)+1e-6))

                loss_dis.backward()
                optimizer_g.step()
            if batch_idx % LOG_INTERVAL == 0:
                # print(ep)
                # print(loss1.data)
                # print(loss2.data)
                # print(loss_dis.data)
                # print(entropy_loss.data)
                print('Train Ep: {} [{}/{}]\tLoss1: {:.6f}\tLoss2: {:.6f}\t Dis: {:.6f} Entropy: {:.6f}'.format(
                    ep, batch_idx*BATCH_SIZE, len_dataloader,
                    loss1.cpu().data.numpy(),loss2.cpu().data.numpy(),
                    loss_dis.cpu().data.numpy(),entropy_loss.cpu().data.numpy()))
            if batch_idx == 1 and ep > 1:
                test(ep)
                G.train()
                F1.train()
                F2.train()

In [12]:
def test(epoch):
    G.eval()
    F1.eval()
    F2.eval()
    test_loss = 0
    correct = 0
    correct2 = 0
    size = 0

    for batch_idx, data in enumerate(dataset_test):
        # if batch_idx*BATCH_SIZE > 5000:
        #     break
        if cuda:
            data2  = data['T']
            target2 = data['T_label']
            # if val:
            #     data2  = data['S']
            #     target2 = data['S_label']
            data2, target2 = data2.cuda(), target2.cuda()
        data1, target1 = Variable(data2, volatile=True), Variable(target2)
        output = G(data1)
        output1 = F1(output)
        output2 = F2(output)
        test_loss += F.nll_loss(output1, target1).cpu().data.numpy()
        pred = output1.data.max(1)[1] # get the index of the max log-probability
        correct += pred.eq(target1.data).cpu().sum()
        pred = output2.data.max(1)[1] # get the index of the max log-probability
        k = target1.data.size()[0]
        correct2 += pred.eq(target1.data).cpu().sum()

        size += k
    test_loss = test_loss
    test_loss /= len(test_loader) # loss function already averages over batch size
    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%) ({:.0f}%)\n'.format(
        test_loss, correct, size,
        100. * correct / size,100.*correct2/size))
    #if 100. * correct / size > 67 or 100. * correct2 / size > 67:
    value = max(100. * correct / size,100. * correct2 / size)
    if value > 60: # TODO need to save the csv
        save_path_pref = os.path.join(output_root, model_name+'_'+str(value)+'_')
        torch.save(F1.state_dict(), save_path_pref+'F1.pth')
        torch.save(F2.state_dict(), save_path_pref+'F2.pth')
        torch.save(G.state_dict(), save_path_pref+'G.pth')

In [None]:
train(N_EPOCH+1)

  cpuset_checked))


Train Ep: 0 [0/6062]	Loss1: 1.924909	Loss2: 1.889425	 Dis: 0.030638 Entropy: 3.904876
Train Ep: 0 [1600/6062]	Loss1: 0.945768	Loss2: 1.001526	 Dis: 0.062011 Entropy: 4.038435
Train Ep: 0 [3200/6062]	Loss1: 0.647412	Loss2: 0.677999	 Dis: 0.085513 Entropy: 4.067057
Train Ep: 0 [4800/6062]	Loss1: 0.536464	Loss2: 0.524799	 Dis: 0.131250 Entropy: 4.130460
Train Ep: 1 [0/6062]	Loss1: 0.273794	Loss2: 0.175599	 Dis: 0.150846 Entropy: 4.258055
Train Ep: 1 [1600/6062]	Loss1: 0.282109	Loss2: 0.337609	 Dis: 0.157715 Entropy: 4.336813
Train Ep: 1 [3200/6062]	Loss1: 0.395625	Loss2: 0.462725	 Dis: 0.138113 Entropy: 4.386701
Train Ep: 1 [4800/6062]	Loss1: 0.465653	Loss2: 0.456765	 Dis: 0.102982 Entropy: 4.151078
Train Ep: 2 [0/6062]	Loss1: 0.330407	Loss2: 0.220862	 Dis: 0.124779 Entropy: 4.430922





Test set: Average loss: -0.0208, Accuracy: 1372/6048 (23%) (4%)

Train Ep: 2 [1600/6062]	Loss1: 0.377532	Loss2: 0.382385	 Dis: 0.141290 Entropy: 4.594434
Train Ep: 2 [3200/6062]	Loss1: 0.171740	Loss2: 0.161757	 Dis: 0.106647 Entropy: 4.832178
Train Ep: 2 [4800/6062]	Loss1: 0.451751	Loss2: 0.469987	 Dis: 0.112154 Entropy: 4.530136
Train Ep: 3 [0/6062]	Loss1: 0.118521	Loss2: 0.079501	 Dis: 0.140467 Entropy: 4.496674

Test set: Average loss: -0.0058, Accuracy: 854/6048 (14%) (5%)

Train Ep: 3 [1600/6062]	Loss1: 0.148845	Loss2: 0.216151	 Dis: 0.132089 Entropy: 4.570918
Train Ep: 3 [3200/6062]	Loss1: 0.088099	Loss2: 0.100654	 Dis: 0.085487 Entropy: 5.252058
Train Ep: 3 [4800/6062]	Loss1: 0.296297	Loss2: 0.263926	 Dis: 0.079546 Entropy: 4.424348
Train Ep: 4 [0/6062]	Loss1: 0.041563	Loss2: 0.122768	 Dis: 0.127576 Entropy: 4.667576

Test set: Average loss: 0.0103, Accuracy: 648/6048 (11%) (5%)

Train Ep: 4 [1600/6062]	Loss1: 0.168379	Loss2: 0.128866	 Dis: 0.108352 Entropy: 4.667634
Train Ep: 