# MLP-based AAN (AAN-A) models.

In [1]:
### All the test is based on torch-1.2.0 and torchtext-0.6.0

import torch
import torch.optim as optim
import torch.nn as nn
import time
from torchtext import data
import random
import numpy as np
import os
from torch.nn import functional as F

os.environ["CUDA_VISIBLE_DEVICES"] = "3"

SEED = 1234

random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.backends.cudnn.deterministic = True

### prepare data loader based on torchtext.

In [2]:
def get_iterator_feature(source_file, target_file, BATCH_SIZE=128):
    '''
    source_file: the source domain dataset in datasets/amazon_reivew/
    target file: the source domain dataset in datasets/amazon_reivew/
    '''
    TEXT = data.Field(dtype = torch.float,sequential=False, batch_first = True,use_vocab=False)
    LABEL = data.LabelField(dtype = torch.long,use_vocab=False)

    fields = {'text': ('text', TEXT), 'label': ('label', LABEL)}

    train_data = data.TabularDataset.splits(
                            path = 'datasets'+os.sep+"amazon_review",
                            train = source_file,
                            format = 'json',
                            fields = fields
    )
    test_data = data.TabularDataset.splits(
                            path = 'datasets'+os.sep+"amazon_review",
                            train = target_file,
                            format = 'json',
                            fields = fields
    )

    train_data = train_data[0]
    test_data = test_data[0]
    ## A very small  target labeled data (50 samples) is used to validate the model. You can set it to zeros. 
    test_data, valid_data = test_data.split(random_state = random.seed(SEED), split_ratio=0.90)

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    source_iterator, target_iterator, valid_iterator = data.BucketIterator.splits(
        (train_data, test_data, valid_data), 
        batch_size = BATCH_SIZE, 
        sort=False,
        shuffle = True,
        # repeat=True,
        device = device)

    return source_iterator, target_iterator, valid_iterator

### Initialize AAN model.

In [4]:
from model.models import  AANMLP
from model.criterion import MMD_loss

aan_version='AAN-A'  ## or 'AAN'

dataset = ['books_400.mat.json','dvd_400.mat.json','elec_400.mat.json','kitchen_400.mat.json']

source_file =dataset[2]
target_file = dataset[1]

source_iterator, target_iterator, valid_iterator = get_iterator_feature(source_file, target_file, BATCH_SIZE=128)

INPUT_DIM = 400
LATENT_DIM = 100
OUTPUT_DIM = 2
DROPOUT = 0.25
MU = 0.1

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model = AANMLP(INPUT_DIM,LATENT_DIM,OUTPUT_DIM, DROPOUT, aan_version)

if aan_version == 'AAN':
    optimizer_task = optim.Adam(model.parameters())
else:
    optimizer_task = optim.Adam([{'params':model.extractor.parameters()},{'params':model.predictor.parameters()}])
    optimizer_kernel = optim.Adam([{'params':model.mmd_linear.parameters()},{'params':model.cmmd_linear.parameters()}])

criterion = nn.CrossEntropyLoss()
model = model.to(device)
criterion = criterion.to(device)
mmd_loss = MMD_loss(kernel_type='mmd', kernel_mul=2.0, kernel_num=5)
cmmd_loss = MMD_loss(kernel_type='cmmd', kernel_mul=2.0, kernel_num=5,eplison=0.00001)


### Training AAN (AAN-A) models.

In [5]:
from model.tools import train_adverisal,  train_normal, evaluate, epoch_time

N_EPOCHS = 30
best_loss = 100.0
best_epoch = 0

for epoch in range(N_EPOCHS):

    start_time = time.time()
    if aan_version == 'AAN-A':
        train_loss = train_adverisal(model,source_iterator,target_iterator,optimizer_task,optimizer_kernel,criterion,mmd_loss,cmmd_loss)
    else:
        train_loss = train_normal(model,source_iterator,target_iterator,optimizer_task,criterion,mmd_loss,cmmd_loss,MU)


    eval_acc,eval_loss = evaluate(model, valid_iterator, criterion)
    if eval_loss < best_loss:
        best_loss = eval_loss
        best_epoch = epoch
        torch.save(model.state_dict(),'mmd-task-model.pt')

    end_time = time.time()
    epoch_mins, epoch_secs = epoch_time(start_time, end_time)


    print(f'Epoch: {epoch+1:02} | Epoch Time: {epoch_mins}m {epoch_secs}s | Best Epoch:{best_epoch+1:02}')
    print(f'\tTrain Loss: {train_loss:.3f}|Valid Acc: {eval_acc:.3f}') 



Epoch: 01 | Epoch Time: 0m 2s | Best Epoch:01
	Train Loss: 0.986|Valid Acc: 0.540
Epoch: 02 | Epoch Time: 0m 0s | Best Epoch:01
	Train Loss: 0.968|Valid Acc: 0.540
Epoch: 03 | Epoch Time: 0m 1s | Best Epoch:03
	Train Loss: 0.967|Valid Acc: 0.580
Epoch: 04 | Epoch Time: 0m 1s | Best Epoch:04
	Train Loss: 0.986|Valid Acc: 0.710
Epoch: 05 | Epoch Time: 0m 1s | Best Epoch:04
	Train Loss: 0.956|Valid Acc: 0.485
Epoch: 06 | Epoch Time: 0m 0s | Best Epoch:06
	Train Loss: 1.002|Valid Acc: 0.575
Epoch: 07 | Epoch Time: 0m 1s | Best Epoch:07
	Train Loss: 0.947|Valid Acc: 0.655
Epoch: 08 | Epoch Time: 0m 1s | Best Epoch:08
	Train Loss: 0.963|Valid Acc: 0.630
Epoch: 09 | Epoch Time: 0m 0s | Best Epoch:09
	Train Loss: 0.968|Valid Acc: 0.710
Epoch: 10 | Epoch Time: 0m 1s | Best Epoch:10
	Train Loss: 0.910|Valid Acc: 0.730
Epoch: 11 | Epoch Time: 0m 0s | Best Epoch:11
	Train Loss: 0.908|Valid Acc: 0.785
Epoch: 12 | Epoch Time: 0m 0s | Best Epoch:12
	Train Loss: 0.805|Valid Acc: 0.780
Epoch: 13 | Epoc

### Test AAN models.

In [7]:
from model.tools import evaluate

### test the model.
model.load_state_dict(torch.load('mmd-task-model.pt'))
eval_acc,eval_loss  = evaluate(model,target_iterator,criterion)
print('from %s to %s, acc is %f'%(source_file,target_file, eval_acc))

from elec_400.mat.json to dvd_400.mat.json, acc is 0.787104
