In [None]:
import os
import ot
import json
import pprint
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

# Instance Based Transfer Learning
from domain_adaptation import KMM
from domain_adaptation import KLIEP
from domain_adaptation import uLSIF
from domain_adaptation import WeightedClassifier

# Feature Based Transfer Learning
from domain_adaptation import TCAClassifier
from domain_adaptation import PCAClassifier
from domain_adaptation import GFKClassifier

# Optimal Transport Transfer Learning
from domain_adaptation import OTClassifier
from domain_adaptation import JDOTClassifier

# Metrics for JDOT
from domain_adaptation.metrics import MSE
from domain_adaptation.metrics import HingeLoss
from domain_adaptation.metrics import CategoricalCrossEntropy

from utils import cval_performance

import warnings
warnings.filterwarnings("ignore")

In [None]:
home_dir = "." # Sets home to current directory
FEATURES = "Raw" # Sets which feature to use

In [None]:
if FEATURES == 'Raw':
    print('Loading raw features')
    df = pd.read_csv('./data/cstr_rawdata.csv')
elif FEATURES == "ACF":
    print('Loading ACF features')
    df = pd.read_csv('./data/cstr_acfmeandata.csv')

dataset = df.values
X, y, d = dataset[:, :-4], dataset[:, -4], dataset[:, -3]

## Table of Contents

* <a href="#Baseline">Baseline Evaluation</a>
* <a href="#OT-Based">Optimal Transport-Based Transfer Learning</a>
    * <a href="#OTDA">Optimal Transport Domain Adaptation</a>
        * <a href="#EMD">Exact Optimal Transport</a>
        * <a href="#sinkhorn">Sinkhorn Algorithm</a>

<h2 id="Baseline">Baseline Evaluation</h2>

In [None]:
clf = SVC(kernel='linear', max_iter=1e+6)
per_domain_acc = []

print('-' * 79)
print('|{:^77}|'.format('Baseline'))
print('-' * 79)
for domain in np.unique(d).astype(int)[1:]:
    Xt = X[np.where(d == domain)[0]]
    yt = y[np.where(d == domain)[0]]
    accs = cval_performance(clf, X, y, d, domain, baseline=True, normalization='normalization')
    print("|{:^25}|{:^25}|{:^25}|".format(domain,
                                      np.round(100 * np.mean(accs), 3),
                                      np.round(100 * np.std(accs), 3)))
    per_domain_acc.append(np.mean(accs))
print("|{:^25}|{:^25}|{:^25}|".format('Score',
                                  np.round(100 * np.mean(per_domain_acc), 3),
                                  np.round(100 * np.std(per_domain_acc), 3)))
print('-' * 79)

<h2 id="OT-Based">Optimal Transport-Based Transfer Learning</h2>

<h3 id="OTDA">Optimal Transport Domain Adaptation</h3>

<h4 id="EMD">Exact Optimal Transport</h4>

In [None]:
N_grid = [None, 'max', 'median', 'log', 'loglog']
M_grid = ['sqeuclidean', 'cityblock']
emd_cval_result = {}

# ------------------------------------- Grid Search -----------------------------------------------
print('-' * (26 * 3) + '-')
print('|{:^25}|{:^25}|{:^25}|'.format('Normalization', 'Metric', 'Score'))
print('-' * (26 * 3) + '-')

for N in N_grid:
    for M in M_grid:
        clf = SVC(kernel='linear')
        ot_clf = OTClassifier(clf=clf, ot_solver=ot.da.EMDTransport, norm=N, metric=M)
        per_domain_acc = []
        for domain in np.unique(d).astype(int)[1:]:
            Xt = X[np.where(d == domain)[0]]
            yt = y[np.where(d == domain)[0]]
            accs = cval_performance(ot_clf, X, y, d, domain, baseline=False)
            per_domain_acc.append(np.mean(accs))
        emd_cval_result['{},{}'.format(str(N), str(M))] = (np.mean(per_domain_acc),
                                                           np.std(per_domain_acc))
        print('|{:^25}|{:^25}|{:^25}|'.format(str(N), str(M), np.mean(per_domain_acc)))
print('-' * (26 * 3) + '-')

# ------------------------------------- Best Parameters -----------------------------------------------
pprint.pprint(emd_cval_result, width=1)
with open(os.path.join(home_dir, './results/{}/EMD_cval.json'.format(FEATURES)), 'w') as f:
    f.write(json.dumps(emd_cval_result))
    
# ------------------------------------- Evaluates for Best Parameters ---------------------------------
print('-' * (26 * 3) + '-')
print('|{:^77}|'.format('EMD'))
print('-' * (26 * 3) + '-')
print('|{:^25}|{:^25}|{:^25}|'.format('Domain', 'Accuracy', 'Std Dev'))


str_to_latex = ""
clf = SVC(kernel='linear', max_iter=1e+6)
ot_clf = OTClassifier(clf=clf, ot_solver=ot.da.EMDTransport, norm='loglog', metric='cityblock')

per_domain_acc = []
for domain in np.unique(d).astype(int)[1:]:
    Xt = X[np.where(d == domain)[0]]
    yt = y[np.where(d == domain)[0]]
    accs = cval_performance(ot_clf, X, y, d, domain, baseline=False)
    per_domain_acc.append(np.mean(accs))
    print('|{:^25}|{:^25}|{:^25}|'.format(domain,
                                          np.round(100 * np.mean(accs), 3),
                                          np.round(100 * np.std(accs), 3)))
    str_to_latex += r"& {} $\pm$ {}".format(np.round(100 * np.mean(accs), 3),
                                             np.round(100 * np.std(accs), 3))
print('|{:^25}|{:^25}|{:^25}|'.format('Score',
                                      np.round(100 * np.mean(per_domain_acc), 3),
                                      np.round(100 * np.std(per_domain_acc), 3)))
str_to_latex += r"& {}".format(np.round(100 * np.mean(per_domain_acc), 3),
                               np.round(100 * np.std(per_domain_acc), 3))
print('-' * (26 * 3) + '-')

<h4 id="sinkhorn">Sinkhorn Algorithm</h4>

In [None]:
M_grid = ['sqeuclidean', 'cityblock']
R_grid = [1e-1, 1e-2, 1e-3]
sinkhorn_cval_result = {}

# ------------------------------------- Grid Search -----------------------------------------------
print('-' * (26 * 3) + '-')
print('|{:^25}|{:^25}|{:^25}|'.format('Normalization', 'Entropic Reg', 'Score'))
print('-' * (26 * 3) + '-')
for N in M_grid:
    for R in R_grid:
        clf = SVC(kernel='linear', max_iter=1e+6)
        ot_clf = OTClassifier(clf=clf, ot_solver=ot.da.SinkhornTransport, norm='max', reg_e=R, metric=N)
        per_domain_acc = []
        for domain in np.unique(d).astype(int)[1:]:
            Xt = X[np.where(d == domain)[0]]
            yt = y[np.where(d == domain)[0]]
            accs = cval_performance(ot_clf, X, y, d, domain, baseline=False)
            per_domain_acc.append(np.mean(accs))
        sinkhorn_cval_result['{},{}'.format(N, R)] = (np.mean(per_domain_acc),
                                                      np.std(per_domain_acc))
        print('|{:^25}|{:^25}|{:^25}|'.format(N, R, np.mean(per_domain_acc)))
print('-' * (26 * 3) + '-')

# ------------------------------------- Saves Results ---------------------------------------------
pprint.pprint(sinkhorn_cval_result, width=1)
with open(os.path.join(home_dir, './results/{}/Sinkhorn_cval.json'.format(FEATURES)), 'w') as f:
    f.write(json.dumps(sinkhorn_cval_result))

# ------------------------------------- Runs for best parameters ----------------------------------
M_grid = ['sqeuclidean', 'cityblock']
R_grid = [1e-1, 1e-2, 1e-3]
sinkhorn_cval_result = {}

print('-' * (26 * 3) + '-')
print('|{:^25}|{:^25}|{:^25}|'.format('Domain', 'Mean Acc', 'Std'))
print('-' * (26 * 3) + '-')

clf = SVC(kernel='linear', max_iter=1e+6)
ot_clf = OTClassifier(clf=clf, ot_solver=ot.da.SinkhornTransport, norm='max', reg_e=1e-3, metric='cityblock')
per_domain_acc = []
for domain in np.unique(d).astype(int)[1:]:
    Xt = X[np.where(d == domain)[0]]
    yt = y[np.where(d == domain)[0]]
    accs = cval_performance(ot_clf, X, y, d, domain, baseline=False)
    per_domain_acc.append(np.mean(accs))
    print('|{:^25}|{:^25}|{:^25}|'.format(domain, np.mean(accs), np.std(accs)))
print('|{:^25}|{:^25}|{:^25}|'.format('Score', np.mean(per_domain_acc), np.std(per_domain_acc)))
print('-' * (26 * 3) + '-')

<h4 id="sinkhornl1l2">Sinkhorn \(\ell_{1}-\ell_{2}\)</h4>

In [None]:
sinkhorn_l1l2_cval_result = {}
R_grid = [1e-1, 1e-2, 1e-3]
E_grid = [1e-1, 1e-2, 1e-3]

# ------------------------------------- Grid Search -----------------------------------------------
print('-' * (26 * 3) + '-')
print('|{:^25}|{:^25}|{:^25}|'.format('Entropic Reg', 'Class Reg', 'Score'))
print('-' * (26 * 3) + '-')

for R in R_grid:
    for E in E_grid:
        clf = SVC(kernel='linear', max_iter=1e+6)
        ot_clf = OTClassifier(clf=clf,
                              ot_solver=ot.da.SinkhornL1l2Transport,
                              norm='max',
                              reg_e=R,
                              reg_cl=E,
                              metric='cityblock')
        per_domain_acc = []
        for domain in np.unique(d).astype(int)[1:]:
            Xt = X[np.where(d == domain)[0]]
            yt = y[np.where(d == domain)[0]]
            try: 
                accs = cval_performance(ot_clf, X, y, d, domain, baseline=False)
            except:
                accs = [-1]
            per_domain_acc.append(np.mean(accs))
        sinkhorn_l1l2_cval_result['{},{}'.format(R, E)] = (np.mean(per_domain_acc),
                                                           np.std(per_domain_acc))
        print('|{:^25}|{:^25}|{:^25}|'.format(R, E, np.mean(per_domain_acc)))
print('-' * (26 * 3) + '-')

# ------------------------------------- Saves Results ---------------------------------------------
pprint.pprint(sinkhorn_l1l2_cval_result, width=1)
with open(os.path.join(home_dir, './results/{}/sinkhorn_l1l2_cval.json'.format(FEATURES)), 'w') as f:
    f.write(json.dumps(sinkhorn_l1l2_cval_result))
    
# ------------------------------------- Runs for best parameters ----------------------------------
M_grid = ['sqeuclidean', 'cityblock']
R_grid = [1e-1, 1e-2, 1e-3]
sinkhorn_cval_result = {}

print('-' * (26 * 3) + '-')
print('|{:^25}|{:^25}|{:^25}|'.format('Domain', 'Mean Acc', 'Std'))
print('-' * (26 * 3) + '-')

clf = SVC(kernel='linear', max_iter=1e+6)
ot_clf = OTClassifier(clf=clf,
                      ot_solver=ot.da.SinkhornL1l2Transport,
                      norm='max',
                      reg_e=1e-3,
                      reg_cl=1e-3,
                      metric='cityblock')
per_domain_acc = []
for domain in np.unique(d).astype(int)[1:]:
    Xt = X[np.where(d == domain)[0]]
    yt = y[np.where(d == domain)[0]]
    accs = cval_performance(ot_clf, X, y, d, domain, baseline=False)
    per_domain_acc.append(np.mean(accs))
    print('|{:^25}|{:^25}|{:^25}|'.format(domain, np.mean(accs), np.std(accs)))
print('|{:^25}|{:^25}|{:^25}|'.format('Score', np.mean(per_domain_acc), np.std(per_domain_acc)))
print('-' * (26 * 3) + '-')

<h4 id="sinkhornlpl1">Sinkhorn \(\ell_{p}-\ell_{1}\)</h4>

In [None]:
sinkhorn_lpl1_cval_result = {}
R_grid = [1e-1, 1e-2, 1e-3]
E_grid = [1e-1, 1e-2, 1e-3]
# ------------------------------------- Grid Search -----------------------------------------------
print('-' * (26 * 4) + '-')
print('|{:^25}|{:^25}|{:^25}|'.format('Entropic Reg', 'Class Reg', 'Score'))
print('-' * (26 * 4) + '-')

for R in R_grid:
    for E in E_grid:
        clf = SVC(kernel='linear', max_iter=1e+6)
        ot_clf = OTClassifier(clf=clf,
                            ot_solver=ot.da.SinkhornLpl1Transport,
                            norm='max',
                            reg_e=R,
                            reg_cl=E)
        per_domain_acc = []
        for domain in np.unique(d).astype(int)[1:]:
            Xt = X[np.where(d == domain)[0]]
            yt = y[np.where(d == domain)[0]]
            accs = cval_performance(ot_clf, X, y, d, domain, baseline=False)
            per_domain_acc.append(np.mean(accs))
        sinkhorn_lpl1_cval_result['{},{}'.format(R, E)] = (np.mean(per_domain_acc),
                                                           np.std(per_domain_acc))
        print('|{:^25}|{:^25}|{:^25}|'.format(R, E, np.mean(per_domain_acc)))
print('-' * (26 * 4) + '-')

# ------------------------------------- Saves Results ---------------------------------------------
pprint.pprint(sinkhorn_lpl1_cval_result, width=1)
with open(os.path.join(home_dir, './results/{}/sinkhorn_lpl1_cval.json'.format(FEATURES)), 'w') as f:
    f.write(json.dumps(sinkhorn_lpl1_cval_result))

# ------------------------------------- Runs for best parameters ----------------------------------
print('-' * (26 * 3) + '-')
print('|{:^77}|'.format('Sinkhorn LpL1'))
print('-' * (26 * 3) + '-')
print('|{:^25}|{:^25}|{:^25}|'.format('Domain', 'Accuracy', 'Std Dev'))

clf = SVC(kernel='linear', max_iter=1e+6)
ot_clf = OTClassifier(clf=clf, ot_solver=ot.da.SinkhornLpl1Transport,
                      norm='max',
                      metric='cityblock',
                      reg_e=1e-3,
                      reg_cl=1e-2)

per_domain_acc = []
for domain in np.unique(d).astype(int)[1:]:
    Xt = X[np.where(d == domain)[0]]
    yt = y[np.where(d == domain)[0]]
    accs = cval_performance(ot_clf, X, y, d, domain,
                            index_crossval=crossval_index, baseline=False)
    per_domain_acc.append(np.mean(accs))
    print('|{:^25}|{:^25}|{:^25}|'.format(domain,
                                          np.round(100 * np.mean(accs), 3),
                                          np.round(100 * np.std(accs), 3)))
print('|{:^25}|{:^25}|{:^25}|'.format('Score',
                                      np.round(100 * np.mean(per_domain_acc), 3),
                                      np.round(100 * np.std(per_domain_acc), 3)))
print('-' * (26 * 3) + '-')

<h3 id="MP">Mapping Estimation (Monge Problem)</h3>

In [None]:
MP_cval_result = {}
K_grid = ['linear', 'gaussian']
M_grid = [1e+0, 1e-1, 1e-2, 1e-3]
E_grid = [1e-3, 1e-2, 1e-1, 1e+0]
B_grid = [True]

# ------------------------------------- Grid Search -----------------------------------------------
print('-' * (18 * 5) + '-')
print('|{:^17}|{:^17}|{:^17}|{:^17}|{:^17}|'.format('Kernel',
                                             'Loss Weight',
                                             'Linear Map Reg',
                                             'Fit bias',
                                             'Score'))
print('-' * (18 * 5) + '-')

for K in K_grid:
    for mu in M_grid:
        for eta in E_grid:
            for bias in B_grid:
                clf = SVC(kernel='linear', max_iter=1e+6)
                ot_clf = OTClassifier(clf=clf,
                                    ot_solver=ot.da.MappingTransport,
                                    norm='max',
                                    max_iter=3,
                                    mu=mu,
                                    eta=eta,
                                    bias=bias,
                                    kernel=K, metric='cityblock')
                per_domain_acc = []
                for domain in np.unique(d).astype(int)[1:]:
                    Xt = X[np.where(d == domain)[0]]
                    yt = y[np.where(d == domain)[0]]
                    accs = cval_performance(ot_clf, X, y, d, domain, baseline=False)
                    per_domain_acc.append(np.mean(accs))
                MP_cval_result['{},{},{},{}'.format(K,
                                                    mu,
                                                    eta,
                                                    bias)] = (np.mean(per_domain_acc),
                                                              np.std(per_domain_acc))
                print('|{:^17}|{:^17}|{:^17}|{:^17}|{:^17}|'.format(K, mu, eta, bias,
                                                             np.round(np.mean(per_domain_acc), 6)))
print('-' * (18 * 5) + '-')
# ------------------------------------- Saves Results ---------------------------------------------
pprint.pprint(MP_cval_result, width=1)
with open('./results/MP_cval.json', 'w') as f:
    f.write(json.dumps(MP_cval_result))
    
# ------------------------------------- Runs for best parameters ----------------------------------
print('-' * (26 * 3) + '-')
print('|{:^77}|'.format('Mapping Estimation'))
print('-' * (26 * 3) + '-')
print('|{:^25}|{:^25}|{:^25}|'.format('Domain', 'Accuracy', 'Std Dev'))


str_to_latex = ""
clf = SVC(kernel='linear', max_iter=1e+6)
ot_clf = OTClassifier(clf=clf,
                      ot_solver=ot.da.MappingTransport,
                      norm='max',
                      max_iter=3,
                      mu=0.1,
                      eta=0.001,
                      bias=True,
                      kernel='linear',
                      metric='sqeuclidean')

per_domain_acc = []
for domain in np.unique(d).astype(int)[1:]:
    Xt = X[np.where(d == domain)[0]]
    yt = y[np.where(d == domain)[0]]
    accs = cval_performance(ot_clf, X, y, d, domain, baseline=False)
    per_domain_acc.append(np.mean(accs))
    print('|{:^25}|{:^25}|{:^25}|'.format(domain,
                                          np.round(100 * np.mean(accs), 3),
                                          np.round(100 * np.std(accs), 3)))
    str_to_latex += r"& {} $\pm$ {}".format(np.round(100 * np.mean(accs), 3),
                                             np.round(100 * np.std(accs), 3))
print('|{:^25}|{:^25}|{:^25}|'.format('Score',
                                      np.round(100 * np.mean(per_domain_acc), 3),
                                      np.round(100 * np.std(per_domain_acc), 3)))
str_to_latex += r"& {}".format(np.round(100 * np.mean(per_domain_acc), 3),
                               np.round(100 * np.std(per_domain_acc), 3))
print('-' * (26 * 3) + '-')

<h3 id="JDOT">Joint Distribution Optimal Transport</h3>

In [None]:
R_grid = [0.0, 1e-2, 1e-1, 1, 1e+1]
L_grid = [hinge_loss, cat_loss, mse_loss]
A_grid = [1e-1, 1, 1e+1, 1e+2]
jdot_cval_result = {}


# ------------------------------------- Grid Search -----------------------------------------------
print('-' * (26 * 4) + '-')
print('|{:^25}|{:^25}|{:^25}|{:^25}|'.format('Alpha', 'Loss Fn', 'Entropic Reg', 'Score'))
print('-' * (26 * 4) + '-')

for A in A_grid:
    for L in L_grid:
        for R in R_grid:
            method = 'sinkhorn' if R > 0.0 else 'emd'
            clf = SVC(kernel='linear', max_iter=1e+6)
            jdot_clf = JDOTClassifier(clf=clf,
                                      method=method,
                                      reg=R,
                                      alpha=A,
                                      loss=L,
                                      numItermax=5,
                                      metric='cityblock')
            per_domain_acc = []
            for domain in np.unique(d).astype(int)[1:]:
                Xt = X[np.where(d == domain)[0]]
                yt = y[np.where(d == domain)[0]]
                try:
                    accs = cval_performance(jdot_clf, X, y, d, domain,
                                            baseline=False)
                except:
                    accs = [-1]
                per_domain_acc.append(np.mean(accs))
            jdot_cval_result['{},{},{}'.format(A, L, R)] = (np.mean(per_domain_acc),
                                                            np.std(per_domain_acc))
            print('|{:^25}|{:^25}|{:^25}|{:^25}|'.format(A, L, R, np.mean(per_domain_acc)))
print('-' * (26 * 4) + '-')

# ------------------------------------- Saves Results ---------------------------------------------
pprint.pprint(jdot_cval_result, width=1)
with open(os.path.join(home_dir, './results/{}/JDOT_cval.json'.format(FEATURES)), 'w') as f:
    f.write(json.dumps(jdot_cval_result))

# ------------------------------------- Runs for best parameters ----------------------------------
print('-' * (26 * 3) + '-')
print('|{:^77}|'.format('JDOT'))
print('-' * (26 * 3) + '-')
print('|{:^25}|{:^25}|{:^25}|'.format('Domain', 'Accuracy', 'Std Dev'))

clf = SVC(kernel='linear', max_iter=1e+6)
ot_clf = JDOTClassifier(clf=clf, loss=hinge_loss, alpha=10, method='emd', numItermax=5,
                        metric='cityblock')
str_to_latex = ""

per_domain_acc = []
for domain in np.unique(d).astype(int)[1:]:
    Xt = X[np.where(d == domain)[0]]
    yt = y[np.where(d == domain)[0]]
    accs = cval_performance(ot_clf, X, y, d, domain, baseline=False)
    per_domain_acc.append(np.mean(accs))
    print('|{:^25}|{:^25}|{:^25}|'.format(domain,
                                          np.round(100 * np.mean(accs), 3),
                                          np.round(100 * np.std(accs), 3)))
    str_to_latex += "& {} $\pm$ {}".format(np.round(100 * np.mean(accs), 3),
                                           np.round(100 * np.std(accs), 3))
print('|{:^25}|{:^25}|{:^25}|'.format('Score',
                                      np.round(100 * np.mean(per_domain_acc), 3),
                                      np.round(100 * np.std(per_domain_acc), 3)))
str_to_latex += "& {} $\pm$ {}".format(np.round(100 * np.mean(per_domain_acc), 3),
                                        np.round(100 * np.std(per_domain_acc), 3))
print('-' * (26 * 3) + '-')

<h2 id="Feat-Based">Feature-Based Transfer Learning</h2>

<h3 id="PCA">Principal Component Analysis</h3>

In [None]:
if FEATURES == "ACF":
    D_grid = [1, 2, 5, 10, 13]
elif FEATURES == "Raw":
    D_grid = [2, 5, 10, 15, 20, 25, 30, 100, 200, 300]
pca_cval_result = {}
clf = SVC(kernel='linear', max_iter=1e+6)

# ------------------------------------- Grid Search -----------------------------------------------
print('-' * (21 * 2) + '-')
print('|{:^20}|{:^20}|'.format('N Dimensions', 'Score'))
print('-' * (21 * 2) + '-')
for ndims in D_grid:
    pca_clf = PCAClassifier(clf=clf, n_components=ndims)
    per_domain_acc = []
    for domain in np.unique(d).astype(int)[1:]:
        Xt = X[np.where(d == domain)[0]]
        yt = y[np.where(d == domain)[0]]
        accs = cval_performance(pca_clf, X, y, d, domain, baseline=False)
        per_domain_acc.append(np.mean(accs))
    pca_cval_result['{}'.format(ndims)] = (np.mean(per_domain_acc),
                                       np.std(per_domain_acc))
    print('|{:^20}|{:^20}|'.format(ndims, np.mean(per_domain_acc)))
print('-' * (21 * 2) + '-')

# ------------------------------------- Saves Results ---------------------------------------------
pprint.pprint(pca_cval_result, width=1)
with open('./results/{}/PCA_cval.json'.format(FEATURES), 'w') as f:
    f.write(json.dumps(pca_cval_result))

# ------------------------------------- Runs for best parameters ----------------------------------
clf = SVC(kernel='linear', max_iter=1e+6)
pca_clf = PCAClassifier(clf=clf, n_components=25)
str_to_latex = ""

print('-' * (26 * 3) + '-')
print('|{:^77}|'.format('PCA'))
print('-' * (26 * 3) + '-')
print('|{:^25}|{:^25}|{:^25}|'.format('Domain', 'Accuracy', 'Std Dev'))

per_domain_acc = []
for domain in np.unique(d).astype(int)[1:]:
    Xt = X[np.where(d == domain)[0]]
    yt = y[np.where(d == domain)[0]]
    accs = cval_performance(pca_clf, X, y, d, domain, baseline=False)
    per_domain_acc.append(np.mean(accs))
    print('|{:^25}|{:^25}|{:^25}|'.format(domain,
                                          np.round(100 * np.mean(accs), 3),
                                          np.round(100 * np.std(accs), 3)))
    str_to_latex += r"& {} $\\pm$ {}".format(np.round(100 * np.mean(accs), 3),
                                            np.round(100 * np.std(accs), 3))
print('|{:^25}|{:^25}|{:^25}|'.format('Score',
                                      np.round(100 * np.mean(per_domain_acc), 3),
                                      np.round(100 * np.std(per_domain_acc), 3)))
str_to_latex += r"& {}".format(np.round(100 * np.mean(per_domain_acc), 3))
print('-' * (26 * 3) + '-')

<h3 id="TCA">Transfer Component Analysis</h3>

In [None]:
D_grid = [1, 2, 5, 10, 13]
K_grid = ['linear', 'rbf']
M_grid = [1e-2, 1e-1, 1, 1e+1, 1e+2]
G_grid = [1e-3, 1e-2, 1e-1, 1e0]
N_grid = ['none', 'scale', 'std']

tca_cval_result = {}
clf = SVC(kernel='linear', max_iter=1e+6)
# ------------------------------------- Grid Search -----------------------------------------------
print('-' * (18 * 6) + '-')
print('|{:^17}|{:^17}|{:^17}|{:^17}|{:^17}|{:^17}|'.format('Ndim',
                                                           'Kernel',
                                                           'Mu',
                                                           'Kernel Param',
                                                           'Norm',
                                                           'Score'))
print('-' * (18 * 6) + '-')
for ndims in D_grid:
    for K in K_grid:
        for M in M_grid:
            for G in G_grid:
                for N in N_grid:
                    tca_clf = TCAClassifier(clf=clf, n_components=ndims, kernel=K, mu=M, gamma=G,
                                            normalize_projections=N)
                    per_domain_acc = []
                    for domain in np.unique(d).astype(int)[1:]:
                        Xt = X[np.where(d == domain)[0]]
                        yt = y[np.where(d == domain)[0]]
                        accs = cval_performance(tca_clf, X, y, d, domain, baseline=False)
                        per_domain_acc.append(np.mean(accs))
                    tca_cval_result['{},{},{},{},{}'.format(ndims,
                                                            K, M, G, N)] = (np.mean(per_domain_acc),
                                                                            np.std(per_domain_acc))
                    print('|{:^17}|{:^17}|{:^17}|{:^17}|{:^17}|{:^17}|'.format(ndims, K, M, G, N,
                                                                               np.round(np.mean(per_domain_acc), 8)))
print('-' * (17 * 6) + '-')

# ------------------------------------- Saves Results ---------------------------------------------
pprint.pprint(tca_cval_result, width=1)
with open('./results/{}/TCA_cval.json'.format(FEATURES), 'w') as f:
    f.write(json.dumps(tca_cval_result))

# ------------------------------------- Runs for best parameters ----------------------------------
clf = SVC(kernel='linear', max_iter=1e+6)
tca_clf = TCAClassifier(clf=clf, n_components=300, kernel='rbf', mu=10.0, gamma=None,
                        normalize_projections='none')
str_to_latex = ""

print('-' * (26 * 3) + '-')
print('|{:^77}|'.format('TCA'))
print('-' * (26 * 3) + '-')
print('|{:^25}|{:^25}|{:^25}|'.format('Domain', 'Mean Accuracy', 'Std Accuracy'))
print('-' * (26 * 3) + '-')
per_domain_acc = []
for domain in np.unique(d).astype(int)[1:]:
    Xt = X[np.where(d == domain)[0]]
    yt = y[np.where(d == domain)[0]]
    accs = cval_performance(tca_clf, X, y, d, domain, baseline=False, normalization="normalization")
    mean_acc = np.round(100 * np.mean(accs), 3)
    std_acc = np.round(100 * np.std(accs), 3)
    print('|{:^25}|{:^25}|{:^25}|'.format(domain, mean_acc, std_acc))
    str_to_latex += "{} $\pm$ {}".format(mean_acc, std_acc)
    per_domain_acc.append(np.mean(accs))
mean_acc = np.round(100 * np.mean(per_domain_acc), 3)
std_acc = np.round(100 * np.std(per_domain_acc), 3)
print('|{:^25}|{:^25}|{:^25}|'.format('score', mean_acc, std_acc))
str_to_latex += "{}".format(mean_acc)
    
print('-' * (26 * 3) + '-')

<h3 id="GFK">Geodesic Flow Kernel</h3>

In [None]:
if FEATURES == "ACF":
    D_grid = [1, 2, 5, 10, 13]
elif FEATURES == "Raw":
    D_grid = [2, 5, 10, 15, 20, 25, 30, 100, 200, 300]
P_grid = ['pca', 'pls']

gfk_cval_result = {}
clf = SVC(kernel='linear', max_iter=1e+6)
# ------------------------------------- Grid Search -----------------------------------------------
print('-' * (26 * 3) + '-')
print('|{:^25}|{:^25}|{:^25}|'.format('Ndim', 'Projection', 'Score'))
print('-' * (26 * 3) + '-')
for ndims in D_grid:
    for P in P_grid:
        gfk_clf = GFKClassifier(clf=clf, n_components=ndims, projection=P)
        per_domain_acc = []
        for domain in np.unique(d).astype(int)[1:]:
            Xt = X[np.where(d == domain)[0]]
            yt = y[np.where(d == domain)[0]]
            try:
                accs = cval_performance(gfk_clf, X, y, d, domain, baseline=False)
            except:
                accs = [-1]
            per_domain_acc.append(np.mean(accs))
        gfk_cval_result['{},{}'.format(ndims, P)] = (np.mean(per_domain_acc), np.std(per_domain_acc))
        print('|{:^25}|{:^25}|{:^25}|'.format(ndims, P, np.round(np.mean(per_domain_acc), 8)))
print('-' * (26 * 3) + '-')

# ------------------------------------- Saves Results ---------------------------------------------
pprint.pprint(gfk_cval_result, width=1)
with open('./results/GFK_cval.json', 'w') as f:
    f.write(json.dumps(gfk_cval_result))
    

# ------------------------------------- Runs for best parameters ----------------------------------
str_to_latex = ""

print('-' * (26 * 3) + '-')
print('|{:^77}|'.format('GFK'))
print('-' * (26 * 3) + '-')
print('|{:^25}|{:^25}|{:^25}|'.format('Domain', 'Accuracy', 'Std Dev'))

clf = SVC(kernel='linear', max_iter=1e+6)
gfk_clf = GFKClassifier(clf=clf, n_components=100, projection='pca')

per_domain_acc = []
for domain in np.unique(d).astype(int)[1:]:
    Xt = X[np.where(d == domain)[0]]
    yt = y[np.where(d == domain)[0]]
    accs = cval_performance(gfk_clf, X, y, d, domain, baseline=False, normalization='normalization')
    per_domain_acc.append(np.mean(accs))
    print('|{:^25}|{:^25}|{:^25}|'.format(domain,
                                          np.round(100 * np.mean(accs), 3),
                                          np.round(100 * np.std(accs), 3)))
    str_to_latex += r"& {} $\pm$ {}".format(np.round(100 * np.mean(accs), 3),
                                           np.round(100 * np.std(accs), 3))
print('|{:^25}|{:^25}|{:^25}|'.format('Score',
                                      np.round(100 * np.mean(per_domain_acc), 3),
                                      np.round(100 * np.std(per_domain_acc), 3)))
str_to_latex += r"& {}".format(np.round(100 * np.mean(per_domain_acc), 3))
print('-' * (26 * 3) + '-')

<h2 id="Instance-Based">Instance-Based Transfer Learning</h2>

<h3 id="KLIEP">Kullback-Leibler Importance Estimation Procedure</h3>

In [None]:
K_grid = ['rbf']
BV_grid = ['None', 'Xs', 'Xt', 'XsXt']
G_grid = [[1e-3], [1e-2], [1e-1], None]
E_grid = [1e-1, 1e-2, 1e-3, 1e-4]
kliep_cval_result = {}

print('-' * (18 * 6) + '-')
print('|{:^17}|{:^17}|{:^17}|{:^17}|{:^17}|{:^17}|'.format('C', 'Kernel', 'Basis Vectors', 'G', 'E', 'Score'))
print('-' * (18 * 6) + '-')
for C in [1, 15, 25]:
    for K in K_grid:
        for BV_str in BV_grid:
            for G in G_grid:
                for E in E_grid:
                    if K == 'linear' and G is not None:
                        pass
                    else:
                        clf = SVC(C=C, kernel='linear')
                        if BV_str == 'None':
                            BV = None
                        elif BV_str == 'Xs':
                            BV = Xs
                        elif BV_str == 'Xt':
                            BV = Xt
                        elif BV_str == 'XsXt':
                            BV = np.concatenate([Xs, Xt], axis=0)
                        wclf = WeightedClassifier(clf,
                                                  KLIEP,
                                                  kernel=K,
                                                  kernel_param=G,
                                                  basis_vectors=BV,
                                                  epsilon=E)
                        per_domain_acc = []
                        for domain in np.unique(d).astype(int)[1:]:
                            accs = cval_performance(wclf, X, y, d, domain, baseline=False)
                            per_domain_acc.append(np.mean(accs))
                        kliep_cval_result['{},{},{},{},{}'.format(C,
                                                                  K,
                                                                  BV_str,
                                                                  G,
                                                                  E)] = (np.mean(per_domain_acc),
                                                                         np.std(per_domain_acc))
                        print('|{:^17}|{:^17}|{:^17}'
                              '|{:^17}|{:^17}|{:^17}|'.format(C, K, BV_str, str(G),
                                                              E, np.round(np.mean(per_domain_acc), 7)))
print('-' * (18 * 6) + '-')

pprint.pprint(kliep_cval_result, width=1)
with open('./results/KLIEP_cval.json', 'w') as f:
    f.write(json.dumps(kliep_cval_result))
    
clf = SVC(C=1, kernel='linear')
wclf = WeightedClassifier(clf, KLIEP, kernel='rbf', kernel_param=None, basis_vectors=None, epsilon=1e-3)
print('-' * 79)
print('|{:^77}|'.format('Baseline'))
print('-' * 79)
print("|{:^25}|{:^25}|{:^25}|".format("Domain", "Mean Accuracy", "Std Deviation"))
print('-' * 79)
per_domain_acc = []
for domain in np.unique(d).astype(int)[1:]:
    Xt = X[np.where(d == domain)[0]]
    yt = y[np.where(d == domain)[0]]
    accs = cval_performance(clf, Xs, ys, Xt, yt, n_folds=5, baseline=True)
    per_domain_acc.append(np.mean(accs))
    print("|{:^25}|{:^25}|{:^25}|".format(domain, np.mean(accs), np.std(accs)))
print("|{:^25}|{:^25}|{:^25}|".format('-', np.mean(per_domain_acc), np.std(per_domain_acc)))
print('-' * 79)

<h3 id="uLSIF">unconstrained Least Squares Importance Fitting</h3>

In [None]:
K_grid = ['rbf']
G_grid = [[1e-3], [1e-2], [1e-1], None]
R_grid = [1e-1, 1e-2, 1e-3, 1e-9]

ulsif_cval_result = {}
print('-' * (21 * 5) + '-')
print('|{:^20}|{:^20}|{:^20}|{:^20}|{:^20}|'.format('C', 'Kernel', 'G', 'R', 'Score'))
print('-' * (21 * 5) + '-')
for C in [10, 15, 25]:
    clf = SVC(C=C, kernel='linear')
    for K in K_grid:
        for G in G_grid:
            for R in R_grid:
                if K == 'linear' and G is not None:
                    pass
                else:
                    wclf = WeightedClassifier(clf, uLSIF, kernel=K, kernel_param=G, basis_vectors=None, reg=R)
                    per_domain_acc = []
                    for domain in np.unique(d).astype(int)[1:]:
                        accs = cval_performance(wclf, X, y, d, domain, baseline=False)
                        per_domain_acc.append(np.mean(accs))
                    ulsif_cval_result['{},{},{},{}'.format(C, K, G, R)] = (np.mean(per_domain_acc),
                                                                           np.std(per_domain_acc))
                    print('|{:^20}|{:^20}|{:^20}|{:^20}|{:^20}|'.format(C, K, str(G),
                                                                        R, np.mean(per_domain_acc)))
                    
pprint.pprint(ulsif_cval_result, width=1)
with open('./results/uLSIF_cval.json', 'w') as f:
    f.write(json.dumps(ulsif_cval_result))
    
clf = SVC(C=1, kernel='linear')
wclf = WeightedClassifier(clf, uLSIF, kernel='linear', kernel_param=[10], basis_vectors=None, reg=1e-1)
print('-' * 79)
print('|{:^77}|'.format('Baseline'))
print('-' * 79)
print("|{:^25}|{:^25}|{:^25}|".format("Domain", "Mean Accuracy", "Std Deviation"))
print('|' + '-' * 77 + '|')
per_domain_acc = []
for domain in np.unique(d).astype(int)[1:]:
    Xt = X[np.where(d == domain)[0]]
    yt = y[np.where(d == domain)[0]]
    accs = cval_performance(wclf, X, y, d, domain, baseline=False)
    per_domain_acc.append(np.mean(accs))
    print("|{:^25}|{:^25}|{:^25}|".format(domain,
                                          np.round(np.mean(100 * accs), 3),
                                          np.round(100 * np.std(accs), 3)))
print("|{:^25}|{:^25}|{:^25}|".format('Score',
                                      np.round(100 * np.mean(per_domain_acc), 3),
                                      np.round(100 * np.std(per_domain_acc), 3)))
print('-' * 79)

<h3 id="KMM">Kernel Mean Matching</h3>

In [None]:
K_grid = ['linear', 'rbf']
G_grid = [[1e-3], [1e-2], [1e-1], None]
B_grid = [1, 1e+1, 1e+2, 1e+3]
kmm_cval_result = {}

print('-' * (21 * 5) + '-')
print('|{:^20}|{:^20}|{:^20}|{:^20}|{:^20}|'.format('C', 'Kernel', 'G', 'B', 'Score'))
print('-' * (21 * 5) + '-')
for C in [1, 15, 25]:
    clf = SVC(C=C, kernel='linear')
    for K in K_grid:
        for G in G_grid:
            for B in B_grid:
                if K == 'linear' and G is not None:
                    pass
                else:
                    wclf = WeightedClassifier(clf, KMM, kernel=K, kernel_param=G, B=B)
                    per_domain_acc = []
                    for domain in np.unique(d).astype(int)[1:]:
                        Xt = X[np.where(d == domain)[0]]
                        yt = y[np.where(d == domain)[0]]
                        accs = cval_performance(wclf, Xs, ys, Xt, yt, n_folds=5, baseline=False)
                        per_domain_acc.append(np.mean(accs))
                    kmm_cval_result['{},{},{},{}'.format(C, K, G, B)] = (np.mean(per_domain_acc),
                                                                         np.std(per_domain_acc))
                    print('|{:^20}|{:^20}|{:^20}|{:^20}|{:^20}|'.format(C, K, str(G), B,
                                                                        np.mean(per_domain_acc)))
                    
pprint.pprint(kmm_cval_result, width=1)
with open('./results/KMM_cval.json', 'w') as f:
    f.write(json.dumps(kmm_cval_result))
    
    
clf = SVC(C=15, kernel='linear')
wclf = WeightedClassifier(clf, KMM, kernel='rbf', kernel_param=[1e-3], B=1000)
print('-' * 79)
print('|{:^77}|'.format('Baseline'))
print('-' * 79)
print("|{:^25}|{:^25}|{:^25}|".format("Domain", "Mean Accuracy", "Std Deviation"))
print('-' * 77)
acc_per_domain = []
for domain in np.unique(d).astype(int)[1:]:
    Xt = X[np.where(d == domain)[0]]
    yt = y[np.where(d == domain)[0]]
    accs = cval_performance(wclf, X, y, d, domain, baseline=False, normalization="normalization")
    print("|{:^25}|{:^25}|{:^25}|".format(domain,
                                          np.round(100 * np.mean(accs), 3),
                                          np.round(100 * np.std(accs), 3)))
    acc_per_domain.append(np.mean(accs))
print("|{:^25}|{:^25}|{:^25}|".format('Score',
                                      np.round(100 * np.mean(acc_per_domain), 3),
                                      np.round(100 * np.std(acc_per_domain), 3)))
print('-' * 79)