In [None]:
from scipy.spatial.distance import cdist
from scipy.stats import mode
import numpy as np
from sklearn.svm import LinearSVC
from sklearn.linear_model import SGDClassifier
from itertools import permutations, combinations

from numpy.random import default_rng
from numpy.linalg import norm
from scipy.special import softmax

In [None]:
def normalize(x):
    return (x - x.mean(axis=0))/x.std(axis=0)

In [None]:
def knn(sx, sy, tx, k=9):
    dist = cdist(tx, sx)
    sorted_idx = np.argsort(dist, axis=1)[:, :k]
    return mode(sy[sorted_idx], axis=1).mode.flatten()

In [None]:
def cbpl(sx, sy, tx, ty, th=1, alpha=0.9, num_iter=3):
    center = np.vstack([sx[sy == idx].mean(axis=0) for idx in range(65)])
    sorted_idx = None
    for _ in range(num_iter):
        dist = cdist(center, tx)
        sorted_idx = np.argsort(dist, axis=1)[:, :th]
        cand = tx[sorted_idx].mean(axis=1)
        center = alpha * center + (1 - alpha) * cand
    return tx[sorted_idx].reshape(-1, tx.shape[-1]), ty[sorted_idx]

In [None]:
def get_features(path, norm=True):
    data = np.load(path)
    ss, st = data['s'], data['t']
    ssx, ssy = ss[:, :-1], ss[:, -1]
    stx, sty = st[:, :-1], st[:, -1]
    if norm:
        return normalize(ssx), ssy, normalize(stx), sty
    return ssx, ssy, stx, sty

In [None]:
for s, t in permutations(range(4), 2):
    s_path = f'./OfficeHome/fixbi/s{s}_t{t}.npz'
    t_path = f'./OfficeHome/fixbi/s{t}_t{s}.npz'
    
    ssx, ssy, stx, sty = get_features(s_path)
    ttx, tty, tsx, tsy = get_features(t_path)
    
    c1 = LinearSVC(random_state=1126, C=0.1, max_iter=5000)
    c1.fit(ssx, ssy)
    print(c1.score(stx, sty))
#     label = c1.predict(stx)
#     new_tx = np.stack([stx[label == i].mean(axis=0) for i in range(65)])
#     new_ty = np.arange(65)
    
#     old_sx = np.stack([ssx[ssy == i].mean(axis=0) for i in range(65)])
    
#     dist = cdist(old_sx, new_tx)
#     for i in range(65):
#         new_dist = cdist(old_sx[i].reshape(1, -1), ssx[ssy == i]).mean()
#         t_dist = cdist(old_sx[i].reshape(1, -1), stx[label == i]).mean()
#         print(dist[i, i], new_dist, t_dist)
#     break

#### different space center comparison

In [None]:
for s, t in permutations(range(4), 2):
    s_path = f'./OfficeHome/fixbi/s{s}_t{t}.npz'
    t_path = f'./OfficeHome/fixbi/s{t}_t{s}.npz'
    
    ssx, ssy, stx, sty = get_features(s_path)
    ttx, tty, tsx, tsy = get_features(t_path)
    
    ssc = np.stack([ssx[ssy == i].mean(axis=0) for i in range(65)])
    tsc = np.stack([tsx[tsy == i].mean(axis=0) for i in range(65)])  
    
    stc = np.stack([stx[sty == i].mean(axis=0) for i in range(65)])
    ttc = np.stack([ttx[tty == i].mean(axis=0) for i in range(65)])
    
    l = [('ssc', ssc), ('tsc', tsc), ('stc', stc), ('ttc', ttc)]
    for i in range(65):
        print(f'class{i}')
        for a, b in combinations(l, 2):
            al, av = a
            bl, bv = b
            print(f'{al}<->{bl}: {norm(av[i] - bv[i])}')
    break

#### cbmix

In [None]:
for s, t in permutations(range(4), 2):
    s, t = 0, 3
    print(f'source {s}, target {t}')
    path = f'./OfficeHome/fixbi/s{s}_t{t}.npz'
    c_path = f'./OfficeHome/cbmix_perfect/s{s}_t{t}.npz'
    
    sx, sy, _, _ = get_features(path)
    csx, csy, ctx, cty = get_features(c_path)
    
    c1 = LinearSVC(random_state=12458, C=0.1, max_iter=5000)
    c1.fit(sx, sy)

    print(c1.score(csx, csy), c1.score(ctx, cty))
    break

#### mixup_perfect

In [None]:
for s, t in permutations(range(4), 2):
    s, t = 1, 0
    print(f'source {s}, target {t}')
    path = f'./OfficeHome/fixbi/s{s}_t{t}.npz'
    c_path = f'./OfficeHome/mixup_perfect/s{s}_t{t}.npz'
    
    sx, sy, _, _ = get_features(path)
    csx, csy, ctx, cty = get_features(c_path)
    print(csy)
    c1 = LinearSVC(random_state=12458, C=0.1, max_iter=5000)
    c1.fit(sx, sy)

    print(c1.score(csx, csy), c1.score(ctx, cty))
    break

### nearest neighbor for stx center

In [None]:
# for s, t in permutations(range(4), 2):
    s_path = f'./OfficeHome/fixbi/s{s}_t{t}.npz'
    t_path = f'./OfficeHome/fixbi/s{t}_t{s}.npz'
    
    ssx, ssy, stx, sty = get_features(s_path)
    ttx, tty, tsx, tsy = get_features(t_path)
    
    c1 = LinearSVC(random_state=12458, C=0.1, max_iter=5000)
    c1.fit(ssx, ssy)

    label = c1.predict(stx)
    new_tx = np.stack([stx[label == i].mean(axis=0) for i in range(65)])
    new_ty = np.arange(65)
    
    dist = cdist(new_tx, stx)
    sorted_idx = np.argsort(dist, axis=1)[:, 0]
    cand_x, ans_y = stx[sorted_idx], sty[sorted_idx]
    print(((ans_y) == new_ty).mean())
    break

#### center comparison for imagenet parameters

In [None]:
for s, t in permutations(range(4), 2):
    data_path = f'./OfficeHome/image_net/s{s}_t{t}.npz'
    
    sx, sy, tx ,ty = get_features(data_path)
    
    c1 = LinearSVC(random_state=12458, C=0.1, max_iter=5000)
    c1.fit(sx, sy)
    t_pred = c1.predict(tx)
    c2 = LinearSVC(random_state=1245, C=0.1, max_iter=5000)
    c2.fit(tx, ty)
    s_pred = c2.predict(sx)
    
    truth = np.arange(65)
    sc = np.stack([sx[sy == i].mean(axis=0) for i in range(65)])
    tpc = np.stack([tx[t_pred == i].mean(axis=0) for i in range(65)])
    spc = np.stack([sx[s_pred == i].mean(axis=0) for i in range(65)])
    tc = np.stack([tx[ty == i].mean(axis=0) for i in range(65)])
    
    l = [('sc', sc), ('tpc', tpc), ('spc', spc), ('tc', tc)]
    for i in range(65):
        print(f'class{i}')
        for a, b in combinations(l, 2):
            al, av = a
            bl, bv = b
            print(f'{al}<->{bl}: {norm(av[i] - bv[i])}')
        print('sc<->other s:', cdist(sc[i].reshape(1, -1), sx[sy == i]).mean())
        print('tc<->other t:', cdist(tc[i].reshape(1, -1), tx[ty == i]).mean())
    break

#### distance between stx center and ssx center

In [None]:
for s, t in permutations(range(4), 2):
    s_path = f'./OfficeHome/fixbi/s{s}_t{t}.npz'
    t_path = f'./OfficeHome/fixbi/s{t}_t{s}.npz'
    
    ssx, ssy, stx, sty = get_features(s_path)
    ttx, tty, tsx, tsy = get_features(t_path)
    
    c1 = LinearSVC(random_state=1126, C=0.1, max_iter=5000)
    c1.fit(ssx, ssy)

    label = c1.predict(stx)
    new_tx = np.stack([stx[label == i].mean(axis=0) for i in range(65)])
    new_ty = np.arange(65)
    
    old_sx = np.stack([ssx[ssy == i].mean(axis=0) for i in range(65)])
    
    dist = cdist(old_sx, new_tx)
    for i in range(65):
        new_dist = cdist(old_sx[i].reshape(1, -1), ssx[ssy == i]).mean()
        t_dist = cdist(old_sx[i].reshape(1, -1), stx[label == i]).mean()
        print(dist[i, i], new_dist, t_dist)
    break

In [None]:
avg = 0
for s, t in permutations(range(4), 2):
    s_path = f'./OfficeHome/fixbi/s{s}_t{t}.npz'
    t_path = f'./OfficeHome/fixbi/s{t}_t{s}.npz'
    
    ssx, ssy, stx, sty = get_features(s_path)
    ttx, tty, tsx, tsy = get_features(t_path)
    
    c1 = LinearSVC(random_state=1126, C=0.1, max_iter=5000)
    c1.fit(ssx, ssy)

    s_acc = c1.score(stx, sty)
    
    c2 = LinearSVC(random_state=1126, C=0.1, max_iter=5000)
    c2.fit(tsx, tsy)
    
    t_acc = c2.score(ttx, tty)
    avg += t_acc
    print('-'*10, f'source {s}, target {t}', '-'*10)
    print(f'source acc: {s_acc}')
    print(f'target acc: {t_acc}')
print('Avg acc.:', avg/12)

In [None]:
th = 3
alpha = 0.1
num_iter = 1
ratio = 0.2

In [None]:
avg = 0
s_avg = 0
for s, t in permutations(range(4), 2):
    s_path = f'./OfficeHome/fixbi/s{s}_t{t}.npz'
    t_path = f'./OfficeHome/fixbi/s{t}_t{s}.npz'
    ssx, ssy, stx, sty = get_features(s_path)
    ttx, tty, tsx, tsy = get_features(t_path)

    train_x, ans_y = cbpl(tsx, tsy, ttx, tty, th=th, num_iter=num_iter, alpha=alpha)
    train_y = np.tile(np.arange(65).reshape(-1, 1), th).flatten()
    print('-'*10, f'source {s}, target {t}', '-'*10)
    print('CBPL acc:', (ans_y.reshape(-1) == train_y).mean())

    c = LinearSVC(random_state=1126, C=0.1, max_iter=5000)
    c.fit(train_x, train_y)

    t_acc = c.score(ttx, tty)
    s_acc = c.score(tsx, tsy)
    avg += t_acc
    s_avg += s_acc
    print('test accuracy:', t_acc)
    print('source accuracy:', s_acc)
print('Avg acc.:', avg/12)
print('S Avg acc.:', s_avg/12)

In [None]:
for s, t in permutations(range(4), 2):
    s_path = f'./OfficeHome/fixbi/s{s}_t{t}.npz'
  
    ssx, ssy, stx, sty = get_features(s_path)

    ssc = np.stack([ssx[ssy == i].mean(axis=0) for i in range(65)])    
    stc = np.stack([stx[sty == i].mean(axis=0) for i in range(65)])
    
    c1 = LinearSVC(random_state=13743, C=0.1, max_iter=5000)
    c1.fit(ssx, ssy)
    
    st_pred = c1.predict(stx)
    
    for i in range(65):
        print(f'class {i}')
        # pred 0 data in target
        same_mean = stx[(st_pred == i) & (sty == i)].mean(axis=0)
        # True 0 data in target
        diff_mean = stx[(st_pred != i) & (sty == i)].mean(axis=0)

        pred_mean = stx[st_pred == i].mean(axis=0)
        s_dist = cdist(ssc[i].reshape(1, -1), ssx[ssy == i]).mean()
        
#         print(f'Correct mean <-> target center: {norm(same_mean-stc[i])}')
#         print(f'Incorrect mean <-> target center: {norm(diff_mean-stc[i])}')
        print(f'Source center <-> target center: {norm(ssc[i] - stc[i])}')
        print(f'Source center <-> pred target center: {norm(pred_mean - ssc[i])}')
        print(f'Predict target center <-> target center: {norm(pred_mean - stc[i])}')
        print(f'Source avg dist: {s_dist}')
#         print(f'Accuracy: {c1.score(stx[sty == i], i*np.ones((sty == i).sum()))}')
    print(ssy.shape, sty.shape)
    break

#### See varaince

In [None]:
for s, t in permutations(range(4), 2):
    s_path = f'./OfficeHome/fixbi/s{s}_t{t}.npz'
  
    ssx, ssy, stx, sty = get_features(s_path, norm=False)

    ssc = np.stack([ssx[ssy == i].mean(axis=0) for i in range(65)])    
    stc = np.stack([stx[sty == i].mean(axis=0) for i in range(65)])
    
    c1 = LinearSVC(random_state=13743, C=0.1, max_iter=5000)
    c1.fit(ssx, ssy)
    
    st_pred = c1.predict(stx)
    
    for i in range(65):
        print(f'class {i}')
        true_tx = ssx[ssy == i]
        norm_true_tx = (true_tx - true_tx.mean(axis=0))/true_tx.std(axis=0)
        
        print(norm_true_tx)
        break
#         pred_tx = stx[st_pred == i]
#         norm_pred_tx = (pred_tx - pred_tx.mean(axis=0))/pred_tx.std(axis=0)
#         print(norm(norm_pred_tx - norm_true_tx))
    break

#### with / without normalization

In [None]:
for s, t in permutations(range(4), 2):
    s_path = f'./OfficeHome/fixbi/s{s}_t{t}.npz'
    sx, sy, tx, ty = get_features(s_path, norm=False)
    nsx, nsy, ntx, nty = get_features(s_path)

    sc = np.stack([sx[sy == i].mean(axis=0) for i in range(65)])    
    tc = np.stack([tx[ty == i].mean(axis=0) for i in range(65)])
    
    c1 = LinearSVC(random_state=13743, C=0.1, max_iter=5000)
    c1.fit(sx, sy)
    
    nsc = np.stack([nsx[nsy == i].mean(axis=0) for i in range(65)])    
    ntc = np.stack([ntx[nty == i].mean(axis=0) for i in range(65)])
    
    c2 = LinearSVC(random_state=13743, C=0.1, max_iter=5000)
    c2.fit(nsx, nsy)
    
    labels = np.arange(65)
    print(c1.score(tc, labels), c2.score(ntc, labels))

In [None]:
def prototype_classifier(X, C, th=2):
    dist = cdist(C, X)
    prob = softmax(-dist, axis=0)
    return prob.argsort(axis=0)[-th:, :].astype(float), prob

#### Prototypical Classifier with multiple pseudo center

In [None]:
avg = 0
label_dict = {i: (a, b) for i, (a, b) in enumerate(combinations(range(65), 2))}
for s, t in permutations(range(4), 2):  
    s_path = f'./OfficeHome/fixbi/s{s}_t{t}.npz'
    ssx, ssy, stx, sty = get_features(s_path, norm=False)
    sty = sty.astype(float)
    ssc = np.stack([ssx[ssy == i].mean(axis = 0) for i in range(65)])
    
    pred, prob = prototype_classifier(stx, ssc, th=2)
    p = np.array([True if y in pr else False for y, pr in zip(sty, pred.T)], dtype=bool)
    score = p.mean()
#     conf = prob.max(axis=0)
#     cri = conf > 0.8
#     good_pred, good_y = pred[cri], sty[cri]
#     remain_x, remain_y = stx[~cri], sty[~cri]
    
#     remain_pred, remain_prob = prototype_classifier(remain_x, psc)
#     remain_pred = [label_dict[int(i)] for i in remain_pred]
#     cor_pred = []
#     p, tot = 0, 0
#     for (a, b), x, y in zip(remain_pred, remain_x, remain_y):
#         a_norm = norm(ssc[int(a)] - x)
#         b_norm = norm(ssc[int(b)] - x)
#         cor_pred.append(a if a_norm < b_norm else b)
#         if y in [a, b]:
#             tot += 1
#             if y == cor_pred[-1]:
#                 p += 1
#     cor_pred = np.array(cor_pred, dtype=float)
    
#     all_pred = np.r_[good_pred, cor_pred]
#     all_y = np.r_[good_y, remain_y]
#     cor_score = p/tot
#     score = (all_pred == all_y).mean()
    
#     pred = [label_dict[int(i)] for i in pred]
    
#     cor_pred = []
#     p, tot = 0, 0
#     for (a, b), x, y in zip(pred, stx, sty):
        
#         a_norm = norm(ssc[int(a)] - x)
#         b_norm = norm(ssc[int(b)] - x)
#         cor_pred.append(a if a_norm < b_norm else b)
#         if a != b:
#             tot+=1
#             if y == cor_pred[-1]:
#                 p += 1
#     cor_pred = np.array(cor_pred, dtype=float)
#     correct = np.array([True if y in p else False for y, p in zip(sty, pred)])
#     score = correct.mean()
#     pred = prototype_classifier(stx, ssc)
#     print('-'*10, f'source {s}, target {t}', '-'*10)
#     print(tot / len(remain_y))
#     print(cor_score)
    print(score)
    avg += score
print('Avg acc.:', avg/12)

#### svm for source @ source space

In [None]:
avg = 0
for s, t in permutations(range(4), 2):
    s_path = f'./OfficeHome/fixbi/s{s}_t{t}.npz'
    ssx, ssy, stx, sty = get_features(s_path, norm=False)
    
    c1 = LinearSVC(random_state=1128, C=0.1, max_iter=5000)
    c1.fit(ssx, ssy)
    pred = c1.predict(stx)
    score = (pred == sty).mean()
    print('-'*10, f'source {s}, target {t}', '-'*10)
    print(score)
    avg += score
print('Avg acc.:', avg/12)

#### svm for source center @ source space

In [None]:
avg = 0
for s, t in permutations(range(4), 2):
    s_path = f'./OfficeHome/fixbi/s{s}_t{t}.npz'
    ssx, ssy, stx, sty = get_features(s_path, norm=False)
    
    ssc = np.stack([ssx[ssy == i].mean(axis = 0) for i in range(65)])
    c1 = LinearSVC(random_state=1128, C=0.1, max_iter=5000)
    labels = np.arange(65)
    c1.fit(ssc, labels)
    pred = c1.predict(stx)
    score = (pred == sty).mean()
    print('-'*10, f'source {s}, target {t}', '-'*10)
    print(score)
    avg += score
print('Avg acc.:', avg/12)

#### Prototypical Classifier @ Source Space

In [None]:
avg = 0
for s, t in permutations(range(4), 2):
    s_path = f'./OfficeHome/fixbi/s{s}_t{t}.npz'
    ssx, ssy, stx, sty = get_features(s_path, norm=False)
    
    ssc = np.stack([ssx[ssy == i].mean(axis = 0) for i in range(65)])
    pred, prob = prototype_classifier(stx, ssc)
    sty = sty.astype(float)
    score = (pred == sty).mean()
    print('-'*10, f'source {s}, target {t}', '-'*10)
    print(score)
    avg += score
print('Avg acc.:', avg/12)

#### Prototypical Network Classifier

In [None]:
avg = 0
for s, t in permutations(range(4), 2):
    s_path = f'./OfficeHome/fixbi/s{s}_t{t}.npz'
    t_path = f'./OfficeHome/fixbi/s{t}_t{s}.npz'
    ssx, ssy, stx, sty = get_features(s_path, norm=False)
    ttx, tty, tsx, tsy = get_features(t_path, norm=False)
    
    tsc = np.stack([tsx[tsy == i].mean(axis=0) for i in range(65)])
    ttc = np.stack([ttx[tty == i].mean(axis=0) for i in range(65)])
    labels = np.arange(65)
    print('-'*10, f'source {s}, target {t}', '-'*10)
    pred = prototype_classifier(ttx, tsc)
    tty = tty.astype(float)
    score = (pred == tty).mean()
#     print('-'*10, f'source {s}, target {t}', '-'*10)
#     score = c1.score(ttx, tty)
    print(score)
    avg += score
print('Avg acc.:', avg/12)

#### without pseudo labeling on target space

In [None]:
avg = 0
for s, t in permutations(range(4), 2):
    s_path = f'./OfficeHome/fixbi/s{s}_t{t}.npz'
    t_path = f'./OfficeHome/fixbi/s{t}_t{s}.npz'
    ssx, ssy, stx, sty = get_features(s_path, norm=False)
    ttx, tty, tsx, tsy = get_features(t_path, norm=False)
    
    tsc = np.stack([tsx[tsy == i].mean(axis=0) for i in range(65)])
    ttc = np.stack([ttx[tty == i].mean(axis=0) for i in range(65)])
    labels = np.arange(65)
    
    c1 = LinearSVC(random_state=14582, C=0.1, max_iter=5000)
    c1.fit(tsc, labels)
    print('-'*10, f'source {s}, target {t}', '-'*10)
    score = c1.score(ttx, tty)
    print(score)
    avg += score
print('Avg acc.:', avg/12)

In [None]:
avg = 0
for s, t in permutations(range(4), 2):
    s_path = f'./OfficeHome/fixbi/s{s}_t{t}.npz'
    t_path = f'./OfficeHome/fixbi/s{t}_t{s}.npz'
    ssx, ssy, stx, sty = get_features(s_path, norm=False)
    ttx, tty, tsx, tsy = get_features(t_path, norm=False)
    
    c1 = LinearSVC(random_state=13241, C=0.1, max_iter=5000)
    c1.fit(tsx, tsy)
    print('-'*10, f'source {s}, target {t}', '-'*10)
    score = c1.score(ttx, tty)
    print(score)
    avg += score
print('Avg acc.:', avg/12)

#### Hyper parameter selection

In [None]:
# rng = np.random.default_rng(12345)
c_list = [1e-4, 1e-3, 1e-2, 1e-1, 1, 10]
total_avg = []
for c in c_list:
    avg = 0
    for s, t in permutations(range(4), 2):
        s_path = f'./OfficeHome/fixbi/s{s}_t{t}.npz'
        t_path = f'./OfficeHome/fixbi/s{t}_t{s}.npz'
        ssx, ssy, stx, sty = get_features(s_path, norm=False)
        ttx, tty, tsx, tsy = get_features(t_path, norm=False)

        tsc = np.stack([tsx[tsy == i].mean(axis=0) for i in range(65)])
        ttc = np.stack([ttx[tty == i].mean(axis=0) for i in range(65)])
        labels = np.arange(65)

        c1 = LinearSVC(random_state=12453, C=c, max_iter=5000)
#         c1.fit(tsc, labels)
        c1.fit(tsx, tsy)
#         print('-'*10, f'source {s}, target {t}', '-'*10)
        score = c1.score(ttx, tty)
#         print(score)
        avg += score
    print(f'C = {c}, Avg acc.:', avg/12)
    total_avg.append(avg/12)
print('Total Avg acc.:', np.mean(total_avg))
print('std:', np.std(total_avg))