In [2]:
from scipy.spatial.distance import cdist
from scipy.stats import mode
import numpy as np
from sklearn.svm import LinearSVC
from sklearn.linear_model import SGDClassifier
from itertools import permutations, combinations

from numpy.random import default_rng
from numpy.linalg import norm
from scipy.special import softmax
from collections import Counter
from pathlib import Path
from sklearn.cluster import KMeans

def normalize(x):
    return (x - x.mean(axis=0))/x.std(axis=0)

def get_features(path, norm=True):
    data = np.load(path)
    ss, st = data['s'], data['t']
    ssx, ssy = ss[:, :-1], ss[:, -1]
    stx, sty = st[:, :-1], st[:, -1]
    if norm:
        return normalize(ssx), ssy, normalize(stx), sty
    return ssx, ssy, stx, sty

def prototype_classifier(X, C, th=1):
    dist = cdist(C, X)
    prob = softmax(-dist, axis=0)
#     prob = softmax(1/(1+dist), axis=0)
    return prob.argsort(axis=0)[-th:][::-1, :], prob

# def prototype_classifier(X, C):
#     dist = cdist(C, X)
#     prob = softmax(-dist, axis=0)
#     return prob.T

def masked_prototypical_classifier(X, C, ratio=0.3, seed=2437, th=2):
    rng = np.random.default_rng(seed)
    masked_idx = ~rng.binomial(1, ratio, X.shape[1]).astype(bool)
    return prototype_classifier(X[:, masked_idx], C[:, masked_idx], th=th)

#### EM Algorithm

In [None]:
#     alpha = prob.mean(axis=0)
#     mean = np.stack([np.average(stx, axis=0, weights=prob[:, i]) for i in range(65)])
#     sigma = []
#     for i in range(65):
#         print(i)
#         all_sigma = np.matmul((stx-mean[i])[:, :, np.newaxis], (stx-mean[i])[:, np.newaxis, :])
#         sigma.append(np.average(all_sigma, axis=0, weights=prob[:, i]))



In [10]:
avg = 0
for s, t in permutations(range(4), 2):
    s_path = f'./OfficeHome/source_only/s{s}_t{t}.npz'
    ssx, ssy, stx, sty = get_features(s_path, norm=False)
    ssc = np.stack([ssx[ssy == i].mean(axis=0) for i in range(65)])   
    stc = np.stack([stx[sty == i].mean(axis=0) for i in range(65)]) 
    pred, _ = prototype_classifier(stx, stc)
    
#     mean = np.stack([np.average(tx, axis=0, weights=prob[i]) for i in range(65)])
    
#     pred, _ = prototype_classifier(tx, mean)
    
    score = (pred == sty).mean()
    print('-'*10, f'source {s}, target {t}', '-'*10)
    print(score)
    avg += score
print('Avg acc.:', avg/12)

---------- source 0, target 1 ----------
0.6604651162790698
---------- source 0, target 2 ----------
0.8347050754458162
---------- source 0, target 3 ----------
0.8173345759552656
---------- source 1, target 0 ----------
0.7193056731583404
---------- source 1, target 2 ----------
0.8026977594878829
---------- source 1, target 3 ----------
0.77143522833178
---------- source 2, target 0 ----------
0.7150719729043183
---------- source 2, target 1 ----------
0.6397674418604651
---------- source 2, target 3 ----------
0.8254892823858341
---------- source 3, target 0 ----------
0.7607959356477562
---------- source 3, target 1 ----------
0.6693023255813954
---------- source 3, target 2 ----------
0.8593964334705075
Avg acc.: 0.756313901709036


In [18]:
avg = 0
ratio=0.2
for s, t in permutations(range(4), 2):
    s_path = f'./OfficeHome/partial/s{t}_t{s}_{ratio}.npz'
    ttx, tty, tsx, tsy = get_features(s_path, norm=False)
    ttc = np.stack([ttx[tty == i].mean(axis=0) for i in range(65)])   
    tsc = np.stack([tsx[tsy == i].mean(axis=0) for i in range(65)]) 
    
    pred, _ = prototype_classifier(ttx, ttc)
    score = (pred == tty).mean()
    print('-'*10, f'source {s}, target {t}', '-'*10)
    print(score)
    avg += score
print('Avg acc.:', avg/12)

---------- source 0, target 1 ----------
0.8004651162790698
---------- source 0, target 2 ----------
0.8950617283950617
---------- source 0, target 3 ----------
0.8646318732525629
---------- source 1, target 0 ----------
0.8107535986452159
---------- source 1, target 2 ----------
0.8950617283950617
---------- source 1, target 3 ----------
0.8646318732525629
---------- source 2, target 0 ----------
0.8107535986452159
---------- source 2, target 1 ----------
0.8004651162790698
---------- source 2, target 3 ----------
0.8646318732525629
---------- source 3, target 0 ----------
0.8107535986452159
---------- source 3, target 1 ----------
0.8004651162790698
---------- source 3, target 2 ----------
0.8950617283950617
Avg acc.: 0.8427280791429775


In [None]:
a = np.arange(15)
rng = np.random.default_rng(10)
rng.shuffle(a)
a = a.reshape(3, 5)

b = np.array([3, 7, 9]).reshape(-1, 1)
print(a)
print(b)
a > b

#### prototype after information minimization

In [26]:
avg = 0
for s, t in permutations(range(4), 2):
    s_path = f'./OfficeHome/source_only/s{s}_t{t}.npz'
    t_path = f'./OfficeHome/s2t_shot/s{s}_t{t}.npz'
    ssx, ssy, _, _ = get_features(s_path, norm=False)
    _, _, stx, sty = get_features(t_path, norm=False)
    ssc = np.stack([ssx[ssy == i].mean(axis=0) for i in range(65)])   
    stc = np.stack([stx[sty == i].mean(axis=0) for i in range(65)]) 
#     print(np.linalg.norm(sc - tc))
    pred, _ = prototype_classifier(stx, ssc)
#     pseudo_c = np.stack([stx[pred.flatten() == i].mean(axis=0) for i in range(65)])
#     pred, _ = prototype_classifier(stx, pseudo_c)
    score = (pred == sty).mean()
    print('-'*10, f'source {s}, target {t}', '-'*10)
    print(score)
    avg += score
#     break
print('Avg acc.:', avg/12)

---------- source 0, target 1 ----------
0.5516279069767441
---------- source 0, target 2 ----------
0.7347965249199817
---------- source 0, target 3 ----------
0.7732991612301957
---------- source 1, target 0 ----------
0.5804403048264183
---------- source 1, target 2 ----------
0.6943301326017376
---------- source 1, target 3 ----------
0.6926840633737186
---------- source 2, target 0 ----------
0.5948348856900931
---------- source 2, target 1 ----------
0.4874418604651163
---------- source 2, target 3 ----------
0.782851817334576
---------- source 3, target 0 ----------
0.6701947502116851
---------- source 3, target 1 ----------
0.5518604651162791
---------- source 3, target 2 ----------
0.8072702331961591
Avg acc.: 0.6601360088285587


#### Kmeans prototypical classifier

In [None]:
def center_dist(a, b):
    return np.linalg.norm(a-b, axis=1)

#### Imagenet prototypical classifier

In [None]:
avg = 0
for s, t in permutations(range(4), 2):
    s_path = f'./OfficeHome/fixbi/s{s}_t{t}.npz'
    ssx, ssy, stx, sty = get_features(s_path, norm=False)
    ssc = np.stack([ssx[ssy == i].mean(axis=0) for i in range(65)])   
    stc = np.stack([stx[sty == i].mean(axis=0) for i in range(65)]) 
    sc = ssx.mean(axis=0)
    tc = stx.mean(axis=0)
    print(np.linalg.norm(ssc - stc, axis=1))
    pred, _ = prototype_classifier(stx, ssc)
#     pseudo_c = np.stack([stx[pred.flatten() == i].mean(axis=0) for i in range(65)])
#     pred, _ = prototype_classifier(stx, pseudo_c)
    score = (pred == sty).mean()
    print('-'*10, f'source {s}, target {t}', '-'*10)
    print(score)
#     print(np.linalg.norm(sc - tc))
    avg += score
    for a, b in combinations(ssc, 2):
        print(np.linalg.norm(a - b))
    break
print('Avg acc.:', avg/12)

#### top-2 pseudo labeling

In [None]:
for s, t in permutations(range(4), 2):
    s_path = f'./OfficeHome/fixbi/s{s}_t{t}.npz'
    ssx, ssy, stx, sty = get_features(s_path, norm=False)

    ssc = np.stack([ssx[ssy == i].mean(axis=0) for i in range(65)])
    t_pred, t_dist = prototype_classifier(stx, ssc, th=3)
    s_pred, s_dist = prototype_classifier(ssx, ssc, th=3)
    s_dist = softmax(s_dist, axis=0)
    t_dist = softmax(t_dist, axis=0)

    output_path = Path(f'./OfficeHome/pseudo_label/s{s}_t{t}.npz')
    output_path.parent.mkdir(exist_ok=True, parents=True)
    with open(output_path, 'wb') as f:
        np.savez(f, s_pred=s_pred.T, t_pred=t_pred.T, s_prob=s_dist.T, t_prob=t_dist.T)

#### Masked Prototypical Classifier for Top-2 Accuracy

In [None]:
avg = 0
ratio=0.8
for s, t in permutations(range(4), 2):
    s_path = f'./OfficeHome/fixbi/s{s}_t{t}.npz'
    ssx, ssy, stx, sty = get_features(s_path, norm=False)
    ssc = np.stack([ssx[ssy == i].mean(axis=0) for i in range(65)])    
    prob = np.zeros((stx.shape[0], 65))
    for i in range(100):
        pred, dist = masked_prototypical_classifier(stx, ssc, ratio=ratio, seed=i, th=2)
        dist = softmax(dist, axis=0)
        prob[np.arange(pred.shape[1]), pred] += dist/100
    pred = prob.argmax(axis=1)
    score = (pred == sty).mean()
    print('-'*10, f'source {s}, target {t}', '-'*10)
    print(score)
    avg += score
print('Avg acc.:', avg/12)