In [1]:
from matplotlib import pyplot as plt
import numpy as np
import os
import sys
import time
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.metrics.pairwise import euclidean_distances
import pyemd
feature_dir = '/root/datasets/FSCIL/features/'

In [2]:

ss = ['AWF_775_complete']
dataset = 'AWF_775_complete'
#'AWF_novel','AWF_base','tor_time_test3d_200w_100tr'
# Load extracted features on CUB-200.
feature = np.load(feature_dir + dataset + '_feature.npy')
label = np.load(feature_dir + dataset + '_labels.npy')

# CUB-200 training set contains 5994 images from 200 classes, each image is 
# represented by a 2048-dimensional feature from the pre-trained ResNet-101.
print('Original feature shape: (%d, %d)' % (feature.shape[0], feature.shape[1]))
print('Number of classes: %d' % (len(np.unique(label))))

# Calculate class feature as the averaged features among all images of the class.
# Class weight is defined as the number of images of the class.
sorted_label = sorted(list(set(label)))
feature_per_class = np.zeros((len(sorted_label), 512), dtype=np.float32)
weight = np.zeros((len(sorted_label), ), dtype=np.float32)
counter = 0
for i in sorted_label:
    idx = [(l==i) for l in label]
    feature_per_class[counter, :] = np.mean(feature[idx, :], axis=0)
    weight[counter] = np.sum(idx)
    counter += 1

print('Feature per class shape: (%d, %d)' % (feature_per_class.shape[0], 
                                             feature_per_class.shape[1]))

np.save(feature_dir + dataset + '.npy', feature_per_class)
np.save(feature_dir + dataset + '_weight.npy', weight)


Original feature shape: (1917105, 512)
Number of classes: 774
Feature per class shape: (774, 512)


In [9]:
label

array([  0, 100,   0, ..., 100, 100, 100])

In [4]:
sds = ['AWF_775']
# 
tds = ['tor_100w_2500tr','tor_time_test3d_200w_100tr','tor_time_test10d_200w_100tr','tor_time_test2w_200w_100tr','tor_time_test4w_200w_100tr','KNN','DF19','DF95']
# ,'KNN','DF95','tor_time_test3d_200w_100tr','tor_time_test10d_200w_100tr','tor_time_test2w_200w_100tr','tor_time_test4w_200w_100tr','tor_time_test6w_200w_100tr'
gamma = 0.1
for sd in sds:
    for td in tds:
        print('%s --> %s' % (sd, td))
        f_s = np.load(feature_dir + sd + '.npy')
        f_t = np.load(feature_dir + td + '.npy')
        w_s = np.load(feature_dir + sd + '_weight.npy')
        w_t = np.load(feature_dir + td + '_weight.npy')
        # Remove source domain classes with number of images < 'min_num_imgs'.
        idx = [i for i in range(len(w_s)) if w_s[i] >= 0]
        f_s = f_s[idx, :]
        w_s = w_s[idx]
        # Make sure two histograms have the same length and distance matrix is square.
        data = np.float64(np.append(f_s, f_t, axis=0))
        w_1 = np.zeros((len(w_s) + len(w_t),), np.float64)
        w_2 = np.zeros((len(w_s) + len(w_t),), np.float64)
        w_1[:len(w_s)] = w_s / np.sum(w_s)
        w_2[len(w_s):] = w_t / np.sum(w_t)
        D = euclidean_distances(data, data)
        emd,flow = pyemd.emd_with_flow(np.float64(w_1), np.float64(w_2), np.float64(D))
        print('EMD: %.3f    Domain Similarity: %.3f\n' % (emd, np.exp(-gamma*emd)))

AWF_775 --> tor_100w_2500tr
EMD: 10.892    Domain Similarity: 0.336

AWF_775 --> tor_time_test3d_200w_100tr
EMD: 10.291    Domain Similarity: 0.357

AWF_775 --> tor_time_test10d_200w_100tr
EMD: 10.343    Domain Similarity: 0.355

AWF_775 --> tor_time_test2w_200w_100tr
EMD: 10.373    Domain Similarity: 0.354

AWF_775 --> tor_time_test4w_200w_100tr
EMD: 10.380    Domain Similarity: 0.354

AWF_775 --> KNN
EMD: 12.845    Domain Similarity: 0.277

AWF_775 --> DF19
EMD: 13.782    Domain Similarity: 0.252

AWF_775 --> DF95
EMD: 13.664    Domain Similarity: 0.255



In [None]:
flow = np.array(flow)
np.where(flow>0)

## class overlap

In [1]:
import numpy as np
file_name = '/root/datasets/FSCIL/'+'AWF_775'+'.npz'
train_novel = np.load(file_name,allow_pickle=True)
train_data_775 = train_novel['data']
train_labels_775 = train_novel['labels']
classes = np.unique(train_labels_775)
file_name = '/root/datasets/FSCIL/'+'tor_900w_2500tr'+'.npz'
train_novel = np.load(file_name,allow_pickle=True)
train_data_900 = train_novel['data']
train_labels_900 = train_novel['labels']
cla = [c[19:] for c in classes]
data = []
label = []
for novel_class in cla:
    inds = np.argwhere(train_labels_900==novel_class)
    inds = inds.reshape(-1)
    data.extend(train_data_900[inds])
    label.extend(train_labels_900[inds])
data = np.array(data)
label = np.array(label)
np.savez('AWF_775_complete.npz',data=data,labels=label)

In [8]:
cla = [c[19:] for c in classes]
data = []
label = []
for novel_class in cla[-2:]:
    inds = np.argwhere(train_labels_900==novel_class)
    inds = inds.reshape(-1)
    data.extend(train_data_900[inds])
    label.extend(train_labels_900[inds])
data = np.array(data)
label = np.array(label)

array(['zougla.gr', 'zougla.gr', 'zougla.gr', ..., 'zytpirwai.net',
       'zytpirwai.net', 'zytpirwai.net'], dtype='<U13')

: 