In [2]:
import torch
import sys
import os
import time
import numpy as np
import argparse
from scipy import spatial
from sklearn.cross_decomposition import CCA

from torch.utils.data import DataLoader
from torch.utils.data import WeightedRandomSampler
from umap.umap_ import find_ab_params

from singleVis.custom_weighted_random_sampler import CustomWeightedRandomSampler
from singleVis.SingleVisualizationModel import VisModel
from singleVis.losses import UmapLoss, ReconstructionLoss, SingleVisLoss
from singleVis.edge_dataset import DataHandler
from singleVis.trainer import SingleVisTrainer
from singleVis.data import NormalDataProvider
from singleVis.spatial_edge_constructor import kcSpatialAlignmentEdgeConstructor
# from singleVis.temporal_edge_constructor import GlobalTemporalEdgeConstructor
from singleVis.alignment_edge_constructor import LocalAlignmentEdgeConstructor
from singleVis.projector import TimeVisProjector
from singleVis.eval.evaluator import Evaluator

In [3]:
CONTENT_PATH = "/home/yifan/dataset/noisy/pairflip/cifar10/0/"
REF_PATH = "/home/yifan/dataset/noisy/pairflip/cifar10/reference/"
NOISY20 = "/home/yifan/dataset/noisy/pairflip/cifar10/20/"
sys.path.append(REF_PATH)
sys.path.append(CONTENT_PATH)

from config import config

SETTING = config["SETTING"]
CLASSES = config["CLASSES"]
DATASET = config["DATASET"]
GPU_ID = config["GPU"]
EPOCH_START = config["EPOCH_START"]
EPOCH_END = config["EPOCH_END"]
EPOCH_PERIOD = config["EPOCH_PERIOD"]

# Training parameter (subject model)
TRAINING_PARAMETER = config["TRAINING"]
NET = TRAINING_PARAMETER["NET"]
LEN = TRAINING_PARAMETER["train_num"]

# Training parameter (visualization model)
VISUALIZATION_PARAMETER = config["VISUALIZATION"]
PREPROCESS = VISUALIZATION_PARAMETER["PREPROCESS"]
LAMBDA = VISUALIZATION_PARAMETER["LAMBDA"]
B_N_EPOCHS = VISUALIZATION_PARAMETER["BOUNDARY"]["B_N_EPOCHS"]
L_BOUND = VISUALIZATION_PARAMETER["BOUNDARY"]["L_BOUND"]
INIT_NUM = VISUALIZATION_PARAMETER["INIT_NUM"]
ALPHA = VISUALIZATION_PARAMETER["ALPHA"]
BETA = VISUALIZATION_PARAMETER["BETA"]
MAX_HAUSDORFF = VISUALIZATION_PARAMETER["MAX_HAUSDORFF"]
# HIDDEN_LAYER = VISUALIZATION_PARAMETER["HIDDEN_LAYER"]
ENCODER_DIMS = VISUALIZATION_PARAMETER["ENCODER_DIMS"]
DECODER_DIMS = VISUALIZATION_PARAMETER["DECODER_DIMS"]
S_N_EPOCHS = VISUALIZATION_PARAMETER["S_N_EPOCHS"]
T_N_EPOCHS = VISUALIZATION_PARAMETER["T_N_EPOCHS"]
N_NEIGHBORS = VISUALIZATION_PARAMETER["N_NEIGHBORS"]
PATIENT = VISUALIZATION_PARAMETER["PATIENT"]
MAX_EPOCH = VISUALIZATION_PARAMETER["MAX_EPOCH"]

VIS_MODEL_NAME = 'contrast'
EVALUATION_NAME = VISUALIZATION_PARAMETER["EVALUATION_NAME"]

SEGMENTS = [(EPOCH_START, EPOCH_END)]
# define hyperparameters
DEVICE = torch.device("cuda:{}".format(GPU_ID) if torch.cuda.is_available() else "cpu")

import Model.model as subject_model
net = eval("subject_model.{}()".format(NET))


from config import config

ref_provider = NormalDataProvider(REF_PATH, net, EPOCH_START, EPOCH_END, EPOCH_PERIOD, split=-1, device=DEVICE, classes=CLASSES,verbose=1)

data_provider = NormalDataProvider(CONTENT_PATH, net, EPOCH_START, EPOCH_END, EPOCH_PERIOD, split=-1, device=DEVICE, classes=CLASSES,verbose=1)
# REFERENCE
noisy_data_provider = NormalDataProvider(NOISY20,net, EPOCH_START, EPOCH_END, EPOCH_PERIOD, split=-1, device=DEVICE, classes=CLASSES,verbose=1 )
ref_train_data = ref_provider.train_representation(200).squeeze()

# EPOCH 200
train_data = data_provider.train_representation(200).squeeze()
# EPOCH 199
prev_train_data = data_provider.train_representation(199).squeeze()

noisy_data = noisy_data_provider.train_representation(200).squeeze()


Finish initialization...
Finish initialization...
Finish initialization...


In [3]:

prev_prev_train_data = data_provider.train_representation(198).squeeze()
ref_prev_train_data = ref_provider.train_representation(199).squeeze()
import torch
from numpy import dot
from numpy.linalg import norm
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

# res = dot(train_data, prev_train_data) / (norm(train_data) * norm(prev_train_data))
# torch.cosine_similarity(data_provider.train_representation(200).unsqueeze(1), data_provider.train_representation(200).unsqueeze(0),dim=-1)
# print(train_data == train_data)
print(train_data.reshape(1, -1)== train_data.reshape(1, -1))

print("con_epoch200 ==== con_epoch200",cosine_similarity(train_data.reshape(1, -1), train_data.reshape(1, -1)))

print("con_epoch200 === con_epoch199",cosine_similarity(train_data.reshape(1, -1), prev_train_data.reshape(1, -1)))

print("con_epoch200 === con_epoch198",cosine_similarity(train_data.reshape(1, -1), prev_prev_train_data.reshape(1, -1)))

print("con_epoch199 === con_epoch198",cosine_similarity(prev_train_data.reshape(1, -1), prev_prev_train_data.reshape(1, -1)))

print("con_epoch200 === ref_epoch200",cosine_similarity(train_data.reshape(1, -1), ref_train_data.reshape(1, -1)))

print("ref_epoch200 === ref_epoch199",cosine_similarity(ref_train_data.reshape(1, -1), ref_prev_train_data.reshape(1, -1)))

for i in range(100):
    a = train_data[i]
    b = ref_train_data[i]
    print("sample",i,"sim",dot(a, b) / (norm(a) * norm(b)))



# print(spatial.distance.cosine(train_data, prev_train_data))
# print(spatial.distance.cosine(train_data, ref_train_data))

[[ True  True  True ...  True  True  True]]
con_epoch200 ==== con_epoch200 [[0.99998623]]
con_epoch200 === con_epoch199 [[0.99968857]]
con_epoch200 === con_epoch198 [[0.9995958]]
con_epoch199 === con_epoch198 [[0.9995976]]
con_epoch200 === ref_epoch200 [[0.5749188]]
ref_epoch200 === ref_epoch199 [[0.99975353]]
sample 0 sim 0.49421495
sample 1 sim 0.6118853
sample 2 sim 0.60229903
sample 3 sim 0.5415111
sample 4 sim 0.6612824
sample 5 sim 0.5647196
sample 6 sim 0.6146781
sample 7 sim 0.50741494
sample 8 sim 0.5893292
sample 9 sim 0.5513724
sample 10 sim 0.52662176
sample 11 sim 0.5476368
sample 12 sim 0.582332
sample 13 sim 0.61053294
sample 14 sim 0.61789453
sample 15 sim 0.61118007
sample 16 sim 0.6272277
sample 17 sim 0.57631403
sample 18 sim 0.5854247
sample 19 sim 0.58948326
sample 20 sim 0.57800555
sample 21 sim 0.5882432
sample 22 sim 0.5464378
sample 23 sim 0.54514277
sample 24 sim 0.580541
sample 25 sim 0.5819594
sample 26 sim 0.5351117
sample 27 sim 0.58567256
sample 28 sim 0.

In [25]:
from umap.umap_ import fuzzy_simplicial_set
from pynndescent import NNDescent
from sklearn.utils import check_random_state

def _construct_fuzzy_complex(train_data):
        """
        construct a vietoris-rips complex
        """
        # number of trees in random projection forest
        n_trees = min(64, 5 + int(round((train_data.shape[0]) ** 0.5 / 20.0)))
        # max number of nearest neighbor iters to perform
        n_iters = max(5, int(round(np.log2(train_data.shape[0]))))
        # distance metric
        metric = "euclidean"
        # get nearest neighbors
        nnd = NNDescent(
            train_data,
            n_neighbors=100,
            metric=metric,
            n_trees=n_trees,
            n_iters=n_iters,
            max_candidates=60,
            verbose=True
        )
        knn_indices, knn_dists = nnd.neighbor_graph
        # If seed is None, return the RandomState singleton used by np.random.
        random_state = check_random_state(None)

        """Given a set of data X, a neighborhood size, and a measure of distance
    compute the fuzzy simplicial set (here represented as a fuzzy graph in
    the form of a sparse matrix) associated to the data. This is done by
    locally approximating geodesic distance at each point, creating a fuzzy
    simplicial set for each such point, and then combining all the local
    fuzzy simplicial sets into a global one via a fuzzy union.
    """
        complex, sigmas, rhos = fuzzy_simplicial_set(
            X=train_data,
            n_neighbors=30,
            metric=metric,
            random_state=random_state,
            knn_indices=knn_indices,
            knn_dists=knn_dists,
        )
        return complex, sigmas, rhos, knn_indices


complex, sigmas, rhos, knn_idxs = _construct_fuzzy_complex(train_data)
prev_complex, prev_sigmas, prev_rhos, prev_knn_idxs = _construct_fuzzy_complex(prev_train_data)
ref_complex, ref_sigmas, ref_rhos, ref_knn_idxs = _construct_fuzzy_complex(ref_train_data)

# print("knn index 0",knn_idxs[0])
# print("prev knn index 0 ",prev_knn_idxs[0])
# print("ref_knn_idxs index 0 ", ref_knn_idxs[0])
scoreList = []
selfScoreList= []

# for m in range(len(knn_idxs)):
for m in range(10):
    nums1 = knn_idxs[m]
    nums2 = prev_knn_idxs[m]
    reflist = ref_knn_idxs[m]
    result = []
    i, j ,k = 0, 0, 0
    score=0
    refScore = 0
    selfScore = 0
    while k<len(nums1):
        # verify self property
        selfList = knn_idxs[m].tolist()
        selfIndex = selfList.index(knn_idxs[m][k])
       
           
        selfSub = selfIndex - k
        if selfIndex - k < 0:
            selfSub = k - selfIndex
        selfScore += (100 - selfSub)/100
        print("selfIndex",selfIndex,"selfSub",selfSub,selfScore)

        if knn_idxs[m][k] in ref_knn_idxs[m]:
            compList = ref_knn_idxs[m].tolist()
           
            index = compList.index(knn_idxs[m][k])
            sub = index - k
            if index - k < 0:
               sub = k - index
            refScore += (100 - sub)/100
      
        k +=1
    print("score",selfScore,refScore)  
    scoreList.append(refScore/99)
    selfScoreList.append(selfScore/99)

    

print("ref vs knn", scoreList)
print("self",selfScoreList)

Sat Dec  3 23:11:30 2022 Building RP forest with 16 trees
Sat Dec  3 23:11:30 2022 NN descent for 16 iterations
	 1  /  16
	 2  /  16
	 3  /  16
	Stopping threshold met -- exiting after 3 iterations
Sat Dec  3 23:11:43 2022 Building RP forest with 16 trees
Sat Dec  3 23:11:44 2022 NN descent for 16 iterations
	 1  /  16
	 2  /  16
	 3  /  16
	Stopping threshold met -- exiting after 3 iterations
Sat Dec  3 23:11:57 2022 Building RP forest with 16 trees
Sat Dec  3 23:11:58 2022 NN descent for 16 iterations
	 1  /  16
	 2  /  16
	 3  /  16
	Stopping threshold met -- exiting after 3 iterations
selfIndex 0 selfSub 0 1.0
selfIndex 1 selfSub 0 2.0
selfIndex 2 selfSub 0 3.0
selfIndex 3 selfSub 0 4.0
selfIndex 4 selfSub 0 5.0
selfIndex 5 selfSub 0 6.0
selfIndex 6 selfSub 0 7.0
selfIndex 7 selfSub 0 8.0
selfIndex 8 selfSub 0 9.0
selfIndex 9 selfSub 0 10.0
selfIndex 10 selfSub 0 11.0
selfIndex 11 selfSub 0 12.0
selfIndex 12 selfSub 0 13.0
selfIndex 13 selfSub 0 14.0
selfIndex 14 selfSub 0 15.0
se

In [26]:
import numpy as np
from sklearn.cross_decomposition import CCA
# 建立模型
X = train_data
Y = ref_train_data
cca = CCA(n_components=1)
# 训练数据
cca.fit(X, Y)
# print(X)
X_train_r, Y_train_r = cca.transform(X, Y)
# print(X_train_r)
print('referece epoch 200 vs current epoch 200')
print(np.corrcoef(X_train_r[:, 0], Y_train_r[:, 0])[0, 1]) #输出相关系数


referece epoch 200 vs current epoch 200
0.9761795450768788


In [24]:
import numpy as np
from sklearn.cross_decomposition import CCA
# 建立模型
X = train_data
Y = prev_train_data
cca = CCA(n_components=1)
# 训练数据
cca.fit(X, Y)
# print(X)
X_train_r, Y_train_r = cca.transform(X, Y)
# print(X_train_r)
print('prev epoch vs current epoch')
print(np.corrcoef(X_train_r[:, 0], Y_train_r[:, 0])[0, 1]) #输出相关系数



prev epoch vs current epoch
0.9998723109070308


In [25]:
import numpy as np
from sklearn.cross_decomposition import CCA
# 建立模型
X = train_data
Y = train_data
cca = CCA(n_components=1)
# 训练数据
cca.fit(X, Y)
# print(X)
X_train_r, Y_train_r = cca.transform(X, Y)
# print(X_train_r)
print('current epoch vs current epoch')
print(np.corrcoef(X_train_r[:, 0], Y_train_r[:, 0])[0, 1]) #输出相关系数

current epoch vs current epoch
1.0


In [29]:
import numpy as np
from sklearn.cross_decomposition import CCA
# 建立模型
X = noisy_data_provider
Y = train_data
cca = CCA(n_components=1)
# 训练数据
cca.fit(X, Y)
# print(X)
X_train_r, Y_train_r = cca.transform(X, Y)
# print(X_train_r)
print('noisy 20 epoch 200 vs ')
print(np.corrcoef(X_train_r[:, 0], Y_train_r[:, 0])[0, 1]) #输出相关系数


noisy 20 epoch 200 vs 
0.96668495055004


In [32]:
epoch1 = data_provider.train_representation(1).squeeze()
import numpy as np
from sklearn.cross_decomposition import CCA
# 建立模型
X = epoch1
Y = train_data
cca = CCA(n_components=1)
# 训练数据
cca.fit(X, Y)
# print(X)
X_train_r, Y_train_r = cca.transform(X, Y)
# print(X_train_r)
print('epoch1 vs epoch200 ')
print(np.corrcoef(X_train_r[:, 0], Y_train_r[:, 0])[0, 1]) #输出相关系数

epoch1 vs epoch200 
0.904889917426604


In [36]:
noisepoch1 = noisy_data_provider.train_representation(1).squeeze()
epoch1 = data_provider.train_representation(1).squeeze()
import numpy as np
from sklearn.cross_decomposition import CCA
# 建立模型
X = noisepoch1
Y = epoch1
cca = CCA(n_components=1)
# 训练数据
cca.fit(X, Y)
# print(X)
X_train_r, Y_train_r = cca.transform(X, Y)
# print(X_train_r)
print('noisy20  epoch 1 vs epoch200 ')
print(np.corrcoef(X_train_r[:, 0], Y_train_r[:, 0])[0, 1]) #输出相关系数

noisy20  epoch 1 vs epoch200 
0.9581232769450594


In [37]:
noisepoch1 = noisy_data_provider.train_representation(1).squeeze()

import numpy as np
from sklearn.cross_decomposition import CCA
# 建立模型
X = noisepoch1
Y = train_data
cca = CCA(n_components=1)
# 训练数据
cca.fit(X, Y)
# print(X)
X_train_r, Y_train_r = cca.transform(X, Y)
# print(X_train_r)
print('noisy20  epoch 1 vs epoch200 ')
print(np.corrcoef(X_train_r[:, 0], Y_train_r[:, 0])[0, 1]) #输出相关系数

noisy20  epoch 1 vs epoch200 
0.9069525894826751


In [None]:
import torch
import numpy as np
from CKA import CKA, CudaCKA

np_cka = CKA()

X = ref_train_data
Y = train_data

# print('Linear CKA, between reference train_data epoch 200 and train_data epoch 200: {}'.format(np_cka.linear_CKA(X, Y)))

print('RBF Kernel CKA, between reference train_data epoch 200 and train_data epoch 200: {}'.format(np_cka.kernel_CKA(X, Y)))