In [1]:
import numpy as np
import argparse
import faiss
from sklearn.neighbors import NearestNeighbors
import matplotlib.pyplot as plt
from tqdm import tqdm as tq
import pandas as pd
from sklearn.decomposition import PCA

Env1 - small, qsmall , rsmall
Env2 - small_1, small_2

PCA_MATRIX1 from small
PCA_MATRIX2 from small_2

qsmall - ref
rsmall - q

In [2]:
TRAINING_SOURCES = [
    "data/small/all_feat.npy",
    "data/small_2/all_feat.npy",
]

TESTING_SOURCES = [
    "data/qsmall/all_feat.npy",
    "data/rsmall/all_feat.npy",
    "data/small_1/all_feat.npy",
]

TRAINING_POSES_FILES = [
    "data/small/all_poses_freq_4.npy",
    "data/small_2/all_poses_freq_5.npy",
]

TESTING_POSES_FILES = [
    "data/qsmall/all_poses_freq_3.npy",
    "data/rsmall/all_poses_freq_3.npy",
    "data/small_1/all_poses_freq_3.npy",
]

In [3]:
def train_pca(src_path, rel_thresh=0.01):
	feat = np.load(src_path)
	pd_feat = pd.DataFrame(feat)
	for col in pd_feat.columns:
		pd_feat[col] = (pd_feat[col] - pd_feat[col].mean() ) / pd_feat[col].std()
	previous_percent = 0
	current_percent = 0
	best_components = -1
	pca_matrix = None
	for i in range(1,min(len(feat),len(pd_feat.columns))):
		pca = PCA(n_components=i)
		pca_result = pca.fit_transform(pd_feat)
		current_percent = np.sum(pca.explained_variance_ratio_)
		if current_percent-previous_percent>rel_thresh:
			previous_percent = current_percent
			best_components = i
			pca_matrix = pca
		else:
			break
	
	print("Relevant top", best_components, " pca-components! They got Cumulative variance = ", 100*current_percent)
	print("Variation per principal component:\n",pca.explained_variance_ratio_)
	return pca_matrix, pca_result

In [4]:
TRAINED_PCA = []
for idx, data in enumerate(TRAINING_SOURCES):
    print("Training for data ",idx+1)
    pca_mat, pca_result = train_pca(data, 0.02)
    TRAINED_PCA.append(pca_mat)

Training for data  1
Relevant top 10  pca-components! They got Cumulative variance =  45.6436425447464
Variation per principal component:
 [0.11938147 0.06771785 0.05131145 0.04062593 0.03595198 0.03080368
 0.02612954 0.02304644 0.02239682 0.02042515 0.01864612]
Training for data  2
Relevant top 11  pca-components! They got Cumulative variance =  47.84126281738281
Variation per principal component:
 [0.1083398  0.06531814 0.06182433 0.04391404 0.03488746 0.03046417
 0.02653001 0.02396005 0.02331732 0.02177596 0.02031511 0.01776625]


In [5]:
TRAINED_PCA_VEC = []
for train_id, train_path in enumerate(TRAINING_SOURCES):
	print("Modifying Train data ", train_id+1)
	pca_vec_list = []
	feat = np.load(train_path)
	pd_feat = pd.DataFrame(feat)
	for col in pd_feat.columns:
		pd_feat[col] = (pd_feat[col] - pd_feat[col].mean() ) / pd_feat[col].std()
	for M in tq(TRAINED_PCA):
		pca_vec_list.append(M.transform(pd_feat))
	TRAINED_PCA_VEC.append(pca_vec_list)
TRAINED_PCA_VEC = np.asarray(TRAINED_PCA_VEC,dtype=object)

Modifying Train data  1


100%|██████████| 2/2 [00:00<00:00,  3.82it/s]


Modifying Train data  2


100%|██████████| 2/2 [00:00<00:00,  3.63it/s]


In [6]:
TEST_PCA_VEC = []
for test_id, test_path in enumerate(TESTING_SOURCES):
	print("Modifying Test data ", test_id+1)
	pca_vec_list = []
	feat = np.load(test_path)
	pd_feat = pd.DataFrame(feat)
	for col in pd_feat.columns:
		pd_feat[col] = (pd_feat[col] - pd_feat[col].mean() ) / pd_feat[col].std()
	for M in tq(TRAINED_PCA):
		pca_vec_list.append(M.transform(pd_feat))
	TEST_PCA_VEC.append(pca_vec_list)
TEST_PCA_VEC = np.asarray(TEST_PCA_VEC,dtype=object)

Modifying Test data  1


100%|██████████| 2/2 [00:00<00:00,  3.87it/s]


Modifying Test data  2


100%|██████████| 2/2 [00:00<00:00,  4.53it/s]


Modifying Test data  3


100%|██████████| 2/2 [00:00<00:00,  4.55it/s]


In [7]:
def computing_basic_recall(GD, poses_file_path, remove_non_gt_query=False, temporal_window = 6, n_value=[1,5,10,20], radius=5):
	POSES = np.load(poses_file_path)
	non_usefull_q = 0
	correct_at_n = np.zeros(len(n_value))
	for gd_idx, gd in enumerate(GD):
		remove_idx = range(max(0,gd_idx-temporal_window),min(len(GD),gd_idx+temporal_window+1))
		ref_idx = np.delete(np.asarray(list(range(len(GD)))),remove_idx)
		if len(ref_idx)<=0:
			non_usefull_q+=1
		else:
			faiss_index = faiss.IndexFlatL2(len(gd))
			faiss_index.add(GD[ref_idx])
			distances, predictions = faiss_index.search(np.array([gd]), max(n_value))
			knn = NearestNeighbors(n_jobs=1)
			ref = POSES[ref_idx]
			knn.fit(ref)
			_, potential_pos = knn.radius_neighbors(np.asarray([POSES[gd_idx]]), radius=radius)
			if remove_non_gt_query:
				if len(potential_pos[0])<=0:
					for i, n in enumerate(n_value):
						if np.any(np.in1d(predictions[0][:n], potential_pos[0])):
							correct_at_n[i:]+=1
							break
				else:
					non_usefull_q+=1
			else:
				for i, n in enumerate(n_value):
						if np.any(np.in1d(predictions[0][:n], potential_pos[0])):
							correct_at_n[i:]+=1
							break
							
	recall_at_n = correct_at_n / (len(GD)-non_usefull_q)
	return recall_at_n, non_usefull_q, len(GD)-non_usefull_q

In [8]:
def check_test(test_pca_vec_list, poses_file_path):
    RECALL, NUQ, UQ = [], [], []
    for id, test_pca_vec in enumerate(test_pca_vec_list):
        recall, nuq, uq = computing_basic_recall(test_pca_vec, poses_file_path)
        RECALL.append(recall)
        NUQ.append(nuq)
        UQ.append(uq)
    return RECALL, NUQ, UQ

In [55]:
for i in range(len(TEST_PCA_VEC)):
    RECALL, NUQ, UQ = check_test([np.load(TESTING_SOURCES[i])], TESTING_POSES_FILES[i])
    print("\nTest data ", i+1)
    for j in range(len(RECALL)):
        print("\ttrain on data=",j+1)
        print("\trecall=",RECALL[j])
        print("\tnuq=",NUQ[j],"\tuq=",UQ[j])


Test data  1
	train on data= 1
	recall= [0.76868327 0.87188612 0.93594306 0.95729537]
	nuq= 0 	uq= 281

Test data  2
	train on data= 1
	recall= [0.58870968 0.84677419 0.90322581 0.9516129 ]
	nuq= 0 	uq= 248

Test data  3
	train on data= 1
	recall= [0.50184502 0.71217712 0.86346863 0.96309963]
	nuq= 0 	uq= 271


In [9]:
for i in range(len(TEST_PCA_VEC)):
    RECALL, NUQ, UQ = check_test(TEST_PCA_VEC[i], TESTING_POSES_FILES[i])
    print("\nTest data ", i+1)
    for j in range(len(RECALL)):
        print("\ttrain on data=",j+1)
        print("\trecall=",RECALL[j])
        print("\tnuq=",NUQ[j],"\tuq=",UQ[j]) 


Test data  1
	train on data= 1
	recall= [0.66903915 0.86120996 0.92882562 0.96797153]
	nuq= 0 	uq= 281
	train on data= 2
	recall= [0.57295374 0.86120996 0.92882562 0.96085409]
	nuq= 0 	uq= 281

Test data  2
	train on data= 1
	recall= [0.4516129  0.78629032 0.875      0.9233871 ]
	nuq= 0 	uq= 248
	train on data= 2
	recall= [0.35887097 0.68951613 0.85887097 0.92741935]
	nuq= 0 	uq= 248

Test data  3
	train on data= 1
	recall= [0.29151292 0.69741697 0.82656827 0.95940959]
	nuq= 0 	uq= 271
	train on data= 2
	recall= [0.43173432 0.7601476  0.87822878 0.96309963]
	nuq= 0 	uq= 271


In [34]:
def computing_primitive_recall(Q, GD, q_poses_file_path, gd_poses_file_path, n_value=[1,5,10,20], radius=3):
	QPOSES = np.load(q_poses_file_path)
	GDPOSES = np.load(gd_poses_file_path)
	# compute predictions
	faiss_index = faiss.IndexFlatL2(len(GD[0]))
	faiss_index.add(GD)
	distances, predictions = faiss_index.search(Q, max(n_value))
	# compute potential positives as ground truth
	knn = NearestNeighbors(n_jobs=1)
	knn.fit(GDPOSES)
	_, potential_pos = knn.radius_neighbors(QPOSES, radius=radius)
	# compute recalls
	correct_at_n = np.zeros(len(n_value))
	for qIx, pred in enumerate(predictions):
		for i, n in enumerate(n_value):
			if np.any(np.in1d(pred[:n], potential_pos[qIx])):
				correct_at_n[i:]+=1
				break
	recall_at_n = correct_at_n / len(Q)
	return recall_at_n

### small, qsmall and rsmall are runs from the same environment and small_1, small_2 are from another.

In [27]:
Q = TEST_PCA_VEC[0][0].copy(order='C')
GD = TRAINED_PCA_VEC[0][0].copy(order='C')
tmpR1 = computing_primitive_recall(Q,GD, TESTING_POSES_FILES[0], TRAINING_POSES_FILES[0])
Q = TEST_PCA_VEC[0][1].copy(order='C')
GD = TRAINED_PCA_VEC[0][1].copy(order='C')
tmpR2 = computing_primitive_recall(Q,GD, TESTING_POSES_FILES[0], TRAINING_POSES_FILES[0])
print(tmpR1)
print(tmpR2)

[0.71530249 0.84697509 0.88967972 0.90747331]
[0.6405694  0.8113879  0.87900356 0.90035587]


In [28]:
Q = TEST_PCA_VEC[1][0].copy(order='C')
GD = TRAINED_PCA_VEC[0][0].copy(order='C')
tmpR1 = computing_primitive_recall(Q,GD, TESTING_POSES_FILES[1], TRAINING_POSES_FILES[0])
Q = TEST_PCA_VEC[1][1].copy(order='C')
GD = TRAINED_PCA_VEC[0][1].copy(order='C')
tmpR2 = computing_primitive_recall(Q,GD, TESTING_POSES_FILES[1], TRAINING_POSES_FILES[0])
print(tmpR1)
print(tmpR2)

[0.56854839 0.74596774 0.79435484 0.83870968]
[0.43548387 0.65322581 0.75806452 0.83870968]


In [29]:
Q = TEST_PCA_VEC[2][0].copy(order='C')
GD = TRAINED_PCA_VEC[1][0].copy(order='C')
tmpR1 = computing_primitive_recall(Q,GD, TESTING_POSES_FILES[2], TRAINING_POSES_FILES[1])
Q = TEST_PCA_VEC[2][1].copy(order='C')
GD = TRAINED_PCA_VEC[1][1].copy(order='C')
tmpR2 = computing_primitive_recall(Q,GD, TESTING_POSES_FILES[2], TRAINING_POSES_FILES[1])
print(tmpR1)
print(tmpR2)

[0.23616236 0.40221402 0.50184502 0.60885609]
[0.33210332 0.45756458 0.52767528 0.5904059 ]


In [35]:
Q = TEST_PCA_VEC[0][0].copy(order='C')
GD = TEST_PCA_VEC[1][0].copy(order='C')
tmpR1 = computing_primitive_recall(Q,GD, TESTING_POSES_FILES[0], TESTING_POSES_FILES[1])
Q = TEST_PCA_VEC[0][1].copy(order='C')
GD = TEST_PCA_VEC[1][1].copy(order='C')
tmpR2 = computing_primitive_recall(Q,GD, TESTING_POSES_FILES[0], TESTING_POSES_FILES[1])
print(tmpR1)
print(tmpR2)

[0.60498221 0.79003559 0.86120996 0.90747331]
[0.44128114 0.74021352 0.85765125 0.93238434]


In [36]:
def computing_primitive_precision(Q, GD, q_poses_file_path, gd_poses_file_path, n_value=[1,5,10,20], radius=3):
	QPOSES = np.load(q_poses_file_path)
	GDPOSES = np.load(gd_poses_file_path)
	# compute predictions
	faiss_index = faiss.IndexFlatL2(len(GD[0]))
	faiss_index.add(GD)
	distances, predictions = faiss_index.search(Q, max(n_value))
	# compute potential positives as ground truth
	knn = NearestNeighbors(n_jobs=1)
	knn.fit(GDPOSES)
	_, potential_pos = knn.radius_neighbors(QPOSES, radius=radius)
	# compute recalls
	correct_at_n = np.zeros(len(n_value))
	for qIx, pred in enumerate(predictions):
		for i, n in enumerate(n_value):
			tp = np.in1d(pred[:n], potential_pos[qIx])
			precision = len(pred[:n][tp])/n
			if precision > 0.9:
				correct_at_n[i]+=1
				
	recall_at_n = correct_at_n / len(Q)
	return recall_at_n

In [37]:
Q = TEST_PCA_VEC[0][0].copy(order='C')
GD = TRAINED_PCA_VEC[0][0].copy(order='C')
tmpR1 = computing_primitive_precision(Q,GD, TESTING_POSES_FILES[0], TRAINING_POSES_FILES[0])
Q = TEST_PCA_VEC[0][1].copy(order='C')
GD = TRAINED_PCA_VEC[0][1].copy(order='C')
tmpR2 = computing_primitive_precision(Q,GD, TESTING_POSES_FILES[0], TRAINING_POSES_FILES[0])
print(tmpR1)
print(tmpR2)

[0.71530249 0.42704626 0.17081851 0.00355872]
[0.6405694  0.21352313 0.03914591 0.        ]


In [38]:
Q = TEST_PCA_VEC[1][0].copy(order='C')
GD = TRAINED_PCA_VEC[0][0].copy(order='C')
tmpR1 = computing_primitive_precision(Q,GD, TESTING_POSES_FILES[1], TRAINING_POSES_FILES[0])
Q = TEST_PCA_VEC[1][1].copy(order='C')
GD = TRAINED_PCA_VEC[0][1].copy(order='C')
tmpR2 = computing_primitive_precision(Q,GD, TESTING_POSES_FILES[1], TRAINING_POSES_FILES[0])
print(tmpR1)
print(tmpR2)

[0.56854839 0.31854839 0.06451613 0.00403226]
[0.43548387 0.08064516 0.01612903 0.        ]


In [39]:
Q = TEST_PCA_VEC[2][0].copy(order='C')
GD = TRAINED_PCA_VEC[1][0].copy(order='C')
tmpR1 = computing_primitive_precision(Q,GD, TESTING_POSES_FILES[2], TRAINING_POSES_FILES[1])
Q = TEST_PCA_VEC[2][1].copy(order='C')
GD = TRAINED_PCA_VEC[1][1].copy(order='C')
tmpR2 = computing_primitive_precision(Q,GD, TESTING_POSES_FILES[2], TRAINING_POSES_FILES[1])
print(tmpR1)
print(tmpR2)

[0.23616236 0.03321033 0.         0.        ]
[0.33210332 0.09225092 0.02214022 0.01107011]


In [40]:
Q = TEST_PCA_VEC[0][0].copy(order='C')
GD = TEST_PCA_VEC[1][0].copy(order='C')
tmpR1 = computing_primitive_precision(Q,GD, TESTING_POSES_FILES[0], TESTING_POSES_FILES[1])
Q = TEST_PCA_VEC[0][1].copy(order='C')
GD = TEST_PCA_VEC[1][1].copy(order='C')
tmpR2 = computing_primitive_precision(Q,GD, TESTING_POSES_FILES[0], TESTING_POSES_FILES[1])
print(tmpR1)
print(tmpR2)

[0.60498221 0.11743772 0.03914591 0.00711744]
[0.44128114 0.05338078 0.         0.        ]


### Comparing GD vs PCA-GD

In [43]:
Q = np.load(TESTING_SOURCES[0])
GD = np.load(TESTING_SOURCES[1])
tmpR1 = computing_primitive_recall(Q,GD, TESTING_POSES_FILES[0], TESTING_POSES_FILES[0])
Q = TEST_PCA_VEC[0][0].copy(order='C')
GD = TEST_PCA_VEC[1][0].copy(order='C')
tmpR2 = computing_primitive_recall(Q,GD, TESTING_POSES_FILES[0], TESTING_POSES_FILES[1])
print(tmpR1)
print(tmpR2)

[0.13879004 0.23843416 0.43060498 0.68683274]
[0.60498221 0.79003559 0.86120996 0.90747331]


In [44]:
Q = np.load(TESTING_SOURCES[0])
GD = np.load(TESTING_SOURCES[1])
tmpR1 = computing_primitive_precision(Q,GD, TESTING_POSES_FILES[0], TESTING_POSES_FILES[0])
Q = TEST_PCA_VEC[0][0].copy(order='C')
GD = TEST_PCA_VEC[1][0].copy(order='C')
tmpR2 = computing_primitive_precision(Q,GD, TESTING_POSES_FILES[0], TESTING_POSES_FILES[1])
print(tmpR1)
print(tmpR2)

[0.13879004 0.01423488 0.         0.        ]
[0.60498221 0.11743772 0.03914591 0.00711744]


In [52]:
Q = np.load(TESTING_SOURCES[2])
GD = np.load(TRAINING_SOURCES[1])
tmpR1 = computing_primitive_recall(Q,GD, TESTING_POSES_FILES[2], TRAINING_POSES_FILES[1])
Q = TEST_PCA_VEC[2][0].copy(order='C')
GD = TRAINED_PCA_VEC[1][0].copy(order='C')
tmpR2 = computing_primitive_recall(Q,GD, TESTING_POSES_FILES[2], TRAINING_POSES_FILES[1])
print(tmpR1)
print(tmpR2)

[0.37269373 0.5202952  0.5904059  0.64575646]
[0.23616236 0.40221402 0.50184502 0.60885609]


In [53]:
Q = np.load(TESTING_SOURCES[2])
GD = np.load(TRAINING_SOURCES[1])
tmpR1 = computing_primitive_precision(Q,GD, TESTING_POSES_FILES[2], TRAINING_POSES_FILES[1])
Q = TEST_PCA_VEC[2][0].copy(order='C')
GD = TRAINED_PCA_VEC[1][0].copy(order='C')
tmpR2 = computing_primitive_precision(Q,GD, TESTING_POSES_FILES[2], TRAINING_POSES_FILES[1])
print(tmpR1)
print(tmpR2)

[0.37269373 0.1402214  0.02214022 0.        ]
[0.23616236 0.03321033 0.         0.        ]


In [56]:
Q = np.load(TESTING_SOURCES[0])
GD = np.load(TRAINING_SOURCES[0])
tmpR1 = computing_primitive_recall(Q,GD, TESTING_POSES_FILES[0], TRAINING_POSES_FILES[0])
Q = TEST_PCA_VEC[0][0].copy(order='C')
GD = TRAINED_PCA_VEC[0][0].copy(order='C')
tmpR2 = computing_primitive_recall(Q,GD, TESTING_POSES_FILES[0], TRAINING_POSES_FILES[0])
print(tmpR1)
print(tmpR2)

[0.76156584 0.84697509 0.89323843 0.90747331]
[0.71530249 0.84697509 0.88967972 0.90747331]
