-
Notifications
You must be signed in to change notification settings - Fork 73
/
calcu_graph.py
67 lines (58 loc) · 1.79 KB
/
calcu_graph.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
import pandas as pd
import numpy as np
import h5py
from sklearn.metrics.pairwise import cosine_similarity as cos
from sklearn.metrics import pairwise_distances as pair
from sklearn.preprocessing import normalize
topk = 10
def construct_graph(features, label, method='heat'):
fname = 'graph/reut10_graph.txt'
num = len(label)
dist = None
if method == 'heat':
dist = -0.5 * pair(features) ** 2
dist = np.exp(dist)
elif method == 'cos':
features[features > 0] = 1
dist = np.dot(features, features.T)
elif method == 'ncos':
features[features > 0] = 1
features = normalize(features, axis=1, norm='l1')
dist = np.dot(features, features.T)
inds = []
for i in range(dist.shape[0]):
ind = np.argpartition(dist[i, :], -(topk+1))[-(topk+1):]
inds.append(ind)
f = open(fname, 'w')
counter = 0
A = np.zeros_like(dist)
for i, v in enumerate(inds):
mutual_knn = False
for vv in v:
if vv == i:
pass
else:
if label[vv] != label[i]:
counter += 1
f.write('{} {}\n'.format(i, vv))
f.close()
print('error rate: {}'.format(counter / (num * topk)))
'''
f = h5py.File('data/usps.h5', 'r')
train = f.get('train')
test = f.get('test')
X_tr = train.get('data')[:]
y_tr = train.get('target')[:]
X_te = test.get('data')[:]
y_te = test.get('target')[:]
f.close()
usps = np.concatenate((X_tr, X_te)).astype(np.float32)
label = np.concatenate((y_tr, y_te)).astype(np.int32)
'''
'''
hhar = np.loadtxt('data/hhar.txt', dtype=float)
label = np.loadtxt('data/hhar_label.txt', dtype=int)
'''
reut = np.loadtxt('data/reut.txt', dtype=float)
label = np.loadtxt('data/reut_label.txt', dtype=int)
construct_graph(reut, label, 'ncos')