In [1]:
import pickle
import numpy as np
import scipy.sparse as sp
from sklearn.neighbors import kneighbors_graph

In [2]:
with open('dti_store/graph_2.pkl') as f:
    graph = pickle.load(f)
    
num_users, num_items, u_nodes, v_nodes, ratings, u_features, v_features = graph
net = np.vstack([u_nodes, v_nodes, ratings]).T

In [3]:
print "Creating k-NN graphs for users and items..."
u_features = u_features.todense()
v_features = v_features.todense()

k_nn_u = kneighbors_graph(u_features, n_neighbors=10, n_jobs=3)
k_nn_v = kneighbors_graph(v_features, n_neighbors=10, n_jobs=3)
print "Done."

Creating k-NN graphs for users and items...
Done.


In [4]:
print "Creating adjacency matrix..."
adj = np.zeros((num_users, num_items))
for src, dst, w in net:
    adj[src][dst] = w + 1
print "Done."

Creating adjacency matrix...
Done.


In [18]:
print "Creating train and test adjacency matrices..."
samples = np.vstack([np.where(adj)]).T

pos, neg = list(), list()
for samp in samples:
    if adj[samp[0]][samp[1]] == 1.:
        neg.append(samp)
    elif adj[samp[0]][samp[1]] == 2.:
        pos.append(samp)
    else:
        raise ValueError('Entry not pos or neg.')
        
pos = np.vstack(pos)
neg = np.vstack(neg)

np.random.shuffle(pos)
np.random.shuffle(neg)

pos_train = pos[:int(0.8*pos.shape[0])]
pos_test = pos[int(0.8*pos.shape[0]):]

neg_train = neg[:int(0.8*neg.shape[0])]
neg_test = neg[int(0.8*neg.shape[0]):]

train_set = np.concatenate([pos_train, neg_train], axis=0)
test_set = np.concatenate([pos_test, neg_test], axis=0)

np.random.shuffle(train_set)
np.random.shuffle(test_set)

train_adj = np.zeros((num_users, num_items))
test_adj = np.zeros((num_users, num_items))

for src, dst in train_set:
    train_adj[src][dst] = 1.
    
for src, dst in test_set:
    test_adj[src][dst] = 1.
    
print "Done."

Creating train and test adjacency matrices...
Done.


In [19]:
print "Total samples in graph =", np.where(adj)[0].shape[0]
print "Total samples in train set =", np.where(train_adj)[0].shape[0]
print "Total samples in test set =", np.where(test_adj)[0].shape[0]

Total samples in graph = 104809
Total samples in train set = 83847
Total samples in test set = 20962


In [20]:
with open('/home/vedang/Documents/Benchmarks/mgcnn/Data/dti.pkl', 'w') as f:
    pickle.dump((adj, train_adj, test_adj, k_nn_u, k_nn_v), f)

In [21]:
adj, train_adj, test_adj, k_nn_u, k_nn_v

(array([[0., 0., 0., ..., 0., 0., 1.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 1., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]]), array([[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 1., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]]), array([[0., 0., 0., ..., 0., 0., 1.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]]), <1862x1862 sparse matrix of type '<type 'numpy.float64'>'
 	with 18620 stored elements in Compressed Sparse Row format>, <1554x1554 sparse matrix of type '<type 'numpy.float64'>'
 	with 15540 stored elements in Compressed Sparse Row format>)