In [1]:
import tensorflow as tf 
import pandas as pd
import numpy as np
import scipy.sparse as sp
import script_config as sc
import csv 

hops             = sc._config_hops
max_list_size    = sc._config_relation_list_size_neighborhood

#Location of the data_folder 
data_folder      ="/nvme/drive_1/NTDS_Final/"
users            = pd.read_csv(data_folder+"usersdata.csv",delimiter='\t',header=None).values
filtered_users   = pd.read_csv(data_folder+"filtered_users.csv",delimiter=',').values

In [2]:
user_idx_dict          = {}
filtered_user_idx_dict = {}

for i,user in enumerate(users):
    user_idx_dict[user[0]]=i

for i,user in enumerate(filtered_users): 
    filtered_user_idx_dict[user[0]]=user_idx_dict[user[0]]
    
                
def check_symmetric(a, tol=1e-8):
    return np.allclose(a, a.T, atol=tol)

** I. Extract Neighboors **

In [3]:
def load_adj_as_matrix(feature_type): 
    
    
    desired_indices = np.array(list(filtered_user_idx_dict.values()))
    adj             = sp.load_npz(data_folder+"filtering/adjacency_"+str(feature_type)+".npz")
    #reduced_adj     = adj[desired_indices,:][:,desired_indices]
   
    return adj.todense()

# BFS search to find neighborhood of radius "hops"
def find_neighborhood(adjacency,user_idx):
    
    # Looking for elements in numpy array (better than lists)
    def element_in_narray(narray,row): 
        count = np.where(narray==row)
        if len(count[0]) is 0: 
            return False
        else:
            return True
    
    # Data Structures     
    queue              = np.ndarray((max_list_size,2),dtype='i4')
    queue[0]           = [int(user_idx),0]
    queue_head          = 0 # Index of queue head 
    queue_tail          = 1 # Index of next free spot in queue 
    

    # Loop until queue is empty 
    while( queue_head != queue_tail ):
        
        current_id, current_hops = queue[queue_head]
        queue_head += 1 
        
        
        # Cutoff Condition
        if current_hops + 1 < hops: 
            
            neigh_ids    = np.where(adjacency[current_id,:]==1)[1]
            
            for neigh_id in neigh_ids: 
                
                # Check that node has not been visited 
                #    and has not been marked to be visited
                if (not element_in_narray(queue[queue_head:queue_tail],int(neigh_id))): 
                    
                    if queue_tail == max_list_size: 
                        raise MemoryError("Increase _config_list_size_neighborhood_creation \
                           from config.py")
                        
                    # Mark node to be visited 
                    queue[queue_tail] = [int(neigh_id),current_hops+1]
                    queue_tail += 1
        
    return queue[:queue_tail,0]

neighborhoods = [{i} for i in range(len(filtered_users))]
for feature in [1,2,3,4,5,6,7]: 
    adj = load_adj_as_matrix(feature)
    for i in range(len(filtered_users)): 
        neighborhoods[i] = neighborhoods[i].union(find_neighborhood(adj,i))
        if i%1000 is 0:
            print("\r"+str(i) +" users processed for feature "+str(feature), sep=' ', end='', flush=True)
            
# Make sure that central node has index 0 
#  in the local neighbor list       
for i in range(len(filtered_users)): 
    neighborhoods[i] = list(neighborhoods[i])
    for idx, neighboor in enumerate(neighborhoods[i]): 
        if neighboor == i: 
            temp                  = neighborhoods[i][0]
            neighborhoods[i][0]   = neighborhoods[i][idx]
            neighborhoods[i][idx] = temp
            break
           
    assert(neighborhoods[i][0] == i)
    if i%1000 is 0:
            print("\r"+str(i) +" users validated", sep=' ', end='', flush=True)
        
    

62000 users validated for feature 7

** II. Create Local Adjacencies from List of Neighbors **

In [5]:
'''
for i,hood in enumerate(neighborhoods): 
    num_neighbors = len(hood)
    local_adj = np.zeros((num_neighbors*7,num_neighbors*7))
    np.save(data_folder+"local/adjacency_"+str(i)+".npy",local_adj)
    if i%100 is 0:
        print("\r"+str(i) +" matrices initialized", sep=' ', end='', flush=True)

for feature in [1,2,3,4,5,6,7]:
    adj = load_adj_as_matrix(feature)
    for i, neighboor_list in enumerate(neighborhoods): 
        
        local_adj = np.load(data_folder+"local/adjacency_"+str(i)+".npy")        

        for idx_1,neighboor_1 in enumerate(neighboor_list): 
            for idx_2,neighboor_2 in enumerate(neighboor_list[idx_1:]):
                val = adj[neighboor_1,neighboor_2]
                local_adj[idx_1*7+feature-1,idx_2*7+feature-1] = val
                local_adj[idx_2*7+feature-1,idx_1*7+feature-1] = val
                
        np.save(data_folder+"local/adjacency_"+str(i)+".npy",local_adj)  
        
        if i%100 is 0:
            print("\r"+str(i) +" users processed for feature "+str(feature), sep=' ', end='', flush=True)
'''           
for i, neighboor_list in enumerate(neighborhoods):  
    local_adj = np.load(data_folder+"local/adjacency_"+str(i)+".npy")
    for idx,neighboor in enumerate(neighboor_list): 
        for feature_1 in [1,2,3,4,5,6,7]: 
            for feature_2 in [1,2,3,4,5,6,7]:
                if feature_1 != feature_2: 
                    local_adj[idx*7+feature_1-1,idx*7+feature_2-1] = 1 
                    local_adj[idx*7+feature_2-1,idx*7+feature_1-1] = 1
    #assert(check_symmetric(local_adj))
    np.save(data_folder+"local/adjacency_"+str(i)+".npy",local_adj) 
    if i%100 is 0:
            print("\r"+str(i)+" adjacency matrices verified"+str(feature), sep=' ', end='', flush=True) 

62100 adjacency matrices verified7