In [1]:
import numpy as np
import pandas as pd
import scipy.sparse
import tqdm
import xswap

import analysis

In [2]:
n_perms = 1000
allow_antiparallel = True
allow_self_loops = True
directed = True

# 2. TFTG directed network

In [3]:
df = pd.read_csv('../data/3.all_nodes/tftg.tsv.xz', sep='\t', compression='xz')

edges = list(map(tuple, 
    df
    .query('train == 1')
    .loc[:, ['id_a', 'id_b']]
    .values
))
mat = analysis.edges_to_matrix(edges, directed=directed).tocsc()
print(mat.shape)

out_degree, in_degree = analysis.compute_directed_degrees(mat)

n_source = max(edge[0] for edge in edges) + 1

(1441, 1441)


In [4]:
feature_dict = {
    'edge_prior': scipy.sparse.csc_matrix((n_source, mat.shape[1])),
    
    'inf': analysis.directed_inference(mat, out_degree, in_degree, n_source),
    'mean_inf': np.zeros((n_source, mat.shape[1]), dtype=float),
    'p_inf': np.zeros((n_source, mat.shape[1]), dtype=float),
}

In [5]:
perm_edges = edges.copy()
for i in tqdm.tnrange(n_perms):
    perm_edges, _ = xswap.permute_edge_list(perm_edges, allow_self_loops=allow_self_loops, 
                                            allow_antiparallel=allow_antiparallel, seed=i)
    perm_mat = analysis.edges_to_matrix(perm_edges, directed=directed).tocsc()

    feature_dict['edge_prior'] += perm_mat[:n_source]
    
    perm_inf = analysis.directed_inference(perm_mat, out_degree, in_degree, n_source)
    feature_dict['mean_inf'] += perm_inf
    feature_dict['p_inf'] += (perm_inf >= feature_dict['inf'])

HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))




In [6]:
for feature, array in feature_dict.items():
    # Normalize features by the number of permutations
    if feature in ['edge_prior', 'mean_inf', 'p_inf']:
        feature_dict[feature] /= n_perms
    
    # Make features dense (for DataFrame)
    if scipy.sparse.issparse(array):
        array = array.toarray()
    
    feature_df = (
        pd.DataFrame(array[:n_source])
        .reset_index()
        .melt(id_vars=['index'])
        .rename(columns={'index': 'id_a', 'variable': 'id_b', 'value': feature})
        .assign(
            id_a=lambda df: df['id_a'].astype(int),
            id_b=lambda df: df['id_b'].astype(int),
        )
    )
    df = df.merge(feature_df, how='left', on=['id_a', 'id_b'])
    
df.head(2)

Unnamed: 0,name_a,name_b,id_a,id_b,train,test_recon,test_new,edge_prior,inf,mean_inf,p_inf
0,AHR,AHR,0,0,0,0,0,0.0,0.0,0.0,1.0
1,AHR,AR,0,1,0,0,0,0.0,0.0,0.0,1.0


In [7]:
%%time

df.to_csv('../data/4.data/tftg.tsv.xz', sep='\t', compression='xz', index=False)

CPU times: user 8.92 s, sys: 51.1 ms, total: 8.97 s
Wall time: 13.6 s
