In [1]:
import pathlib
import regex

import pandas as pd
import scipy.stats
import tqdm

## Full networks

In [2]:
root = pathlib.Path()
prior_paths = sorted(root.glob('*_priors/*.tsv.gz'))

rows = list()

for prior_path in tqdm.tqdm_notebook(prior_paths):
    network = regex.search('.+(?=_priors)', prior_path.parent.name).group()
    metaedge = regex.search('(?<=_priors/).+(?=.tsv.gz)', str(prior_path)).group()
    print(network, metaedge)
    
    df = pd.read_csv(prior_path, sep='\t', usecols=['source_degree', 'target_degree', 'edge', 'xswap_prior'])
    
    xswap_prior = df['xswap_prior'].values
    del df['xswap_prior']

    degree_product = df['source_degree'].values * df['target_degree'].values
    analytic_prior = degree_product / (degree_product - df['source_degree'].values 
                                       - df['target_degree'].values + df['edge'].sum() + 1)
    del df['source_degree'], df['target_degree'], df['edge'], df

    scaled_degree = degree_product / degree_product.max()
    del degree_product

    rows.append({
        'network': network,
        'metaedge': metaedge,
        'xswap_analytic': scipy.stats.spearmanr(xswap_prior, analytic_prior)[0],
        'xswap_degree': scipy.stats.spearmanr(xswap_prior, scaled_degree)[0],
    })
    del xswap_prior, analytic_prior, scaled_degree

HBox(children=(IntProgress(value=0, max=40), HTML(value='')))

full AdG
full AeG
full AlD
full AuG
full BPpG
full CCpG
full CbG
full CcSE
full CdG
full CrC
full CuG
full DaG
full DdG
full DpS
full DuG
full G<rG
full GcG
full GiG
full GpMF
full GpPW
sampled AdG
sampled AeG
sampled AlD
sampled AuG
sampled BPpG
sampled CCpG
sampled CbG
sampled CcSE
sampled CdG
sampled CrC
sampled CuG
sampled DaG
sampled DdG
sampled DpS
sampled DuG
sampled G<rG
sampled GcG
sampled GiG
sampled GpMF
sampled GpPW



In [3]:
corr_df = pd.DataFrame.from_records(rows)
corr_df.to_csv('hetionet_feature_correlation.tsv', sep='\t', index=False)
corr_df.head()

Unnamed: 0,metaedge,network,xswap_analytic,xswap_degree
0,AdG,full,0.999995,0.999994
1,AeG,full,0.984483,0.984467
2,AlD,full,0.997876,0.997869
3,AuG,full,0.999997,0.999997
4,BPpG,full,0.999925,0.999926
