# Create a table summarizing metapath performance

In [1]:
import configparser
import collections
import os

import pandas
import numpy

import hetio.readwrite

In [2]:
metapath_df = (
    pandas.read_table('../all-features/data/metapaths.tsv')
    .rename(columns={'abbreviation': 'metapath'})
    [['metapath', 'length']]
)
metapath_df.head(2)

Unnamed: 0,metapath,length
0,CbGaD,2
1,CbGdD,2


In [3]:
auroc_df = pandas.read_table('../all-features/data/feature-performance/auroc.tsv')
auroc_df['mlog10_pval_delta_auroc'] = -numpy.log10(auroc_df['pval_delta_auroc'])
auroc_df = auroc_df[['metapath', 'delta_auroc', 'mlog10_pval_delta_auroc']]
metapath_df = metapath_df.merge(auroc_df)
metapath_df.head(2)

Unnamed: 0,metapath,length,delta_auroc,mlog10_pval_delta_auroc
0,CbGaD,2,0.1453,6.213675
1,CbGdD,2,-0.003001,1.526966


In [4]:
coef_df = pandas.read_table('./model/coefficient.tsv')
coef_df = coef_df[coef_df.feature.str.startswith('dwpc_')]
coef_df['metapath'] = coef_df.feature.str.split('_', expand=True)[1]
coef_df = coef_df[['metapath', 'coef']]
metapath_df = metapath_df.merge(coef_df, how='left')
metapath_df.head(2)

Unnamed: 0,metapath,length,delta_auroc,mlog10_pval_delta_auroc,coef
0,CbGaD,2,0.1453,6.213675,0.198193
1,CbGdD,2,-0.003001,1.526966,


In [5]:
# Load metagraph for the hetnet
config = configparser.ConfigParser()
config.read('../config.ini')
commit = config['hetnet']['integrate_commit']
url = 'https://github.com/dhimmel/integrate/raw/{}/data/metagraph.json'.format(commit)
metagraph = hetio.readwrite.read_metagraph(url)

In [6]:
metapath_df['verbose'] = metapath_df.metapath.map(lambda x: metagraph.metapath_from_abbrev(x).get_unicode_str())

In [7]:
metapath_df.head(2)

Unnamed: 0,metapath,length,delta_auroc,mlog10_pval_delta_auroc,coef,verbose
0,CbGaD,2,0.1453,6.213675,0.198193,Compound–binds–Gene–associates–Disease
1,CbGdD,2,-0.003001,1.526966,,Compound–binds–Gene–downregulates–Disease


In [8]:
path = os.path.join('features', 'metapaths.tsv')
metapath_df.to_csv(path, index=False, sep='\t', float_format='%.5g')

## Create a pretty markdown table for select metapaths of interest

In [9]:
# Manually selected metapaths
select = [
    'CbGaD',
    'CbGiGaD',
    'CbGiGiGaD',
    'CbGpPWpGaD',
    'CbGpBPpGaD',
    'CcSEcCtD',
    'CtDpSpD',
    'CbGeAlD',
    'CtDlAlD',
    'CuGdD',
    'CdGuD',
    'CuGuCtD',
    'CdGdCtD',
    'CuGdCtD',
    'CdGuCtD',
    'CtDuGuD',
    'CtDdGdD',
    'CtDdGuD',
    'CtDuGdD',
]

In [10]:
def df_to_markdown(df, float_format='%.2g'):
    """
    Export a pandas.DataFrame to markdown-formatted text.
    DataFrame should not contain any `|` characters.
    """
    from os import linesep
    return linesep.join([
        '|'.join(df.columns),
        '|'.join(4 * '-' for i in df.columns),
        df.to_csv(sep='|', index=False, header=False, float_format=float_format)
    ]).replace('|', ' | ')

In [11]:
table_df = metapath_df.query("metapath in @select or coef > 0.05").copy()

formatter = {
    'delta_auroc': '{:.1%}'.format,
    'mlog10_pval_delta_auroc': '{:.1f}'.format,
}

renamer = collections.OrderedDict([
    ('metapath', 'Abbrev.'),
    ('length', 'Len.'),
    ('delta_auroc', 'Δ AUROC'),
    ('mlog10_pval_delta_auroc', '−log10(*p*)'),
    ('coef', 'Coef.'),
    ('verbose', 'Metapath'),
])
for column, fxn in formatter.items():
    table_df[column] = table_df[column].map(fxn)
table_df = table_df[list(renamer)]
table_df = table_df.rename(columns=renamer)
table_md = df_to_markdown(table_df, float_format='%.2f')

path = 'figure/metapath-table.md'
with open(path, 'wt') as write_file:
    write_file.write(table_md)