In [1]:
import numpy as np
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
import networkx as nx
from grakel import graph_from_pandas
from grakel.kernels import ShortestPath, EdgeHistogram
from grakel.utils import graph_from_networkx

In [2]:
taxa = ['B_caccae',
        'B_cellulosilyticus_WH2',
        'B_ovatus',
        'B_thetaiotaomicron',
        'B_uniformis',
        'B_vulgatus',
        'C_aerofaciens',
        'C_scindens',
        'C_spiroforme',
        # 'D_longicatena',
        'P_distasonis',
        'R_obeum'
        ]

order = ['B_cellulosilyticus_WH2', 
        'B_caccae', 
        'B_vulgatus', 
        'B_thetaiotaomicron', 
        'B_ovatus', 
        'R_obeum', 
        'B_uniformis', 
        'P_distasonis', 
        'C_scindens', 
        'C_aerofaciens', 
        'C_spiroforme', 
        # 'D_longicatena'
        ]

pretty_names_dir = {'B_caccae': '$B. caccae$',
                'B_cellulosilyticus_WH2': '$B. cellulosilyticus$',
                'B_ovatus': '$B. ovatus$',
                'B_thetaiotaomicron': '$B. thetaiotaomicron$',
                'B_uniformis': '$B. uniformis$',
                'B_vulgatus': '$B. vulgatus$',
                'C_aerofaciens': '$C. aerofaciens$',
                'C_scindens': '$C. scindens$',
                'C_spiroforme': '$C. spiroforme$',
                # 'D_longicatena': '$D. longicatena$',
                'P_distasonis': '$P. distasonis$',
                'R_obeum': '$R. obeum$'
                }

abbr_names_dir = {'B_caccae': 'Bca',
                'B_cellulosilyticus_WH2': 'Bce',
                'B_ovatus': 'Bov',
                'B_thetaiotaomicron': 'Bth',
                'B_uniformis': 'Bun',
                'B_vulgatus': 'Bvu',
                'C_aerofaciens': 'Cae',
                'C_scindens': 'Csc',
                'C_spiroforme': 'Csp',
                # 'D_longicatena': 'Dlo',
                'P_distasonis': 'Pdi',
                'R_obeum': 'Rob'
                }

pretty_names = [pretty_names_dir[taxa] for taxa in taxa]

pretty_abbrv_dir = {pretty_names_dir[organism]:abbr_names_dir[organism] for organism in order}

order_pretty = [pretty_names_dir[organism] for organism in order]

In [3]:
nodes_lf0 = pd.read_table('mcnulty-results/mcnulty-LF0-seed96/posteriors/nodes.tsv', index_col=0)
nodes_lf0['Self_interaction_mag_log'] = np.log10(nodes_lf0['Self_interaction_mean'])
nodes_lf0_just_self = nodes_lf0.loc[:, ['Self_interaction_mag_log']].copy()
nodes_lf0_just_self.index = nodes_lf0_just_self.index.map(abbr_names_dir)
nodes_lf0_just_self.index.name = 'Node'
nodes_lf0_just_self.to_csv('mcnulty-results/mcnulty-LF0-seed96/posteriors/nodes_mod.tsv', sep='\t')
nodes_lf0_just_self

Unnamed: 0_level_0,Self_interaction_mag_log
Node,Unnamed: 1_level_1
Bca,-8.280115
Bce,-8.883129
Bov,-7.662734
Bth,-7.811491
Bun,-7.732665
Bvu,-8.317414
Cae,-6.485811
Csc,-7.05728
Csp,-6.057891
Pdi,-7.408262


In [4]:
nodes_hf0 = pd.read_table('mcnulty-results/mcnulty-HF0-seed12/posteriors/nodes.tsv', index_col=0)
nodes_hf0['Self_interaction_mag_log'] = np.log10(nodes_hf0['Self_interaction_mean'])
nodes_hf0_just_self = nodes_hf0.loc[:, ['Self_interaction_mag_log']].copy()
nodes_hf0_just_self.index = nodes_hf0_just_self.index.map(abbr_names_dir)
nodes_hf0_just_self.index.name = 'Node'
nodes_hf0_just_self.to_csv('mcnulty-results/mcnulty-HF0-seed12/posteriors/nodes_mod.tsv', sep='\t')
nodes_hf0_just_self

Unnamed: 0_level_0,Self_interaction_mag_log
Node,Unnamed: 1_level_1
Bca,-7.954483
Bce,-8.495508
Bov,-7.960239
Bth,-8.0664
Bun,-7.72977
Bvu,-8.382326
Cae,-6.625344
Csc,-7.145696
Csp,-5.96571
Pdi,-7.680981


In [5]:
def evidence(x):
    if x < np.sqrt(10):
        return 'Weak'
    elif x < 10:
        return 'Moderate'
    elif x < 100:
        return 'Strong'
    else:
        return 'Very strong'

In [6]:
edges_lf0 = pd.read_table('mcnulty-results/mcnulty-LF0-seed96/posteriors/edges.tsv', index_col=[0,1])
edges_lf0.drop('Cocluster_probability', axis=1, inplace=True)
edges_lf0 = edges_lf0[edges_lf0['Interaction_bayes_factor'] > np.sqrt(10)].copy()
edges_lf0['Interaction_evidence'] = edges_lf0['Interaction_bayes_factor'].apply(evidence)
edges_lf0['Interaction_evidence'] = pd.Categorical(edges_lf0['Interaction_evidence'], categories=['Moderate', 'Strong', 'Very strong'], ordered=True)
edges_lf0.reset_index(inplace=True)
edges_lf0 = edges_lf0.rename(columns={'Source': 'Target', 'Target': 'Source'})
edges_lf0['Source'] = edges_lf0['Source'].map(abbr_names_dir)
edges_lf0['Target'] = edges_lf0['Target'].map(abbr_names_dir)
edges_lf0.set_index(['Source', 'Target'], inplace=True)
edges_lf0.to_csv('mcnulty-results/mcnulty-LF0-seed96/posteriors/edges_mod.tsv', index=True, sep='\t')
edges_lf0

Unnamed: 0_level_0,Unnamed: 1_level_0,Interaction,Interaction_magnitude_log,Interaction_bayes_factor,Interaction_evidence
Source,Target,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Bca,Bov,+,-10.158087,6.969866,Moderate
Bca,Cae,-,-9.832596,13.566310,Strong
Bca,Csc,+,-10.483920,4.671291,Moderate
Bca,Csp,-,-9.543032,22.335230,Strong
Bca,Pdi,-,-10.236093,6.434514,Moderate
...,...,...,...,...,...
Rob,Bvu,-,-9.764345,7.047947,Moderate
Rob,Cae,-,-8.797001,42.908928,Strong
Rob,Csc,-,-8.541454,73.823038,Strong
Rob,Csp,-,-8.295361,104.563560,Very strong


In [7]:
edges_hf0 = pd.read_table('mcnulty-results/mcnulty-HF0-seed12/posteriors/edges.tsv', index_col=[0,1])
edges_hf0.drop('Cocluster_probability', axis=1, inplace=True)
edges_hf0 = edges_hf0[edges_hf0['Interaction_bayes_factor'] > np.sqrt(10)].copy()
edges_hf0['Interaction_evidence'] = edges_hf0['Interaction_bayes_factor'].apply(evidence)
edges_hf0['Interaction_evidence'] = pd.Categorical(edges_hf0['Interaction_evidence'], categories=['Moderate', 'Strong', 'Very strong'], ordered=True)
edges_hf0.reset_index(inplace=True)
edges_hf0 = edges_hf0.rename(columns={'Source': 'Target', 'Target': 'Source'})
edges_hf0['Source'] = edges_hf0['Source'].map(abbr_names_dir)
edges_hf0['Target'] = edges_hf0['Target'].map(abbr_names_dir)
edges_hf0.set_index(['Source', 'Target'], inplace=True)
edges_hf0.to_csv('mcnulty-results/mcnulty-HF0-seed12/posteriors/edges_mod.tsv', index=True, sep='\t')
edges_hf0

Unnamed: 0_level_0,Unnamed: 1_level_0,Interaction,Interaction_magnitude_log,Interaction_bayes_factor,Interaction_evidence
Source,Target,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Bca,Bov,-,-9.222653,44.794383,Strong
Bca,Bth,-,-10.428382,3.416409,Moderate
Bca,Bvu,-,-8.889703,106.833798,Very strong
Bca,Cae,-,-10.370939,3.747775,Moderate
Bca,Csp,+,-10.565281,3.634053,Moderate
...,...,...,...,...,...
Rob,Bvu,+,-9.310634,9.795012,Moderate
Rob,Cae,+,-8.614974,38.874289,Strong
Rob,Csc,+,-9.011517,15.126470,Strong
Rob,Csp,-,-8.779090,25.022971,Strong


In [8]:
edges_lf0_2 = edges_lf0.copy()
edges_lf0_2['Interaction'] = edges_lf0_2['Interaction'].apply(lambda x: 1 if x == '+' else -1)
edges_lf0_2.drop(['Interaction_evidence', 'Interaction_bayes_factor'], axis=1, inplace=True)
edges_lf0_2.to_csv('mcnulty-results/mcnulty-LF0-seed96/posteriors/edges_mod_2.tsv', index=True, sep='\t')
edges_lf0_2

Unnamed: 0_level_0,Unnamed: 1_level_0,Interaction,Interaction_magnitude_log
Source,Target,Unnamed: 2_level_1,Unnamed: 3_level_1
Bca,Bov,1,-10.158087
Bca,Cae,-1,-9.832596
Bca,Csc,1,-10.483920
Bca,Csp,-1,-9.543032
Bca,Pdi,-1,-10.236093
...,...,...,...
Rob,Bvu,-1,-9.764345
Rob,Cae,-1,-8.797001
Rob,Csc,-1,-8.541454
Rob,Csp,-1,-8.295361


In [9]:
edges_hf0_2 = edges_hf0.copy()
edges_hf0_2['Interaction'] = edges_hf0_2['Interaction'].apply(lambda x: 1 if x == '+' else -1)
edges_hf0_2.drop(['Interaction_evidence', 'Interaction_bayes_factor'], axis=1, inplace=True)
edges_hf0_2.to_csv('mcnulty-results/mcnulty-HF0-seed12/posteriors/edges_mod_2.tsv', index=True, sep='\t')
edges_hf0_2

Unnamed: 0_level_0,Unnamed: 1_level_0,Interaction,Interaction_magnitude_log
Source,Target,Unnamed: 2_level_1,Unnamed: 3_level_1
Bca,Bov,-1,-9.222653
Bca,Bth,-1,-10.428382
Bca,Bvu,-1,-8.889703
Bca,Cae,-1,-10.370939
Bca,Csp,1,-10.565281
...,...,...,...
Rob,Bvu,1,-9.310634
Rob,Cae,1,-8.614974
Rob,Csc,1,-9.011517
Rob,Csp,-1,-8.779090


In [10]:
lf0_edges = pd.read_table('mcnulty-results/mcnulty-LF0-seed96/posteriors/edges_mod_2.tsv')
lf0_edges['edge_key'] = f'{lf0_edges["Source"]}_{lf0_edges["Target"]}'
lf0_edges['graph_index'] = 'lf0'
lf0_nodes = pd.read_table('mcnulty-results/mcnulty-LF0-seed96/posteriors/nodes_mod.tsv')
lf0_nodes['graph_index'] = 'lf0'
lf0_attr_dict = lf0_nodes.set_index('Node').to_dict()

hf0_edges = pd.read_table('mcnulty-results/mcnulty-HF0-seed12/posteriors/edges_mod_2.tsv')
hf0_edges['edge_key'] = f'{hf0_edges["Source"]}_{hf0_edges["Target"]}'
hf0_edges['graph_index'] = 'hf0'
hf0_nodes = pd.read_table('mcnulty-results/mcnulty-HF0-seed12/posteriors/nodes_mod.tsv')
hf0_nodes['graph_index'] = 'hf0'
hf0_attr_dict = hf0_nodes.set_index('Node').to_dict()

In [11]:
lf0_graph = nx.from_pandas_edgelist(lf0_edges, 
    source='Source', 
    target='Target', 
    edge_attr=['Interaction', 'Interaction_magnitude_log'], 
    create_using=nx.DiGraph(),
    edge_key='edge_key')
# nx.set_node_attributes(lf0_graph, lf0_attr_dict, 'Node')

hf0_graph = nx.from_pandas_edgelist(hf0_edges,
    source='Source',
    target='Target',
    edge_attr=['Interaction', 'Interaction_magnitude_log'],
    create_using=nx.DiGraph(),
    edge_key='edge_key')
# nx.set_node_attributes(hf0_graph, attr_dict, 'Node')

In [12]:
for key, value in lf0_attr_dict['Self_interaction_mag_log'].items():
    lf0_graph.add_node(key, label=key)
    lf0_graph.add_node(key, Self_interaction_mag_log=value)

for key, value in hf0_attr_dict['Self_interaction_mag_log'].items():
    hf0_graph.add_node(key, label=key)
    hf0_graph.add_node(key, Self_interaction_mag_log=value)

In [13]:
lf0_graph = graph_from_networkx([lf0_graph])
hf0_graph = graph_from_networkx([hf0_graph])

In [14]:
# lf0_graph = graph_from_pandas((lf0_edges, 'graph_index', ('Source', 'Target'), 'Interaction_magnitude_log', 'edge_key'), 
lf0_graph = graph_from_pandas((lf0_edges, 'graph_index', ('Source', 'Target'), 'Interaction_magnitude_log', ['Interaction']), 
                #   (lf0_nodes, 'graph_index', '['Self_interaction_mag_log']'), 
                  directed=True, 
                  as_Graph=True
                  )

# hf0_graph = graph_from_pandas((hf0_edges, 'graph_index', ('Source', 'Target'), 'Interaction_magnitude_log', 'edge_key'), 
hf0_graph = graph_from_pandas((hf0_edges, 'graph_index', ('Source', 'Target'), 'Interaction_magnitude_log', ['Interaction']), 
                #   (lf0_nodes, 'graph_index', '['Self_interaction_mag_log']'), 
                  directed=True, 
                  as_Graph=True
                  )
# graph_from_pandas((hf0_edges, 'graph_index', ('Source', 'Target'), 'Interaction_magnitude_log', ['Interaction']))

In [15]:
# kernel = ShortestPath(normalize=True)
# kernel = EdgeHistogram()
# K1 = kernel.fit_transform(list(lf0_graph['lf0']))[0]
# K2 = kernel.fit_transform(list(hf0_graph['hf0']))[0]