# Figure S9 data: human inulin association maps
This script generates xml files for the networks showing changes in associations for Bifidobacterium and Agathobacter rectalis in figure S9 of the paper. The xml files can be viewed in cytoscape and combined in adobe illustrator to generate figure S9.

### Before you start
This notebook assumes the analysis on the human inulin dataset have been run and results are located in `MCSPACE_paper/results/analysis/human_inulin`. Refer to the README in `scripts/analysis` for the analysis pipeline and more details.

In [1]:
from pathlib import Path
import pandas as pd
from mcspace.utils import pickle_load
import matplotlib.pyplot as plt
import matplotlib as mpl
import numpy as np

from mcspace.visualization import render_assemblages, render_assemblage_proportions, export_association_networks_to_cytoscape
import matplotlib.colors as mcolors
import mcspace.vis_tools as vis
import matplotlib.patches as mpatches

In [2]:
%matplotlib inline 

In [3]:
# change font to arial
mpl.rcParams['font.sans-serif'] = "Arial"
mpl.rcParams['font.family'] = "sans-serif"
plt.rcParams['svg.fonttype'] = 'none'

# Paths

In [4]:
basepath = Path("./")
rootpath = Path("../../")
datapath = rootpath / "MCSPACE_paper" / "datasets"

outpath = basepath / "supplemental_figures"
outpath.mkdir(exist_ok=True, parents=True)

In [5]:
resultpath = rootpath / "MCSPACE_paper" / "results" / "analysis" / "human_inulin"

In [7]:
results = pickle_load(resultpath / "results.pkl")

# Fix taxonomy (add genus to species names...)

In [8]:
results.keys()

dict_keys(['assemblages', 'assemblage_proportions', 'perturbation_bayes_factors', 'relative_abundances'])

In [9]:
theta = results["assemblages"]

In [10]:
print(theta.shape)

(50, 42)


In [11]:
theta.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,Unnamed: 7_level_0,A1,A2,A3,A4,A5,A6,A7,A8,A9,A10,...,A33,A34,A35,A36,A37,A38,A39,A40,A41,A42
Otu,Domain,Phylum,Order,Class,Family,Genus,Species,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1
Otu15,Bacteria,Firmicutes,Acidaminococcales,Negativicutes,Acidaminococcaceae,Acidaminococcaceae,na,5.402044e-10,4.566318e-10,7.087878e-10,6.761923e-10,0.01011743,0.004154032,6.815317e-10,0.0006337994,9.157046e-10,1.710469e-08,...,2.75328e-09,9.905521e-10,0.021291,1.765768e-09,6.548908e-08,1.364419e-09,1.39265e-09,0.0008368565,0.006111267,0.0005314949
Otu81,Bacteria,Firmicutes,Clostridia UCG-014,Clostridia,na,na,na,5.993243e-10,1.099598e-09,1.125636e-09,7.870884e-10,7.910649e-10,8.462128e-10,3.655898e-09,3.735886e-05,0.001654418,0.001180011,...,0.0003719087,0.0004524394,0.012844,2.875757e-09,6.175879e-08,0.001043484,0.01019878,1.219343e-09,7.642739e-05,1.901888e-09
Otu109,Bacteria,Firmicutes,Lachnospirales,Clostridia,Lachnospiraceae,na,na,1.22317e-09,1.269643e-09,4.571187e-05,9.508778e-10,2.440368e-09,0.006747477,5.486141e-10,2.234653e-09,7.103887e-10,1.271684e-09,...,0.001121878,6.744422e-10,0.00281,1.796872e-09,1.28023e-07,8.011021e-10,0.03995764,2.639358e-10,0.002562632,0.01772351
Otu52,Bacteria,Firmicutes,Lachnospirales,Clostridia,Lachnospiraceae,Lachnospiraceae,UCG-001,0.001028309,7.469828e-10,3.63041e-06,9.249437e-10,1.098402e-09,0.01015128,0.000165084,0.001012947,1.53072e-09,0.0001035948,...,0.0004415186,2.44639e-09,0.003712,0.002437058,0.001112596,0.003667009,1.304979e-09,2.938695e-09,9.657782e-10,0.0003487537
Otu75,Bacteria,Actinobacteriota,Coriobacteriales,Coriobacteriia,Eggerthellaceae,na,na,0.000753484,2.488229e-05,8.02923e-10,9.289624e-10,1.575106e-09,7.921071e-10,7.187091e-10,6.652289e-10,0.0001214494,0.0009285728,...,0.008868053,0.0004765187,0.002964,5.74049e-09,1.374571e-07,8.202014e-10,0.003937318,0.003687582,0.000289427,0.0006012646


In [12]:
names=['Otu', 'Domain', 'Phylum', 'Order', 'Class', 'Family', 'Genus', 'Species']

new_index = [
    (otu, domain, phylum, order, class_, family, genus, f"{genus} {species}" if species != "na" else "na")
    for otu, domain, phylum, order, class_, family, genus, species in theta.index
]

In [13]:
theta.index = pd.MultiIndex.from_tuples(new_index, names=names)

In [14]:
theta.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,Unnamed: 7_level_0,A1,A2,A3,A4,A5,A6,A7,A8,A9,A10,...,A33,A34,A35,A36,A37,A38,A39,A40,A41,A42
Otu,Domain,Phylum,Order,Class,Family,Genus,Species,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1
Otu15,Bacteria,Firmicutes,Acidaminococcales,Negativicutes,Acidaminococcaceae,Acidaminococcaceae,na,5.402044e-10,4.566318e-10,7.087878e-10,6.761923e-10,0.01011743,0.004154032,6.815317e-10,0.0006337994,9.157046e-10,1.710469e-08,...,2.75328e-09,9.905521e-10,0.021291,1.765768e-09,6.548908e-08,1.364419e-09,1.39265e-09,0.0008368565,0.006111267,0.0005314949
Otu81,Bacteria,Firmicutes,Clostridia UCG-014,Clostridia,na,na,na,5.993243e-10,1.099598e-09,1.125636e-09,7.870884e-10,7.910649e-10,8.462128e-10,3.655898e-09,3.735886e-05,0.001654418,0.001180011,...,0.0003719087,0.0004524394,0.012844,2.875757e-09,6.175879e-08,0.001043484,0.01019878,1.219343e-09,7.642739e-05,1.901888e-09
Otu109,Bacteria,Firmicutes,Lachnospirales,Clostridia,Lachnospiraceae,na,na,1.22317e-09,1.269643e-09,4.571187e-05,9.508778e-10,2.440368e-09,0.006747477,5.486141e-10,2.234653e-09,7.103887e-10,1.271684e-09,...,0.001121878,6.744422e-10,0.00281,1.796872e-09,1.28023e-07,8.011021e-10,0.03995764,2.639358e-10,0.002562632,0.01772351
Otu52,Bacteria,Firmicutes,Lachnospirales,Clostridia,Lachnospiraceae,Lachnospiraceae,Lachnospiraceae UCG-001,0.001028309,7.469828e-10,3.63041e-06,9.249437e-10,1.098402e-09,0.01015128,0.000165084,0.001012947,1.53072e-09,0.0001035948,...,0.0004415186,2.44639e-09,0.003712,0.002437058,0.001112596,0.003667009,1.304979e-09,2.938695e-09,9.657782e-10,0.0003487537
Otu75,Bacteria,Actinobacteriota,Coriobacteriales,Coriobacteriia,Eggerthellaceae,na,na,0.000753484,2.488229e-05,8.02923e-10,9.289624e-10,1.575106e-09,7.921071e-10,7.187091e-10,6.652289e-10,0.0001214494,0.0009285728,...,0.008868053,0.0004765187,0.002964,5.74049e-09,1.374571e-07,8.202014e-10,0.003937318,0.003687582,0.000289427,0.0006012646


In [15]:
results["assemblages"] = theta

In [16]:
bf = results['perturbation_bayes_factors']

In [17]:
np.amax(bf)

3.0916824

## Aggregate time points by diet phase and output association network data

In [27]:
from mcspace import utils as ut

In [21]:
ra = results['relative_abundances']

In [18]:
diet_times = {'Pre-Inulin': [1,2,4],
'Inulin': [7,8,10],
'Post-Inulin': [11]}

In [19]:
def average_diet_times(df, diet_times):
    df2 = df.copy()
    diets_keep = list(diet_times.keys())
    for k in diets_keep:
        df2[k] = df.loc[:,diet_times[k]].mean(axis=1)
    df2 = df2[diets_keep]
    return df2

In [23]:
radiets = average_diet_times(ra, diet_times)

In [24]:
thetadf = results['assemblages']
betadf = results['assemblage_proportions']

In [28]:
avebeta = ut.get_subj_averaged_assemblage_proportions(betadf)

In [25]:
taxlevels = ['Otu', 'Domain', 'Phylum', 'Class', 'Order', 'Family', 'Genus', 'Species']
taxonomy = thetadf.reset_index()[taxlevels].copy()
taxonomy = taxonomy.set_index("Otu")

In [26]:
otu_plot = ["Otu1", "Otu6"]

In [30]:
for oidx in otu_plot:
    otu_name = ut.get_lowest_level_name(oidx, taxonomy)

    # get edges and node weights
    alpha = ut.get_assoc_scores(thetadf, avebeta, oidx)
    alphasub = average_diet_times(alpha, diet_times)
    ew = ut.filter_assoc_scores(alphasub, radiets, oidx, ra_threshold=0.01, edge_threshold=0.01)
    nw = radiets.loc[ew.index,:]

    # update labels for taxa 
    nw3 = ut.update_names(nw, taxonomy)
    ew3 = ut.update_names(ew, taxonomy)
    
    nw3.to_csv(outpath / f"node_data_{oidx}.csv")
    ew3.to_csv(outpath / f"edge_data_{oidx}.csv")

    # output to file
    ut.output_association_network_to_graphML(oidx, nw3, ew3, taxonomy, outpath / f'{otu_name}.xml')