In [2]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from pathlib import Path
import matplotlib.cm as cm

In [11]:
taxa = ['B_caccae',
        'B_cellulosilyticus_WH2',
        'B_ovatus',
        'B_thetaiotaomicron',
        'B_uniformis',
        'B_vulgatus',
        'C_aerofaciens',
        'C_scindens',
        'C_spiroforme',
        'D_longicatena',
        'P_distasonis',
        'R_obeum']

input_path = Path('/hpcfs/home/ciencias_biologicas/a.castellanoss/masters-thesis/data/08-mdsine2-inference')
output_path = Path('/hpcfs/home/ciencias_biologicas/a.castellanoss/masters-thesis/data/09-mdsine2-rhats_and_rsme')
seeds = [0, 1, 2, 3, 4, 5, 6, 12, 23, 24, 26, 28, 31, 46, 51, 69, 77, 78, 89, 96, 127]

In [None]:
def bayes_factors_scores(simtype, dataset, seeds=seeds):
    if dataset == 'LF0':
        perturbation = 'HF/HS'
    elif dataset == 'HF0':
        perturbation = 'LF/HPP'
    inter_bayes_fact_scores = []
    pertur_bayes_fact_scores = []
    for seed in seeds:
        inter_bayes_fact = pd.read_table(f'{input_path}/{simtype}/mcunlty-{dataset}-seed{seed}/interactions/bayes_factors.tsv', sep='\t', index_col=0)
        inter_bayes_fact = inter_bayes_fact.values
        for i in range(len(taxa)):
            for j in range(len(taxa)):
                if (inter_bayes_fact[i,j] > 3) & (inter_bayes_fact[i,j] < 10):
                    inter_bayes_fact[i,j] = 1**2
                elif (inter_bayes_fact[i,j] > 10) & (inter_bayes_fact[i,j] < 100):
                    inter_bayes_fact[i,j] = 2**2
                elif (inter_bayes_fact[i,j] > 100):
                    inter_bayes_fact[i,j] = 3**2
                elif (inter_bayes_fact[i,j] < 1/3) & (inter_bayes_fact[i,j] > 0.1):
                    inter_bayes_fact[i,j] = 1**2
                elif (inter_bayes_fact[i,j] < 0.1) & (inter_bayes_fact[i,j] > 0.01):
                    inter_bayes_fact[i,j] = 2**2
                elif (inter_bayes_fact[i,j] < 0.01):
                    inter_bayes_fact[i,j] = 3**2
                else:
                    inter_bayes_fact[i,j] = 0
        inter_bayes_fact_scores.append(inter_bayes_fact.sum())
        pertur_bayes_fact = pd.read_table(f'{input_path}/{simtype}/mcunlty-{dataset}-seed{seed}/{perturbation}/bayes_factors.tsv', sep='\t', index_col=0)
        pertur_bayes_fact = pertur_bayes_fact.values[0]
        for i in range(len(taxa)):
            if (pertur_bayes_fact[i] > 3) & (pertur_bayes_fact[i] < 10):
                pertur_bayes_fact[i] = 1**2
            elif (pertur_bayes_fact[i] > 10) & (pertur_bayes_fact[i] < 100):
                pertur_bayes_fact[i] = 2**2
            elif (pertur_bayes_fact[i] > 100):
                pertur_bayes_fact[i] = 3**2
            elif (pertur_bayes_fact[i] < 1/3) & (pertur_bayes_fact[i] > 0.1):
                pertur_bayes_fact[i] = 1**2
            elif (pertur_bayes_fact[i] < 0.1) & (pertur_bayes_fact[i] > 0.01):
                pertur_bayes_fact[i] = 2**2
            elif (pertur_bayes_fact[i] < 0.01):
                pertur_bayes_fact[i] = 3**2
            else:
                pertur_bayes_fact[i] = 0
        pertur_bayes_fact_scores.append(pertur_bayes_fact.sum())
    table = pd.DataFrame({'seed':seeds, 'inter_bayes_fact_score': inter_bayes_fact_scores, 'pertur_bayes_fact_score': pertur_bayes_fact_scores})
    table.to_csv(f'{output_path}/{simtype}/{dataset}/bayes_factors_scores.tsv', sep='\t', index=False)

In [52]:
interactions_bayes_factors = pd.read_table('mcnulty-results/LF0/posteriors/interactions/bayes_factors.tsv', index_col=0)
interactions_bayes_factors = np.ma.array(interactions_bayes_factors.values, mask=False)
for i in range(len(taxa)):
    for j in range(len(taxa)):
        if i == j:
            interactions_bayes_factors.mask[i, j] = True
interactions_bayes_factors = interactions_bayes_factors.filled(np.nan)
interactions_bayes_factors

array([[         nan,   0.6727098 , 466.2062925 ,  11.13135612,
          4.07700444,   1.63810163,  53.33947711,  11.7900235 ,
         57.59171658,  14.91006735,   9.35087795,  76.71843407],
       [  2.7227952 ,          nan, 495.4390507 ,  47.97991038,
         28.08924949,   3.83902604, 177.19490286,  22.95652174,
         56.74516554,  17.6       ,  28.37229477, 552.91593605],
       [  6.81552402,  14.91006735,          nan,   4.38359878,
         39.31753555,  10.71383975, 108.07117271,  23.77762638,
         33.64645089,  16.88841601,  31.06822521,  46.34482759],
       [  1.23777629,   2.52084297,  30.47852761,          nan,
          4.66810838,   1.71608304,  59.44683241,  13.37162369,
         30.08215363,  14.62796834,  22.32111709,  47.5197404 ],
       [  2.52084297,   1.08241988,  18.83189001,   3.96362867,
                 nan,   3.34177215,  45.22401171,  13.42020229,
         21.76583078,  14.8977903 ,   9.58694244,  25.44723618],
       [  1.77181208,  14.26086957,

In [53]:
for i in range(len(taxa)):
    for j in range(len(taxa)):
        if (interactions_bayes_factors[i,j] > 3) & (interactions_bayes_factors[i,j] < 10):
            interactions_bayes_factors[i,j] = 1**2
        elif (interactions_bayes_factors[i,j] > 10) & (interactions_bayes_factors[i,j] < 100):
            interactions_bayes_factors[i,j] = 2**2
        elif (interactions_bayes_factors[i,j] > 100):
            interactions_bayes_factors[i,j] = 3**2
        elif (interactions_bayes_factors[i,j] < 1/3) & (interactions_bayes_factors[i,j] > 0.1):
            interactions_bayes_factors[i,j] = 1**2
        elif (interactions_bayes_factors[i,j] < 0.1) & (interactions_bayes_factors[i,j] > 0.01):
            interactions_bayes_factors[i,j] = 2**2
        elif (interactions_bayes_factors[i,j] < 0.01):
            interactions_bayes_factors[i,j] = 3**2
        else:
            interactions_bayes_factors[i,j] = 0
interactions_bayes_factors

array([[0., 0., 9., 4., 1., 0., 4., 4., 4., 4., 1., 4.],
       [0., 0., 9., 4., 4., 1., 9., 4., 4., 4., 4., 9.],
       [1., 4., 0., 1., 4., 4., 9., 4., 4., 4., 4., 4.],
       [0., 0., 4., 0., 1., 0., 4., 4., 4., 4., 4., 4.],
       [0., 0., 4., 1., 0., 1., 4., 4., 4., 4., 1., 4.],
       [0., 4., 1., 1., 4., 0., 4., 4., 4., 4., 1., 1.],
       [4., 1., 4., 4., 9., 4., 0., 4., 4., 4., 4., 4.],
       [1., 0., 4., 1., 4., 1., 4., 0., 4., 4., 4., 4.],
       [4., 1., 4., 4., 4., 1., 4., 4., 0., 4., 9., 9.],
       [4., 1., 4., 4., 9., 4., 4., 4., 4., 0., 4., 4.],
       [4., 4., 4., 4., 1., 0., 4., 4., 4., 4., 0., 4.],
       [1., 0., 4., 4., 4., 1., 4., 4., 4., 4., 4., 0.]])

In [54]:
interactions_bayes_factors.sum()

456.0

In [62]:
perturbations_bayes_factors = pd.read_table('mcnulty-results/LF0/posteriors/HF/HS/bayes_factors.tsv', index_col=0)
perturbations_bayes_factors = perturbations_bayes_factors.values[0]
perturbations_bayes_factors

array([5.67101092e-02, 5.99900000e+03,            inf, 2.03429775e-01,
       5.66635847e-02, 1.10596946e-01, 2.91900718e+00, 1.28296742e-01,
       1.79492256e+00, 6.92644051e-01, 1.91403594e+00,            inf])

In [64]:
for i in range(len(taxa)):
    if (perturbations_bayes_factors[i] > 3) & (perturbations_bayes_factors[i] < 10):
        perturbations_bayes_factors[i] = 1**2
    elif (perturbations_bayes_factors[i] > 10) & (perturbations_bayes_factors[i] < 100):
        perturbations_bayes_factors[i] = 2**2
    elif (perturbations_bayes_factors[i] > 100):
        perturbations_bayes_factors[i] = 3**2
    elif (perturbations_bayes_factors[i] < 1/3) & (perturbations_bayes_factors[i] > 0.1):
        perturbations_bayes_factors[i] = 1**2
    elif (perturbations_bayes_factors[i] < 0.1) & (perturbations_bayes_factors[i] > 0.01):
        perturbations_bayes_factors[i] = 2**2
    elif (perturbations_bayes_factors[i] < 0.01):
        perturbations_bayes_factors[i] = 3**2
    else:
        perturbations_bayes_factors[i] = 0
perturbations_bayes_factors

array([1., 9., 9., 1., 4., 1., 0., 1., 0., 0., 0., 9.])

In [65]:
perturbations_bayes_factors.sum()

35.0