In [1]:
import itertools as it
import os

from matplotlib import rcParams
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scipy
import skbio
import statsmodels.api as sms
import statsmodels.formula.api as smf
import seaborn as sn

from qiime2 import Artifact, Metadata, Visualization

import qiime2.plugins.diversity.actions as q2_diversity
import qiime2.plugins.deicode.actions as q2_deicode

rcParams['font.sans-serif'] = ['Helvetica', 'Arial']
rcParams['pdf.fonttype'] = 42
np.set_printoptions(precision=5)

%matplotlib inline

In [2]:
meta_q2 = Metadata.load('data/metadata_paired.tsv')
meta = meta_q2.to_dataframe()
meta['long_survival'] = meta['long_survival'].astype(int)

In [3]:
beta_metrics = ['unweighted_unifrac', 'weighted_unifrac', 'braycurtis', 
           'aitchison', 'jaccard']
beta = {
    metric: Artifact.load(f'data/diversity/beta/{metric}.qza'.replace("_", '-'))
    for metric in beta_metrics
}
beta['ctf'] = Artifact.load('data/diversity/ctf/distance_matrix.qza')
beta = {
    metric: q2_diversity.filter_distance_matrix(dm, metadata=Metadata(meta)
                                               ).filtered_distance_matrix
    for metric, dm in beta.items()
}

In [4]:
dist_delta = pd.DataFrame(
    data={
        metric: dm.view(skbio.DistanceMatrix).filter(meta.index).to_series()
        for metric, dm in beta.items()
    }
)
dist_delta.index.set_names(['sample0', 'sample1'], inplace=True)
dist_delta.reset_index(inplace=True)
dist_delta['hsi0'] = meta.loc[dist_delta['sample0'], 'host_subject_id'].reset_index(drop=True)
dist_delta['hsi1'] = meta.loc[dist_delta['sample1'], 'host_subject_id'].reset_index(drop=True)

single_dist = dist_delta.loc[dist_delta['hsi0'] == dist_delta['hsi1']].copy()
single_dist.set_index('hsi0', inplace=True)
single_dist.drop(columns=['sample0', 'sample1', 'hsi1'], inplace=True)
single_dist.head()

single_dist['aitchison'] = single_dist['aitchison'] / 10
single_dist[['unweighted_unifrac', 'weighted_unifrac', 'braycurtis', 'jaccard']] = \
    single_dist[['unweighted_unifrac', 'weighted_unifrac', 'braycurtis', 'jaccard']] / 0.1

In [5]:
meta_paired = pd.concat(axis=1, objs=[
    meta.drop_duplicates('host_subject_id').set_index('host_subject_id'),
    single_dist
])

In [6]:
ref_eqs = {
    'crude': '1',
    'full': ('age_cat + sex + ana_location + surgery_year + asa_cat'
                ' + differentiation_grade + stage_tnm + radical_surgery'),
}
# meta_paired['simpson'] = meta_paired['simpson'] / 0.1
# meta_paired['shannon'] = meta_paired['shannon'] / 0.25
fits = {
    (metric, model): smf.logit(f'long_survival ~ {metric} + {ref_eq}', data=meta_paired).fit(disp=False)
    for metric, (model, ref_eq) in it.product(beta.keys(), ref_eqs.items())
}

In [7]:
def summarize_fit(metric, model, fit):
    """
    Makes a pretty summary
    """
    params = pd.concat(axis=1, objs=[fit.params, fit.conf_int(), fit.pvalues])
    params.columns = ['coeff', 'ci_lo', 'ci_hi', 'p-value']
    params['model'] = model
    params = params.loc[[metric]]
    return params

In [8]:
summary = pd.concat(axis=0, objs=[
    summarize_fit(metric, model, fit) for (metric, model), fit in fits.items()
])
summary

Unnamed: 0,coeff,ci_lo,ci_hi,p-value,model
unweighted_unifrac,-0.645436,-1.233767,-0.057105,0.031539,crude
unweighted_unifrac,-0.696904,-1.614205,0.220398,0.136475,full
weighted_unifrac,-0.305314,-0.507704,-0.102925,0.003109,crude
weighted_unifrac,-0.391514,-0.708193,-0.074835,0.015388,full
braycurtis,-0.49026,-0.815087,-0.165432,0.003095,crude
braycurtis,-0.530409,-1.005788,-0.055031,0.028753,full
aitchison,-0.647653,-1.184014,-0.111292,0.01795,crude
aitchison,-0.352819,-1.234462,0.528824,0.432838,full
jaccard,-0.422651,-0.848343,0.003041,0.051659,crude
jaccard,-0.477922,-1.148151,0.192306,0.162234,full


In [9]:
tidy_beta = np.exp(-summary.set_index('model', append=True)).apply(
    lambda x:'{coeff:2.2f} ({ci_hi:2.2f}, {ci_lo:2.2f})'.format(**x.to_dict()), axis=1
)
tidy_beta = tidy_beta.unstack().loc[
    ['jaccard', 'aitchison', 'ctf', 'braycurtis', 'unweighted_unifrac', 'weighted_unifrac']
]
tidy_beta.rename(
    index={'aitchison': 'Aitchison (10 units)',
           'braycurtis': 'Bray Curtis (0.1 unit)',
           'ctf': 'CTF (1 unit)',
           'jaccard': 'Binary Jaccard (0.1 unit)',
           'unweighted_unifrac': 'unweighted UniFrac (0.1 unit)',
           'weighted_unifrac': 'weighted UniFrac (0.1 unit)',
          },
    columns={'crude': 'Crude', 'full': 'Fully adjusted1'},
    inplace=True
    )
tidy_beta.index.set_names('Metric', inplace=True)
tidy_beta.columns.set_names('', inplace=True)

In [10]:
tidy_beta.to_csv('output/tables/table_s4.tsv', sep='\t')

In [11]:
tidy_beta

Unnamed: 0_level_0,Crude,Fully adjusted1
Metric,Unnamed: 1_level_1,Unnamed: 2_level_1
Binary Jaccard (0.1 unit),"1.53 (1.00, 2.34)","1.61 (0.83, 3.15)"
Aitchison (10 units),"1.91 (1.12, 3.27)","1.42 (0.59, 3.44)"
CTF (1 unit),"1.20 (0.54, 2.67)","1.94 (0.54, 6.98)"
Bray Curtis (0.1 unit),"1.63 (1.18, 2.26)","1.70 (1.06, 2.73)"
unweighted UniFrac (0.1 unit),"1.91 (1.06, 3.43)","2.01 (0.80, 5.02)"
weighted UniFrac (0.1 unit),"1.36 (1.11, 1.66)","1.48 (1.08, 2.03)"
