In [1]:
import os
import qiime2
import numpy as np
import pandas as pd
import scipy as sp

from qiime2.plugins.feature_table.methods import group, filter_samples, rarefy, filter_features
from qiime2.plugins.feature_table.visualizers import summarize
from qiime2.plugins.diversity.methods import alpha_phylogenetic, alpha, beta_phylogenetic, pcoa as pcoa_method
from qiime2.plugins.taxa.methods import collapse
from qiime2.plugins.sample_classifier.pipelines import metatable as metatable_pipeline, regress_samples, classify_samples
from qiime2.plugins.longitudinal.visualizers import anova
from qiime2.plugins.metadata.visualizers import tabulate



In [None]:
base_dir = "/Users/gregcaporaso/Dropbox/shared-analysis-results/2019.09-exmp/combined/"
sample_md_fp = os.path.join(base_dir, "sample-metadata", "sample-metadata.tsv")
data_dir = os.path.join(base_dir, "dada2-paired-long/")


table_fp = os.path.join(data_dir, "table-sepp-no-excludes.qza")
taxonomy_fp = os.path.join(data_dir, "taxonomy-gtdb.qza")
phylogeny_fp = os.path.join(data_dir, "rooted-tree-sepp.qza")
uu_pcoa_fp = os.path.join(data_dir, "cm5000/unweighted_unifrac_pcoa_results.qza")
wu_pcoa_fp = os.path.join(data_dir, "cm5000/weighted_unifrac_pcoa_results.qza")

In [None]:
table = qiime2.Artifact.load(table_fp)
metadata = qiime2.Metadata.load(sample_md_fp)
phylogeny = qiime2.Artifact.load(phylogeny_fp)
taxonomy = qiime2.Artifact.load(taxonomy_fp)

In [None]:
dep_variable = 'VO2max_change'
project = 'exmp1'
where = "[project]='%s' and [exclude]='no' and [week] IN ('1.0', '2.0', '3.0')" % project
ids_to_keep = metadata.get_ids(where=where)

metadata = metadata.filter_ids(ids_to_keep=ids_to_keep)
table = filter_samples(table, metadata=metadata).filtered_table
table = filter_features(table, min_samples=10).filtered_table

table = group(table, axis='sample', metadata=metadata.get_column('subject-id'), mode='median-ceiling').grouped_table

_df = metadata.to_dataframe().drop_duplicates('subject-id').set_index('subject-id')
_df.index.name = 'id'
if project == 'exmp1':
    _df = _df[['BMI-change', 'VO2max-change', 'BMI_Pre', 'PreTest_VO2max', 'body-fat-change', 
               '%Body_Fat_Pre', 'RER-change']].dropna().astype(np.float)
    _df = _df.rename(columns={'BMI-change': 'BMI_change',
                              'VO2max-change': 'VO2max_change',
                              'body-fat-change': 'body_fat_change',
                              'RER-change': 'RER_change'})
else:
    _df = _df[['BMI-change', 'BMI_Pre', 'body-fat-change', 
               '%Body_Fat_Pre', 'row-change', 
               'bench-press-change', '3RM-squat-change']].dropna().astype(np.float)
    _df = _df.rename(columns={'BMI-change': 'BMI_change',
                              'row-change': 'row_change',
                              'body-fat-change': 'body_fat_change',
                              'bench-press-change': 'bench_press_change',
                              '3RM-squat-change': 'three_rep_max_squat_change'})
metadata = qiime2.Metadata(_df)

table = filter_samples(table, metadata=metadata).filtered_table

In [None]:
rarefied_table = rarefy(table, 8500).rarefied_table

faith_pd = alpha_phylogenetic(rarefied_table, phylogeny, metric='faith_pd').alpha_diversity
shannon = alpha(rarefied_table, metric='shannon').alpha_diversity
pielou_e = alpha(rarefied_table, metric='pielou_e').alpha_diversity

wu_dm = beta_phylogenetic(rarefied_table, phylogeny, metric='weighted_unifrac').distance_matrix
wu_pcoa = pcoa_method(wu_dm).pcoa
wu_pcoa = wu_pcoa.view(qiime2.Metadata).to_dataframe()[['Axis 1', 'Axis 2', 'Axis 3']]
wu_pcoa = wu_pcoa.rename(columns={'Axis 1': 'Weighted_UniFrac_PC1', 
                                  'Axis 2': 'Weighted_UniFrac_PC2', 
                                  'Axis 3': 'Weighted_UniFrac_PC3'})
metadata = metadata.merge(qiime2.Metadata(wu_pcoa))

uu_dm = beta_phylogenetic(rarefied_table, phylogeny, metric='unweighted_unifrac').distance_matrix
uu_pcoa = pcoa_method(uu_dm).pcoa
uu_pcoa = uu_pcoa.view(qiime2.Metadata).to_dataframe()[['Axis 1', 'Axis 2', 'Axis 3']]
uu_pcoa = uu_pcoa.rename(columns={'Axis 1': 'Unweighted_UniFrac_PC1', 
                                  'Axis 2': 'Unweighted_UniFrac_PC2', 
                                  'Axis 3': 'Unweighted_UniFrac_PC3'})
metadata = metadata.merge(qiime2.Metadata(uu_pcoa))

metadata = metadata.merge(faith_pd.view(qiime2.Metadata))
metadata = metadata.merge(shannon.view(qiime2.Metadata))
metadata = metadata.merge(pielou_e.view(qiime2.Metadata))


df = metadata.to_dataframe()

In [None]:
df.corr(method='spearman')


In [2]:
import statsmodels.api as sm
# df = pd.read_csv('/Users/gregcaporaso/temp/exmp1-for-derek.csv')
# df = df.rename(columns={'subject-id':'id'})
# df = df.set_index('id')
# dep_variable = 'VO2max_change'

In [3]:
gamma_model = sm.GLM(df[dep_variable], df[['faith_pd', 'shannon', 'pielou_e', 
                                'Weighted_UniFrac_PC1', 'Weighted_UniFrac_PC2', 'Weighted_UniFrac_PC3', 
                                'Unweighted_UniFrac_PC1', 'Unweighted_UniFrac_PC2', 'Unweighted_UniFrac_PC3']])
gamma_results = gamma_model.fit()

In [4]:
print(gamma_results.summary())

                 Generalized Linear Model Regression Results                  
Dep. Variable:          VO2max_change   No. Observations:                   28
Model:                            GLM   Df Residuals:                       19
Model Family:                Gaussian   Df Model:                            8
Link Function:               identity   Scale:                          5.0016
Method:                          IRLS   Log-Likelihood:                -56.838
Date:                Tue, 28 Jan 2020   Deviance:                       95.031
Time:                        16:43:42   Pearson chi2:                     95.0
No. Iterations:                     3                                         
Covariance Type:            nonrobust                                         
                             coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------------------
faith_pd                  -1

In [5]:
mod = sm.OLS(df[dep_variable], df[['faith_pd', 'shannon', 'pielou_e', 
                                'Weighted_UniFrac_PC1', 'Weighted_UniFrac_PC2', 'Weighted_UniFrac_PC3', 
                                'Unweighted_UniFrac_PC1', 'Unweighted_UniFrac_PC2', 'Unweighted_UniFrac_PC3']])
res = mod.fit()
print(res.summary())

                                 OLS Regression Results                                
Dep. Variable:          VO2max_change   R-squared (uncentered):                   0.585
Model:                            OLS   Adj. R-squared (uncentered):              0.388
Method:                 Least Squares   F-statistic:                              2.973
Date:                Tue, 28 Jan 2020   Prob (F-statistic):                      0.0218
Time:                        16:43:43   Log-Likelihood:                         -56.838
No. Observations:                  28   AIC:                                      131.7
Df Residuals:                      19   BIC:                                      143.7
Df Model:                           9                                                  
Covariance Type:            nonrobust                                                  
                             coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------

In [6]:
anova(metadata=qiime2.Metadata(df),
      formula= dep_variable + " ~ faith_pd + shannon + pielou_e + \
               Weighted_UniFrac_PC1 + Weighted_UniFrac_PC2 + Weighted_UniFrac_PC3 + \
               Unweighted_UniFrac_PC1 + Unweighted_UniFrac_PC2 + Unweighted_UniFrac_PC3").visualization

In [None]:
%matplotlib inline
df[dep_variable].hist()

In [None]:
# export data for Derek
derek_metadata = df[[dep_variable, 'faith_pd', 'shannon', 'pielou_e', 
        'Weighted_UniFrac_PC1', 'Weighted_UniFrac_PC2', 'Weighted_UniFrac_PC3', 
        'Unweighted_UniFrac_PC1', 'Unweighted_UniFrac_PC2', 'Unweighted_UniFrac_PC3']]
derek_metadata.index = derek_metadata.index.rename('subject-id')
derek_metadata.to_csv('exmp1-for-derek.csv')

In [7]:
import statsmodels
statsmodels.__version__

'0.11.0'

In [12]:
df

Unnamed: 0,subject-id,VO2max_change,faith_pd,shannon,pielou_e,Weighted_UniFrac_PC1,Weighted_UniFrac_PC2,Weighted_UniFrac_PC3,Unweighted_UniFrac_PC1,Unweighted_UniFrac_PC2,Unweighted_UniFrac_PC3
0,01cd,-3.4,12.528185,5.244139,0.796381,0.08081,0.181229,0.027694,-0.140004,0.105009,0.063339
1,0966,6.45,9.96681,3.680577,0.604616,-0.251197,-0.062315,0.176309,-0.051075,0.074521,0.124318
2,1464,-2.54,9.98516,4.011937,0.654552,0.109282,0.174704,0.052001,-0.038935,0.243928,0.03034
3,2fa8,-3.3,8.877454,3.795,0.623412,-0.171856,0.021406,-0.179844,0.021226,-0.048051,-0.144785
4,3543,2.31,8.070896,4.234963,0.71407,-0.188909,0.057729,-0.140894,0.115245,-0.115495,-0.091143
5,4004,3.41,8.933249,3.32069,0.551393,-0.398626,-0.146385,0.023705,-0.020146,-0.007051,-0.022157
6,415f,3.55,11.601411,5.153588,0.793855,0.097658,0.119372,0.098439,-0.0575,0.163225,0.133197
7,4216,-1.52,8.619923,4.257344,0.72676,-0.071624,0.14228,-0.093678,0.045535,0.094355,-0.113993
8,5840,0.62,9.02511,3.059147,0.504303,0.303211,-0.050269,-0.070291,-0.087641,-0.082028,-0.100019
9,5a38,-2.71,7.867468,2.185372,0.393435,0.372743,-0.328398,-0.166601,0.1262,0.116814,-0.060188
