In [1]:
import qiime2
import exmp
from pathlib import Path
# replace os.path.join calls with Path functionality
import os.path
import pandas as pd

from qiime2.plugins.diversity.actions import filter_distance_matrix, pcoa as pcoa_action
from qiime2.plugins.longitudinal.actions import first_distances, first_differences

In [2]:
time_column = 'week'
project = 'exmp2'

if time_column == 'period':
    data_dir = exmp.cm_grouped_by_period_path
    sample_metadata = exmp.load_sample_metadata_grouped_by_period()
    # need to assess how these values will be used
    baseline_time_value = 1
    reference_time_value = 1
elif time_column == 'week':
    data_dir = exmp.cm_path
    sample_metadata = exmp.load_sample_metadata()
    # need to assess how these values will be used
    baseline_time_value = 1.0
    reference_time_value = 2.0
else:
    raise ValueError("Invalid value for time_column.")

output_dir = os.path.join(data_dir, 'longitudinal-boxplots')
Path(output_dir).mkdir(parents=True, exist_ok=True)

In [3]:
uu = ("unweighted UniFrac", os.path.join(data_dir, "unweighted_unifrac_distance_matrix.qza"))
wu = ("weighted UniFrac", os.path.join(data_dir, "weighted_unifrac_distance_matrix.qza"))
bc = ("Bray-Curtis", os.path.join(data_dir, "bray_curtis_distance_matrix.qza"))
bj = ("Jaccard", os.path.join(data_dir, "jaccard_distance_matrix.qza"))
faithpd = ("Faith PD", os.path.join(data_dir, "faith_pd_vector.qza"))
evenness = ("Evenness", os.path.join(data_dir, "evenness_vector.qza"))
shannon = ("Shannon", os.path.join(data_dir, "shannon_vector.qza"))

In [4]:
where = "[project]='%s' and [exclude]='no'" % project
ids_to_keep = sample_metadata.get_ids(where=where)
sample_metadata = sample_metadata.filter_ids(ids_to_keep=ids_to_keep)

In [5]:
metadata_to_merge = []
distance_columns = []
pcoa_columns = []
alpha_columns = []

for metric, dm_fp in [uu, wu, bc, bj]:
    dm = qiime2.Artifact.load(dm_fp)
    dm = filter_distance_matrix(dm, metadata=sample_metadata).filtered_distance_matrix
    # add distances to baseline to sample metadata
    dists_to_baselines = first_distances(distance_matrix=dm, metadata=sample_metadata, state_column=time_column, 
                                         individual_id_column='subject-id', baseline=baseline_time_value, 
                                         replicate_handling='random').first_distances
    dists_to_baselines = dists_to_baselines.view(qiime2.Metadata).get_column('Distance').to_dataframe()
    column_name = '%s distance to %s %d' % (metric, time_column, baseline_time_value)
    dists_to_baselines = dists_to_baselines.rename(columns = {'Distance' : column_name})
    metadata_to_merge.append(qiime2.Metadata(dists_to_baselines))
    distance_columns.append(column_name)
    
    # add first three PCoA axes to sample metadata
    pcoa = pcoa_action(dm).pcoa
    pcoa = pcoa.view(qiime2.Metadata).to_dataframe()[['Axis 1', 'Axis 2', 'Axis 3']]
    column_names = {'Axis 1' : '%s PCoA 1' % metric, 
                    'Axis 2' : '%s PCoA 2' % metric,
                    'Axis 3' : '%s PCoA 3' % metric}
    pcoa = pcoa.rename(columns = column_names)

    metadata_to_merge.append(qiime2.Metadata(pcoa))
    pcoa_columns.extend(column_names.values())

for metric, alpha_vector_fp in [faithpd, evenness, shannon]:
    alpha_vector = qiime2.Artifact.load(alpha_vector_fp)
    alpha_vector_name = alpha_vector.view(pd.Series).name
    sample_metadata = sample_metadata.merge(alpha_vector.view(qiime2.Metadata))
    diffs_to_baselines = first_differences(metadata=sample_metadata, 
                                           state_column=time_column, metric=alpha_vector_name,                  
                                           individual_id_column='subject-id', baseline=baseline_time_value, 
                                           replicate_handling='random').first_differences
    diffs_to_baselines = diffs_to_baselines.view(qiime2.Metadata).get_column('Difference').to_dataframe()
    column_name = '%s difference from %s %d' % (metric, time_column, baseline_time_value)
    diffs_to_baselines = diffs_to_baselines.rename(columns = {'Difference' : column_name})
    metadata_to_merge.append(qiime2.Metadata(diffs_to_baselines))
    alpha_columns.append(column_name)
    
for e in metadata_to_merge:
    sample_metadata = sample_metadata.merge(e)
data = sample_metadata.to_dataframe()



In [6]:
data

Unnamed: 0_level_0,subject-id,week,project,exclude,activity,Age,Weight_Pre (kg),Weight_Post (kg),Fat_Free_Mass_Pre (kg),Fat_Free_Mass_Post (kg),...,Bray-Curtis PCoA 1,Bray-Curtis PCoA 2,Bray-Curtis PCoA 3,Jaccard distance to week 1,Jaccard PCoA 1,Jaccard PCoA 2,Jaccard PCoA 3,Faith PD difference from week 1,Evenness difference from week 1,Shannon difference from week 1
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
10be8,137b2,2.0,exmp2,no,baseline,18,75.07,75.60,47.49,48.22,...,-0.009827,0.183792,-0.264170,0.370370,0.020433,0.229559,0.065157,1.021310,-0.002333,0.074702
a6bdb,5c55c,2.0,exmp2,no,baseline,20,79.38,75.20,51.57,51.56,...,0.526308,-0.101704,-0.130404,0.500000,-0.222828,-0.223351,0.058822,-4.871112,-0.296667,-2.624660
3189a71,70911,2.0,exmp2,no,baseline,19,72.26,74.10,59.28,59.84,...,-0.178778,0.185967,-0.002802,0.509677,0.027557,0.186622,0.018310,0.734895,0.085662,0.730645
e937b,843eb,2.0,exmp2,no,baseline,25,72.40,70.35,57.28,57.19,...,-0.123795,-0.178322,-0.033053,0.588448,-0.208867,-0.171110,-0.016037,5.299400,0.194990,1.847311
aec31,96189,2.0,exmp2,no,baseline,22,65.45,64.50,45.27,45.60,...,-0.160178,0.041649,0.075352,0.636842,-0.083435,0.098746,-0.043679,1.226498,-0.087156,-0.540159
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
bb52c8f,b6643,14.0,exmp2,no,washout,18,96.93,97.10,64.36,64.08,...,-0.225947,0.146109,-0.138179,0.330882,-0.092312,0.216392,-0.092679,-3.429067,-0.054578,-0.582767
4e15d76,dca0e,14.0,exmp2,no,washout,19,43.32,44.15,33.52,35.15,...,-0.046448,0.172655,-0.232922,0.418803,0.054670,0.184685,0.075650,1.218468,0.022865,0.380751
01c78ef,e1127,14.0,exmp2,no,washout,19,54.34,56.45,40.01,42.08,...,-0.002141,0.076948,0.212240,0.590909,0.251417,-0.019335,0.087408,0.491717,0.245947,1.639102
337f01c,e5b0a,14.0,exmp2,no,washout,19,55.16,55.35,40.32,40.46,...,-0.110263,-0.056343,0.085083,0.354167,-0.153011,0.055136,0.049815,2.850298,0.014116,0.260994


In [7]:
for e in distance_columns + alpha_columns + pcoa_columns:
    output_base_filename = '%s-%s' % (project, e.replace(' ', '-'))
    output_figure_filepath = os.path.join(output_dir, '%s.pdf' % output_base_filename)
    output_table_filepath = os.path.join(output_dir, '%s.csv' % output_base_filename)
    exmp.plot_week_data_with_stats(data, metric=e, time_column=time_column, 
                                   reference_time=reference_time_value, 
                                   output_figure_filepath=output_figure_filepath, 
                                   output_table_filepath=output_table_filepath)