In [1]:
import qiime2
import exmp
from pathlib import Path
# replace os.path.join calls with Path functionality
import os.path
import pandas as pd

from qiime2.plugins.diversity.actions import filter_distance_matrix, pcoa as pcoa_action
from qiime2.plugins.longitudinal.actions import first_distances, first_differences

In [2]:
time_column = 'period'
project = 'exmp2'

if time_column == 'period':
    data_dir = exmp.cm_grouped_by_period_path
    sample_metadata = exmp.load_sample_metadata_grouped_by_period()
    baseline_time_value = 1
    reference_time_value = 1
elif time_column == 'week':
    data_dir = exmp.cm_path
    sample_metadata = exmp.load_sample_metadata()
    baseline_time_value = 1.0
    reference_time_value = 2.0
else:
    raise ValueError("Invalid value for time_column.")

output_dir = os.path.join(data_dir, 'longitudinal-boxplots')
Path(output_dir).mkdir(parents=True, exist_ok=True)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  grouped_sample_metadata['period'])]


In [3]:
uu = ("unweighted UniFrac", os.path.join(data_dir, "unweighted_unifrac_distance_matrix.qza"))
wu = ("weighted UniFrac", os.path.join(data_dir, "weighted_unifrac_distance_matrix.qza"))
bc = ("Bray-Curtis", os.path.join(data_dir, "bray_curtis_distance_matrix.qza"))
bj = ("Jaccard", os.path.join(data_dir, "jaccard_distance_matrix.qza"))
faithpd = ("Faith PD", os.path.join(data_dir, "faith_pd_vector.qza"))
evenness = ("Evenness", os.path.join(data_dir, "evenness_vector.qza"))
shannon = ("Shannon", os.path.join(data_dir, "shannon_vector.qza"))

In [4]:
where = "[project]='%s' and [exclude]='no'" % project
ids_to_keep = sample_metadata.get_ids(where=where)
sample_metadata = sample_metadata.filter_ids(ids_to_keep=ids_to_keep)

In [5]:
metadata_to_merge = []
distance_columns = []
pcoa_columns = []
alpha_columns = []

for metric, dm_fp in [uu, wu, bc, bj]:
    dm = qiime2.Artifact.load(dm_fp)
    dm = filter_distance_matrix(dm, metadata=sample_metadata).filtered_distance_matrix
    # add distances to baseline to sample metadata
    dists_to_baselines = first_distances(distance_matrix=dm, metadata=sample_metadata, state_column=time_column, 
                                         individual_id_column='subject-id', baseline=baseline_time_value, 
                                         replicate_handling='random').first_distances
    dists_to_baselines = dists_to_baselines.view(qiime2.Metadata).get_column('Distance').to_dataframe()
    column_name = '%s distance to %s %d' % (metric, time_column, baseline_time_value)
    dists_to_baselines = dists_to_baselines.rename(columns = {'Distance' : column_name})
#     dropped_ids = ids_to_keep - set(dists_to_baselines.index)
#     for e in dropped_ids:
#         dists_to_baselines.loc[e] = [0.0]
    metadata_to_merge.append(qiime2.Metadata(dists_to_baselines))
    distance_columns.append(column_name)
    
    # add first three PCoA axes to sample metadata
    pcoa = pcoa_action(dm).pcoa
    pcoa = pcoa.view(qiime2.Metadata).to_dataframe()[['Axis 1', 'Axis 2', 'Axis 3']]
    column_names = {'Axis 1' : '%s PCoA 1' % metric, 
                    'Axis 2' : '%s PCoA 2' % metric,
                    'Axis 3' : '%s PCoA 3' % metric}
    pcoa = pcoa.rename(columns = column_names)

    metadata_to_merge.append(qiime2.Metadata(pcoa))
    pcoa_columns.extend(column_names.values())

for metric, alpha_vector_fp in [faithpd, evenness, shannon]:
    alpha_vector = qiime2.Artifact.load(alpha_vector_fp)
    alpha_vector_name = alpha_vector.view(pd.Series).name
    sample_metadata = sample_metadata.merge(alpha_vector.view(qiime2.Metadata))
    diffs_to_baselines = first_differences(metadata=sample_metadata, 
                                           state_column=time_column, metric=alpha_vector_name,                  
                                           individual_id_column='subject-id', baseline=baseline_time_value, 
                                           replicate_handling='random').first_differences
    diffs_to_baselines = diffs_to_baselines.view(qiime2.Metadata).get_column('Difference').to_dataframe()
    column_name = '%s difference from %s %d' % (metric, time_column, baseline_time_value)
    diffs_to_baselines = diffs_to_baselines.rename(columns = {'Difference' : column_name})
#     dropped_ids = ids_to_keep - set(diffs_to_baselines.index)
#     for e in dropped_ids:
#         diffs_to_baselines.loc[e] = [0.0]
    metadata_to_merge.append(qiime2.Metadata(diffs_to_baselines))
    alpha_columns.append(column_name)
    
for e in metadata_to_merge:
    sample_metadata = sample_metadata.merge(e)
data = sample_metadata.to_dataframe()



In [6]:
for e in distance_columns + alpha_columns + pcoa_columns:
    output_base_filename = '%s-%s' % (project, e.replace(' ', '-'))
    output_figure_filepath = os.path.join(output_dir, '%s.pdf' % output_base_filename)
    output_table_filepath = os.path.join(output_dir, '%s.csv' % output_base_filename)
    if time_column == 'period':
        exmp.plot_week_data(data, metric=e, time_column=time_column, 
                            output_figure_filepath=output_figure_filepath)
    else:
        exmp.plot_week_data_with_stats(data, metric=e, time_column=time_column, 
                                       reference_time=reference_time_value, 
                                       output_figure_filepath=output_figure_filepath, 
                                       output_table_filepath=output_table_filepath)