In [None]:
import qiime2
import pandas as pd
import os.path
import exmp

project = 'exmp1'
output_filepath = os.path.join(exmp.base_dir, 'data', '%s-samples-per-subject-by-week.csv' % project)

In [None]:
sample_md = exmp.load_sample_metadata()

where = "[project]='%s' and [exclude]='no'" % project
ids_to_keep = sample_md.get_ids(where=where)
sample_md = sample_md.filter_ids(ids_to_keep=ids_to_keep)
df = sample_md.to_dataframe()
df['week'] = pd.to_numeric(df['week'], errors='coerce')
weeks = df['week'].unique()
subject_ids = df['subject-id'].unique()

In [None]:
data = {}
for subject_id in subject_ids:
    d = df[df['subject-id'] == subject_id]
    row = {}
    for week in weeks:
        e = d[d['week'] == week]
        row[week] = len(e)
    data[subject_id] = row
data = pd.DataFrame(data).T
data.index.name = 'subject-id'

week_summary = df.groupby(['subject-id']).agg({
        "week" : ["min", "max"]})
week_summary.columns = [" ".join(x) for x in week_summary.columns.ravel()]
week_summary['week count'] = data.astype(bool).sum(axis=1)


result = pd.merge(data, week_summary,
                  left_index=True, right_index=True).sort_values(by="week count", ascending=False)
result.to_csv(output_filepath)

In [None]:
result