In [1]:
import pandas as pd
import qiime2 as q2
import numpy as np
from biom import load_table, Table
from qiime2.plugins.gemelli.actions import ctf
from qiime2.plugins.longitudinal.actions import volatility


ModuleNotFoundError: No module named 'qiime2'

# Skin

In [3]:
# subset skin
bt_tmp = q2.Artifact.load('../data/split-data/Baby-Right_Forearm/table.qza')
mf_tmp = q2.Metadata.load('../data/split-data/Baby-Right_Forearm/metadata.qza').to_dataframe()

# build metadata
mf_tmp = mf_tmp[mf_tmp.date_sampling_category_days_continuous.astype(float) <= 360]
# filter the data by frequency
bt_tmp = bt_tmp.view(Table).copy()
bt_tmp = bt_tmp.filter(mf_tmp.index)
freq_filts = bt_tmp.matrix_data.toarray().astype(bool).astype(int).sum(axis=1) \
              > int(.01 * bt_tmp.shape[1])
bt_tmp = bt_tmp.filter(bt_tmp.ids('observation')[freq_filts], axis='observation')
mf_tmp = mf_tmp.reindex(bt_tmp.ids())

# import into qiime2
q2bt_tmp =  q2.Artifact.import_data('FeatureTable[Frequency]', bt_tmp)
q2mf_tmp = q2.Metadata(mf_tmp)

# run ctf (increased iterations)
skin_res = ctf(q2bt_tmp,
               q2mf_tmp,
               'subjectid_unique',
               'date_sampling_category_days_continuous',
                n_components = 6,
                max_iterations_als = 200,
                max_iterations_rptm = 200,
                n_initializations = 200)
# save results
for name_, art_ in skin_res.__dict__.items():
    if name_ != '_fields':
        art_.save('../data/ctf-results/skin/%s' % name_)




# Fecal

In [4]:
# subset fecal
bt_tmp = q2.Artifact.load('../data/split-data/Baby-Feces/table.qza')
mf_tmp = q2.Metadata.load('../data/split-data/Baby-Feces/metadata.qza').to_dataframe()

# build metadata
mf_tmp = mf_tmp[mf_tmp.date_sampling_category_days_continuous.astype(float) <= 360]
mf_tmp = mf_tmp[~mf_tmp.isin([np.nan])]
# filter the data by frequency
bt_tmp = bt_tmp.view(Table).copy()
bt_tmp = bt_tmp.filter(mf_tmp.index)
freq_filts = bt_tmp.matrix_data.toarray().astype(bool).astype(int).sum(axis=1) > int(.01 * bt_tmp.shape[1])
bt_tmp = bt_tmp.filter(bt_tmp.ids('observation')[freq_filts], axis='observation')
mf_tmp = mf_tmp.reindex(bt_tmp.ids())

# import into qiime2
q2bt_tmp =  q2.Artifact.import_data('FeatureTable[Frequency]', bt_tmp)
q2mf_tmp = q2.Metadata(mf_tmp)

# run ctf (creased iterations)
fecal_res = ctf(q2bt_tmp,
               q2mf_tmp,
               'subjectid_unique',
               'date_sampling_category_days_continuous',
                n_components = 3,
                max_iterations_als = 55,
                max_iterations_rptm = 55,
                n_initializations = 55)

for name_, art_ in fecal_res.__dict__.items():
    if name_ != '_fields':
        art_.save('../data/ctf-results/fecal/%s' % name_)




# Oral

In [5]:
# subset oral
bt_tmp = q2.Artifact.load('../data/split-data/Baby-Mouth/table.qza')
mf_tmp = q2.Metadata.load('../data/split-data/Baby-Mouth/metadata.qza').to_dataframe()

# build metadata
mf_tmp = mf_tmp[mf_tmp.date_sampling_category_days_continuous.astype(float) <= 360]
# filter the data by frequency
bt_tmp = bt_tmp.view(Table).copy()
bt_tmp = bt_tmp.filter(mf_tmp.index)
freq_filts = bt_tmp.matrix_data.toarray().astype(bool).astype(int).sum(axis=1) > int(.05 * bt_tmp.shape[1])
bt_tmp = bt_tmp.filter(bt_tmp.ids('observation')[freq_filts], axis='observation')
mf_tmp = mf_tmp.reindex(bt_tmp.ids())

# import into qiime2
q2bt_tmp =  q2.Artifact.import_data('FeatureTable[Frequency]', bt_tmp)
q2mf_tmp = q2.Metadata(mf_tmp)

# run ctf (increased iterations)
oral_res = ctf(q2bt_tmp,
               q2mf_tmp,
               'subjectid_unique',
               'date_sampling_category_days_continuous',
                n_components = 4,
                max_iterations_als = 25,
                max_iterations_rptm = 25,
                n_initializations = 25)

for name_, art_ in oral_res.__dict__.items():
    if name_ != '_fields':
        art_.save('../data/ctf-results/oral/%s' % name_)


