In [1]:
import pandas as pd
import biom
import qiime2
import numpy as np
import os
import skbio

from qiime2.plugins.emperor.actions import (plot, biplot)
from qiime2.plugins.diversity.actions import (beta_phylogenetic, pcoa, beta)
from qiime2.plugins.feature_table.actions import (rarefy, filter_samples)

from qiime2.plugins.taxa.actions import collapse

import matplotlib.pyplot as plt
plt.rcParams['svg.fonttype'] = 'none'

import seaborn as sns

# from skbio.stats.ordination import pcoa
from scipy import stats
%matplotlib inline



# Processing the input data

We will use data from Allaband et. al. 20XX. The table needs to be collapsed and matched to the taxonomy for downstream analyses. We will also aggregate time points to make for hull visualizations with more points.

One of the most useful applications of a convex hull calculation in the context of a metagenomic study is quantifying the dispersion of different conditions over time.

In [2]:
qtree = qiime2.Artifact.load('data/chfig-IHIC-new-rooted-tree.qza')

qtax = qiime2.Artifact.load('data/chfig-IHIC_taxonomy.qza')
tax = qtax.view(pd.DataFrame).to_dict()['Taxon']

btab = biom.load_table('data/chfig-Haddad_IHIC_reference-hit.biom')
qtab = qiime2.Artifact.import_data('FeatureTable[Frequency]', btab)

meta = pd.read_csv('data/chfig-10_week_IH_IC_MetaData.txt', sep='\t')
meta = meta.drop(columns=['dna_extracted', 'physical_specimen_remaining'])
ID = 'sample_name'

In [3]:
!mkdir data/tab/

mkdir: data/tab/: File exists


### Match biom table to taxonomy

In [4]:
bids = btab.ids('observation')
tids = list(tax.keys())
ids = set(bids) & set(tids)

fbtab = btab.filter(ids, axis='observation')

qfbtab = qiime2.Artifact.import_data('FeatureTable[Frequency]', fbtab)

In [5]:
taxdf = pd.DataFrame(
    {'Feature ID': list(tax.keys()),
    'Taxon':list(tax.values())}
)
taxdf.set_index('Feature ID', inplace=True)
qtax = qiime2.Artifact.import_data('FeatureData[Taxonomy]', taxdf)
qtax.save('data/taxonomy.qza')

'data/taxonomy.qza'

### Bin ages 

In [6]:
def age_map(x):
    if x<=12:
        return 12
    if x >=12 and x < 14:
        return 14
    if x >=14 and x < 16:
        return 16
    if x >=16 and x < 18:
        return 18
    if x >=18:
        return 20

def age_match(x):
    if x == 10.0:
        return 10.0
    if x == 10.5:
        return 10.5
    if x == 15.0:
        return 15.0
    if x == 20.0:
        return 20.0
    return 'remove'
    
meta['age_bin'] = meta['host_age'].map(age_map)
meta['age_match'] = meta['host_age'].map(age_match)
meta['age_bin']

0       12.0
1       16.0
2       16.0
3       16.0
4       18.0
        ... 
1032     NaN
1033     NaN
1034     NaN
1035     NaN
1036     NaN
Name: age_bin, Length: 1037, dtype: float64

### Match metadata to table 

Filter out blanks from metadata as well.

In [7]:
def match_meta_to_table(meta, btab, ID='sample_name'):
    mids = list(meta[ID])
    tids = btab.ids('sample')
    
    ids = list(set(mids) & set(tids))
    
    meta = meta.set_index(ID).loc[ids].reset_index()
    fbtab = btab.filter(ids, 'sample', inplace=False)
    return meta, fbtab
    
# SUBSET
feces = meta['sample_type'] == 'stool'
noblanks = ~(meta[ID].str.contains('BLANK'))
timepoints = ~(meta['age_match'] == 'remove')
m = meta[feces & noblanks & timepoints]

# MATCH
m, fbtab = match_meta_to_table(m, btab)

# SAVE METADATA
m.to_csv('data/metadata.tsv', sep='\t', index=False)


### Collapse table and save

In [9]:
qtab = qiime2.Artifact.import_data('FeatureTable[Frequency]', fbtab)

res = collapse(
    table=qtab,
    taxonomy=qtax,
    level=7
)
res.collapsed_table.save('data/table.qza')

btab = res.collapsed_table.view(biom.Table)
with biom.util.biom_open('data/table.biom', 'w') as f:
    btab.to_hdf5(f, "example")