#### Adam Klie<br>04/08/2020
## Process downloaded data into necessary Qiime2 artifacts
 - Takes as input samples.biom table and metadata.tsv files generated from download_data.ipynb
 - Generates a filtered frequeuncy table, filtered rep seqs, fragment insertion tree, taxonomy

## Requirements
 - qiime2 kernel

In [40]:
import biom
import qiime2
import pandas as pd
import warnings
warnings.filterwarnings('ignore')
from qiime2.plugins import metadata, feature_table

In [41]:
data_dir = '../data/test/2020_05_05'

### Load FeatureTable[Frequency] and Metadata

In [5]:
table = qiime2.Artifact.import_data(type='FeatureTable[Frequency]', 
                                    view='{}/samples.biom'.format(data_dir), 
                                    view_type='BIOMV210Format')

In [73]:
metadata = qiime2.Metadata.load('{}/metadata.tsv'.format(data_dir))

In [7]:
table_vis = feature_table.visualizers.summarize(table=table, 
                                                sample_metadata=metadata)

In [8]:
table_vis.visualization

```bash
%%time
h_map = feature_table.visualizers.heatmap(table=table,
                                  sample_metadata=metadata.get_column('exercise_frequency'),
                                  normalize=True,
                                  cluster='features')
```

In [68]:
with open('{}/rep_seqs.fna'.format(data_dir), 'w') as f:
    seqs = ''
    for i,seq in enumerate(table.view(pd.DataFrame).columns):
        seqs = seqs + '>' + 'seq' + str(i+1) + '\n' + seq + '\n'
    f.write(seqs[:-1])

In [69]:
rep_seqs = qiime2.Artifact.import_data(type='FeatureData[Sequence]', view='{}/rep_seqs.fna'.format(data_dir))

In [70]:
tabulate_seqs_vis = feature_table.visualizers.tabulate_seqs(data=rep_seqs)

In [71]:
tabulate_seqs_vis.visualization

In [None]:
qiime feature-table filter-samples \
  --i-table table.qza \
  --m-metadata-file sample-metadata.tsv \
  --p-where "[subject]='subject-1'" \
  --o-filtered-table subject-1-filtered-table.qza

In [None]:
filtered_table = feature_table.methods.filter_samples(table=table)