In this notebook we try to analyze the differential abundance in our samples. Meaning we try to test whether individual ASVs/taxa differ in abundance between samples groups. :)

We will first try to explore the data (finding out that data is not normalverteilt, shocking) and then try to use ANCOM as appropriate statistical test. 

In [17]:
import os
import matplotlib.pyplot as plt
import pandas as pd
import qiime2 as q2
from qiime2 import Visualization
import seaborn as sns
from scipy.stats import shapiro, kruskal, f_oneway

data_dir = 'CE'
%matplotlib inline

Artifacts we need to run this Notebook:
1. feature table = 'dada2_table_align_filtered.qza'
2. metadata table = 'food-metadata.tsv'
3. taxonomic classification = 'taxonomy_v4.qza'


In [18]:
##Data Exploration

In [19]:
data = q2.Artifact.load(f'{data_dir}/dada2_table_align_filtered.qza')

In [20]:
data.view(pd.DataFrame)

Unnamed: 0,b3893f7cc45caaa29ff9090021c98822,e1cafc36b65b17e4fde098189e9b7c7c,016c2c8bde3fb34cd8ad2ffe06bc46dc,851b5f19bc4c0d8030cd84e5107df306,67299ef82f06d47199dc55707c703470,ddcf8ac73ead02bf250d42fb791b44b3,c588453eafcc05c64576433a7c6ad7a1,490ba71cee0e74368ea4292c2ae4e3aa,d1d8b40053e9ac1c556ebdc0b0c16ca8,ae50fd09413b611de93cc744bf8a6ef8,...,21e643dfb99023c640efa1f55a77a19c,4f22f4408b840595732e68a76bb182b6,9287cd911bd5b22611f5e0013eed7fa9,596b99d88f77c05066427e325a88fa22,7c5b8d7d55b20ebe3998eb76371ee182,1dbcda0f1308ec18e4b7816298611e50,9b6d8c6d8a1de6c8b7c1ec24228c9737,66f682ebd56a1a79da8925b580bd4b78,8a6a9f3addc305db3e3e4d165a573fdb,1a6f2dbd655c66d0f4ae5dac6a1cef14
11488.CSB279,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
11488.CSB280,0.0,280.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,47.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
11488.CSB281,0.0,3367.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,20.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
11488.CSB282,0.0,53.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,14.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
11488.CSB290,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,163.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11488.CSB980,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,...,0.0,0.0,0.0,0.0,49.0,0.0,0.0,0.0,0.0,0.0
11488.CSB981,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,35.0,0.0,0.0,0.0,0.0,0.0
11488.CSB984,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
11488.CSB985,0.0,3661.0,0.0,0.0,0.0,0.0,0.0,138888.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


##ANCOM

In [21]:
#only retain features that are present at some minimal frequency (25) and in at least 4 samples
! qiime feature-table filter-features \
--i-table $data_dir/dada2_table_align_filtered.qza \
--p-min-frequency 25 \
--p-min-samples 4 \
--o-filtered-table $data_dir/table_abund254.qza

[32mSaved FeatureTable[Frequency] to: CE/table_abund254.qza[0m
[0m

In [22]:
#example: comparing diff. abundance within continents
! qiime feature-table filter-samples \
--i-table $data_dir/table_abund254.qza \
--m-metadata-file $data_dir/food-metadata.tsv \
--p-where "[continent]='North_America' or [continent]='Europe'" \
--o-filtered-table $data_dir/table_abund254_continent.qza

[32mSaved FeatureTable[Frequency] to: CE/table_abund254_continent.qza[0m
[0m

In [7]:
! qiime composition add-pseudocount \
--i-table $data_dir/table_abund254_continent.qza \
--o-composition-table $data_dir/table_abund254_continent_comp.qza

[32mSaved FeatureTable[Composition] to: CE/table_abund254_continent_comp.qza[0m
[0m

In [8]:
! qiime composition ancom \
--i-table $data_dir/table_abund254_continent_comp.qza \
--m-metadata-file $data_dir/food-metadata.tsv \
--m-metadata-column continent \
--p-transform-function log \
--o-visualization $data_dir/ancom254_continent.qzv

[32mSaved Visualization to: CE/ancom254_continent.qzv[0m
[0m

In [9]:
Visualization.load(f'{data_dir}/ancom254_continent.qzv')

In [10]:
#example: comparing diff. abundance within rindtype (washed or natural)
! qiime feature-table filter-samples \
--i-table $data_dir/table_abund254.qza \
--m-metadata-file $data_dir/food-metadata.tsv \
--p-where "[rindtype]='washed' or [rindtype]='natural'" \
--o-filtered-table $data_dir/table_abund254_rindtype1.qza

[32mSaved FeatureTable[Frequency] to: CE/table_abund254_rindtype1.qza[0m
[0m

In [11]:
! qiime composition add-pseudocount \
--i-table $data_dir/table_abund254_rindtype1.qza \
--o-composition-table $data_dir/table_abund254_rindtype1_comp.qza

[32mSaved FeatureTable[Composition] to: CE/table_abund254_rindtype1_comp.qza[0m
[0m

In [12]:
! qiime composition ancom \
--i-table $data_dir/table_abund254_rindtype1_comp.qza \
--m-metadata-file $data_dir/food-metadata.tsv \
--m-metadata-column rindtype \
--p-transform-function log \
--o-visualization $data_dir/ancom254_rindtype1.qzv

[32mSaved Visualization to: CE/ancom254_rindtype1.qzv[0m
[0m

In [14]:
Visualization.load(f'{data_dir}/ancom254_rindtype1.qzv')
#washed vs. natural

In [4]:
#example: comparing diff. abundance within rindtype (bloomy or natural)
! qiime feature-table filter-samples \
--i-table $data_dir/table_abund254.qza \
--m-metadata-file $data_dir/food-metadata.tsv \
--p-where "[rindtype]='bloomy' or [rindtype]='natural'" \
--o-filtered-table $data_dir/table_abund254_rindtype2.qza

! qiime composition add-pseudocount \
--i-table $data_dir/table_abund254_rindtype1.qza \
--o-composition-table $data_dir/table_abund254_rindtype2_comp.qza

! qiime composition ancom \
--i-table $data_dir/table_abund254_rindtype2_comp.qza \
--m-metadata-file $data_dir/food-metadata.tsv \
--m-metadata-column rindtype \
--p-transform-function log \
--o-visualization $data_dir/ancom254_rindtype2.qzv

[32mSaved FeatureTable[Frequency] to: CE/table_abund254_rindtype2.qza[0m
[0m[32mSaved FeatureTable[Composition] to: CE/table_abund254_rindtype2_comp.qza[0m
[0m[32mSaved Visualization to: CE/ancom254_rindtype2.qzv[0m
[0m

In [5]:
Visualization.load(f'{data_dir}/ancom254_rindtype2.qzv')
#bloomy vs. natural

In [8]:
#example: comparing diff. abundance within rindtype (bloomy or washed)
! qiime feature-table filter-samples \
--i-table $data_dir/table_abund254.qza \
--m-metadata-file $data_dir/food-metadata.tsv \
--p-where "[rindtype]='bloomy' or [rindtype]='washed'" \
--o-filtered-table $data_dir/table_abund254_rindtype3.qza

! qiime composition add-pseudocount \
--i-table $data_dir/table_abund254_rindtype3.qza \
--o-composition-table $data_dir/table_abund254_rindtype3_comp.qza

! qiime composition ancom \
--i-table $data_dir/table_abund254_rindtype3_comp.qza \
--m-metadata-file $data_dir/food-metadata.tsv \
--m-metadata-column rindtype \
--p-transform-function log \
--o-visualization $data_dir/ancom254_rindtype3.qzv

[32mSaved FeatureTable[Frequency] to: CE/table_abund254_rindtype3.qza[0m
[0m[32mSaved FeatureTable[Composition] to: CE/table_abund254_rindtype3_comp.qza[0m
[0m[32mSaved Visualization to: CE/ancom254_rindtype3.qzv[0m
[0m

In [9]:
Visualization.load(f'{data_dir}/ancom254_rindtype3.qzv')
#bloomy vs. washed

In [10]:
#example: comparing diff. abundance within style (blue or clothbound)
! qiime feature-table filter-samples \
--i-table $data_dir/table_abund254.qza \
--m-metadata-file $data_dir/food-metadata.tsv \
--p-where "[style]='blue' or [style]='clothbound'" \
--o-filtered-table $data_dir/table_abund254_style1.qza

! qiime composition add-pseudocount \
--i-table $data_dir/table_abund254_style1.qza \
--o-composition-table $data_dir/table_abund254_style1_comp.qza

! qiime composition ancom \
--i-table $data_dir/table_abund254_style1_comp.qza \
--m-metadata-file $data_dir/food-metadata.tsv \
--m-metadata-column style \
--p-transform-function log \
--o-visualization $data_dir/ancom254_style1.qzv

[32mSaved FeatureTable[Frequency] to: CE/table_abund254_style1.qza[0m
[0m[32mSaved FeatureTable[Composition] to: CE/table_abund254_style1_comp.qza[0m
[0m[32mSaved Visualization to: CE/ancom254_style1.qzv[0m
[0m

In [11]:
Visualization.load(f'{data_dir}/ancom254_style1.qzv')
#blue vs. clothbound

In [12]:
#example: comparing diff. abundance within pasteurized (Y or N)
! qiime feature-table filter-samples \
--i-table $data_dir/table_abund254.qza \
--m-metadata-file $data_dir/food-metadata.tsv \
--p-where "[pasteurized]='Y' or [pasteurized]='N'" \
--o-filtered-table $data_dir/table_abund254_pasteurized.qza

! qiime composition add-pseudocount \
--i-table $data_dir/table_abund254_pasteurized.qza \
--o-composition-table $data_dir/table_abund254_pasteurized_comp.qza

! qiime composition ancom \
--i-table $data_dir/table_abund254_pasteurized_comp.qza \
--m-metadata-file $data_dir/food-metadata.tsv \
--m-metadata-column pasteurized \
--p-transform-function log \
--o-visualization $data_dir/ancom254_pasteurized.qzv

[32mSaved FeatureTable[Frequency] to: CE/table_abund254_pasteurized.qza[0m
[0m[32mSaved FeatureTable[Composition] to: CE/table_abund254_pasteurized_comp.qza[0m
[0m[32mSaved Visualization to: CE/ancom254_pasteurized.qzv[0m
[0m

In [13]:
Visualization.load(f'{data_dir}/ancom254_pasteurized.qzv')
#Y vs. N