In this notebook we try to analyze the differential abundance in our samples. Meaning we try to test whether individual ASVs/taxa differ in abundance between samples groups. :)

We will first try to explore the data (finding out that data is not normalverteilt, shocking) and then try to use ANCOM as appropriate statistical test. 

In [1]:
import os
import matplotlib.pyplot as plt
import pandas as pd
import qiime2 as q2
from qiime2 import Visualization
import seaborn as sns
from scipy.stats import shapiro, kruskal, f_oneway

data_dir = 'CE'
%matplotlib inline

Artifacts we need to run this Notebook:
1. feature table = 'dada2_table_align_filtered.qza'
2. metadata table = 'food-metadata.tsv'
3. taxonomic classification = 'taxonomy_v4.qza'


In [2]:
##Data Exploration

In [2]:
data = q2.Artifact.load(f'{data_dir}/dada2_table_align_filtered.qza')

In [4]:
data.view(pd.DataFrame)

Unnamed: 0,709f9e4f47beb9eeac4dca4c0f279946,010318c557e27329d8ba4adbb2b7a4d5,d8805a58ee0553d4947a5697b758f581,1ae2286d0dd37c642adbc48139b03f81,152e13bd47890e8cf55b1a36c3106ea8,2bfe904710dbb886d40b939d46e23907,33d3a19706adae6023b345ea446ab938,e68ee64c4e6fdb1559af6c599e350160,813dd987a8e673038be4998fab0cbf35,feaae025a83a49935db1c8df8b8f5adc,...,f4b682e863fab1a479222e9abbf01561,67e83e672480975a7702cefc9f55637c,0c2828e2beb18ca39f40232b61522e13,dcf632540e8c8a20f81271979d895675,423b81f05724d61e7acda8946831e2e6,3412f71aa55edbec2dc3ec3a5e4c4b13,b4b1cad1578f75d8f95fdbe02125da6c,77a71fb632620380a1c9a9fedf14a769,e10c690b8ef551bb09ba7dd4d4c4393a,ba50da6157955c5189257fa30f34df00
11488.CSB279,0.0,15.0,11.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
11488.CSB280,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
11488.CSB281,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
11488.CSB282,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
11488.CSB290,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11488.CSB980,0.0,18.0,10.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
11488.CSB981,0.0,7.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
11488.CSB984,0.0,6.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
11488.CSB985,0.0,27.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,631.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


##ANCOM

In [5]:
#only retain features that are present at some minimal frequency (25) and in at least 4 samples
! qiime feature-table filter-features \
--i-table $data_dir/dada2_table_align_filtered.qza \
--p-min-frequency 25 \
--p-min-samples 4 \
--o-filtered-table $data_dir/table_abund254.qza

[32mSaved FeatureTable[Frequency] to: CE/table_abund254.qza[0m
[0m

In [6]:
#example: comparing diff. abundance within continents
! qiime feature-table filter-samples \
--i-table $data_dir/table_abund254.qza \
--m-metadata-file $data_dir/food-metadata.tsv \
--p-where "[continent]='North_America' or [continent]='Europe'" \
--o-filtered-table $data_dir/table_abund254_continent.qza

[32mSaved FeatureTable[Frequency] to: CE/table_abund254_continent.qza[0m
[0m

In [7]:
! qiime composition add-pseudocount \
--i-table $data_dir/table_abund254_continent.qza \
--o-composition-table $data_dir/table_abund254_continent_comp.qza

[32mSaved FeatureTable[Composition] to: CE/table_abund254_continent_comp.qza[0m
[0m

In [8]:
! qiime composition ancom \
--i-table $data_dir/table_abund254_continent_comp.qza \
--m-metadata-file $data_dir/food-metadata.tsv \
--m-metadata-column continent \
--p-transform-function log \
--o-visualization $data_dir/ancom254_continent.qzv

[32mSaved Visualization to: CE/ancom254_continent.qzv[0m
[0m

In [9]:
Visualization.load(f'{data_dir}/ancom254_continent.qzv')

In [10]:
#example: comparing diff. abundance within rindtype (washed or natural)
! qiime feature-table filter-samples \
--i-table $data_dir/table_abund254.qza \
--m-metadata-file $data_dir/food-metadata.tsv \
--p-where "[rindtype]='washed' or [rindtype]='natural'" \
--o-filtered-table $data_dir/table_abund254_rindtype1.qza

[32mSaved FeatureTable[Frequency] to: CE/table_abund254_rindtype1.qza[0m
[0m

In [11]:
! qiime composition add-pseudocount \
--i-table $data_dir/table_abund254_rindtype1.qza \
--o-composition-table $data_dir/table_abund254_rindtype1_comp.qza

[32mSaved FeatureTable[Composition] to: CE/table_abund254_rindtype1_comp.qza[0m
[0m

In [12]:
! qiime composition ancom \
--i-table $data_dir/table_abund254_rindtype1_comp.qza \
--m-metadata-file $data_dir/food-metadata.tsv \
--m-metadata-column rindtype \
--p-transform-function log \
--o-visualization $data_dir/ancom254_rindtype1.qzv

[32mSaved Visualization to: CE/ancom254_rindtype1.qzv[0m
[0m

In [14]:
Visualization.load(f'{data_dir}/ancom254_rindtype1.qzv')
#washed vs. natural