# Prepare the Microbiome Feature Tables for Analysis with Metabolomics data

In [82]:
import pandas as pd
import numpy as np
import biom
import os
from qiime2 import Visualization


%matplotlib inline

# ITS

### 1. Subset to PostMLF 

|          	| ASV table 	| 90% closed ref OUTs 	| OTUs collapsed on genus 	|
|----------	|-----------	|---------------------	|-------------------------	|
| features 	| 333       	| 97                  	| 68                      	|


> rarefy to the min. 2835 to keep all features



In [78]:
#!mkdir /home/lfloerl/cloud/lfloerl/Microterroir/LC-MS_data/Results/MicrobiomeMetabolome/ITS

os.chdir('/home/lfloerl/cloud/lfloerl/Microterroir/LC-MS_data/Results/MicrobiomeMetabolome/ITS')

#### 1.1. Rarefied ASV table

In [56]:
%%bash 

# 1. Subset to PostMLF 
qiime feature-table filter-samples \
    --i-table /home/lfloerl/cloud/lfloerl/Microterroir/artifacts/ITS/lavaux/mv_filtered_table.qza \
    --m-metadata-file /home/lfloerl/microterroir/Microbiome/Metadata/ITS_lavaux.tsv \
    --p-where "[sample_type]='Post-MLF'" \
    --o-filtered-table /home/lfloerl/cloud/lfloerl/Microterroir/artifacts/ITS/lavaux/mv_PostMLF_filtered_table.qza

qiime feature-table filter-seqs \
    --i-data /home/lfloerl/cloud/lfloerl/Microterroir/artifacts/Sven-denoising-test/ITS/revcomp-trimmed-denoised/with-adjusted-parameters/ITS-revcomp-trimmed-ER-0.05-denoised-trunc-190-PFA-4.0-maxee-4.0-rep-seqs.qza \
    --i-table /home/lfloerl/cloud/lfloerl/Microterroir/artifacts/ITS/lavaux/mv_PostMLF_filtered_table.qza \
    --o-filtered-data /home/lfloerl/cloud/lfloerl/Microterroir/artifacts/ITS/lavaux/mv_PostMLF_filtered_rep_seqs.qza

qiime feature-table summarize --i-table /home/lfloerl/cloud/lfloerl/Microterroir/artifacts/ITS/lavaux/spatio-temporal/microvinification/mv_PostMLF_filtered_table.qza --o-visualization /home/lfloerl/cloud/lfloerl/Microterroir/artifacts/ITS/lavaux/spatio-temporal/microvinification/mv_PostMLF_filtered_table.qzv

# 2. Rarefy 
qiime feature-table rarefy \
    --i-table /home/lfloerl/cloud/lfloerl/Microterroir/artifacts/ITS/lavaux/spatio-temporal/microvinification/mv_PostMLF_filtered_table.qza \
    --p-sampling-depth 2835 \
    --o-rarefied-table /home/lfloerl/cloud/lfloerl/Microterroir/artifacts/ITS/lavaux/spatio-temporal/microvinification/mv_PostMLF_rarefied2835.qza

# 3. Export 
qiime tools export --input-path /home/lfloerl/cloud/lfloerl/Microterroir/artifacts/ITS/lavaux/spatio-temporal/microvinification/mv_PostMLF_rarefied2835.qza --output-path mv_PostMLF_rarefied2835

Saved FeatureTable[Frequency] to: /home/lfloerl/cloud/lfloerl/Microterroir/artifacts/ITS/lavaux/mv_PostMLF_filtered_table.qza
Saved FeatureData[Sequence] to: /home/lfloerl/cloud/lfloerl/Microterroir/artifacts/ITS/lavaux/mv_PostMLF_filtered_rep_seqs.qza
Saved Visualization to: /home/lfloerl/cloud/lfloerl/Microterroir/artifacts/ITS/lavaux/spatio-temporal/microvinification/mv_PostMLF_filtered_table.qzv
Saved FeatureTable[Frequency] to: /home/lfloerl/cloud/lfloerl/Microterroir/artifacts/ITS/lavaux/spatio-temporal/microvinification/mv_PostMLF_rarefied2835.qza
Exported /home/lfloerl/cloud/lfloerl/Microterroir/artifacts/ITS/lavaux/spatio-temporal/microvinification/mv_PostMLF_rarefied2835.qza as BIOMV210DirFmt to directory mv_PostMLF_rarefied2835


> **333 features**


In [57]:
Visualization.load('/home/lfloerl/cloud/lfloerl/Microterroir/artifacts/ITS/lavaux/spatio-temporal/microvinification/mv_PostMLF_filtered_table.qzv')

In [85]:
# Load the BIOM table
biom_table = biom.load_table("mv_PostMLF_rarefied2835/feature-table.biom")
df_ITS = biom_table.to_dataframe()

# load the metadata to rename the samples accordingly 
ITS_md = pd.read_csv('/home/lfloerl/microterroir/Microbiome/Metadata/ITS_lavaux.tsv', sep='\t')
df_ITS.columns = ['PostMLF_' + str(ITS_md.set_index('id').loc[col, 'Year']) + '_Plot' + str(ITS_md.loc[ITS_md['id'] == col, 'Plot'].values[0]) for col in df_ITS.columns]

# load the taxonomy to rename the features
ITS_taxonomy = pd.read_csv('/home/lfloerl/cloud/lfloerl/Microterroir/artifacts/ITS/taxonomy/taxonomy.tsv', sep='\t')
ITS_taxonomy.set_index('Feature ID', inplace=True)
# Rename the index of df_ITS based on the 'Taxon' column of ITS_taxonomy
df_ITS.index = df_ITS.index.map(ITS_taxonomy['Taxon'])

# transpose and reset index
df_ITS = df_ITS.T.reset_index()

df_ITS.to_csv('PostMLF_ASVs_rarefied_labled.tsv', sep='\t')

df_ITS.head()

Unnamed: 0,index,k__Fungi;p__Ascomycota,k__Fungi;p__Ascomycota;c__Leotiomycetes;o__Helotiales;f__Hyaloscyphaceae;g__Pseudaegerita;s__Pseudaegerita_sp;sh__SH1255515.10FU,k__Fungi;p__Ascomycota.1,k__Fungi;p__Ascomycota;c__Dothideomycetes;o__Pleosporales;f__Pleosporaceae;g__Alternaria;s__Alternaria_subcucurbitae;sh__SH1206908.10FU,k__Fungi;p__Ascomycota;c__Dothideomycetes;o__Capnodiales;f__Capnodiales_fam_Incertae_sedis;g__Ramimonilia;s__Ramimonilia_apicalis;sh__SH1394102.10FU,k__Fungi;p__Ascomycota.2,k__Fungi;p__Basidiomycota;c__Malasseziomycetes;o__Malasseziales;f__Malasseziaceae;g__Malassezia;s__Malassezia_restricta;sh__SH1382014.10FU,k__Fungi;p__Fungi_phy_Incertae_sedis;c__Fungi_cls_Incertae_sedis;o__Fungi_ord_Incertae_sedis;f__Fungi_fam_Incertae_sedis;g__Fungi_gen_Incertae_sedis;s__Fungi_sp;sh__SH1213166.10FU,k__Fungi;p__Ascomycota;c__Dothideomycetes;o__Pleosporales;f__Pleosporaceae;g__Alternaria;s__Alternaria_eureka;sh__SH1206790.10FU,...,k__Fungi;p__Basidiomycota;c__Exobasidiomycetes;o__Microstromatales;f__Microstromataceae;g__Microstroma;s__Microstroma_bacarum,k__Fungi;p__Ascomycota;c__Dothideomycetes;o__Pleosporales;f__Pleosporaceae;g__Alternaria,k__Fungi;p__Ascomycota;c__Sordariomycetes;o__Glomerellales;f__Plectosphaerellaceae;g__Verticillium;s__Verticillium_dahliae;sh__SH1269301.10FU,k__Fungi;p__Ascomycota.3,k__Fungi;p__Ascomycota;c__Lecanoromycetes;o__Teloschistales;f__Teloschistaceae;g__Gallowayella;s__Gallowayella_poeltii,k__Fungi;p__Ascomycota;c__Saccharomycetes;o__Saccharomycetales;f__Saccharomycodaceae;g__Hanseniaspora;s__Hanseniaspora_uvarum;sh__SH1339598.10FU,k__Fungi;p__Ascomycota;c__Dothideomycetes;o__Cladosporiales;f__Cladosporiaceae;g__Cladosporium;s__Cladosporium_herbarum;sh__SH1335104.10FU,k__Fungi;p__Ascomycota;c__Dothideomycetes;o__Neophaeothecales;f__Neophaeothecaceae;g__Nothophaeotheca;s__Nothophaeotheca_mirabibensis;sh__SH1394800.10FU,k__Fungi;p__Ascomycota;c__Saccharomycetes;o__Saccharomycetales;f__Saccharomycodaceae;g__Hanseniaspora;s__Hanseniaspora_uvarum;sh__SH1339598.10FU.1,k__Fungi;p__Ascomycota;c__Dothideomycetes;o__Cladosporiales;f__Cladosporiaceae;g__Cladosporium;s__Cladosporium_herbarum;sh__SH1335104.10FU.1
0,PostMLF_2023_Plot4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,PostMLF_2023_Plot13,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,PostMLF_2023_Plot9,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,PostMLF_2023_Plot5,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,PostMLF_2021_Plot12,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


#### 1.2. Closed Ref OTU clustering 

> note. UNITE release_10.05.2021

In [58]:
%%bash 

qiime vsearch cluster-features-closed-reference \
    --i-sequences /home/lfloerl/cloud/lfloerl/Microterroir/artifacts/ITS/lavaux/mv_PostMLF_filtered_rep_seqs.qza \
    --i-table /home/lfloerl/cloud/lfloerl/Microterroir/artifacts/ITS/lavaux/mv_PostMLF_filtered_table.qza \
    --i-reference-sequences /home/lfloerl/public/Data/Databases/QIIME2/UNITE/sh_qiime_release_10.05.2021/unite-ver8-99-seqs-10.05.2021.qza \
    --p-perc-identity 0.9 \
    --p-threads 10 \
    --o-clustered-table PostMLF_closedOTU_table.qza \
    --o-clustered-sequences PostMLF_closedOTU_seq.qza \
    --o-unmatched-sequences PostMLF_closedOTU_unmatched_seq.qza

Saved FeatureTable[Frequency] to: PostMLF_closedOTU_table.qza
Saved FeatureData[Sequence] to: PostMLF_closedOTU_seq.qza
Saved FeatureData[Sequence] to: PostMLF_closedOTU_unmatched_seq.qza


In [59]:
!qiime feature-table summarize --i-table PostMLF_closedOTU_table.qza --o-visualization PostMLF_closedOTU_table.qzv

[32mSaved Visualization to: PostMLF_closedOTU_table.qzv[0m
[0m

> **97 features**


In [60]:
Visualization.load('PostMLF_closedOTU_table.qzv')

In [61]:
%%bash 

# 2. Rarefy 
qiime feature-table rarefy \
    --i-table PostMLF_closedOTU_table.qza \
    --p-sampling-depth 2835 \
    --o-rarefied-table PostMLF_closedOTU_rarefied_table.qza

# 3. Export 
qiime tools export --input-path PostMLF_closedOTU_rarefied_table.qza --output-path PostMLF_closedOTU_rarefied_table

Saved FeatureTable[Frequency] to: PostMLF_closedOTU_rarefied_table.qza
Exported PostMLF_closedOTU_rarefied_table.qza as BIOMV210DirFmt to directory PostMLF_closedOTU_rarefied_table


In [87]:
# export corresponding taxonomy
!qiime tools export --input-path /home/lfloerl/public/Data/Databases/QIIME2/UNITE/sh_qiime_release_10.05.2021/sh_taxonomy_qiime_ver8_99_10.05.2021.qza --output-path unite-ver8-99-taxonomy

[32mExported /home/lfloerl/public/Data/Databases/QIIME2/UNITE/sh_qiime_release_10.05.2021/sh_taxonomy_qiime_ver8_99_10.05.2021.qza as TSVTaxonomyDirectoryFormat to directory unite-ver8-99-taxonomy[0m
[0m

In [90]:
# Load the BIOM table
biom_table = biom.load_table("PostMLF_closedOTU_rarefied_table/feature-table.biom")
df_ITS = biom_table.to_dataframe()

# load the metadata to rename the samples accordingly 
ITS_md = pd.read_csv('/home/lfloerl/microterroir/Microbiome/Metadata/ITS_lavaux.tsv', sep='\t')
df_ITS.columns = ['PostMLF_' + str(ITS_md.set_index('id').loc[col, 'Year']) + '_Plot' + str(ITS_md.loc[ITS_md['id'] == col, 'Plot'].values[0]) for col in df_ITS.columns]

# load the taxonomy to rename the features
ITS_taxonomy = pd.read_csv('unite-ver8-99-taxonomy/taxonomy.tsv', sep='\t')
ITS_taxonomy.set_index('Feature ID', inplace=True)
# Rename the index of df_ITS based on the 'Taxon' column of ITS_taxonomy
df_ITS.index = df_ITS.index.map(ITS_taxonomy['Taxon'])

# transpose and reset index
df_ITS = df_ITS.T.reset_index()

df_ITS.to_csv('PostMLF_cOTUs_rarefied_labled.tsv', sep='\t')

df_ITS.head()

Unnamed: 0,index,k__Fungi;p__Ascomycota;c__Dothideomycetes;o__unidentified;f__unidentified;g__unidentified;s__unidentified,k__Fungi;p__Ascomycota;c__Leotiomycetes;o__Helotiales;f__Hyaloscyphaceae;g__Hyaloscypha;s__unidentified,k__Fungi;p__Ascomycota;c__Dothideomycetes;o__Capnodiales;f__Cladosporiaceae;g__Cladosporium;s__Cladosporium_austrohemisphaericum,k__Fungi;p__Ascomycota;c__Dothideomycetes;o__Pleosporales;f__Pleosporaceae;g__Alternaria;s__Alternaria_subcucurbitae,k__Fungi;p__Ascomycota;c__Dothideomycetes;o__Botryosphaeriales;f__Planistromellaceae;g__Ramimonilia;s__Ramimonilia_apicalis,k__Fungi;p__Basidiomycota;c__Malasseziomycetes;o__Malasseziales;f__Malasseziaceae;g__Malassezia;s__Malassezia_restricta,k__Fungi;p__Ascomycota;c__unidentified;o__unidentified;f__unidentified;g__unidentified;s__unidentified,k__Fungi;p__Ascomycota;c__Dothideomycetes;o__Pleosporales;f__Pleosporaceae;g__Alternaria;s__Alternaria_eureka,k__Fungi;p__Basidiomycota;c__Microbotryomycetes;o__Sporidiobolales;f__Sporidiobolaceae;g__Rhodotorula;s__Rhodotorula_graminis,...,k__Fungi;p__Ascomycota;c__Dothideomycetes;o__Pleosporales;f__Didymosphaeriaceae;g__Paracamarosporium;s__Paracamarosporium_hawaiiense,k__Fungi;p__Ascomycota;c__Dothideomycetes;o__Pleosporales;f__Pleosporaceae;g__unidentified;s__unidentified,k__Fungi;p__Basidiomycota;c__Pucciniomycetes;o__Pucciniales;f__Melampsoraceae;g__Melampsora;s__Melampsora_epitea,k__Fungi;p__Ascomycota;c__Dothideomycetes;o__Dothideales;f__Dothioraceae;g__Hormonema;s__Hormonema_macrosporum,k__Fungi;p__Ascomycota;c__Sordariomycetes;o__Hypocreales;f__Clavicipitaceae;g__Claviceps;s__Claviceps_purpurea,k__Fungi;p__Ascomycota;c__Eurotiomycetes;o__Chaetothyriales;f__unidentified;g__unidentified;s__unidentified,k__Fungi;p__Ascomycota;c__Dothideomycetes;o__Dothideales;f__Dothideaceae;g__Endoconidioma;s__Endoconidioma_populi,k__Fungi;p__Ascomycota;c__Arthoniomycetes;o__Lichenostigmatales;f__Phaeococcomycetaceae;g__Phaeococcomyces;s__unidentified,k__Fungi;p__Ascomycota;c__Sordariomycetes;o__Hypocreales;f__Hypocreales_fam_Incertae_sedis;g__Acremonium;s__unidentified,k__Fungi;p__Ascomycota;c__Lecanoromycetes;o__Teloschistales;f__Teloschistaceae;g__Gallowayella;s__Gallowayella_poeltii
0,PostMLF_2023_Plot4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,PostMLF_2023_Plot13,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,PostMLF_2023_Plot9,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,PostMLF_2023_Plot5,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,PostMLF_2021_Plot12,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


#### 1.3. Collapse on Genus Level

In [62]:
%%bash 

qiime taxa collapse \
    --i-table PostMLF_closedOTU_rarefied_table.qza \
    --i-taxonomy /home/lfloerl/public/Data/Databases/QIIME2/UNITE/sh_qiime_release_10.05.2021/sh_taxonomy_qiime_ver8_99_10.05.2021.qza \
    --p-level 6 \
    --o-collapsed-table PostMLF_closedOTU_rarefied_genus_table.qza
    
qiime feature-table summarize --i-table PostMLF_closedOTU_rarefied_genus_table.qza --o-visualization PostMLF_closedOTU_rarefied_genus_table.qzv

qiime tools export --input-path PostMLF_closedOTU_rarefied_genus_table.qza --output-path PostMLF_closedOTU_rarefied_genus_table

Saved FeatureTable[Frequency] to: PostMLF_closedOTU_rarefied_genus_table.qza
Saved Visualization to: PostMLF_closedOTU_rarefied_genus_table.qzv
Exported PostMLF_closedOTU_rarefied_genus_table.qza as BIOMV210DirFmt to directory PostMLF_closedOTU_rarefied_genus_table


> **68 features** 

In [63]:
Visualization.load('PostMLF_closedOTU_rarefied_genus_table.qzv')

In [93]:
# Load the BIOM table
biom_table = biom.load_table("PostMLF_closedOTU_rarefied_genus_table/feature-table.biom")
df_ITS = biom_table.to_dataframe()

# load the metadata to rename the samples accordingly 
ITS_md = pd.read_csv('/home/lfloerl/microterroir/Microbiome/Metadata/ITS_lavaux.tsv', sep='\t')
df_ITS.columns = ['PostMLF_' + str(ITS_md.set_index('id').loc[col, 'Year']) + '_Plot' + str(ITS_md.loc[ITS_md['id'] == col, 'Plot'].values[0]) for col in df_ITS.columns]

# transpose and reset index
df_ITS = df_ITS.T.reset_index()

df_ITS.to_csv('PostMLF_cOTUs_genus_rarefied_labled.tsv', sep='\t')

df_ITS.head()

Unnamed: 0,index,k__Fungi;p__Ascomycota;c__Dothideomycetes;o__unidentified;f__unidentified;g__unidentified,k__Fungi;p__Ascomycota;c__Leotiomycetes;o__Helotiales;f__Hyaloscyphaceae;g__Hyaloscypha,k__Fungi;p__Ascomycota;c__Dothideomycetes;o__Capnodiales;f__Cladosporiaceae;g__Cladosporium,k__Fungi;p__Ascomycota;c__Dothideomycetes;o__Pleosporales;f__Pleosporaceae;g__Alternaria,k__Fungi;p__Ascomycota;c__Dothideomycetes;o__Botryosphaeriales;f__Planistromellaceae;g__Ramimonilia,k__Fungi;p__Basidiomycota;c__Malasseziomycetes;o__Malasseziales;f__Malasseziaceae;g__Malassezia,k__Fungi;p__Ascomycota;c__unidentified;o__unidentified;f__unidentified;g__unidentified,k__Fungi;p__Basidiomycota;c__Microbotryomycetes;o__Sporidiobolales;f__Sporidiobolaceae;g__Rhodotorula,k__Fungi;p__Ascomycota;c__Saccharomycetes;o__Saccharomycetales;f__Saccharomycodaceae;g__Hanseniaspora,...,k__Fungi;p__Ascomycota;c__Dothideomycetes;o__Botryosphaeriales;f__Botryosphaeriaceae;g__Dothiorella,k__Fungi;p__Ascomycota;c__Dothideomycetes;o__Pleosporales;f__Didymosphaeriaceae;g__Paracamarosporium,k__Fungi;p__Ascomycota;c__Dothideomycetes;o__Pleosporales;f__Pleosporaceae;g__unidentified,k__Fungi;p__Ascomycota;c__Dothideomycetes;o__Dothideales;f__Dothioraceae;g__Hormonema,k__Fungi;p__Ascomycota;c__Sordariomycetes;o__Hypocreales;f__Clavicipitaceae;g__Claviceps,k__Fungi;p__Ascomycota;c__Eurotiomycetes;o__Chaetothyriales;f__unidentified;g__unidentified,k__Fungi;p__Ascomycota;c__Dothideomycetes;o__Dothideales;f__Dothideaceae;g__Endoconidioma,k__Fungi;p__Ascomycota;c__Arthoniomycetes;o__Lichenostigmatales;f__Phaeococcomycetaceae;g__Phaeococcomyces,k__Fungi;p__Ascomycota;c__Sordariomycetes;o__Hypocreales;f__Hypocreales_fam_Incertae_sedis;g__Acremonium,k__Fungi;p__Ascomycota;c__Lecanoromycetes;o__Teloschistales;f__Teloschistaceae;g__Gallowayella
0,PostMLF_2023_Plot4,0,0,0,0,0,0,0,0,2349.0,...,0,0,0,0,0,0,0,0,0,0
1,PostMLF_2023_Plot13,0,0,0,0,0,0,0,0,421.0,...,0,0,0,0,0,0,0,0,0,0
2,PostMLF_2023_Plot9,0,0,0,0,0,0,0,0,2641.0,...,0,0,0,0,0,0,0,0,0,0
3,PostMLF_2023_Plot5,0,0,0,0,0,0,0,0,2722.0,...,0,0,0,0,0,0,0,0,0,0
4,PostMLF_2021_Plot12,0,0,0,0,0,0,0,0,0.0,...,0,0,0,0,0,0,0,0,0,0


<hr>

# 16S

|          	| ASV table 	| 99% closed ref OUTs 	| OTUs collapsed on genus 	|
|----------	|-----------	|---------------------	|-------------------------	|
| features 	| 326       	| 203                 	| 56                      	|

### 1. Subset to PostMLF 


In [97]:
#!mkdir /home/lfloerl/cloud/lfloerl/Microterroir/LC-MS_data/Results/MicrobiomeMetabolome/16S

os.chdir('/home/lfloerl/cloud/lfloerl/Microterroir/LC-MS_data/Results/MicrobiomeMetabolome/16S')

#### 1.1. Rarefied ASV table

> rarefy to 100 to keep as many as possible


In [52]:
%%bash 

# 1. Subset to PostMLF 
qiime feature-table filter-samples \
    --i-table /home/lfloerl/cloud/lfloerl/Microterroir/artifacts/16S/lavaux/mv_filtered_table.qza \
    --m-metadata-file /home/lfloerl/microterroir/Microbiome/Metadata/16S_lavaux.tsv \
    --p-where "[sample_type]='Post-MLF'" \
    --o-filtered-table /home/lfloerl/cloud/lfloerl/Microterroir/artifacts/16S/lavaux/mv_PostMLF_filtered_table.qza

qiime feature-table filter-seqs \
    --i-data /home/lfloerl/cloud/lfloerl/Microterroir/artifacts/16S/bac-dada2-single/dada-rep-seqs-220-ee4-fa4.qza \
    --i-table /home/lfloerl/cloud/lfloerl/Microterroir/artifacts/16S/lavaux/mv_PostMLF_filtered_table.qza \
    --o-filtered-data /home/lfloerl/cloud/lfloerl/Microterroir/artifacts/16S/lavaux/mv_PostMLF_filtered_rep_seqs.qza

Saved FeatureTable[Frequency] to: /home/lfloerl/cloud/lfloerl/Microterroir/artifacts/16S/lavaux/mv_PostMLF_filtered_table.qza
Saved FeatureData[Sequence] to: /home/lfloerl/cloud/lfloerl/Microterroir/artifacts/16S/lavaux/mv_PostMLF_filtered_rep_seqs.qza


In [41]:
!qiime feature-table summarize --i-table /home/lfloerl/cloud/lfloerl/Microterroir/artifacts/16S/lavaux/mv_PostMLF_filtered_table.qza --o-visualization /home/lfloerl/cloud/lfloerl/Microterroir/artifacts/16S/lavaux/mv_PostMLF_filtered_table.qzv

[32mSaved Visualization to: /home/lfloerl/cloud/lfloerl/Microterroir/artifacts/16S/lavaux/mv_PostMLF_filtered_table.qzv[0m
[0m

> **326 features**


In [42]:
Visualization.load('/home/lfloerl/cloud/lfloerl/Microterroir/artifacts/16S/lavaux/mv_PostMLF_filtered_table.qzv')

In [53]:
%%bash 

# 2. Rarefy 
qiime feature-table rarefy \
    --i-table /home/lfloerl/cloud/lfloerl/Microterroir/artifacts/16S/lavaux/mv_PostMLF_filtered_table.qza \
    --p-sampling-depth 100 \
    --o-rarefied-table /home/lfloerl/cloud/lfloerl/Microterroir/artifacts/16S/lavaux/mv_PostMLF_rarefied.qza

# 3. Export 
qiime tools export --input-path /home/lfloerl/cloud/lfloerl/Microterroir/artifacts/16S/lavaux/mv_PostMLF_rarefied.qza --output-path mv_PostMLF_rarefied

Saved FeatureTable[Frequency] to: /home/lfloerl/cloud/lfloerl/Microterroir/artifacts/16S/lavaux/mv_PostMLF_rarefied.qza
Exported /home/lfloerl/cloud/lfloerl/Microterroir/artifacts/16S/lavaux/mv_PostMLF_rarefied.qza as BIOMV210DirFmt to directory mv_PostMLF_rarefied


In [95]:
#!qiime tools export --input-path /home/lfloerl/cloud/lfloerl/Microterroir/artifacts/16S/taxonomy.qza --output-path /home/lfloerl/cloud/lfloerl/Microterroir/artifacts/16S/taxonomy

[32mExported /home/lfloerl/cloud/lfloerl/Microterroir/artifacts/16S/taxonomy.qza as TSVTaxonomyDirectoryFormat to directory /home/lfloerl/cloud/lfloerl/Microterroir/artifacts/16S/taxonomy[0m
[0m

In [103]:
# Load the BIOM table
biom_table = biom.load_table("mv_PostMLF_rarefied/feature-table.biom")
df_16S = biom_table.to_dataframe()


# load the metadata to rename the samples accordingly 
Bac_md = pd.read_csv('/home/lfloerl/microterroir/Microbiome/Metadata/16S_lavaux.tsv', sep='\t')
df_16S.columns = ['PostMLF_' + str(Bac_md.set_index('id').loc[col, 'Year']) + '_Plot' + str(Bac_md.loc[Bac_md['id'] == col, 'Plot'].values[0]) for col in df_16S.columns]

# load the taxonomy to rename the features
Bac_taxonomy = pd.read_csv('/home/lfloerl/cloud/lfloerl/Microterroir/artifacts/16S/taxonomy/taxonomy.tsv', sep='\t')
Bac_taxonomy.set_index('Feature ID', inplace=True)
# Rename the index of df_ITS based on the 'Taxon' column of ITS_taxonomy
df_16S.index = df_16S.index.map(Bac_taxonomy['Taxon'])


# transpose and reset index
df_16S = df_16S.T.reset_index()

df_16S.to_csv('PostMLF_ASVs_rarefied_labled.tsv', sep='\t')

df_16S.head()

Unnamed: 0,index,d__Bacteria;p__Planctomycetota;c__Planctomycetes;o__Pirellulales;f__Pirellulaceae;g__uncultured;s__uncultured_bacterium,d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacterales;f__Enterobacteriaceae;g__Escherichia-Shigella;s__Escherichia_sp.,d__Bacteria;p__Chloroflexi;c__Anaerolineae;o__Caldilineales;f__Caldilineaceae;g__uncultured;s__metagenome,d__Bacteria;p__Planctomycetota;c__Phycisphaerae;o__Tepidisphaerales;f__Tepidisphaeraceae;g__Tepidisphaeraceae;s__uncultured_bacterium,d__Bacteria;p__Actinobacteriota;c__Thermoleophilia;o__Solirubrobacterales;f__Solirubrobacteraceae;g__Solirubrobacter,d__Bacteria;p__Proteobacteria;c__Alphaproteobacteria;o__Acetobacterales;f__Acetobacteraceae;g__Acetobacter;s__Acetobacter_cerevisiae,d__Bacteria;p__Chloroflexi;c__Anaerolineae;o__Anaerolineales;f__Anaerolineaceae;g__uncultured,d__Bacteria;p__Actinobacteriota;c__Actinobacteria;o__Pseudonocardiales;f__Pseudonocardiaceae;g__Actinomycetospora;s__uncultured_bacterium,d__Bacteria;p__Planctomycetota;c__Planctomycetes;o__Gemmatales;f__Gemmataceae;g__uncultured,...,d__Bacteria;p__Proteobacteria;c__Alphaproteobacteria;o__Rhodobacterales;f__Rhodobacteraceae,d__Bacteria;p__Acidobacteriota;c__Blastocatellia;o__Pyrinomonadales;f__Pyrinomonadaceae;g__RB41,d__Bacteria;p__Chloroflexi;c__TK10;o__TK10;f__TK10;g__TK10;s__uncultured_Chloroflexus,d__Bacteria;p__Acidobacteriota;c__Vicinamibacteria;o__Vicinamibacterales;f__Vicinamibacteraceae;g__Vicinamibacteraceae,d__Bacteria;p__Chloroflexi;c__Anaerolineae;o__SBR1031;f__A4b;g__A4b;s__metagenome,d__Bacteria;p__Acidobacteriota;c__Vicinamibacteria;o__Vicinamibacterales;f__Vicinamibacteraceae;g__Vicinamibacteraceae.1,d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacterales;f__Enterobacteriaceae,d__Bacteria;p__Acidobacteriota;c__Vicinamibacteria;o__Vicinamibacterales;f__Vicinamibacteraceae;g__Vicinamibacteraceae;s__uncultured_bacterium,d__Bacteria;p__Acidobacteriota;c__Vicinamibacteria;o__Vicinamibacterales;f__uncultured;g__uncultured,d__Bacteria;p__Chloroflexi;c__Chloroflexia;o__Thermomicrobiales;f__JG30-KF-CM45;g__JG30-KF-CM45;s__uncultured_bacterium
0,PostMLF_2023_Plot4,0.0,0.0,0,0,0.0,0,0,0,0,...,0,0,0,0,0,0,0.0,0,0,0
1,PostMLF_2023_Plot13,0.0,5.0,0,0,0.0,0,0,0,0,...,0,0,0,0,0,0,1.0,0,0,0
2,PostMLF_2023_Plot9,0.0,0.0,0,0,0.0,0,0,0,0,...,0,0,0,0,0,0,0.0,0,0,0
3,PostMLF_2023_Plot5,0.0,0.0,0,0,0.0,0,0,0,0,...,0,0,0,0,0,0,0.0,0,0,0
4,PostMLF_2021_Plot17,1.0,0.0,0,0,1.0,0,0,0,0,...,0,0,0,0,0,0,0.0,0,0,0


#### 1.2. Closed Ref OTU clustering 

In [68]:
%%bash 

qiime vsearch cluster-features-closed-reference \
    --i-sequences /home/lfloerl/cloud/lfloerl/Microterroir/artifacts/16S/lavaux/mv_PostMLF_filtered_rep_seqs.qza \
    --i-table /home/lfloerl/cloud/lfloerl/Microterroir/artifacts/16S/lavaux/mv_PostMLF_filtered_table.qza \
    --i-reference-sequences /home/lfloerl/public/Data/Databases/QIIME2/SILVA/silva-138-99-seqs.qza \
    --p-perc-identity 0.99 \
    --p-threads 10 \
    --o-clustered-table PostMLF_closedOTU_table.qza \
    --o-clustered-sequences PostMLF_closedOTU_seq.qza \
    --o-unmatched-sequences PostMLF_closedOTU_unmatched_seq.qza

Saved FeatureTable[Frequency] to: PostMLF_closedOTU_table.qza
Saved FeatureData[Sequence] to: PostMLF_closedOTU_seq.qza
Saved FeatureData[Sequence] to: PostMLF_closedOTU_unmatched_seq.qza


In [69]:
!qiime feature-table summarize --i-table PostMLF_closedOTU_table.qza --o-visualization PostMLF_closedOTU_table.qzv

[32mSaved Visualization to: PostMLF_closedOTU_table.qzv[0m
[0m

> **203 features**


In [71]:
Visualization.load('PostMLF_closedOTU_table.qzv')

In [72]:
%%bash 

# 2. Rarefy 
qiime feature-table rarefy \
    --i-table PostMLF_closedOTU_table.qza \
    --p-sampling-depth 100 \
    --o-rarefied-table PostMLF_closedOTU_rarefied_table.qza

# 3. Export 
qiime tools export --input-path PostMLF_closedOTU_rarefied_table.qza --output-path PostMLF_closedOTU_rarefied_table

Saved FeatureTable[Frequency] to: PostMLF_closedOTU_rarefied_table.qza
Exported PostMLF_closedOTU_rarefied_table.qza as BIOMV210DirFmt to directory PostMLF_closedOTU_rarefied_table


In [105]:
# export corresponding taxonomy
!qiime tools export --input-path /home/lfloerl/public/Data/Databases/QIIME2/SILVA/silva-138-99-tax.qza --output-path silva-138-99-tax

[32mExported /home/lfloerl/public/Data/Databases/QIIME2/SILVA/silva-138-99-tax.qza as TSVTaxonomyDirectoryFormat to directory silva-138-99-tax[0m
[0m

In [106]:
# Load the BIOM table
biom_table = biom.load_table("PostMLF_closedOTU_rarefied_table/feature-table.biom")
df_16S = biom_table.to_dataframe()


# load the metadata to rename the samples accordingly 
Bac_md = pd.read_csv('/home/lfloerl/microterroir/Microbiome/Metadata/16S_lavaux.tsv', sep='\t')
df_16S.columns = ['PostMLF_' + str(Bac_md.set_index('id').loc[col, 'Year']) + '_Plot' + str(Bac_md.loc[Bac_md['id'] == col, 'Plot'].values[0]) for col in df_16S.columns]

# load the taxonomy to rename the features
Bac_taxonomy = pd.read_csv('silva-138-99-tax/taxonomy.tsv', sep='\t')
Bac_taxonomy.set_index('Feature ID', inplace=True)
# Rename the index of df_ITS based on the 'Taxon' column of ITS_taxonomy
df_16S.index = df_16S.index.map(Bac_taxonomy['Taxon'])


# transpose and reset index
df_16S = df_16S.T.reset_index()

df_16S.to_csv('PostMLF_cOTUs_rarefied_labled.tsv', sep='\t')

df_16S.head()

Unnamed: 0,index,d__Bacteria; p__Proteobacteria; c__Gammaproteobacteria; o__Enterobacterales; f__Enterobacteriaceae; g__Escherichia-Shigella; s__Escherichia_coli,d__Bacteria; p__Actinobacteriota; c__Thermoleophilia; o__Solirubrobacterales; f__Solirubrobacteraceae; g__Solirubrobacter; s__bacterium_Ellin6048,d__Bacteria; p__Proteobacteria; c__Alphaproteobacteria; o__Acetobacterales; f__Acetobacteraceae; g__Acetobacter; s__Acetobacter_cerevisiae,d__Bacteria; p__Actinobacteriota; c__Actinobacteria; o__Pseudonocardiales; f__Pseudonocardiaceae; g__Actinomycetospora; s__uncultured_bacterium,d__Bacteria; p__Firmicutes; c__Bacilli; o__Lactobacillales; f__Leuconostocaceae; g__Oenococcus; s__Oenococcus_oeni,d__Bacteria; p__Planctomycetota; c__Planctomycetes; o__Pirellulales; f__Pirellulaceae; g__Pirellula; s__uncultured_Planctomycetaceae,d__Bacteria; p__Methylomirabilota; c__Methylomirabilia; o__Rokubacteriales; f__Rokubacteriales; g__Rokubacteriales; s__uncultured_bacterium,d__Bacteria; p__Proteobacteria; c__Alphaproteobacteria; o__Acetobacterales; f__Acetobacteraceae; g__Acetobacter; s__uncultured_Alphaproteobacteria,d__Bacteria; p__Acidobacteriota; c__Vicinamibacteria; o__Vicinamibacterales; f__Vicinamibacteraceae; g__uncultured; s__uncultured_bacterium,...,d__Bacteria; p__Actinobacteriota; c__Actinobacteria; o__Corynebacteriales; f__Mycobacteriaceae; g__Mycobacterium; s__Mycobacterium_sp.,d__Bacteria; p__Planctomycetota; c__Planctomycetes; o__Pirellulales; f__Pirellulaceae; g__uncultured; s__uncultured_bacterium,d__Bacteria; p__Firmicutes; c__Clostridia; o__Clostridia; f__Hungateiclostridiaceae; g__Ruminiclostridium; s__Ruminiclostridium_hungatei,d__Bacteria; p__Chloroflexi; c__TK10; o__TK10; f__TK10; g__TK10; s__uncultured_bacterium,d__Bacteria; p__Acidobacteriota; c__Vicinamibacteria; o__Vicinamibacterales; f__Vicinamibacteraceae; g__Vicinamibacteraceae; s__uncultured_bacterium,d__Bacteria; p__Chloroflexi; c__Anaerolineae; o__SBR1031; f__A4b; g__A4b; s__metagenome,d__Bacteria; p__Actinobacteriota; c__Thermoleophilia; o__Gaiellales; f__uncultured; g__uncultured; s__uncultured_microorganism,d__Bacteria; p__Planctomycetota; c__Planctomycetes; o__Pirellulales; f__Pirellulaceae; g__Pirellula; s__uncultured_bacterium,d__Bacteria; p__Proteobacteria; c__Gammaproteobacteria; o__Enterobacterales; f__Enterobacteriaceae; g__Salmonella; s__Salmonella_enterica,d__Bacteria; p__Acidobacteriota; c__Vicinamibacteria; o__Vicinamibacterales; f__Vicinamibacteraceae; g__Vicinamibacteraceae; s__uncultured_bacterium.1
0,PostMLF_2023_Plot4,0.0,0.0,0,0,84.0,0,0,0,0,...,0.0,0,0,0,0,0,0.0,0,0.0,0
1,PostMLF_2023_Plot13,4.0,0.0,0,0,69.0,0,0,0,0,...,0.0,0,0,0,0,0,1.0,0,2.0,0
2,PostMLF_2023_Plot9,0.0,0.0,0,0,97.0,0,0,0,0,...,0.0,0,0,0,0,0,0.0,0,0.0,0
3,PostMLF_2023_Plot5,0.0,0.0,0,0,100.0,0,0,0,0,...,0.0,0,0,0,0,0,0.0,0,0.0,0
4,PostMLF_2021_Plot17,0.0,2.0,0,0,1.0,0,0,0,0,...,1.0,0,0,0,0,0,0.0,0,0.0,0


#### 1.3. Collapse on Genus Level

In [74]:
%%bash 

qiime taxa collapse \
    --i-table PostMLF_closedOTU_rarefied_table.qza \
    --i-taxonomy /home/lfloerl/public/Data/Databases/QIIME2/SILVA/silva-138-99-tax.qza \
    --p-level 6 \
    --o-collapsed-table PostMLF_closedOTU_rarefied_genus_table.qza
    
qiime feature-table summarize --i-table PostMLF_closedOTU_rarefied_genus_table.qza --o-visualization PostMLF_closedOTU_rarefied_genus_table.qzv

qiime tools export --input-path PostMLF_closedOTU_rarefied_genus_table.qza --output-path PostMLF_closedOTU_rarefied_genus_table

Saved FeatureTable[Frequency] to: PostMLF_closedOTU_rarefied_genus_table.qza
Saved Visualization to: PostMLF_closedOTU_rarefied_genus_table.qzv
Exported PostMLF_closedOTU_rarefied_genus_table.qza as BIOMV210DirFmt to directory PostMLF_closedOTU_rarefied_genus_table


> **56 features** 

In [75]:
Visualization.load('PostMLF_closedOTU_rarefied_genus_table.qzv')

In [108]:
# Load the BIOM table
biom_table = biom.load_table("PostMLF_closedOTU_rarefied_genus_table/feature-table.biom")
df_16S = biom_table.to_dataframe()

# load the metadata to rename the samples accordingly 
Bac_md = pd.read_csv('/home/lfloerl/microterroir/Microbiome/Metadata/16S_lavaux.tsv', sep='\t')
df_16S.columns = ['PostMLF_' + str(Bac_md.set_index('id').loc[col, 'Year']) + '_Plot' + str(Bac_md.loc[Bac_md['id'] == col, 'Plot'].values[0]) for col in df_16S.columns]

# transpose and reset index
df_16S = df_16S.T.reset_index()

df_16S.to_csv('PostMLF_cOTUs_genus_rarefied_labled.tsv', sep='\t')

df_16S.head()

Unnamed: 0,index,d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacterales;f__Enterobacteriaceae;g__Escherichia-Shigella,d__Bacteria;p__Actinobacteriota;c__Thermoleophilia;o__Solirubrobacterales;f__Solirubrobacteraceae;g__Solirubrobacter,d__Bacteria;p__Proteobacteria;c__Alphaproteobacteria;o__Acetobacterales;f__Acetobacteraceae;g__Acetobacter,d__Bacteria;p__Actinobacteriota;c__Actinobacteria;o__Pseudonocardiales;f__Pseudonocardiaceae;g__Actinomycetospora,d__Bacteria;p__Firmicutes;c__Bacilli;o__Lactobacillales;f__Leuconostocaceae;g__Oenococcus,d__Bacteria;p__Planctomycetota;c__Planctomycetes;o__Pirellulales;f__Pirellulaceae;g__Pirellula,d__Bacteria;p__Methylomirabilota;c__Methylomirabilia;o__Rokubacteriales;f__Rokubacteriales;g__Rokubacteriales,d__Bacteria;p__Acidobacteriota;c__Vicinamibacteria;o__Vicinamibacterales;f__Vicinamibacteraceae;g__uncultured,d__Bacteria;p__Verrucomicrobiota;c__Verrucomicrobiae;o__Pedosphaerales;f__Pedosphaeraceae;g__uncultured,...,d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Burkholderiales;f__Alcaligenaceae;g__Achromobacter,d__Bacteria;p__Verrucomicrobiota;c__Verrucomicrobiae;o__Chthoniobacterales;f__Chthoniobacteraceae;g__Chthoniobacter,d__Bacteria;p__Actinobacteriota;c__Actinobacteria;o__Micromonosporales;f__Micromonosporaceae;g__Actinoplanes,d__Bacteria;p__Acidobacteriota;c__Blastocatellia;o__Blastocatellales;f__Blastocatellaceae;g__JGI_0001001-H03,d__Bacteria;p__Proteobacteria;c__Alphaproteobacteria;o__Acetobacterales;f__Acetobacteraceae;g__Gluconobacter,d__Bacteria;p__Firmicutes;c__Bacilli;o__Lactobacillales;f__Lactobacillaceae;g__Lactobacillus,d__Bacteria;p__Actinobacteriota;c__Actinobacteria;o__Corynebacteriales;f__Mycobacteriaceae;g__Mycobacterium,d__Bacteria;p__Firmicutes;c__Clostridia;o__Clostridia;f__Hungateiclostridiaceae;g__Ruminiclostridium,d__Bacteria;p__Chloroflexi;c__Anaerolineae;o__SBR1031;f__A4b;g__A4b,d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacterales;f__Enterobacteriaceae;g__Salmonella
0,PostMLF_2023_Plot4,0.0,0.0,13.0,0,84.0,0,0.0,0,0,...,1.0,0,0,0,0,0.0,0.0,0,0,0.0
1,PostMLF_2023_Plot13,4.0,0.0,0.0,0,69.0,0,0.0,0,0,...,0.0,0,0,0,0,2.0,0.0,0,0,2.0
2,PostMLF_2023_Plot9,0.0,0.0,3.0,0,97.0,0,0.0,0,0,...,0.0,0,0,0,0,0.0,0.0,0,0,0.0
3,PostMLF_2023_Plot5,0.0,0.0,0.0,0,100.0,0,0.0,0,0,...,0.0,0,0,0,0,0.0,0.0,0,0,0.0
4,PostMLF_2021_Plot17,0.0,2.0,0.0,0,1.0,0,1.0,0,0,...,0.0,0,0,0,0,0.0,1.0,0,0,0.0
