# Prepare the Microbiome Feature Tables for Analysis with Metabolomics data

In [5]:
import pandas as pd
import numpy as np
import biom
import os
from qiime2 import Visualization
import re

%matplotlib inline

In [17]:
## function to rename columns 

def rename_columns_by_taxonomy(df):
    def extract_reliable_taxon(taxon_str):
        levels = taxon_str.split(';')
        
        # Remove 'sh' level if present
        levels = [level for level in levels if not level.startswith('sh__')]
        
        # Iterate backwards through taxonomic levels
        for level in reversed(levels):
            taxon_name = re.sub(r'^[a-z]__', '', level)  # Remove prefix (e.g., 's__')
            if taxon_name and not any(keyword in taxon_name.lower() for keyword in ["metagenome", "unidentified", "uncultured"]):
                return taxon_name
        
        # If all levels are unreliable, return the highest available taxon
        return re.sub(r'^[a-z]__', '', levels[0]) if levels else "Unknown"
    
    df_renamed = df.rename(columns={col: extract_reliable_taxon(col) for col in df.columns})
    return df_renamed

# ITS

### 1. Subset to PostMLF 

|          	| ASV table 	| 90% closed ref OUTs 	| OTUs collapsed on genus 	|
|----------	|-----------	|---------------------	|-------------------------	|
| features 	| 333       	| 97                  	| 68                      	|


> rarefy to the min. 2835 to keep all features



In [7]:
#!mkdir /home/lfloerl/cloud/lfloerl/Microterroir/LC-MS_data/Results/MicrobiomeMetabolome/ITS

os.chdir('/home/lfloerl/cloud/lfloerl/Microterroir/LC-MS_data/Results/MicrobiomeMetabolome/ITS')

#### 1.1. Rarefied ASV table

In [8]:
%%bash 

# 1. Subset to PostMLF 
qiime feature-table filter-samples \
    --i-table /home/lfloerl/cloud/lfloerl/Microterroir/artifacts/ITS/lavaux/mv_filtered_table.qza \
    --m-metadata-file /home/lfloerl/microterroir/Microbiome/Metadata/ITS_lavaux.tsv \
    --p-where "[sample_type]='Post-MLF'" \
    --o-filtered-table /home/lfloerl/cloud/lfloerl/Microterroir/artifacts/ITS/lavaux/mv_PostMLF_filtered_table.qza

qiime feature-table filter-seqs \
    --i-data /home/lfloerl/cloud/lfloerl/Microterroir/artifacts/Sven-denoising-test/ITS/revcomp-trimmed-denoised/with-adjusted-parameters/ITS-revcomp-trimmed-ER-0.05-denoised-trunc-190-PFA-4.0-maxee-4.0-rep-seqs.qza \
    --i-table /home/lfloerl/cloud/lfloerl/Microterroir/artifacts/ITS/lavaux/mv_PostMLF_filtered_table.qza \
    --o-filtered-data /home/lfloerl/cloud/lfloerl/Microterroir/artifacts/ITS/lavaux/mv_PostMLF_filtered_rep_seqs.qza

qiime feature-table summarize --i-table /home/lfloerl/cloud/lfloerl/Microterroir/artifacts/ITS/lavaux/spatio-temporal/microvinification/mv_PostMLF_filtered_table.qza --o-visualization /home/lfloerl/cloud/lfloerl/Microterroir/artifacts/ITS/lavaux/spatio-temporal/microvinification/mv_PostMLF_filtered_table.qzv

# 2. Rarefy 
qiime feature-table rarefy \
    --i-table /home/lfloerl/cloud/lfloerl/Microterroir/artifacts/ITS/lavaux/spatio-temporal/microvinification/mv_PostMLF_filtered_table.qza \
    --p-sampling-depth 2835 \
    --o-rarefied-table /home/lfloerl/cloud/lfloerl/Microterroir/artifacts/ITS/lavaux/spatio-temporal/microvinification/mv_PostMLF_rarefied2835.qza

# 3. Export 
qiime tools export --input-path /home/lfloerl/cloud/lfloerl/Microterroir/artifacts/ITS/lavaux/spatio-temporal/microvinification/mv_PostMLF_rarefied2835.qza --output-path mv_PostMLF_rarefied2835

Error while terminating subprocess (pid=3110671): 


> **333 features**


In [57]:
Visualization.load('/home/lfloerl/cloud/lfloerl/Microterroir/artifacts/ITS/lavaux/spatio-temporal/microvinification/mv_PostMLF_filtered_table.qzv')

In [13]:
# Load the BIOM table
biom_table = biom.load_table("mv_PostMLF_rarefied2835/feature-table.biom")
df_ITS = biom_table.to_dataframe()

# load the metadata to rename the samples accordingly 
ITS_md = pd.read_csv('/home/lfloerl/microterroir/Microbiome/Metadata/ITS_lavaux.tsv', sep='\t')
df_ITS.columns = ['PostMLF_' + str(ITS_md.set_index('id').loc[col, 'Year']) + '_Plot' + str(ITS_md.loc[ITS_md['id'] == col, 'Plot'].values[0]) for col in df_ITS.columns]

# load the taxonomy to rename the features
ITS_taxonomy = pd.read_csv('/home/lfloerl/cloud/lfloerl/Microterroir/artifacts/ITS/taxonomy/taxonomy.tsv', sep='\t')
ITS_taxonomy.set_index('Feature ID', inplace=True)
# Rename the index of df_ITS based on the 'Taxon' column of ITS_taxonomy
df_ITS.index = df_ITS.index.map(ITS_taxonomy['Taxon'])

# transpose and reset index
df_ITS = df_ITS.T.reset_index()

# rename columns 
df_ITS = rename_columns_by_taxonomy(df_ITS)

df_ITS.to_csv('PostMLF_ASVs_rarefied_labled.tsv', sep='\t')

df_ITS.head()

Unnamed: 0,index,Ascomycota,Pseudaegerita_sp,Ascomycota.1,Alternaria_subcucurbitae,Ramimonilia_apicalis,Ascomycota.2,Malassezia_restricta,Fungi_sp,Alternaria_eureka,...,Phaeococcomyces_sp,Microstroma_bacarum,Alternaria,Ascomycota.3,Gallowayella_poeltii,Hanseniaspora_uvarum,Cladosporium_herbarum,Nothophaeotheca_mirabibensis,Hanseniaspora_uvarum.1,Cladosporium_herbarum.1
0,PostMLF_2023_Plot4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,PostMLF_2023_Plot13,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,PostMLF_2023_Plot9,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,PostMLF_2023_Plot5,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,PostMLF_2021_Plot12,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


#### 1.2. Closed Ref OTU clustering 

> note. UNITE release_10.05.2021

In [58]:
%%bash 

qiime vsearch cluster-features-closed-reference \
    --i-sequences /home/lfloerl/cloud/lfloerl/Microterroir/artifacts/ITS/lavaux/mv_PostMLF_filtered_rep_seqs.qza \
    --i-table /home/lfloerl/cloud/lfloerl/Microterroir/artifacts/ITS/lavaux/mv_PostMLF_filtered_table.qza \
    --i-reference-sequences /home/lfloerl/public/Data/Databases/QIIME2/UNITE/sh_qiime_release_10.05.2021/unite-ver8-99-seqs-10.05.2021.qza \
    --p-perc-identity 0.9 \
    --p-threads 10 \
    --o-clustered-table PostMLF_closedOTU_table.qza \
    --o-clustered-sequences PostMLF_closedOTU_seq.qza \
    --o-unmatched-sequences PostMLF_closedOTU_unmatched_seq.qza

Saved FeatureTable[Frequency] to: PostMLF_closedOTU_table.qza
Saved FeatureData[Sequence] to: PostMLF_closedOTU_seq.qza
Saved FeatureData[Sequence] to: PostMLF_closedOTU_unmatched_seq.qza


In [59]:
!qiime feature-table summarize --i-table PostMLF_closedOTU_table.qza --o-visualization PostMLF_closedOTU_table.qzv

[32mSaved Visualization to: PostMLF_closedOTU_table.qzv[0m
[0m

> **97 features**


In [60]:
Visualization.load('PostMLF_closedOTU_table.qzv')

In [61]:
%%bash 

# 2. Rarefy 
qiime feature-table rarefy \
    --i-table PostMLF_closedOTU_table.qza \
    --p-sampling-depth 2835 \
    --o-rarefied-table PostMLF_closedOTU_rarefied_table.qza

# 3. Export 
qiime tools export --input-path PostMLF_closedOTU_rarefied_table.qza --output-path PostMLF_closedOTU_rarefied_table

Saved FeatureTable[Frequency] to: PostMLF_closedOTU_rarefied_table.qza
Exported PostMLF_closedOTU_rarefied_table.qza as BIOMV210DirFmt to directory PostMLF_closedOTU_rarefied_table


In [87]:
# export corresponding taxonomy
!qiime tools export --input-path /home/lfloerl/public/Data/Databases/QIIME2/UNITE/sh_qiime_release_10.05.2021/sh_taxonomy_qiime_ver8_99_10.05.2021.qza --output-path unite-ver8-99-taxonomy

[32mExported /home/lfloerl/public/Data/Databases/QIIME2/UNITE/sh_qiime_release_10.05.2021/sh_taxonomy_qiime_ver8_99_10.05.2021.qza as TSVTaxonomyDirectoryFormat to directory unite-ver8-99-taxonomy[0m
[0m

In [14]:
# Load the BIOM table
biom_table = biom.load_table("PostMLF_closedOTU_rarefied_table/feature-table.biom")
df_ITS = biom_table.to_dataframe()

df_ITS.head()

Unnamed: 0,364525_567-LP3-ITS-0567,364525_570-LP3-ITS-0570,364525_571-LP3-ITS-0571,364525_572-LP3-ITS-0572,364526_154-LP3-ITS-0730,364526_156-LP3-ITS-0732,364526_159-LP3-ITS-0735,364526_163-LP3-ITS-0739,364526_165-LP3-ITS-0741,364526_188-LP3-ITS-0764,...,364526_549-LP3-ITS-1125,364526_550-LP3-ITS-1126,364526_559-LP3-ITS-1135,364526_562-LP3-ITS-1138,364526_565-LP3-ITS-1141,364526_566-LP3-ITS-1142,364526_568-LP3-ITS-1144,364526_570-LP3-ITS-1146,364526_572-LP3-ITS-1148,364526_574-LP3-ITS-1150
SH1846583.08FU_UDB0119152_reps,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
SH1685246.08FU_UDB0736585_reps,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
SH1744677.08FU_MN509277_reps,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
SH1688430.08FU_KC584249_refs,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
SH1729342.08FU_AY843135_refs,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [19]:
# Load the BIOM table
biom_table = biom.load_table("PostMLF_closedOTU_rarefied_table/feature-table.biom")
df_ITS = biom_table.to_dataframe()

# load the metadata to rename the samples accordingly 
ITS_md = pd.read_csv('/home/lfloerl/microterroir/Microbiome/Metadata/ITS_lavaux.tsv', sep='\t')
df_ITS.columns = ['PostMLF_' + str(ITS_md.set_index('id').loc[col, 'Year']) + '_Plot' + str(ITS_md.loc[ITS_md['id'] == col, 'Plot'].values[0]) for col in df_ITS.columns]

# load the taxonomy to rename the features
ITS_taxonomy = pd.read_csv('unite-ver8-99-taxonomy/taxonomy.tsv', sep='\t')
ITS_taxonomy.set_index('Feature ID', inplace=True)
# Rename the index of df_ITS based on the 'Taxon' column of ITS_taxonomy
df_ITS.index = df_ITS.index.map(ITS_taxonomy['Taxon'])

# transpose and reset index
df_ITS = df_ITS.T.reset_index()

# rename columns 
df_ITS = rename_columns_by_taxonomy(df_ITS)

df_ITS.to_csv('PostMLF_cOTUs_rarefied_labled.tsv', sep='\t')

df_ITS.head()

Unnamed: 0,index,Dothideomycetes,Hyaloscypha,Cladosporium_austrohemisphaericum,Alternaria_subcucurbitae,Ramimonilia_apicalis,Malassezia_restricta,Ascomycota,Alternaria_eureka,Rhodotorula_graminis,...,Paracamarosporium_hawaiiense,Pleosporaceae,Melampsora_epitea,Hormonema_macrosporum,Claviceps_purpurea,Chaetothyriales,Endoconidioma_populi,Phaeococcomyces,Acremonium,Gallowayella_poeltii
0,PostMLF_2023_Plot4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,PostMLF_2023_Plot13,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,PostMLF_2023_Plot9,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,PostMLF_2023_Plot5,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,PostMLF_2021_Plot12,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


#### 1.3. Collapse on Genus Level

In [62]:
%%bash 

qiime taxa collapse \
    --i-table PostMLF_closedOTU_rarefied_table.qza \
    --i-taxonomy /home/lfloerl/public/Data/Databases/QIIME2/UNITE/sh_qiime_release_10.05.2021/sh_taxonomy_qiime_ver8_99_10.05.2021.qza \
    --p-level 6 \
    --o-collapsed-table PostMLF_closedOTU_rarefied_genus_table.qza
    
qiime feature-table summarize --i-table PostMLF_closedOTU_rarefied_genus_table.qza --o-visualization PostMLF_closedOTU_rarefied_genus_table.qzv

qiime tools export --input-path PostMLF_closedOTU_rarefied_genus_table.qza --output-path PostMLF_closedOTU_rarefied_genus_table

Saved FeatureTable[Frequency] to: PostMLF_closedOTU_rarefied_genus_table.qza
Saved Visualization to: PostMLF_closedOTU_rarefied_genus_table.qzv
Exported PostMLF_closedOTU_rarefied_genus_table.qza as BIOMV210DirFmt to directory PostMLF_closedOTU_rarefied_genus_table


> **68 features** 

In [63]:
Visualization.load('PostMLF_closedOTU_rarefied_genus_table.qzv')

In [20]:
# Load the BIOM table
biom_table = biom.load_table("PostMLF_closedOTU_rarefied_genus_table/feature-table.biom")
df_ITS = biom_table.to_dataframe()

# load the metadata to rename the samples accordingly 
ITS_md = pd.read_csv('/home/lfloerl/microterroir/Microbiome/Metadata/ITS_lavaux.tsv', sep='\t')
df_ITS.columns = ['PostMLF_' + str(ITS_md.set_index('id').loc[col, 'Year']) + '_Plot' + str(ITS_md.loc[ITS_md['id'] == col, 'Plot'].values[0]) for col in df_ITS.columns]

# transpose and reset index
df_ITS = df_ITS.T.reset_index()

# rename columns 
df_ITS = rename_columns_by_taxonomy(df_ITS)

df_ITS.to_csv('PostMLF_cOTUs_genus_rarefied_labled.tsv', sep='\t')

df_ITS.head()

Unnamed: 0,index,Dothideomycetes,Hyaloscypha,Cladosporium,Alternaria,Ramimonilia,Malassezia,Ascomycota,Rhodotorula,Hanseniaspora,...,Dothiorella,Paracamarosporium,Pleosporaceae,Hormonema,Claviceps,Chaetothyriales,Endoconidioma,Phaeococcomyces,Acremonium,Gallowayella
0,PostMLF_2023_Plot4,0,0,0,0,0,0,0,0,2349.0,...,0,0,0,0,0,0,0,0,0,0
1,PostMLF_2023_Plot13,0,0,0,0,0,0,0,0,421.0,...,0,0,0,0,0,0,0,0,0,0
2,PostMLF_2023_Plot9,0,0,0,0,0,0,0,0,2641.0,...,0,0,0,0,0,0,0,0,0,0
3,PostMLF_2023_Plot5,0,0,0,0,0,0,0,0,2722.0,...,0,0,0,0,0,0,0,0,0,0
4,PostMLF_2021_Plot12,0,0,0,0,0,0,0,0,0.0,...,0,0,0,0,0,0,0,0,0,0


<hr>

# 16S

|          	| ASV table 	| 99% closed ref OUTs 	| OTUs collapsed on genus 	|
|----------	|-----------	|---------------------	|-------------------------	|
| features 	| 326       	| 203                 	| 56                      	|

### 1. Subset to PostMLF 


In [21]:
#!mkdir /home/lfloerl/cloud/lfloerl/Microterroir/LC-MS_data/Results/MicrobiomeMetabolome/16S

os.chdir('/home/lfloerl/cloud/lfloerl/Microterroir/LC-MS_data/Results/MicrobiomeMetabolome/16S')

#### 1.1. Rarefied ASV table

> rarefy to 100 to keep as many as possible


In [52]:
%%bash 

# 1. Subset to PostMLF 
qiime feature-table filter-samples \
    --i-table /home/lfloerl/cloud/lfloerl/Microterroir/artifacts/16S/lavaux/mv_filtered_table.qza \
    --m-metadata-file /home/lfloerl/microterroir/Microbiome/Metadata/16S_lavaux.tsv \
    --p-where "[sample_type]='Post-MLF'" \
    --o-filtered-table /home/lfloerl/cloud/lfloerl/Microterroir/artifacts/16S/lavaux/mv_PostMLF_filtered_table.qza

qiime feature-table filter-seqs \
    --i-data /home/lfloerl/cloud/lfloerl/Microterroir/artifacts/16S/bac-dada2-single/dada-rep-seqs-220-ee4-fa4.qza \
    --i-table /home/lfloerl/cloud/lfloerl/Microterroir/artifacts/16S/lavaux/mv_PostMLF_filtered_table.qza \
    --o-filtered-data /home/lfloerl/cloud/lfloerl/Microterroir/artifacts/16S/lavaux/mv_PostMLF_filtered_rep_seqs.qza

Saved FeatureTable[Frequency] to: /home/lfloerl/cloud/lfloerl/Microterroir/artifacts/16S/lavaux/mv_PostMLF_filtered_table.qza
Saved FeatureData[Sequence] to: /home/lfloerl/cloud/lfloerl/Microterroir/artifacts/16S/lavaux/mv_PostMLF_filtered_rep_seqs.qza


In [41]:
!qiime feature-table summarize --i-table /home/lfloerl/cloud/lfloerl/Microterroir/artifacts/16S/lavaux/mv_PostMLF_filtered_table.qza --o-visualization /home/lfloerl/cloud/lfloerl/Microterroir/artifacts/16S/lavaux/mv_PostMLF_filtered_table.qzv

[32mSaved Visualization to: /home/lfloerl/cloud/lfloerl/Microterroir/artifacts/16S/lavaux/mv_PostMLF_filtered_table.qzv[0m
[0m

> **326 features**


In [42]:
Visualization.load('/home/lfloerl/cloud/lfloerl/Microterroir/artifacts/16S/lavaux/mv_PostMLF_filtered_table.qzv')

In [53]:
%%bash 

# 2. Rarefy 
qiime feature-table rarefy \
    --i-table /home/lfloerl/cloud/lfloerl/Microterroir/artifacts/16S/lavaux/mv_PostMLF_filtered_table.qza \
    --p-sampling-depth 100 \
    --o-rarefied-table /home/lfloerl/cloud/lfloerl/Microterroir/artifacts/16S/lavaux/mv_PostMLF_rarefied.qza

# 3. Export 
qiime tools export --input-path /home/lfloerl/cloud/lfloerl/Microterroir/artifacts/16S/lavaux/mv_PostMLF_rarefied.qza --output-path mv_PostMLF_rarefied

Saved FeatureTable[Frequency] to: /home/lfloerl/cloud/lfloerl/Microterroir/artifacts/16S/lavaux/mv_PostMLF_rarefied.qza
Exported /home/lfloerl/cloud/lfloerl/Microterroir/artifacts/16S/lavaux/mv_PostMLF_rarefied.qza as BIOMV210DirFmt to directory mv_PostMLF_rarefied


In [95]:
#!qiime tools export --input-path /home/lfloerl/cloud/lfloerl/Microterroir/artifacts/16S/taxonomy.qza --output-path /home/lfloerl/cloud/lfloerl/Microterroir/artifacts/16S/taxonomy

[32mExported /home/lfloerl/cloud/lfloerl/Microterroir/artifacts/16S/taxonomy.qza as TSVTaxonomyDirectoryFormat to directory /home/lfloerl/cloud/lfloerl/Microterroir/artifacts/16S/taxonomy[0m
[0m

In [23]:
# Load the BIOM table
biom_table = biom.load_table("mv_PostMLF_rarefied/feature-table.biom")
df_16S = biom_table.to_dataframe()


# load the metadata to rename the samples accordingly 
Bac_md = pd.read_csv('/home/lfloerl/microterroir/Microbiome/Metadata/16S_lavaux.tsv', sep='\t')
df_16S.columns = ['PostMLF_' + str(Bac_md.set_index('id').loc[col, 'Year']) + '_Plot' + str(Bac_md.loc[Bac_md['id'] == col, 'Plot'].values[0]) for col in df_16S.columns]

# load the taxonomy to rename the features
Bac_taxonomy = pd.read_csv('/home/lfloerl/cloud/lfloerl/Microterroir/artifacts/16S/taxonomy/taxonomy.tsv', sep='\t')
Bac_taxonomy.set_index('Feature ID', inplace=True)
# Rename the index of df_ITS based on the 'Taxon' column of ITS_taxonomy
df_16S.index = df_16S.index.map(Bac_taxonomy['Taxon'])


# transpose and reset index
df_16S = df_16S.T.reset_index()

# rename columns 
df_16S = rename_columns_by_taxonomy(df_16S)

df_16S.to_csv('PostMLF_ASVs_rarefied_labled.tsv', sep='\t')

df_16S.head()

Unnamed: 0,index,Pirellulaceae,Escherichia_sp.,Caldilineaceae,Tepidisphaeraceae,Solirubrobacter,Acetobacter_cerevisiae,Anaerolineaceae,Actinomycetospora,Gemmataceae,...,Rhodobacteraceae,RB41,TK10,Vicinamibacteraceae,A4b,Vicinamibacteraceae.1,Enterobacteriaceae,Vicinamibacteraceae.2,Vicinamibacterales,JG30-KF-CM45
0,PostMLF_2023_Plot4,0.0,0.0,0,0,0.0,0,0,0,0,...,0,0,0,0,0,0,0.0,0,0,0
1,PostMLF_2023_Plot13,0.0,5.0,0,0,0.0,0,0,0,0,...,0,0,0,0,0,0,1.0,0,0,0
2,PostMLF_2023_Plot9,0.0,0.0,0,0,0.0,0,0,0,0,...,0,0,0,0,0,0,0.0,0,0,0
3,PostMLF_2023_Plot5,0.0,0.0,0,0,0.0,0,0,0,0,...,0,0,0,0,0,0,0.0,0,0,0
4,PostMLF_2021_Plot17,1.0,0.0,0,0,1.0,0,0,0,0,...,0,0,0,0,0,0,0.0,0,0,0


#### 1.2. Closed Ref OTU clustering 

In [68]:
%%bash 

qiime vsearch cluster-features-closed-reference \
    --i-sequences /home/lfloerl/cloud/lfloerl/Microterroir/artifacts/16S/lavaux/mv_PostMLF_filtered_rep_seqs.qza \
    --i-table /home/lfloerl/cloud/lfloerl/Microterroir/artifacts/16S/lavaux/mv_PostMLF_filtered_table.qza \
    --i-reference-sequences /home/lfloerl/public/Data/Databases/QIIME2/SILVA/silva-138-99-seqs.qza \
    --p-perc-identity 0.99 \
    --p-threads 10 \
    --o-clustered-table PostMLF_closedOTU_table.qza \
    --o-clustered-sequences PostMLF_closedOTU_seq.qza \
    --o-unmatched-sequences PostMLF_closedOTU_unmatched_seq.qza

Saved FeatureTable[Frequency] to: PostMLF_closedOTU_table.qza
Saved FeatureData[Sequence] to: PostMLF_closedOTU_seq.qza
Saved FeatureData[Sequence] to: PostMLF_closedOTU_unmatched_seq.qza


In [69]:
!qiime feature-table summarize --i-table PostMLF_closedOTU_table.qza --o-visualization PostMLF_closedOTU_table.qzv

[32mSaved Visualization to: PostMLF_closedOTU_table.qzv[0m
[0m

> **203 features**


In [71]:
Visualization.load('PostMLF_closedOTU_table.qzv')

In [72]:
%%bash 

# 2. Rarefy 
qiime feature-table rarefy \
    --i-table PostMLF_closedOTU_table.qza \
    --p-sampling-depth 100 \
    --o-rarefied-table PostMLF_closedOTU_rarefied_table.qza

# 3. Export 
qiime tools export --input-path PostMLF_closedOTU_rarefied_table.qza --output-path PostMLF_closedOTU_rarefied_table

Saved FeatureTable[Frequency] to: PostMLF_closedOTU_rarefied_table.qza
Exported PostMLF_closedOTU_rarefied_table.qza as BIOMV210DirFmt to directory PostMLF_closedOTU_rarefied_table


In [105]:
# export corresponding taxonomy
!qiime tools export --input-path /home/lfloerl/public/Data/Databases/QIIME2/SILVA/silva-138-99-tax.qza --output-path silva-138-99-tax

[32mExported /home/lfloerl/public/Data/Databases/QIIME2/SILVA/silva-138-99-tax.qza as TSVTaxonomyDirectoryFormat to directory silva-138-99-tax[0m
[0m

In [None]:
# Load the BIOM table
biom_table = biom.load_table("PostMLF_closedOTU_rarefied_table/feature-table.biom")
df_16S = biom_table.to_dataframe()


# load the metadata to rename the samples accordingly 
Bac_md = pd.read_csv('/home/lfloerl/microterroir/Microbiome/Metadata/16S_lavaux.tsv', sep='\t')
df_16S.columns = ['PostMLF_' + str(Bac_md.set_index('id').loc[col, 'Year']) + '_Plot' + str(Bac_md.loc[Bac_md['id'] == col, 'Plot'].values[0]) for col in df_16S.columns]

# load the taxonomy to rename the features
Bac_taxonomy = pd.read_csv('silva-138-99-tax/taxonomy.tsv', sep='\t')
Bac_taxonomy.set_index('Feature ID', inplace=True)
# Rename the index of df_ITS based on the 'Taxon' column of ITS_taxonomy
df_16S.index = df_16S.index.map(Bac_taxonomy['Taxon'])

# transpose and reset index
df_16S = df_16S.T.reset_index()

# rename columns 
df_16S = rename_columns_by_taxonomy(df_16S)

df_16S.to_csv('PostMLF_cOTUs_rarefied_labled.tsv', sep='\t')

df_16S.head()

Unnamed: 0,index,s__Escherichia_coli,s__bacterium_Ellin6048,s__Acetobacter_cerevisiae,g__Actinomycetospora,s__Oenococcus_oeni,g__Pirellula,g__Rokubacteriales,g__Acetobacter,f__Vicinamibacteraceae,...,s__Mycobacterium_sp.,f__Pirellulaceae,s__Ruminiclostridium_hungatei,g__TK10,g__Vicinamibacteraceae,g__A4b,o__Gaiellales,g__Pirellula.1,s__Salmonella_enterica,g__Vicinamibacteraceae.1
0,PostMLF_2023_Plot4,0.0,0.0,0,0,84.0,0,0,0,0,...,0.0,0,0,0,0,0,0.0,0,0.0,0
1,PostMLF_2023_Plot13,4.0,0.0,0,0,69.0,0,0,0,0,...,0.0,0,0,0,0,0,1.0,0,2.0,0
2,PostMLF_2023_Plot9,0.0,0.0,0,0,97.0,0,0,0,0,...,0.0,0,0,0,0,0,0.0,0,0.0,0
3,PostMLF_2023_Plot5,0.0,0.0,0,0,100.0,0,0,0,0,...,0.0,0,0,0,0,0,0.0,0,0.0,0
4,PostMLF_2021_Plot17,0.0,2.0,0,0,1.0,0,0,0,0,...,1.0,0,0,0,0,0,0.0,0,0.0,0


#### 1.3. Collapse on Genus Level

In [74]:
%%bash 

qiime taxa collapse \
    --i-table PostMLF_closedOTU_rarefied_table.qza \
    --i-taxonomy /home/lfloerl/public/Data/Databases/QIIME2/SILVA/silva-138-99-tax.qza \
    --p-level 6 \
    --o-collapsed-table PostMLF_closedOTU_rarefied_genus_table.qza
    
qiime feature-table summarize --i-table PostMLF_closedOTU_rarefied_genus_table.qza --o-visualization PostMLF_closedOTU_rarefied_genus_table.qzv

qiime tools export --input-path PostMLF_closedOTU_rarefied_genus_table.qza --output-path PostMLF_closedOTU_rarefied_genus_table

Saved FeatureTable[Frequency] to: PostMLF_closedOTU_rarefied_genus_table.qza
Saved Visualization to: PostMLF_closedOTU_rarefied_genus_table.qzv
Exported PostMLF_closedOTU_rarefied_genus_table.qza as BIOMV210DirFmt to directory PostMLF_closedOTU_rarefied_genus_table


> **56 features** 

In [75]:
Visualization.load('PostMLF_closedOTU_rarefied_genus_table.qzv')

In [25]:
# Load the BIOM table
biom_table = biom.load_table("PostMLF_closedOTU_rarefied_genus_table/feature-table.biom")
df_16S = biom_table.to_dataframe()

# load the metadata to rename the samples accordingly 
Bac_md = pd.read_csv('/home/lfloerl/microterroir/Microbiome/Metadata/16S_lavaux.tsv', sep='\t')
df_16S.columns = ['PostMLF_' + str(Bac_md.set_index('id').loc[col, 'Year']) + '_Plot' + str(Bac_md.loc[Bac_md['id'] == col, 'Plot'].values[0]) for col in df_16S.columns]

# transpose and reset index
df_16S = df_16S.T.reset_index()

# rename columns 
df_16S = rename_columns_by_taxonomy(df_16S)

df_16S.to_csv('PostMLF_cOTUs_genus_rarefied_labled.tsv', sep='\t')

df_16S.head()

Unnamed: 0,index,Escherichia-Shigella,Solirubrobacter,Acetobacter,Actinomycetospora,Oenococcus,Pirellula,Rokubacteriales,Vicinamibacteraceae,Pedosphaeraceae,...,Achromobacter,Chthoniobacter,Actinoplanes,JGI_0001001-H03,Gluconobacter,Lactobacillus,Mycobacterium,Ruminiclostridium,A4b,Salmonella
0,PostMLF_2023_Plot4,0.0,0.0,13.0,0,84.0,0,0.0,0,0,...,1.0,0,0,0,0,0.0,0.0,0,0,0.0
1,PostMLF_2023_Plot13,4.0,0.0,0.0,0,69.0,0,0.0,0,0,...,0.0,0,0,0,0,2.0,0.0,0,0,2.0
2,PostMLF_2023_Plot9,0.0,0.0,3.0,0,97.0,0,0.0,0,0,...,0.0,0,0,0,0,0.0,0.0,0,0,0.0
3,PostMLF_2023_Plot5,0.0,0.0,0.0,0,100.0,0,0.0,0,0,...,0.0,0,0,0,0,0.0,0.0,0,0,0.0
4,PostMLF_2021_Plot17,0.0,2.0,0.0,0,1.0,0,1.0,0,0,...,0.0,0,0,0,0,0.0,1.0,0,0,0.0
