# Preprare microbiome files for combined analysis

In [2]:
import pandas as pd
import numpy as np
import biom
import os
from qiime2 import Visualization

%matplotlib inline

# ITS

## Soil

Using the rarefied to 5000 table

In [3]:
workdir = '/home/lfloerl/cloud/lfloerl/Microterroir/artifacts/ITS/lavaux'
%cd $workdir

/home/lfloerl/cloud/lfloerl/Microterroir/artifacts/ITS/lavaux


In [4]:
%%bash 

#mkdir soil-closedRefOTU

# Cluster to OTUs 
qiime vsearch cluster-features-closed-reference \
    --i-sequences soil_filtered_rep_seqs.qza \
    --i-table soil_filtered_table.qza \
    --i-reference-sequences /home/lfloerl/public/Data/Databases/QIIME2/UNITE/sh_qiime_release_10.05.2021/unite-ver8-99-seqs-10.05.2021.qza \
    --p-perc-identity 0.9 \
    --p-threads 10 \
    --o-clustered-table soil-closedRefOTU/soil_table.qza \
    --o-clustered-sequences soil-closedRefOTU/soil_seq.qza \
    --o-unmatched-sequences soil-closedRefOTU/soil_unmatched_seq.qza

# 2. Rarefy 
qiime feature-table rarefy \
    --i-table soil-closedRefOTU/soil_table.qza \
    --p-sampling-depth 5000 \
    --o-rarefied-table soil-closedRefOTU/soil_rarefied_table.qza

# 3. Export 
qiime tools export --input-path soil-closedRefOTU/soil_rarefied_table.qza --output-path soil-closedRefOTU/

Saved FeatureTable[Frequency] to: soil-closedRefOTU/soil_table.qza
Saved FeatureData[Sequence] to: soil-closedRefOTU/soil_seq.qza
Saved FeatureData[Sequence] to: soil-closedRefOTU/soil_unmatched_seq.qza
Saved FeatureTable[Frequency] to: soil-closedRefOTU/soil_rarefied_table.qza
Exported soil-closedRefOTU/soil_rarefied_table.qza as BIOMV210DirFmt to directory soil-closedRefOTU/


In [83]:
# Load the BIOM table
biom_table = biom.load_table("soil-closedRefOTU/feature-table.biom")
df_ITS = biom_table.to_dataframe()

# rename the samples 
ITS_md = pd.read_csv('/home/lfloerl/microterroir/Microbiome/Metadata/ITS_lavaux.tsv', sep='\t')
column_mapping = ITS_md.set_index("id")["SAMPLE_NAME"].to_dict()
# Rename df_16S columns using the mapping
df_ITS.rename(columns=column_mapping, inplace=True)

# load the taxonomy to rename the features
# this was exported in 00_Prep_MicrobiomeDataForMetabolomics notebook 
ITS_taxonomy = pd.read_csv('/home/lfloerl/cloud/lfloerl/Microterroir/LC-MS_data/Results/MicrobiomeMetabolome/ITS/unite-ver8-99-taxonomy/taxonomy.tsv', sep='\t')
ITS_taxonomy.set_index('Feature ID', inplace=True)
# Rename the index of df_ITS based on the 'Taxon' column of ITS_taxonomy
df_ITS.index = df_ITS.index.map(ITS_taxonomy['Taxon'])
# transpose and reset index
df_ITS = df_ITS.T

df_ITS.to_csv('soil-closedRefOTU/soil_cOTUs_rarefied_labled.tsv', sep='\t')

df_ITS.head()

Unnamed: 0,k__Fungi;p__Ascomycota;c__Dothideomycetes;o__Pleosporales;f__Didymellaceae;g__Epicoccum;s__Epicoccum_nigrum,k__Fungi;p__Ascomycota;c__Dothideomycetes;o__Pleosporales;f__Sporormiaceae;g__Preussia;s__unidentified,k__Fungi;p__Ascomycota;c__Leotiomycetes;o__Helotiales;f__Sclerotiniaceae;g__Botrytis;s__Botrytis_cinerea,k__Fungi;p__Basidiomycota;c__Agaricomycetes;o__Agaricales;f__Stephanosporaceae;g__Lindtneria;s__unidentified,k__Fungi;p__unidentified;c__unidentified;o__unidentified;f__unidentified;g__unidentified;s__unidentified,k__Fungi;p__Ascomycota;c__Sordariomycetes;o__Hypocreales;f__Nectriaceae;g__Fusicolla;s__Fusicolla_violacea,k__Fungi;p__Ascomycota;c__Dothideomycetes;o__Pleosporales;f__unidentified;g__unidentified;s__unidentified,k__Fungi;p__Ascomycota;c__Sordariomycetes;o__Hypocreales;f__Nectriaceae;g__Stephanonectria;s__Stephanonectria_keithii,k__Fungi;p__Ascomycota;c__unidentified;o__unidentified;f__unidentified;g__unidentified;s__unidentified,k__Fungi;p__unidentified;c__unidentified;o__unidentified;f__unidentified;g__unidentified;s__unidentified.1,...,k__Fungi;p__Ascomycota;c__Leotiomycetes;o__unidentified;f__unidentified;g__unidentified;s__unidentified,k__Fungi;p__Basidiomycota;c__Atractiellomycetes;o__Atractiellales;f__Hoehnelomycetaceae;g__unidentified;s__unidentified,k__Fungi;p__Ascomycota;c__Dothideomycetes;o__Pleosporales;f__unidentified;g__unidentified;s__unidentified.1,k__Fungi;p__Ascomycota;c__Pezizomycetes;o__Pezizales;f__Ascobolaceae;g__Ascobolus;s__unidentified,k__Fungi;p__Ascomycota;c__unidentified;o__unidentified;f__unidentified;g__unidentified;s__unidentified.1,k__Fungi;p__Basidiomycota;c__Agaricomycetes;o__unidentified;f__unidentified;g__unidentified;s__unidentified,k__Fungi;p__Ascomycota;c__Eurotiomycetes;o__Onygenales;f__unidentified;g__unidentified;s__unidentified,k__Fungi;p__Ascomycota;c__Sordariomycetes;o__Sordariales;f__Lasiosphaeriaceae;g__Podospora;s__unidentified,k__Fungi;p__Ascomycota;c__Pezizomycetes;o__Pezizales;f__Pezizaceae;g__unidentified;s__unidentified,k__Fungi;p__Ascomycota;c__Leotiomycetes;o__Helotiales;f__Helotiales_fam_Incertae_sedis;g__Cadophora;s__unidentified
Lavaux_2022-08-10_soil_Plot17,18.0,0.0,4.0,0,0.0,0,13.0,0.0,0.0,282.0,...,0.0,0,0.0,0,0,0,0,0,0.0,0
Lavaux_2022-08-10_soil_Plot5,20.0,0.0,8.0,0,0.0,0,7.0,0.0,37.0,29.0,...,0.0,0,19.0,0,0,0,0,0,9.0,0
Lavaux_2022-08-10_soil_Plot11,53.0,0.0,12.0,0,53.0,0,45.0,39.0,0.0,114.0,...,3.0,0,181.0,0,0,0,0,0,0.0,0
Lavaux_2022-08-10_soil_Plot14,36.0,0.0,0.0,0,0.0,0,19.0,0.0,0.0,17.0,...,0.0,0,7.0,0,0,0,0,0,0.0,0
Lavaux_2022-08-10_soil_Plot20,0.0,6.0,4.0,0,22.0,0,0.0,1.0,0.0,0.0,...,0.0,0,0.0,0,0,0,0,0,0.0,0


## Berries

In [7]:
%%bash 

#mkdir berries-closedRefOTU

# Cluster to OTUs 
qiime vsearch cluster-features-closed-reference \
    --i-sequences must_filtered_rep_seqs.qza \
    --i-table must_filtered_table.qza \
    --i-reference-sequences /home/lfloerl/public/Data/Databases/QIIME2/UNITE/sh_qiime_release_10.05.2021/unite-ver8-99-seqs-10.05.2021.qza \
    --p-perc-identity 0.9 \
    --p-threads 10 \
    --o-clustered-table berries-closedRefOTU/berries_table.qza \
    --o-clustered-sequences berries-closedRefOTU/berries_seq.qza \
    --o-unmatched-sequences berries-closedRefOTU/berries_unmatched.qza

# 2. Rarefy 
qiime feature-table rarefy \
    --i-table berries-closedRefOTU/berries_table.qza \
    --p-sampling-depth 10000 \
    --o-rarefied-table berries-closedRefOTU/berries_rarefied_table.qza

# 3. Export 
qiime tools export --input-path berries-closedRefOTU/berries_rarefied_table.qza --output-path berries-closedRefOTU/

mkdir: das Verzeichnis „berries-closedRefOTU“ kann nicht angelegt werden: File exists


Saved FeatureTable[Frequency] to: berries-closedRefOTU/berries_table.qza
Saved FeatureData[Sequence] to: berries-closedRefOTU/berries_seq.qza
Saved FeatureData[Sequence] to: berries-closedRefOTU/berries_unmatched.qza
Saved FeatureTable[Frequency] to: berries-closedRefOTU/berries_rarefied_table.qza
Exported berries-closedRefOTU/berries_rarefied_table.qza as BIOMV210DirFmt to directory berries-closedRefOTU/


In [8]:
# Load the BIOM table
biom_table = biom.load_table("berries-closedRefOTU/feature-table.biom")
df_ITS = biom_table.to_dataframe()

# rename the samples 
ITS_md = pd.read_csv('/home/lfloerl/microterroir/Microbiome/Metadata/ITS_lavaux.tsv', sep='\t')
column_mapping = ITS_md.set_index("id")["SAMPLE_NAME"].to_dict()
# Rename df_16S columns using the mapping
df_ITS.rename(columns=column_mapping, inplace=True)

# load the taxonomy to rename the features
# this was exported in 00_Prep_MicrobiomeDataForMetabolomics notebook 
ITS_taxonomy = pd.read_csv('/home/lfloerl/cloud/lfloerl/Microterroir/LC-MS_data/Results/MicrobiomeMetabolome/ITS/unite-ver8-99-taxonomy/taxonomy.tsv', sep='\t')
ITS_taxonomy.set_index('Feature ID', inplace=True)
# Rename the index of df_ITS based on the 'Taxon' column of ITS_taxonomy
df_ITS.index = df_ITS.index.map(ITS_taxonomy['Taxon'])
# transpose and reset index
df_ITS = df_ITS.T

df_ITS.to_csv('berries-closedRefOTU/berries_cOTUs_rarefied_labled.tsv', sep='\t')

df_ITS.head()

Unnamed: 0,k__Fungi;p__Basidiomycota;c__Agaricomycetes;o__Agaricales;f__Schizophyllaceae;g__Schizophyllum;s__Schizophyllum_commune,k__Fungi;p__Basidiomycota;c__Agaricomycetes;o__Agaricales;f__Crepidotaceae;g__Crepidotus;s__Crepidotus_stenocystis,k__Fungi;p__Basidiomycota;c__Tremellomycetes;o__Filobasidiales;f__Filobasidiaceae;g__Filobasidium;s__Filobasidium_magnum,k__Fungi;p__Basidiomycota;c__Microbotryomycetes;o__Sporidiobolales;f__Sporidiobolaceae;g__Sporobolomyces;s__Sporobolomyces_roseus,k__Fungi;p__Basidiomycota;c__Microbotryomycetes;o__Microbotryomycetes_ord_Incertae_sedis;f__Microbotryomycetes_fam_Incertae_sedis;g__Curvibasidium;s__Curvibasidium_cygneicollum,k__Fungi;p__Basidiomycota;c__Tremellomycetes;o__Tremellales;f__Bulleribasidiaceae;g__Derxomyces;s__Derxomyces_mrakii,k__Fungi;p__Ascomycota;c__Dothideomycetes;o__Pleosporales;f__Phaeosphaeriaceae;g__Leptospora;s__unidentified,k__Fungi;p__Basidiomycota;c__Agaricomycetes;o__Russulales;f__Bondarzewiaceae;g__Heterobasidion;s__Heterobasidion_parviporum,k__Fungi;p__Ascomycota;c__unidentified;o__unidentified;f__unidentified;g__unidentified;s__unidentified,k__Fungi;p__Ascomycota;c__Dothideomycetes;o__Pleosporales;f__Didymellaceae;g__Neoascochyta;s__Neoascochyta_exitialis,...,k__Fungi;p__Basidiomycota;c__Agaricomycetes;o__Agaricales;f__Tricholomataceae;g__Macrocystidia;s__Macrocystidia_cucumis,k__Fungi;p__Basidiomycota;c__Agaricomycetes;o__Agaricales;f__Tricholomataceae;g__Macrocystidia;s__Macrocystidia_cucumis.1,k__Fungi;p__Ascomycota;c__Pezizomycetes;o__Pezizales;f__Pezizaceae;g__Iodophanus;s__Iodophanus_testaceus,k__Fungi;p__Ascomycota;c__Sordariomycetes;o__Hypocreales;f__Hypocreaceae;g__Trichoderma;s__Trichoderma_atroviride,k__Fungi;p__Ascomycota;c__Dothideomycetes;o__Pleosporales;f__unidentified;g__unidentified;s__unidentified,k__Fungi;p__Ascomycota;c__Pezizomycetes;o__Pezizales;f__Pyronemataceae;g__Wilcoxina;s__unidentified,k__Fungi;p__Ascomycota;c__Dothideomycetes;o__Pleosporales;f__Pleosporaceae;g__Alternaria;s__unidentified,k__Fungi;p__Ascomycota;c__Dothideomycetes;o__Capnodiales;f__Mycosphaerellaceae;g__Passalora;s__Passalora_passaloroides,k__Fungi;p__Basidiomycota;c__Agaricomycetes;o__Agaricales;f__Pterulaceae;g__Pterula;s__Pterula_gracilis,k__Fungi;p__Basidiomycota;c__Agaricomycetes;o__Auriculariales;f__unidentified;g__unidentified;s__unidentified
Lavaux_2021-09-30_must_Plot4_Row9/Vine20_ID6,0,0,57.0,0.0,0.0,0,0.0,0.0,0,0,...,0,0,0,0,0,0,0,0,0,0
Lavaux_2021-09-30_must_Plot15_Row3/Vine20_ID121b,0,0,0.0,0.0,13.0,0,0.0,4.0,0,0,...,0,0,0,0,0,0,0,0,0,0
Lavaux_2021-09-30_must_Plot17_Row13/Vine20_ID171,0,0,0.0,3.0,213.0,0,0.0,6.0,0,0,...,0,0,0,0,0,0,0,0,0,0
Lavaux_2021-09-30_must_Plot5_Row6/Vine30_ID37b,0,0,215.0,0.0,0.0,0,0.0,0.0,0,0,...,0,0,0,0,0,0,0,0,0,0
Lavaux_2021-09-30_must_Plot11_Row6/Vine22_ID66,0,0,53.0,0.0,82.0,0,7.0,9.0,0,0,...,0,0,0,0,0,0,0,0,0,0


## Berries - Harvest

In [9]:
!qiime feature-table summarize --i-table berries-closedRefOTU/berries_harvest_rarefied_table.qza --o-visualization berries-closedRefOTU/berries_harvest_rarefied_table.qzv

[32mSaved Visualization to: berries-closedRefOTU/berries_harvest_rarefied_table.qzv[0m
[0m

In [None]:
%%bash 

#mkdir berries-closedRefOTU

qiime feature-table filter-seqs \
    --i-data must_filtered_rep_seqs.qza \
    --i-table must_harvest_filtered_table.qza \
    --o-filtered-data must_harvest_filtered_rep_seqs.qza

# Cluster to OTUs 
qiime vsearch cluster-features-closed-reference \
    --i-sequences must_harvest_filtered_rep_seqs.qza \
    --i-table must_harvest_filtered_table.qza \
    --i-reference-sequences /home/lfloerl/public/Data/Databases/QIIME2/UNITE/sh_qiime_release_10.05.2021/unite-ver8-99-seqs-10.05.2021.qza \
    --p-perc-identity 0.9 \
    --p-threads 10 \
    --o-clustered-table berries-closedRefOTU/berries_harvest_table.qza \
    --o-clustered-sequences berries-closedRefOTU/berries_harvest_seq.qza \
    --o-unmatched-sequences berries-closedRefOTU/berries_harvest_unmatched.qza


# 2. Rarefy 
qiime feature-table rarefy \
    --i-table berries-closedRefOTU/berries_harvest_table.qza \
    --p-sampling-depth 10000 \
    --o-rarefied-table berries-closedRefOTU/berries_harvest_rarefied_table.qza

# 3. Export 
qiime tools export --input-path berries-closedRefOTU/berries_harvest_rarefied_table.qza --output-path berries-closedRefOTU/feature-table-harvest

In [15]:
# Load the BIOM table
biom_table = biom.load_table("berries-closedRefOTU/feature-table-harvest/feature-table.biom")
df_ITS = biom_table.to_dataframe()

# rename the samples 
ITS_md = pd.read_csv('/home/lfloerl/microterroir/Microbiome/Metadata/ITS_lavaux.tsv', sep='\t')
column_mapping = ITS_md.set_index("id")["SAMPLE_NAME"].to_dict()
# Rename df_16S columns using the mapping
df_ITS.rename(columns=column_mapping, inplace=True)

# load the taxonomy to rename the features
# this was exported in 00_Prep_MicrobiomeDataForMetabolomics notebook 
ITS_taxonomy = pd.read_csv('/home/lfloerl/cloud/lfloerl/Microterroir/LC-MS_data/Results/MicrobiomeMetabolome/ITS/unite-ver8-99-taxonomy/taxonomy.tsv', sep='\t')
ITS_taxonomy.set_index('Feature ID', inplace=True)
# Rename the index of df_ITS based on the 'Taxon' column of ITS_taxonomy
df_ITS.index = df_ITS.index.map(ITS_taxonomy['Taxon'])
# transpose and reset index
df_ITS = df_ITS.T

df_ITS.to_csv('berries-closedRefOTU/berries_harvest_cOTUs_rarefied_labled.tsv', sep='\t')

df_ITS.head()

Unnamed: 0,k__Fungi;p__Basidiomycota;c__Agaricomycetes;o__Agaricales;f__Schizophyllaceae;g__Schizophyllum;s__Schizophyllum_commune,k__Fungi;p__Ascomycota;c__unidentified;o__unidentified;f__unidentified;g__unidentified;s__unidentified,k__Fungi;p__Basidiomycota;c__Agaricomycetes;o__Agaricales;f__Crepidotaceae;g__Crepidotus;s__Crepidotus_stenocystis,k__Fungi;p__Basidiomycota;c__Tremellomycetes;o__Filobasidiales;f__Filobasidiaceae;g__Filobasidium;s__Filobasidium_magnum,k__Fungi;p__Basidiomycota;c__Microbotryomycetes;o__Sporidiobolales;f__Sporidiobolaceae;g__Sporobolomyces;s__Sporobolomyces_roseus,k__Fungi;p__Basidiomycota;c__Microbotryomycetes;o__Microbotryomycetes_ord_Incertae_sedis;f__Microbotryomycetes_fam_Incertae_sedis;g__Curvibasidium;s__Curvibasidium_cygneicollum,k__Fungi;p__Basidiomycota;c__Tremellomycetes;o__Tremellales;f__Bulleribasidiaceae;g__Derxomyces;s__Derxomyces_mrakii,k__Fungi;p__Ascomycota;c__Dothideomycetes;o__Pleosporales;f__Phaeosphaeriaceae;g__Leptospora;s__unidentified,k__Fungi;p__Basidiomycota;c__Agaricomycetes;o__Russulales;f__Bondarzewiaceae;g__Heterobasidion;s__Heterobasidion_parviporum,k__Fungi;p__Ascomycota;c__unidentified;o__unidentified;f__unidentified;g__unidentified;s__unidentified.1,...,k__Fungi;p__Ascomycota;c__Sordariomycetes;o__Xylariales;f__Xylariaceae;g__Barrmaelia;s__Barrmaelia_rappazii,k__Fungi;p__Basidiomycota;c__Agaricomycetes;o__Agaricales;f__Tricholomataceae;g__Macrocystidia;s__Macrocystidia_cucumis,k__Fungi;p__Basidiomycota;c__Agaricomycetes;o__Agaricales;f__Tricholomataceae;g__Macrocystidia;s__Macrocystidia_cucumis.1,k__Fungi;p__Ascomycota;c__Pezizomycetes;o__Pezizales;f__Pezizaceae;g__Iodophanus;s__Iodophanus_testaceus,k__Fungi;p__Ascomycota;c__Dothideomycetes;o__Pleosporales;f__unidentified;g__unidentified;s__unidentified,k__Fungi;p__Ascomycota;c__Pezizomycetes;o__Pezizales;f__Pyronemataceae;g__Wilcoxina;s__unidentified,k__Fungi;p__Ascomycota;c__Dothideomycetes;o__Pleosporales;f__Pleosporaceae;g__Alternaria;s__unidentified,k__Fungi;p__Ascomycota;c__Dothideomycetes;o__Capnodiales;f__Mycosphaerellaceae;g__Passalora;s__Passalora_passaloroides,k__Fungi;p__Basidiomycota;c__Agaricomycetes;o__Agaricales;f__Pterulaceae;g__Pterula;s__Pterula_gracilis,k__Fungi;p__Basidiomycota;c__Agaricomycetes;o__Auriculariales;f__unidentified;g__unidentified;s__unidentified
Lavaux_2021-09-30_must_Plot4_Row9/Vine20_ID6,0,0,0,57.0,0.0,0.0,0,0.0,0.0,0,...,0,0,0,0,0,0,0,0,0,0
Lavaux_2021-09-30_must_Plot15_Row3/Vine20_ID121b,0,0,0,0.0,0.0,7.0,0,0.0,4.0,0,...,0,0,0,0,0,0,0,0,0,0
Lavaux_2021-09-30_must_Plot17_Row13/Vine20_ID171,0,0,0,0.0,3.0,227.0,0,0.0,6.0,0,...,0,0,0,0,0,0,0,0,0,0
Lavaux_2021-09-30_must_Plot5_Row6/Vine30_ID37b,0,0,0,209.0,0.0,0.0,0,0.0,0.0,0,...,0,0,0,0,0,0,0,0,0,0
Lavaux_2021-09-30_must_Plot11_Row6/Vine22_ID66,0,0,0,64.0,0.0,106.0,0,3.0,6.0,0,...,0,0,0,0,0,0,0,0,0,0


### Annotated ASV table 

In [8]:
!qiime tools export --input-path /home/lfloerl/cloud/lfloerl/Microterroir/artifacts/ITS/lavaux/spatio-temporal/InterIntra/cm10000/rarefied_table.qza --output-path /home/lfloerl/cloud/lfloerl/Microterroir/artifacts/ITS/lavaux/spatio-temporal/InterIntra/cm10000/rarefied_table

[32mExported /home/lfloerl/cloud/lfloerl/Microterroir/artifacts/ITS/lavaux/spatio-temporal/InterIntra/cm10000/rarefied_table.qza as BIOMV210DirFmt to directory /home/lfloerl/cloud/lfloerl/Microterroir/artifacts/ITS/lavaux/spatio-temporal/InterIntra/cm10000/rarefied_table[0m
[0m

In [4]:
# Load the BIOM table
biom_table = biom.load_table("/home/lfloerl/cloud/lfloerl/Microterroir/artifacts/ITS/lavaux/spatio-temporal/InterIntra/cm10000/rarefied_table/feature-table.biom")
df_ITS = biom_table.to_dataframe()

# rename the samples 
ITS_md = pd.read_csv('/home/lfloerl/microterroir/Microbiome/Metadata/ITS_md.tsv', sep='\t')
column_mapping = ITS_md.set_index("id")["SAMPLE_NAME"].to_dict()
# Rename df_16S columns using the mapping
df_ITS.rename(columns=column_mapping, inplace=True)

# load the taxonomy to rename the features
# this was exported in 00_Prep_MicrobiomeDataForMetabolomics notebook 
ITS_taxonomy = pd.read_csv('/home/lfloerl/cloud/lfloerl/Microterroir/artifacts/ITS/taxonomy/taxonomy.tsv', sep='\t')
ITS_taxonomy.set_index('Feature ID', inplace=True)
# Rename the index of df_ITS based on the 'Taxon' column of ITS_taxonomy
df_ITS.index = df_ITS.index.map(ITS_taxonomy['Taxon'])
# transpose and reset index
df_ITS = df_ITS.T

df_ITS.to_csv('berries_harvest_ASVs_rarefied_labled.tsv', sep='\t')

df_ITS.head()

Unnamed: 0,k__Fungi;p__Fungi_phy_Incertae_sedis;c__Fungi_cls_Incertae_sedis;o__Fungi_ord_Incertae_sedis;f__Fungi_fam_Incertae_sedis;g__Fungi_gen_Incertae_sedis;s__Fungi_sp;sh__SH1302074.10FU,k__Fungi;p__Basidiomycota;c__Tremellomycetes;o__Cystofilobasidiales;f__Cystofilobasidiaceae;g__Cystofilobasidium;s__Cystofilobasidium_macerans;sh__SH1083411.10FU,k__Fungi;p__Fungi_phy_Incertae_sedis;c__Fungi_cls_Incertae_sedis;o__Fungi_ord_Incertae_sedis;f__Fungi_fam_Incertae_sedis;g__Fungi_gen_Incertae_sedis;s__Fungi_sp,k__Fungi;p__Ascomycota;c__Dothideomycetes;o__Pleosporales;f__Torulaceae;g__Torula;s__Torula_fici;sh__SH1349958.10FU,k__Fungi;p__Ascomycota;c__Dothideomycetes;o__Pleosporales;f__Didymellaceae;g__Neoascochyta;s__Neoascochyta_exitialis;sh__SH1213549.10FU,k__Fungi;p__Fungi_phy_Incertae_sedis;c__Fungi_cls_Incertae_sedis;o__Fungi_ord_Incertae_sedis;f__Fungi_fam_Incertae_sedis;g__Fungi_gen_Incertae_sedis;s__Fungi_sp;sh__SH1302074.10FU.1,k__Fungi;p__Basidiomycota;c__Agaricomycetes;o__Boletales;f__Boletaceae;g__Boletus;s__Boletus_satanas;sh__SH1216224.10FU,k__Fungi;p__Ascomycota;c__Leotiomycetes;o__Helotiales;f__Sclerotiniaceae,k__Fungi;p__Ascomycota;c__Leotiomycetes;o__Helotiales;f__Sclerotiniaceae.1,k__Fungi;p__Basidiomycota;c__Agaricomycetes;o__Agaricales;f__Lycoperdaceae;g__Lycoperdon;s__Lycoperdon_pyriforme;sh__SH1397992.10FU,...,k__Fungi;p__Fungi_phy_Incertae_sedis;c__Fungi_cls_Incertae_sedis;o__Fungi_ord_Incertae_sedis;f__Fungi_fam_Incertae_sedis;g__Fungi_gen_Incertae_sedis;s__Fungi_sp;sh__SH1302074.10FU.2,k__Fungi;p__Ascomycota;c__Dothideomycetes;o__Pleosporales;f__Didymellaceae;g__Neoascochyta;s__Neoascochyta_tardicrescens;sh__SH1213171.10FU,k__Fungi;p__Fungi_phy_Incertae_sedis;c__Fungi_cls_Incertae_sedis;o__Fungi_ord_Incertae_sedis;f__Fungi_fam_Incertae_sedis;g__Fungi_gen_Incertae_sedis;s__Fungi_sp.1,k__Fungi;p__Ascomycota;c__Leotiomycetes;o__Helotiales;f__Sclerotiniaceae.2,k__Fungi;p__Ascomycota;c__Sordariomycetes;o__Xylariales;f__Diatrypaceae;g__Cryptovalsa;s__Cryptovalsa_ampelina;sh__SH1052393.10FU,k__Fungi;p__Basidiomycota;c__Microbotryomycetes;o__Sporidiobolales;f__Sporidiobolaceae;g__Sporobolomyces;s__Sporobolomyces_patagonicus;sh__SH1273307.10FU,k__Fungi;p__Fungi_phy_Incertae_sedis;c__Fungi_cls_Incertae_sedis;o__Fungi_ord_Incertae_sedis;f__Fungi_fam_Incertae_sedis;g__Fungi_gen_Incertae_sedis;s__Fungi_sp;sh__SH1302074.10FU.3,k__Fungi;p__Ascomycota;c__Sordariomycetes;o__Glomerellales;f__Plectosphaerellaceae,k__Fungi;p__Basidiomycota;c__Microbotryomycetes;o__Microbotryomycetes_ord_Incertae_sedis;f__Microbotryomycetes_fam_Incertae_sedis;g__Curvibasidium;s__Curvibasidium_cygneicollum;sh__SH1189363.10FU,k__Fungi;p__Fungi_phy_Incertae_sedis;c__Fungi_cls_Incertae_sedis;o__Fungi_ord_Incertae_sedis;f__Fungi_fam_Incertae_sedis;g__Fungi_gen_Incertae_sedis;s__Fungi_sp;sh__SH1302074.10FU.4
Valais_7,0,0,0,0,0,0.0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Valais_30,0,0,0,0,0,0.0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Valais_2,0,0,0,0,0,0.0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Valais_33,0,0,0,0,0,0.0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Valais_26,0,0,0,0,0,68.0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


# 16S

## Soil

In [84]:
workdir = '/home/lfloerl/cloud/lfloerl/Microterroir/artifacts/16S/lavaux'
%cd $workdir

/home/lfloerl/cloud/lfloerl/Microterroir/artifacts/16S/lavaux


In [23]:
%%bash 

mkdir soil-closedRefOTU

# Cluster to OTUs 
qiime vsearch cluster-features-closed-reference \
    --i-sequences soil_filtered_rep_seqs.qza \
    --i-table soil_filtered_table.qza \
    --i-reference-sequences /home/lfloerl/public/Data/Databases/QIIME2/SILVA/silva-138-99-seqs.qza \
    --p-perc-identity 0.99 \
    --p-threads 10 \
    --o-clustered-table soil-closedRefOTU/soil_table.qza \
    --o-clustered-sequences soil-closedRefOTU/soil_seq.qza \
    --o-unmatched-sequences soil-closedRefOTU/soil_unmatched_seq.qza

# 2. Rarefy 
qiime feature-table rarefy \
    --i-table soil-closedRefOTU/soil_table.qza \
    --p-sampling-depth 1500 \
    --o-rarefied-table soil-closedRefOTU/soil_rarefied_table.qza

# 3. Export 
qiime tools export --input-path soil-closedRefOTU/soil_rarefied_table.qza --output-path soil-closedRefOTU/

Saved FeatureTable[Frequency] to: soil-closedRefOTU/soil_table.qza
Saved FeatureData[Sequence] to: soil-closedRefOTU/soil_seq.qza
Saved FeatureData[Sequence] to: soil-closedRefOTU/soil_unmatched_seq.qza
Saved FeatureTable[Frequency] to: soil-closedRefOTU/soil_rarefied_table.qza
Exported soil-closedRefOTU/soil_rarefied_table.qza as BIOMV210DirFmt to directory soil-closedRefOTU/


In [85]:
# Load the BIOM table
biom_table = biom.load_table("soil-closedRefOTU/feature-table.biom")
df_16S = biom_table.to_dataframe()

# rename samples 
Bac_md = pd.read_csv('/home/lfloerl/microterroir/Microbiome/Metadata/16S_lavaux.tsv', sep='\t')
column_mapping = Bac_md.set_index("id")["SAMPLE_NAME"].to_dict()
# Rename df_16S columns using the mapping
df_16S.rename(columns=column_mapping, inplace=True)

# load the taxonomy to rename the features
Bac_taxonomy = pd.read_csv('/home/lfloerl/cloud/lfloerl/Microterroir/LC-MS_data/Results/MicrobiomeMetabolome/16S/silva-138-99-tax/taxonomy.tsv', sep='\t')
Bac_taxonomy.set_index('Feature ID', inplace=True)
# Rename the index of df_ITS based on the 'Taxon' column of ITS_taxonomy
df_16S.index = df_16S.index.map(Bac_taxonomy['Taxon'])

# transpose and reset index
df_16S = df_16S.T #.reset_index()

df_16S.to_csv('soil-closedRefOTU/soil_cOTUs_rarefied_labled.tsv', sep='\t')

df_16S.head()

Unnamed: 0,d__Bacteria; p__Myxococcota; c__Polyangia; o__Polyangiales; f__Polyangiaceae; g__Pajaroellobacter; s__uncultured_Polyangiaceae,d__Bacteria; p__Actinobacteriota; c__Actinobacteria; o__Frankiales; f__Geodermatophilaceae; g__Blastococcus; s__uncultured_actinobacterium,d__Bacteria; p__Myxococcota; c__Polyangia; o__Haliangiales; f__Haliangiaceae; g__Haliangium; s__uncultured_bacterium,d__Bacteria; p__Verrucomicrobiota; c__Verrucomicrobiae; o__Chthoniobacterales; f__Chthoniobacteraceae; g__Chthoniobacter; s__uncultured_bacterium,d__Bacteria; p__Proteobacteria; c__Alphaproteobacteria; o__Rhodobacterales; f__Rhodobacteraceae; g__Rubellimicrobium; s__uncultured_bacterium,d__Bacteria; p__Planctomycetota; c__Planctomycetes; o__Gemmatales; f__Gemmataceae; g__Gemmata; s__Gemmata_sp.,d__Bacteria; p__Planctomycetota; c__OM190; o__OM190; f__OM190; g__OM190; s__uncultured_bacterium,d__Bacteria; p__Planctomycetota; c__Planctomycetes; o__Gemmatales; f__Gemmataceae; g__Fimbriiglobus; s__uncultured_Pietermaritzburg,d__Bacteria; p__Proteobacteria; c__Alphaproteobacteria; o__Elsterales; f__uncultured; g__uncultured; s__uncultured_Alphaproteobacteria,d__Bacteria; p__Chloroflexi; c__Anaerolineae; o__Anaerolineales; f__Anaerolineaceae; g__uncultured; s__uncultured_bacterium,...,d__Bacteria; p__Proteobacteria; c__Gammaproteobacteria; o__Burkholderiales; f__Burkholderiaceae; g__Lautropia; s__uncultured_bacterium,d__Bacteria; p__Gemmatimonadota; c__Gemmatimonadetes; o__Gemmatimonadales; f__Gemmatimonadaceae; g__uncultured; s__uncultured_bacterium,d__Bacteria; p__Actinobacteriota; c__Thermoleophilia; o__Solirubrobacterales; f__67-14; g__67-14; s__uncultured_bacterium,d__Bacteria; p__Verrucomicrobiota; c__Verrucomicrobiae; o__Chthoniobacterales; f__Chthoniobacteraceae; g__Chthoniobacter; s__uncultured_bacterium.1,d__Bacteria; p__Proteobacteria; c__Alphaproteobacteria; o__Rhizobiales; f__Rhizobiales_Incertae_Sedis; g__uncultured; s__uncultured_bacterium,d__Bacteria; p__Bacteroidota; c__Bacteroidia; o__Chitinophagales; f__Chitinophagaceae; g__Terrimonas; s__uncultured_Bacteroidetes,d__Bacteria; p__Firmicutes; c__Bacilli; o__Paenibacillales; f__Paenibacillaceae; g__Paenibacillus; s__Paenibacillus_sp.,d__Bacteria; p__Firmicutes; c__Bacilli; o__Paenibacillales; f__Paenibacillaceae; g__Paenibacillus; s__Paenibacillus_wynnii,d__Bacteria; p__Chloroflexi; c__Chloroflexia; o__Chloroflexales; f__Roseiflexaceae; g__uncultured; s__uncultured_bacterium,d__Bacteria; p__Actinobacteriota; c__Actinobacteria; o__Streptomycetales; f__Streptomycetaceae; g__Streptomyces; s__Streptomyces_rochei
Lavaux_2022-08-10_soil_Plot17,0,0.0,2.0,0,0,1.0,0.0,0.0,0,0.0,...,0,3.0,0.0,0,4.0,1.0,0,0.0,0,1.0
Lavaux_2022-08-10_soil_Plot5,0,0.0,1.0,0,0,3.0,0.0,0.0,0,0.0,...,0,6.0,0.0,0,0.0,0.0,0,0.0,0,0.0
Lavaux_2022-08-10_soil_Plot11,0,0.0,2.0,0,0,0.0,0.0,0.0,0,5.0,...,0,3.0,0.0,0,1.0,0.0,0,1.0,0,0.0
Lavaux_2022-08-10_soil_Plot13,0,4.0,0.0,0,0,0.0,1.0,2.0,0,2.0,...,0,3.0,0.0,0,0.0,0.0,0,0.0,0,0.0
Lavaux_2022-08-10_soil_Plot14,0,0.0,0.0,0,0,0.0,0.0,0.0,0,0.0,...,0,2.0,1.0,0,1.0,0.0,0,0.0,0,0.0
