# 5. Influence factors: Berry Chemistry

In [1]:
!mkdir /home/lfloerl/cloud/lfloerl/Microterroir/artifacts/ITS/lavaux/climate

In [2]:
workdir = '/home/lfloerl/cloud/lfloerl/Microterroir/artifacts/ITS/lavaux/climate'
%cd $workdir

/home/lfloerl/cloud/lfloerl/Microterroir/artifacts/ITS/lavaux/climate


In [3]:
%env TMPDIR=/scratch/lfloerl/tmpdata

env: TMPDIR=/scratch/lfloerl/tmpdata


In [4]:
from qiime2 import Visualization
import qiime2 as q2
from qiime2 import Visualization
from qiime2 import Artifact
from qiime2 import Metadata
import os
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from qiime2.plugins.diversity.visualizers import alpha_group_significance
from qiime2.plugins.feature_table.methods import (filter_samples, filter_seqs)

from qiime2.plugins import diversity as q2d

%matplotlib inline

# Mantel: climate euclidian 

In [None]:
%%bash 

# Define the time points
time_points=('Veraison' 'Harvest')

# Loop through each time point
for time_point in "${time_points[@]}"
do 
    echo "Processing $time_point"
    
    # Create directory for the time point and navigate into it
    mkdir -p "./$time_point"
    pushd "./$time_point" > /dev/null

    # Filter table and rep seqs based on Time_point and where they have HPLC data 
    qiime feature-table filter-samples \
      --i-table /home/lfloerl/cloud/lfloerl/Microterroir/artifacts/ITS/lavaux/must_filtered_table.qza \
      --m-metadata-file /home/lfloerl/microterroir/Microbiome/Metadata/ITS_Lavaux_BerryChemistry.tsv \
      --p-where "[Time_point]='$time_point' AND [Tartrate_gL] IS NOT NULL" \
      --o-filtered-table must_${time_point}_filtered_table.qza

    qiime feature-table filter-seqs \
        --i-data /home/lfloerl/cloud/lfloerl/Microterroir/artifacts/Sven-denoising-test/ITS/revcomp-trimmed-denoised/with-adjusted-parameters/ITS-revcomp-trimmed-ER-0.05-denoised-trunc-190-PFA-4.0-maxee-4.0-rep-seqs.qza \
        --i-table must_${time_point}_filtered_table.qza \
        --o-filtered-data must_${time_point}_filtered_rep_seqs.qza

    # Core diversity metrics
    qiime diversity core-metrics \
        --i-table must_${time_point}_filtered_table.qza \
        --p-sampling-depth 5000 \
        --m-metadata-file /home/lfloerl/microterroir/Microbiome/Metadata/ITS_Lavaux_BerryChemistry.tsv \
        --p-n-jobs 5 \
        --output-dir cm5000/

    # Kmer diversity
    qiime kmerizer core-metrics \
        --i-sequences must_${time_point}_filtered_rep_seqs.qza \
        --i-table must_${time_point}_filtered_table.qza \
        --p-sampling-depth 5000 \
        --m-metadata-file /home/lfloerl/microterroir/Microbiome/Metadata/ITS_Lavaux_BerryChemistry.tsv \
        --p-n-jobs auto \
        --p-max-features 5000 \
        --output-dir kmer_cm5000/

    # Return to the parent directory
    popd > /dev/null
done


### PERMANOVA thereof

In [69]:
%%bash

# Formula for PERMANOVA with berry chemistry variables
formula='Plot_ID + Year + Glucose_gL + Tartrate_gL + Malate_gL + Fructose_gL'

beta_metrics=('bray_curtis' 'jaccard')
time_points=('Veraison' 'Harvest')

# Loop through each time point
for time_point in "${time_points[@]}"
do 
    echo "Processing PERMANOVA for $time_point"
    
    div_dir="./$time_point/cm5000"
    # PERMANOVA with core metrics
    for m in "${beta_metrics[@]}"
    do
        qiime diversity adonis \
            --i-distance-matrix "${div_dir}/${m}_distance_matrix.qza" \
            --m-metadata-file /home/lfloerl/microterroir/Microbiome/Metadata/ITS_Lavaux_BerryChemistry.tsv \
            --p-formula "$formula" \
            --p-n-jobs 5 \
            --o-visualization "${div_dir}/adonis_${m}.qzv"
    done

    kmer_dir="./$time_point/kmer_cm5000"
    # PERMANOVA with kmer core metrics
    for m in "${beta_metrics[@]}"
    do
        qiime diversity adonis \
            --i-distance-matrix "${kmer_dir}/${m}_distance_matrix.qza" \
            --m-metadata-file /home/lfloerl/microterroir/Microbiome/Metadata/ITS_Lavaux_BerryChemistry.tsv \
            --p-formula "$formula" \
            --p-n-jobs 5 \
            --o-visualization "${kmer_dir}/adonis_${m}.qzv"
    done
done


Processing PERMANOVA for Veraison
Saved Visualization to: ./Veraison/cm5000/adonis_bray_curtis.qzv
Saved Visualization to: ./Veraison/cm5000/adonis_jaccard.qzv
Saved Visualization to: ./Veraison/kmer_cm5000/adonis_bray_curtis.qzv
Saved Visualization to: ./Veraison/kmer_cm5000/adonis_jaccard.qzv
Processing PERMANOVA for Harvest
Saved Visualization to: ./Harvest/cm5000/adonis_bray_curtis.qzv
Saved Visualization to: ./Harvest/cm5000/adonis_jaccard.qzv
Saved Visualization to: ./Harvest/kmer_cm5000/adonis_bray_curtis.qzv
Saved Visualization to: ./Harvest/kmer_cm5000/adonis_jaccard.qzv


### Check it out! 

#### Harvest

In [70]:
Visualization.load('Harvest/cm5000/adonis_bray_curtis.qzv')

In [71]:
Visualization.load('Harvest/cm5000/adonis_jaccard.qzv')

In [72]:
Visualization.load('Harvest/kmer_cm5000/adonis_bray_curtis.qzv')

In [73]:
Visualization.load('Harvest/kmer_cm5000/adonis_jaccard.qzv')

#### Veraison

In [74]:
Visualization.load('Veraison/cm5000/adonis_bray_curtis.qzv')

In [75]:
Visualization.load('Veraison/cm5000/adonis_jaccard.qzv')

In [76]:
Visualization.load('Veraison/kmer_cm5000/adonis_bray_curtis.qzv')

In [77]:
Visualization.load('Veraison/kmer_cm5000/adonis_jaccard.qzv')

## Regress Samples

> for: Glucose_gL, Tartrate_gL, Malate_gL,  Fructose_gL


In [19]:
#!rm -r Harvest/Fructose_gL Harvest/Glucose_gL Harvest/Malate_gL Harvest/Tartrate_gL

In [20]:
%%bash 

# Define the list of metadata columns for regression
columns=('Glucose_gL' 'Tartrate_gL' 'Malate_gL' 'Fructose_gL')

# Loop through each column
for column in "${columns[@]}"
do
    echo "Processing regression for $column"
    
    # Run the sample-classifier regress-samples command
    qiime sample-classifier regress-samples \
        --i-table Harvest/must_Harvest_filtered_table.qza \
        --m-metadata-file /home/lfloerl/microterroir/Microbiome/Metadata/ITS_Lavaux_BerryChemistry.tsv \
        --m-metadata-column "$column" \
        --p-n-jobs 10 \
        --output-dir "Harvest/$column"
done


Processing regression for Glucose_gL
Saved SampleEstimator[Regressor] to: Harvest/Glucose_gL/sample_estimator.qza
Saved FeatureData[Importance] to: Harvest/Glucose_gL/feature_importance.qza
Saved SampleData[RegressorPredictions] to: Harvest/Glucose_gL/predictions.qza
Saved Visualization to: Harvest/Glucose_gL/model_summary.qzv
Saved Visualization to: Harvest/Glucose_gL/accuracy_results.qzv
Processing regression for Tartrate_gL
Saved SampleEstimator[Regressor] to: Harvest/Tartrate_gL/sample_estimator.qza
Saved FeatureData[Importance] to: Harvest/Tartrate_gL/feature_importance.qza
Saved SampleData[RegressorPredictions] to: Harvest/Tartrate_gL/predictions.qza
Saved Visualization to: Harvest/Tartrate_gL/model_summary.qzv
Saved Visualization to: Harvest/Tartrate_gL/accuracy_results.qzv
Processing regression for Malate_gL
Saved SampleEstimator[Regressor] to: Harvest/Malate_gL/sample_estimator.qza
Saved FeatureData[Importance] to: Harvest/Malate_gL/feature_importance.qza
Saved SampleData[Regr

In [30]:
Visualization.load('Harvest/Glucose_gL/accuracy_results.qzv')

In [24]:
Visualization.load('Harvest/Tartrate_gL/accuracy_results.qzv')

In [26]:
Visualization.load('Harvest/Fructose_gL/accuracy_results.qzv')

In [28]:
Visualization.load('Harvest/Malate_gL/accuracy_results.qzv')

In [33]:
%%bash 

# Define the list of metadata columns for regression
columns=('Glucose_gL' 'Tartrate_gL' 'Malate_gL' 'Fructose_gL')

# Loop through each column
for column in "${columns[@]}"
do
    echo "Processing regression for $column"
    
    # Run the sample-classifier regress-samples command
    qiime sample-classifier regress-samples \
        --i-table Veraison/must_Veraison_filtered_table.qza \
        --m-metadata-file /home/lfloerl/microterroir/Microbiome/Metadata/ITS_Lavaux_BerryChemistry.tsv \
        --m-metadata-column "$column" \
        --p-n-jobs 10 \
        --output-dir "Veraison/$column"
done


Processing regression for Glucose_gL
Saved SampleEstimator[Regressor] to: Veraison/Glucose_gL/sample_estimator.qza
Saved FeatureData[Importance] to: Veraison/Glucose_gL/feature_importance.qza
Saved SampleData[RegressorPredictions] to: Veraison/Glucose_gL/predictions.qza
Saved Visualization to: Veraison/Glucose_gL/model_summary.qzv
Saved Visualization to: Veraison/Glucose_gL/accuracy_results.qzv
Processing regression for Tartrate_gL
Saved SampleEstimator[Regressor] to: Veraison/Tartrate_gL/sample_estimator.qza
Saved FeatureData[Importance] to: Veraison/Tartrate_gL/feature_importance.qza
Saved SampleData[RegressorPredictions] to: Veraison/Tartrate_gL/predictions.qza
Saved Visualization to: Veraison/Tartrate_gL/model_summary.qzv
Saved Visualization to: Veraison/Tartrate_gL/accuracy_results.qzv
Processing regression for Malate_gL
Saved SampleEstimator[Regressor] to: Veraison/Malate_gL/sample_estimator.qza
Saved FeatureData[Importance] to: Veraison/Malate_gL/feature_importance.qza
Saved Sam

In [34]:
Visualization.load('Veraison/Glucose_gL/accuracy_results.qzv')

In [35]:
Visualization.load('Veraison/Tartrate_gL/accuracy_results.qzv')

In [36]:
Visualization.load('Veraison/Fructose_gL/accuracy_results.qzv')

In [37]:
Visualization.load('Veraison/Malate_gL/accuracy_results.qzv')

# Brix 

In [5]:
!ls

Harvest				   Lavaux_berry_samples_table.qza  Veraison
Lavaux_berry_samples_rep_seqs.qza  Lavaux_berry_samples_table.qzv


In [8]:
%%bash 

qiime feature-table filter-samples \
      --i-table /home/lfloerl/cloud/lfloerl/Microterroir/artifacts/ITS/lavaux/must_filtered_table.qza \
      --m-metadata-file /home/lfloerl/microterroir/Microbiome/Metadata/ITS_Lavaux_BerryChemistry.tsv \
      --p-where "[Time_point]='Harvest' AND [Brix_degree] IS NOT NULL" \
      --o-filtered-table Harvest/must_Brix_filtered_table.qza
      
qiime feature-table filter-samples \
      --i-table /home/lfloerl/cloud/lfloerl/Microterroir/artifacts/ITS/lavaux/must_filtered_table.qza \
      --m-metadata-file /home/lfloerl/microterroir/Microbiome/Metadata/ITS_Lavaux_BerryChemistry.tsv \
      --p-where "[Time_point]='Veraison' AND [Brix_degree] IS NOT NULL" \
      --o-filtered-table Veraison/must_Brix_filtered_table.qza

Saved FeatureTable[Frequency] to: Harvest/must_Brix_filtered_table.qza
Saved FeatureTable[Frequency] to: Veraison/must_Brix_filtered_table.qza


### Regress samples for Brix 

In [9]:
%%bash

qiime sample-classifier regress-samples \
        --i-table Harvest/must_Brix_filtered_table.qza \
        --m-metadata-file /home/lfloerl/microterroir/Microbiome/Metadata/ITS_Lavaux_BerryChemistry.tsv \
        --m-metadata-column "Brix_degree" \
        --p-n-jobs 10 \
        --output-dir Harvest/Brix_degree

qiime sample-classifier regress-samples \
        --i-table Veraison/must_Brix_filtered_table.qza \
        --m-metadata-file /home/lfloerl/microterroir/Microbiome/Metadata/ITS_Lavaux_BerryChemistry.tsv \
        --m-metadata-column "Brix_degree" \
        --p-n-jobs 10 \
        --output-dir Veraison/Brix_degree

Saved SampleEstimator[Regressor] to: Harvest/Brix_degree/sample_estimator.qza
Saved FeatureData[Importance] to: Harvest/Brix_degree/feature_importance.qza
Saved SampleData[RegressorPredictions] to: Harvest/Brix_degree/predictions.qza
Saved Visualization to: Harvest/Brix_degree/model_summary.qzv
Saved Visualization to: Harvest/Brix_degree/accuracy_results.qzv
Saved SampleEstimator[Regressor] to: Veraison/Brix_degree/sample_estimator.qza
Saved FeatureData[Importance] to: Veraison/Brix_degree/feature_importance.qza
Saved SampleData[RegressorPredictions] to: Veraison/Brix_degree/predictions.qza
Saved Visualization to: Veraison/Brix_degree/model_summary.qzv
Saved Visualization to: Veraison/Brix_degree/accuracy_results.qzv


In [10]:
Visualization.load('Harvest/Brix_degree/accuracy_results.qzv')

In [11]:
Visualization.load('Harvest/Brix_degree/accuracy_results.qzv')