# 2019-08-08 Biomphalaria tissues

## Aim

Lauren Carruthers internship project. Look at microbiomes of different tissues from two *Biomphalaria* species.


In [None]:
# Project directory
cd $HOME/analyses/11-Microbiome/
mkdir 2019-08-08_Biomphalaria_tissues
cd 2019-08-08_Biomphalaria_tissues

# Fasta generation (MiSeq Output previously uploaded)
cd 0-Raw\ data/190806_M01370_0001_000000000-CKY27/
nohup bcl2fastq --output-dir ../fastq_files/
cd ../..

# Working directory
mkdir 1-Qiime
cd 1-Qiime

In [1]:
cd $HOME/analyses/11-Microbiome/
cd 2019-08-08_Biomphalaria_tissues
cd 1-Qiime

# Link data
mkdir data
ln -s ../0-Libraries/*.fastq.gz data/

In [5]:
# Create the manifest for importing data in artefact
## source: https://docs.qiime2.org/2019.4/tutorials/importing/#fastq-manifest-formats
for i in $(ls data/* | cut -d "_" -f 3-5 | uniq)
do
    nm=$(sed "s,data/,, ; s,_,.,g" <<<$i)
    fl=$(ls -1 $PWD/data/*$i* | tr "\n" "\t")

    echo -e "$nm\t$fl"
done > manifest

# Add header
sed -i "1s/^/sample-id\tforward-absolute-filepath\treverse-absolute-filepath\n/" manifest

In [None]:
source ~/local/pckg/python/miniconda3/etc/profile.d/conda.sh

# To remove any potential conflict
export PYTHONPATH=""

# Activate qiime environment
conda activate qiime2-2019.4

# Import data
## source: https://docs.qiime2.org/2019.4/tutorials/importing/
qiime tools import \
  --type 'SampleData[PairedEndSequencesWithQuality]' \
  --input-path manifest \
  --input-format PairedEndFastqManifestPhred33V2 \
  --output-path demux-paired-end.qza

In [9]:
# Make a summary to check read quality
qiime demux summarize \
  --i-data demux-paired-end.qza \
  --o-visualization demux-paired-end.qzv

(qiime2-2019.4) [32mSaved Visualization to: demux-paired-end2.qzv[0m
(qiime2-2019.4) 

: 1

In [12]:
qiime dada2 denoise-paired \
  --i-demultiplexed-seqs demux-paired-end.qza \
  --p-trunc-len-f 250 \
  --p-trunc-len-r 250 \
  --p-trim-left-f 0 \
  --p-trim-left-r 13 \
  --p-n-threads 0   \
  --o-table table.qza \
  --o-representative-sequences rep-seqs.qza \
  --o-denoising-stats denoising-stats.qza

[32mSaved FeatureTable[Frequency] to: table.qza[0m
[32mSaved FeatureData[Sequence] to: rep-seqs.qza[0m
[32mSaved SampleData[DADA2Stats] to: denoising-stats.qza[0m
(qiime2-2019.4) 

: 1

In [13]:
ln -s ../../2019-05-09_paper/1-Qiime/database/ .

qiime feature-classifier classify-consensus-vsearch \
    --i-query rep-seqs.qza \
    --i-reference-reads database/silva_132_99_16S.qza \
    --i-reference-taxonomy database/silva_132_99_16S_taxa.qza \
    --p-perc-identity 0.97 \
    --p-threads $(nproc) \
    --o-classification rep-seqs_taxa.qza

(qiime2-2019.4) 

: 1

In [None]:
# Create the manifest for importing data in artefact
## source: https://docs.qiime2.org/2019.4/tutorials/importing/#fastq-manifest-formats
for i in $(ls data/* | cut -d "_" -f -5 | uniq)
do
    nm=$(sed "s,data/,, ; s,_,.,g" <<<$i)
    cln=$(echo "$nm" | cut -d "." -f 4)
    cln="$cln\t$(echo "$nm" | cut -d "." -f 4-5)"
    cln="$cln\t$(echo "$nm" | cut -d "." -f 5)"
    
    # Update name
    nm=$(echo "$nm" | cut -d "." -f 3-5)

    echo -e "$nm\t$cln"
done > sample-metadata.tsv

# Add header
sed -i "1s/^/sample-id\tSpecies\tComb\tTissue\n/" sample-metadata.tsv

In [19]:
## !! WARNING To be removed when samplesheet corrected
sed -i "s/\tCa/\tBa/g" sample-metadata.tsv

(qiime2-2019.4) (qiime2-2019.4) (qiime2-2019.4) (qiime2-2019.4) (qiime2-2019.4) (qiime2-2019.4) 

: 1

In [20]:
# source: https://chmi-sops.github.io/mydoc_qiime2.html

#carry out a multiple seqeunce alignment using Mafft
qiime alignment mafft \
    --i-sequences rep-seqs.qza \
    --o-alignment aligned-rep-seqs.qza

#mask (or filter) the alignment to remove positions that are highly variable. These positions are generally considered to add noise to a resulting phylogenetic tree.
qiime alignment mask \
    --i-alignment aligned-rep-seqs.qza \
    --o-masked-alignment masked-aligned-rep-seqs.qza

#create the tree using the Fasttree program
qiime phylogeny fasttree \
    --i-alignment masked-aligned-rep-seqs.qza \
    --o-tree unrooted-tree.qza

#root the tree using the longest root
qiime phylogeny midpoint-root \
    --i-tree unrooted-tree.qza \
    --o-rooted-tree rooted-tree.qza

(qiime2-2019.4) (qiime2-2019.4) (qiime2-2019.4) [32mSaved FeatureData[AlignedSequence] to: aligned-rep-seqs.qza[0m
(qiime2-2019.4) (qiime2-2019.4) (qiime2-2019.4) [32mSaved FeatureData[AlignedSequence] to: masked-aligned-rep-seqs.qza[0m
(qiime2-2019.4) (qiime2-2019.4) (qiime2-2019.4) [32mSaved Phylogeny[Unrooted] to: unrooted-tree.qza[0m
(qiime2-2019.4) (qiime2-2019.4) (qiime2-2019.4) [32mSaved Phylogeny[Rooted] to: rooted-tree.qza[0m
(qiime2-2019.4) 

: 1

In [32]:
qiime diversity alpha-rarefaction \
    --i-table table.qza \
    --i-phylogeny rooted-tree.qza \
    --p-max-depth 33000 \
    --m-metadata-file sample-metadata.tsv \
    --o-visualization alpha-rarefaction.qzv

[32mSaved Visualization to: alpha-rarefaction.qzv[0m
(qiime2-2019.4) 

: 1

In [22]:
qiime diversity core-metrics-phylogenetic \
    --i-phylogeny rooted-tree.qza \
    --i-table table.qza \
    --p-sampling-depth 5000 \
    --m-metadata-file sample-metadata.tsv \
    --output-dir core-metrics-results

[32mSaved FeatureTable[Frequency] to: core-metrics-results/rarefied_table.qza[0m
[32mSaved SampleData[AlphaDiversity] % Properties('phylogenetic') to: core-metrics-results/faith_pd_vector.qza[0m
[32mSaved SampleData[AlphaDiversity] to: core-metrics-results/observed_otus_vector.qza[0m
[32mSaved SampleData[AlphaDiversity] to: core-metrics-results/shannon_vector.qza[0m
[32mSaved SampleData[AlphaDiversity] to: core-metrics-results/evenness_vector.qza[0m
[32mSaved DistanceMatrix % Properties('phylogenetic') to: core-metrics-results/unweighted_unifrac_distance_matrix.qza[0m
[32mSaved DistanceMatrix % Properties('phylogenetic') to: core-metrics-results/weighted_unifrac_distance_matrix.qza[0m
[32mSaved DistanceMatrix to: core-metrics-results/jaccard_distance_matrix.qza[0m
[32mSaved DistanceMatrix to: core-metrics-results/bray_curtis_distance_matrix.qza[0m
[32mSaved PCoAResults to: core-metrics-results/unweighted_unifrac_pcoa_results.qza[0m
[32mSaved PCoAResults to: core-me

: 1

In [34]:
qiime diversity core-metrics-phylogenetic \
    --i-phylogeny rooted-tree.qza \
    --i-table table.qza \
    --p-sampling-depth 20000 \
    --m-metadata-file sample-metadata.tsv \
    --output-dir core-metrics-results_20000

[32mSaved FeatureTable[Frequency] to: core-metrics-results_20000/rarefied_table.qza[0m
[32mSaved SampleData[AlphaDiversity] % Properties('phylogenetic') to: core-metrics-results_20000/faith_pd_vector.qza[0m
[32mSaved SampleData[AlphaDiversity] to: core-metrics-results_20000/observed_otus_vector.qza[0m
[32mSaved SampleData[AlphaDiversity] to: core-metrics-results_20000/shannon_vector.qza[0m
[32mSaved SampleData[AlphaDiversity] to: core-metrics-results_20000/evenness_vector.qza[0m
[32mSaved DistanceMatrix % Properties('phylogenetic') to: core-metrics-results_20000/unweighted_unifrac_distance_matrix.qza[0m
[32mSaved DistanceMatrix % Properties('phylogenetic') to: core-metrics-results_20000/weighted_unifrac_distance_matrix.qza[0m
[32mSaved DistanceMatrix to: core-metrics-results_20000/jaccard_distance_matrix.qza[0m
[32mSaved DistanceMatrix to: core-metrics-results_20000/bray_curtis_distance_matrix.qza[0m
[32mSaved PCoAResults to: core-metrics-results_20000/unweighted_uni

: 1

In [None]:
## NOT RUN
qiime diversity beta-group-significance \
  --i-distance-matrix core-metrics-results/unweighted_unifrac_distance_matrix.qza \
  --m-metadata-file sample-metadata.tsv \
  --o-visualization core-metrics-results/unweighted-unifrac-body-site-significance.qzv \
  --p-pairwise

In [26]:
mkdir core-metrics-visu

#first, use the unweighted unifrac data as input
qiime emperor plot \
    --i-pcoa core-metrics-results/unweighted_unifrac_pcoa_results.qza \
    --m-metadata-file sample-metadata.tsv \
    --o-visualization core-metrics-visu/unweighted-unifrac-emperor-DaysSinceExperimentStart.qzv
#  --p-custom-axes Spieces \

#now repeat with bray curtis
qiime emperor plot \
    --i-pcoa core-metrics-results/bray_curtis_pcoa_results.qza \
    --m-metadata-file sample-metadata.tsv \
    --o-visualization core-metrics-visu/bray-curtis-emperor-DaysSinceExperimentStart.qzv
#  --p-custom-axes DaysSinceExperimentStart \

(qiime2-2019.4) (qiime2-2019.4) (qiime2-2019.4) [32mSaved Visualization to: core-metrics-visu/unweighted-unifrac-emperor-DaysSinceExperimentStart.qzv[0m
(qiime2-2019.4) (qiime2-2019.4) (qiime2-2019.4) (qiime2-2019.4) [32mSaved Visualization to: core-metrics-visu/bray-curtis-emperor-DaysSinceExperimentStart.qzv[0m
(qiime2-2019.4) (qiime2-2019.4) 

: 1

In [35]:
output_dir="core-metrics-visu_20000"
[[ ! -d "$output_dir" ]] && mkdir -p "$output_dir"

#first, use the unweighted unifrac data as input
qiime emperor plot \
    --i-pcoa core-metrics-results_20000/unweighted_unifrac_pcoa_results.qza \
    --m-metadata-file sample-metadata.tsv \
    --o-visualization "$output_dir/unweighted-unifrac-emperor.qzv"
#  --p-custom-axes Spieces \

#now repeat with bray curtis
qiime emperor plot \
    --i-pcoa core-metrics-results_20000/bray_curtis_pcoa_results.qza \
    --m-metadata-file sample-metadata.tsv \
    --o-visualization "$output_dir/bray-curtis-emperor.qzv"
#  --p-custom-axes DaysSinceExperimentStart \

(qiime2-2019.4) (qiime2-2019.4) (qiime2-2019.4) (qiime2-2019.4) [32mSaved Visualization to: core-metrics-visu_20000/unweighted-unifrac-emperor.qzv[0m
(qiime2-2019.4) (qiime2-2019.4) (qiime2-2019.4) (qiime2-2019.4) [32mSaved Visualization to: core-metrics-visu_20000/bray-curtis-emperor.qzv[0m
(qiime2-2019.4) (qiime2-2019.4) 

: 1

## New trial for poster

### Preparation of the environment

In [None]:
source ~/local/pckg/python/miniconda3/etc/profile.d/conda.sh

cd "analysis/11-Microbiome/2019-08_Biomphalaria_tissues/1-Qiime"

# To remove any potential conflict
export PYTHONPATH=""



In [None]:
# Installation of Qiime2 and picrust2
conda create -f env.yml

In [None]:
# Activate qiime environment
conda activate ubiome_organs

# Enable autocompletion
source tab-qiime

In [None]:
# Qiime output directory
qdir="results/1-qiime"
[[ ! -d "$qdir" ]] && mkdir -p "$qdir"

In [None]:
# Preparing data

In [None]:
# Create the manifest for importing data in artefact
## source: https://docs.qiime2.org/2019.4/tutorials/importing/#fastq-manifest-formats
for i in $(ls data/* | cut -d "_" -f -3 | uniq)
do
    nm=$(sed "s,data/,, ; s,_,.,g" <<<$i)
    fl=$(ls -1 $PWD/$i* | tr "\n" "\t")

    echo -e "$nm\t$fl"
done > manifest

# Add header
sed -i "1s/^/sample-id\tforward-absolute-filepath\treverse-absolute-filepath\n/" manifest

In [None]:
# Prepare database

#Import data

# Make summary

### Clustering and denoising

In [None]:
qiime dada2 denoise-paired \
  --i-demultiplexed-seqs "$qdir/demux-paired-end.qza" \
  --p-trunc-len-f 177 \
  --p-trunc-len-r 202 \
  --p-trim-left-f 0 \
  --p-trim-left-r 13 \
  --p-max-ee-f 5 \
  --p-max-ee-r 10 \
  --p-n-threads 0 \
  --o-table "$qdir/table.qza" \
  --o-representative-sequences "$qdir/rep-seqs.qza" \
  --o-denoising-stats "$qdir/denoising-stats.qza"

In [None]:
qiime feature-classifier classify-consensus-vsearch \
  --i-query "$qdir/rep-seqs.qza" \
  --i-reference-reads database/silva_132_99_16S.qza \
  --i-reference-taxonomy database/silva_132_99_16S_taxa.qza \
  --p-perc-identity 0.97 \
  --p-threads $(nproc) \
  --o-classification "$qdir/rep-seqs_taxa.qza"

In [None]:
# source: https://chmi-sops.github.io/mydoc_qiime2.html

# Multiple seqeunce alignment using Mafft
qiime alignment mafft \
    --i-sequences "$qdir/rep-seqs.qza" \
    --o-alignment "$qdir/aligned-rep-seqs.qza"

# Masking (or filtering) the alignment to remove positions that are highly variable. These positions are generally considered to add noise to a resulting phylogenetic tree.
qiime alignment mask \
    --i-alignment "$qdir/aligned-rep-seqs.qza" \
    --o-masked-alignment "$qdir/masked-aligned-rep-seqs.qza"

# Creating tree using the Fasttree program
qiime phylogeny fasttree \
    --i-alignment "$qdir/masked-aligned-rep-seqs.qza" \
    --o-tree "$qdir/unrooted-tree.qza"

# Root the tree using the longest root
qiime phylogeny midpoint-root \
    --i-tree "$qdir/unrooted-tree.qza" \
    --o-rooted-tree "$qdir/rooted-tree.qza"