# Phylogeny with empress

<a id='setup'></a>

## 1. Setup

In [5]:
from qiime2 import Visualization
import os
import pandas as pd
import numpy as np

import qiime2 as q2

%matplotlib inline
# location of this week's data and all the results produced by this notebook 
# - this should be a path relative to your working directory
data_dir = 'phylogeny_data'
data_seq = 'seq_data_new'

if not os.path.isdir(data_dir):
    os.makedirs(data_dir) 

## 2. De-novo Phylogeny analysis


### 2.1 Sequence alignment

In [13]:
! qiime alignment mafft \
    --i-sequences $data_seq/dada2_rep_set.qza \
    --o-alignment $data_dir/aligned-rep-seqs.qza

[32mSaved FeatureData[AlignedSequence] to: phylogeny_data/aligned-rep-seqs.qza[0m
[0m

### 2.2 Alignment masking

In [14]:
! qiime alignment mask \
    --i-alignment $data_dir/aligned-rep-seqs.qza \
    --o-masked-alignment $data_dir/masked-aligned-rep-seqs.qza

[32mSaved FeatureData[AlignedSequence] to: phylogeny_data/masked-aligned-rep-seqs.qza[0m
[0m

### 2.3 Tree construction and Visualization

In [3]:
! qiime phylogeny fasttree \
    --i-alignment $data_dir/masked-aligned-rep-seqs.qza \
    --o-tree $data_dir/fasttree-tree.qza

! qiime phylogeny midpoint-root \
    --i-tree $data_dir/fasttree-tree.qza \
    --o-rooted-tree $data_dir/fasttree-tree-rooted.qza

[32mSaved Phylogeny[Unrooted] to: phylogeny_data/fasttree-tree.qza[0m
[0m[32mSaved Phylogeny[Rooted] to: phylogeny_data/fasttree-tree-rooted.qza[0m
[0m

In [None]:
#### 2.3.1 Visualization using qiime2

In [4]:
! qiime empress tree-plot \
    --i-tree $data_dir/fasttree-tree-rooted.qza \
    --m-feature-metadata-file $data_dir/taxonomy.qza \
    --o-visualization $data_dir/fasttree-tree-rooted.qzv

[31m[1mThere was an issue with loading the file phylogeny_data/taxonomy.qza as metadata:

  Metadata file path doesn't exist, or the path points to something other than a file. Please check that the path exists, has read permissions, and points to a regular file (not a directory): phylogeny_data/taxonomy.qza

  There may be more errors present in the metadata file. To get a full report, sample/feature metadata files can be validated with Keemei: https://keemei.qiime2.org

  Find details on QIIME 2 metadata requirements here: https://docs.qiime2.org/2022.2/tutorials/metadata/[0m

[0m

In [None]:
Visualization.load(f'{data_dir}/fasttree-tree-rooted.qzv')

### 2.3 Bootstrapping

In [None]:
! qiime phylogeny raxml-rapid-bootstrap \
    --i-alignment $data_dir/masked-aligned-rep-seqs.qza \
    --p-seed 1723 \
    --p-rapid-bootstrap-seed 9384 \
    --p-bootstrap-replicates 100 \
    --p-substitution-model GTRCAT \
    --p-n-threads 3 \
    --o-tree $data_dir/raxml-cat-bootstrap-tree.qza

In [None]:
! qiime phylogeny midpoint-root \
    --i-tree $data_dir/raxml-cat-bootstrap-tree.qza \
    --o-rooted-tree $data_dir/raxml-cat-bootstrap-tree-rooted.qza

In [None]:
! qiime empress tree-plot \
    --i-tree $data_dir/raxml-cat-bootstrap-tree-rooted.qza \
    --m-feature-metadata-file $data_dir/taxonomy.qza \
    --o-visualization $data_dir/raxml-cat-bootstrap-tree-rooted.qzv

In [None]:
Visualization.load(f'{data_dir}/raxml-cat-bootstrap-tree-rooted.qzv')

### 2.4 Fragment insertion

In [None]:
! wget -nv -O $data_dir/sepp-refs-gg-13-8.qza https://data.qiime2.org/2021.4/common/sepp-refs-gg-13-8.qza

In [None]:
! qiime fragment-insertion sepp \
    --i-representative-sequences $data_dir/rep-seqs-filtered.qza \
    --i-reference-database $data_dir/sepp-refs-gg-13-8.qza \
    --p-threads 2 \
    --o-tree $data_dir/sepp-tree.qza \
    --o-placements $data_dir/sepp-tree-placements.qza

In [None]:
! qiime empress tree-plot \
    --i-tree $data_dir/sepp-tree.qza \
    --m-feature-metadata-file $data_dir/taxonomy.qza \
    --o-visualization $data_dir/sepp-tree.qzv

In [None]:
Visualization.load(f'{data_dir}/sepp-tree.qzv')