In [33]:
import qiime2
import pandas as pd
import os
from qiime2.plugins import (feature_table, alignment, phylogeny, diversity)
from tqdm import tqdm

base_dir = os.getcwd()
results_dir = os.path.join(base_dir, 'results')
merged_dir = os.path.join(results_dir, 'merged')
data_dir = os.path.join(base_dir, 'data')

# merged_alpha_dir: Directory to save alpha diversity results
merged_alpha_dir = os.path.join(merged_dir, 'alpha')

def cal_alpha_diversity(sample_metadata_df, table_clean, rooted_tree):
    '''Calculate alpha diversity metrics and return a DataFrame with results.
    Args:
        sample_metadata_df (pd.DataFrame): DataFrame containing sample metadata.
        table_clean (qiime2.Artifact): Cleaned feature table artifact.
        rooted_tree (qiime2.Artifact): Rooted phylogenetic tree artifact.
    Returns:
        pd.DataFrame: DataFrame containing alpha diversity metrics.'''
    alpha_shannon = diversity.pipelines.alpha(table = table_clean, 
                                              metric = 'shannon').alpha_diversity.view(qiime2.Metadata).to_dataframe()
    alpha_evenness = diversity.pipelines.alpha(table = table_clean, 
                                                     metric = 'pielou_e').alpha_diversity.view(qiime2.Metadata).to_dataframe()
    alpha_observed_features = diversity.pipelines.alpha(table = table_clean, 
                                                        metric = 'observed_features').alpha_diversity.view(qiime2.Metadata).to_dataframe()
    alpha_faith_pd = diversity.pipelines.alpha_phylogenetic(table = table_clean, 
                                                            phylogeny = rooted_tree, 
                                                            metric='faith_pd').alpha_diversity.view(qiime2.Metadata).to_dataframe()

    alpha_df = pd.merge(sample_metadata_df[['ProjectID','HostName','BodySite', 'BodysiteClass']],
                        alpha_shannon, 
                        left_index=True, right_index=True, how='left')
    alpha_df = pd.merge(alpha_df, 
                        alpha_evenness, left_index=True, right_index=True, how='left')
    alpha_df = pd.merge(alpha_df, 
                        alpha_observed_features, left_index=True, right_index=True, how='left')
    alpha_df = pd.merge(alpha_df, 
                        alpha_faith_pd, left_index=True, right_index=True, how='left')
    return alpha_df

In [34]:
# Constructs a phylogenetic tree.
merged_rep_seqs = qiime2.Artifact.load(os.path.join(merged_dir, 'merged_rep_seqs.qza'))
aligned_rep_seqs = alignment.methods.mafft(merged_rep_seqs, n_threads = 16).alignment
masked_aligned_rep_seqs = alignment.methods.mask(aligned_rep_seqs).masked_alignment
masked_aligned_rep_seqs.save(os.path.join(merged_dir, 'masked_aligned_rep_seqs'))
masked_aligned_rep_seqs = qiime2.Artifact.load(os.path.join(merged_dir, 'masked_aligned_rep_seqs.qza'))
unrooted_tree = phylogeny.methods.fasttree(masked_aligned_rep_seqs, n_threads = 16).tree
unrooted_tree.save(os.path.join(merged_dir, 'unrooted_tree'))
rooted_tree = phylogeny.methods.midpoint_root(unrooted_tree).rooted_tree
rooted_tree.save(os.path.join(merged_dir, 'rooted_tree'))
# rooted_tree = qiime2.Artifact.load(os.path.join(merged_dir, 'rooted_tree.qza'))


Running external command line application. This may print messages to stdout and/or stderr.
The command being run is below. This command cannot be manually re-run as it will depend on temporary files that no longer exist.

Command: mafft --preservecase --inputorder --thread 16 /tmp/qiime2/gaoyuze/data/35829b82-f43b-4bb9-9067-8a82217f6a33/data/dna-sequences.fasta



inputfile = orig
1131 x 390 - 145 d
nthread = 16
nthreadpair = 16
nthreadtb = 16
ppenalty_ex = 0
stacksize: 8192 kb
generating a scoring matrix for nucleotide (dist=200) ... done
Gap Penalty = -1.53, +0.00, +0.00



Making a distance matrix ..
 1101 / 1131 (thread   12)
done.

Constructing a UPGMA tree (efffree=0) ... 
 1120 / 1131
done.

Progressive alignment 1/2... 
STEP   103 / 1130 (thread   11)
Reallocating..done. *alloclen = 1781
STEP  1101 / 1130 (thread    7)
done.

Making a distance matrix from msa.. 
 1100 / 1131 (thread    3)
done.

Constructing a UPGMA tree (efffree=1) ... 
 1120 / 1131
done.

Progressive alignment 2/2... 
STEP   382 / 1130 (thread   13)
Reallocating..done. *alloclen = 1782
STEP  1101 / 1130 (thread    7) h
done.

disttbfast (nuc) Version 7.520
alg=A, model=DNA200 (2), 1.53 (4.59), -0.00 (-0.00), noshift, amax=0.0
16 thread(s)


Strategy:
 FFT-NS-2 (Fast but rough)
 Progressive method (guide trees were built 2 times.)

If unsure which option to use, try 'ma

Running external command line application. This may print messages to stdout and/or stderr.
The command being run is below. This command cannot be manually re-run as it will depend on temporary files that no longer exist.

Command: FastTreeMP -quote -nt /tmp/qiime2/gaoyuze/data/f52b7e00-570b-4fe2-89f9-9b3de90c9d0a/data/aligned-dna-sequences.fasta



FastTree Version 2.1.11 Double precision (No SSE3), OpenMP (16 threads)
Alignment: /tmp/qiime2/gaoyuze/data/f52b7e00-570b-4fe2-89f9-9b3de90c9d0a/data/aligned-dna-sequences.fasta
Nucleotide distances: Jukes-Cantor Joins: balanced Support: SH-like 1000
Search: Normal +NNI +SPR (2 rounds range 10) +ML-NNI opt-each=1
TopHits: 1.00*sqrtN close=default refresh=0.80
ML Model: Jukes-Cantor, CAT approximation with 20 rate categories
      0.13 seconds: Joined    200 of   1113
      0.23 seconds: Joined    400 of   1113
      0.34 seconds: Joined    600 of   1113
      0.44 seconds: Joined    800 of   1113
      0.54 seconds: Joined   1000 of   1113
Initial topology in 0.62 seconds
Refining topology: 40 rounds ME-NNIs, 2 rounds ME-SPRs, 20 rounds ML-NNIs
      0.65 seconds: ME NNI round 2 of 40, 101 of 1114 splits, 12 changes (max delta 0.015)
      0.80 seconds: SPR round   1 of   2, 101 of 2230 nodes
      0.93 seconds: SPR round   1 of   2, 401 of 2230 nodes
      1.03 seconds: SPR round   1 

'/home/gaoyuze/symbiotic_microbiota_analysis/results/merged/rooted_tree.qza'

In [35]:
sample_metadata = qiime2.Metadata.load(os.path.join(data_dir, 'sample_metadata.txt'))
merged_table_clean = qiime2.Artifact.load(os.path.join(merged_dir, 'merged_table_clean.qza'))

alpha_df = pd.DataFrame(
    columns=['SampleID','ProjectID', 'HostName', 'BodySite','shannon_entropy','pielou_evenness', 'observed_features', 'faith_pd']
)
alpha_df = cal_alpha_diversity(sample_metadata.to_dataframe(), merged_table_clean, rooted_tree)
alpha_df.reset_index(inplace=True)
alpha_df.to_csv(os.path.join(merged_alpha_dir, 'alpha_diversity.csv'), index=False)

print('STEP 5  Done!')

Running external command line application. This may print messages to stdout and/or stderr.
The command being run is below. This command cannot be manually re-run as it will depend on temporary files that no longer exist.

Command:

faithpd -i /tmp/qiime2/gaoyuze/data/0036ba6b-337e-4a6e-b5f4-0309d2b4dc06/data/feature-table.biom -t /tmp/qiime2/gaoyuze/data/73382eed-be36-45ce-b307-2c12fafee5a4/data/tree.nwk -o /tmp/q2-AlphaDiversityFormat-a2h0g6u7

STEP 5  Done!


In [36]:
alpha_df

Unnamed: 0,SampleID,ProjectID,HostName,BodySite,BodysiteClass,shannon_entropy,pielou_evenness,observed_features,faith_pd
0,SRR23216662,PRJNA924021,Terrapene carolina,Cloaca,CloacaReptilia,4.46131,0.618814,148.0,10.186701
1,SRR23216663,PRJNA924021,Terrapene carolina,Cloaca,CloacaReptilia,4.299742,0.608504,134.0,9.457109
2,SRR23216664,PRJNA924021,Terrapene carolina,Cloaca,CloacaReptilia,4.197208,0.610915,117.0,8.93719
3,SRR23216665,PRJNA924021,Terrapene carolina,Cloaca,CloacaReptilia,4.419209,0.618093,142.0,9.666447
4,SRR23216666,PRJNA924021,Terrapene carolina,Cloaca,CloacaReptilia,4.248253,0.588468,149.0,10.92974
5,SRR23216667,PRJNA924021,Terrapene carolina,Cloaca,CloacaReptilia,4.695012,0.650354,149.0,10.92974
6,ERR1474235,PRJEB14602,Varanus komodoensis,feces,FecesReptilia,5.288433,0.71792,165.0,11.020333
7,ERR1474236,PRJEB14602,Varanus komodoensis,oral cavity,Oral_cavityReptilia,6.785875,0.779326,418.0,24.741341
8,ERR1474237,PRJEB14602,Varanus komodoensis,skin,SkinReptilia,5.491313,0.780745,131.0,10.85253
9,ERR1474238,PRJEB14602,Varanus komodoensis,feces,FecesReptilia,4.663354,0.746384,76.0,5.79836
