In [None]:
#activate qiime2 environment
conda activate qiime2-2019.10

In [None]:
#Convert fasta and qual files into fastq 
convert_fastaqual_fastq.py -f JB2020/091416TM1-full.fasta  -q JB2020/091416TM1-full.qual -o fastq_files/

In [None]:
#Extract barcodes (gzip the output files in folder and rename files into “sequences.fastq.gz” and “barcodes.fastq.gz”)
extract_barcodes.py -f JB2020/091416TM1-full.fastq.gz -c barcode_single_end -l 8 -o barcodes/


In [None]:
#Rename reads to sequences and gzip
gzip Desktop/JB2020/091416/sequences.gz
gzip Desktop/JB2020/091416/barcodes.gz

In [None]:
#Validate Metadata file
#validate using Keemei google sheet plug-in and download as tsv file


In [None]:
#import sequences (converts file to Qiime2 artifact .qza type). EMPSingleEndSequences
qiime tools import  --type EMPSingleEndSequences  --input-path 091416/ --output-path 091416.qza


In [None]:
#demultiplexing data with metadata file (no golay errors due barcode format) 
qiime demux emp-single --i-seqs jb2020.qza --m-barcodes-file JBmetadata091416.tsv --m-barcodes-column BarcodeSequence --o-per-sample-sequences demuxJB.qza --o-error-correction-details errors.qza --p-no-golay-error-correction


In [None]:
#visualize the demultiplexed data per sample set (set denoise truncation) 
qiime demux summarize --i-data demuxJB.qza --o-visualization demuxJB.qzv


In [None]:
#truncated at positions 293 and 480 to maintain higher quality samples (>20)--lower whisker of 293 is 18 = ~ <1% error rate
#Random 10,000 sampleing without replacement loses many reads. Elected to retain lower ~10% low-quality sequences due to lost data (91% of left trim abides by 20 quality score, lowest of 18)


In [None]:
#DADA2 denoise (fixes amplicon errors instead of deleting them) 
qiime dada2 denoise-single --i-demultiplexed-seqs demuxJB.qza --p-trim-left 293 --p-trunc-len 480 --o-representative-sequences dada2-seqs.qza --o-table table-dada2.qza --o-denoising-stats stats-dada2.qza 


In [None]:
#Visualize Truncation 
qiime metadata tabulate --m-input-file stats-dada2.qza --o-visualization stats-dada2.qzv


In [None]:
#Creates table of features per sample
qiime feature-table summarize --i-table table-dada2.qza --o-visualization table-dada2.qzv --m-sample-metadata-file JBmetadata091416.tsv 


In [None]:
#generates sampling depth in an interactive table. Sampling depth: 94000. retained most features in all samples (69.17%)
qiime feature-table tabulate-seqs --i-data dada2-seqs.qza --o-visualization rep-dada2seqs.qzv


In [None]:
#Phylogenetic Analysis via high speed multiple sequence alignment
qiime phylogeny align-to-tree-mafft-fasttree --i-sequences dada2-seqs.qza --o-alignment aligned-rep-seqs.qza --o-masked-alignment masked-rep-set.qza --o-tree unrooted-tree.qza --o-rooted-tree rooted-tree.qza


In [None]:
#Alpha and Beta  Diversity (94,000 sampling depth)
qiime diversity core-metrics-phylogenetic --i-phylogeny phylo/rooted-tree.qza --i-table table-dada2.qza --p-sampling-depth 94000 --m-metadata-file JBmetadataCat.tsv --output-dir core-diversity-results


In [None]:
#Visualizes alpha diversity
qiime diversity alpha-group-significance --i-alpha-diversity core-diversity-results/faith_pd_vector.qza --m-metadata-file JBmetadataCat.tsv --o-visualization core-diversity-results/faith_pd_group_significance.qzv 


In [None]:
qiime diversity alpha-group-significance --i-alpha-diversity core-diversity-results/evenness_vector.qza --m-metadata-file JBmetadataCat.tsv --o-visualization core-diversity-results/evenness-group-significance.qzv 

In [None]:
#Visualizes beta diversity 
qiime diversity beta-group-significance --i-distance-matrix core-diversity-results/weighted_unifrac_distance_matrix.qza --m-metadata-file JBmetadataCat.tsv --m-metadata-column Categorical --o-visualization core-diversity-results/weighted_unifrac_project_singificance.qzv --p-pairwise 


In [None]:
#Rarefraction Plot 
qiime diversity alpha-rarefaction --i-table table-dada2.qza --i-phylogeny phylo/rooted-tree.qza --p-max-depth 94000 --m-metadata-file JBmetadataCat.tsv --o-visualization alpha-rarefraction.qzv 


In [None]:
#Set up SILVA rRNA database classifier for taxonomic analysis, Version: SILVA 128 97% OTUs
#Importing 97% otus
qiime tools import --type 'FeatureData[Sequence]' --input-path '/home/qiime/Desktop/JB2020/Reiteration2/SSUdivers/Silva_128_release (1)/SILVA_128_QIIME_release/rep_set/rep_set_16S_only/97/97_otus_16S.fasta' --output-path SSUdivers/97otus_16s.qza


In [None]:
#Importing Taxonomy classification
qiime tools import --type 'FeatureData[Taxonomy]' --input-format HeaderlessTSVTaxonomyFormat --input-path '/home/qiime/Desktop/JB2020/Reiteration2/SSUdivers/Silva_128_release (1)/SILVA_128_QIIME_release/taxonomy/16S_only/97/consensus_taxonomy_7_levels.txt'  
--output-path SSUdivers/97ref_taxonomy.qza

#Extract 515F/806R primers of V3-V4 region
qiime feature-classifier extract-reads --i-sequences SSUdivers/97otus_16s.qza --p-f-primer GTGCCAGCMGCCGCGGTAA --p-r-primer GGACTACHVGGGTWTCTAAT --p-trunc-len 100 --o-reads SSUdivers/97ref-seqs.qza



In [None]:
#Taxonomic Analysis Using Silva SU classifier for 97% OTUs
qiime feature-classifier classify-sklearn --i-classifier SSUdivers/classifier.qza --i-reads dada2-seqs.qza --o-classification SSUdivers/97taxonomy.qza


qiime metadata tabulate --m-input-file SSUdivers/97taxonomy.qza --o-visualization SSUdivers/taxonomy.qzv


In [None]:
#Taxa bar plots
qiime taxa barplot --i-table table-dada2.qza --i-taxonomy core-diversity-results/taxonomy.qza --m-metadata-file JBmetadata091416.tsv --o-visualization core-diversity-results/taxa-bar-plots.qzv
