-
Notifications
You must be signed in to change notification settings - Fork 0
/
Qiime2_script_Góngora_v2.txt
230 lines (187 loc) · 19.9 KB
/
Qiime2_script_Góngora_v2.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
#Activating Qiime2
source activate qiime2-2020.8
source tab-qiime
#Converting dada2 files to BIOM format and importing them to Qiime2
biom convert -i ASVs_counts.txt -o counts.biom --to-hdf5
qiime tools import --type 'FeatureTable[Frequency]' --input-path counts.biom --output-path feature_table_dada2.qza
qiime tools import --type 'FeatureData[Sequence]' --input-path ASVs.fasta --output-path rep-seqs_dada2.qza
qiime feature-table summarize --i-table feature_table_dada2.qza --m-sample-metadata-file sample-metadata-full.txt --o-visualization feature_table_dada2.qzv
qiime tools view feature_table_dada2.qzv
#Negative control removal and data cleaning
#Samples were amplififed in two different PCR batches depending on the number of PCR cycles
#Samples were separated them according to the negative control of each batch
#Batch 1 (Negative Control 1) has samples the least amount of cycles
#Batch 2 (Negative Control 1) has samples the least amount of cycles
mkdir neg_cont
cd neg_cont
qiime feature-table filter-samples --i-table ../feature_table_dada2.qza --m-metadata-file control2_and_samples.txt --o-filtered-table neg2_and_samples.qza
qiime tools export --input-path neg2_and_samples.qza --output-path exported/
biom convert -i exported/feature-table-2.biom -o exported/neg2_and_samples.tsv --to-tsv
qiime feature-table filter-samples --i-table ../feature_table_dada2.qza --m-metadata-file control2_and_samples.txt --o-filtered-table neg1_and_other_samples.qza --p-exclude-ids
qiime tools export --input-path neg1_and_other_samples.qza --output-path exported/
biom convert -i exported/feature-table-1.biom -o exported/neg1_and_other-samples.tsv --to-tsv
###Remove "#Constructed from biom file" header, transpose table using Excel, and save as .txt file
#Running negative control sequence removal scripts
##remove.sh flags individual ASVs for a check if they are present in the negative controls
##remove.sh flags individual ASVs to be deleted if the total number of sequences of said ASV is equal to the number of sequences of the ASV in the controls
##If remove.sh is not executable, use:
chmod +x remove.sh
./remove.sh
###Add the ASVs that were manually chosen for removal into the asvs_to_remove.txt file that was produced by remove.sh
#Making table with only removed samples for sanity check
qiime feature-table filter-features --i-table ../feature_table_dada2.qza --m-metadata-file asvs_to_remove.txt --o-filtered-table removed-features.qza
qiime feature-table summarize --i-table removed-features.qza --m-sample-metadata-file ../sample-metadata-full.txt --o-visualization removed-features.qzv
qiime tools view removed-features.qzv
###Download the Frequency per sample detail csv table and check that most of the removed sequences belonged to the negative controls
#If everything is okay, remove the features from the table:
qiime feature-table filter-features --i-table ../feature_table_dada2.qza --m-metadata-file asvs_to_remove.txt --o-filtered-table filtered_table_removed.qza --p-exclude-ids
qiime feature-table summarize --i-table filtered_table_removed.qza --m-sample-metadata-file ../sample-metadata-full.txt --o-visualization filtered_table_removed.qzv
qiime tools view filtered_table_removed.qzv
#Making a filtered table of only the features that need to be checked:
qiime feature-table filter-features --i-table filtered_table_removed.qza --m-metadata-file asvs_to_check.txt --o-filtered-table check_features.qza
qiime feature-table summarize --i-table check_features.qza --m-sample-metadata-file ../sample-metadata-full.txt --o-visualization check_features.qzv
qiime tools view check_features.qzv
qiime tools export --input-path check_features.qza --output-path exported/
biom convert -i exported/feature-table-check.biom -o exported/check.tsv --to-tsv
###Remove "#Constructed from biom file" header and transpose table using Excel (use Ctrl+C or it won't work)
###Before running check.sh, go to the "Feature Detail" tab of "qiime tools view check_features.qzv" and copy full table to extract "# of Samples Observed In"
###Save into detail.tsv, delete all columns using Excel except for "# of Samples Observed In", remove header and save to detail.txt so that it can be used in check.sh
##check.sh determines what proportion of the total number of sequences per ASV belongs to the negative controls
##check.sh flags samples in term of total proportion of all samples (T) or in terms of proportion of individual samples (P)
##If check.sh is not executable, use:
chmod +x check.sh
./check.sh
##The ASVs that were flagged by check.sh are checked manually based on their taxonomic assignment
##This was done to make sure that they did not have any biological reason to be present in feces (e.g. bacteria normally associated with skin)
###Save ASVs to be removed to asvs_to_remove_from_check.txt
qiime feature-table filter-features --i-table filtered_table_removed.qza --m-metadata-file asvs_to_remove_from_check.txt --o-filtered-table filtered_table_removed_checked.qza --p-exclude-ids
qiime feature-table summarize --i-table filtered_table_removed_checked.qza --m-sample-metadata-file ../sample-metadata-full.txt --o-visualization filtered_table_removed_checked.qzv
qiime tools view filtered_table_removed_checked.qzv
#Samples with less than 1000 sequences were removed and checked to see which samples were removed:
qiime feature-table filter-samples --i-table filtered_table_removed_checked.qza --p-min-frequency 1000 --o-filtered-table filtered_table_1000.qza
qiime feature-table summarize --i-table filtered_table_1000.qza --m-sample-metadata-file ../sample-metadata-full.txt --o-visualization filtered_table_1000.qzv
qiime tools view filtered_table_1000.qzv
#Negative controls were removed from the dataset
##This step was needed as the negative control 2 had more than 1000 sequences
qiime feature-table filter-samples --i-table filtered_table_1000.qza --m-metadata-file control2.txt --p-exclude-ids --o-filtered-table filtered_table_1000_no_controls.qza
#Singletons were removed from the dataset
qiime feature-table filter-features --i-table filtered_table_1000_no_controls.qza --p-min-frequency 2 --o-filtered-table filter-table.qza
###A new metadata file was created where excluded samples were removed and the new dataset was checked
qiime feature-table summarize --i-table filter-table.qza --m-sample-metadata-file sample-metadata-clean.txt --o-visualization filter-table.qzv
qiime tools view filter-table.qzv
qiime feature-table filter-seqs --i-data rep-seqs_dada2.qza --i-table filter-table.qza --o-filtered-data filter-rep-seqs.qza
#Taxonomic classification and phylogeny
#The q2-classifier was used to perform the final taxonomic classification of the dataset
qiime feature-classifier classify-sklearn --i-reads neg_cont/filter-rep-seqs.qza --i-classifier silva-132-99-nb-classifier.qza --p-n-jobs 16 --p-confidence 0.8 --o-classification taxonomy-16S-silva-132.qza
qiime metadata tabulate --m-input-file taxonomy-16S-silva-132.qza --o-visualization taxonomy-16S-silva-132.qzv
qiime tools view taxonomy-16S-silva-132.qzv
#Features that were classified as mitochondrial or chloroplastic were removed as well as those unclassified
qiime taxa filter-table --i-table neg_cont/filter-table.qza --i-taxonomy taxonomy-16S-silva-132.qza --p-include D_1__ --p-exclude mitochondria,chloroplast --o-filtered-table filtered-table.qza
qiime feature-table filter-seqs --i-data neg_cont/filter-rep-seqs.qza --i-table filtered-table.qza --o-filtered-data filtered-rep-seqs.qza
qiime taxa barplot --i-table filtered-table.qza --i-taxonomy taxonomy-16S-silva-132.qza --m-metadata-file sample-metadata-clean.txt --o-visualization barplots.qzv
qiime tools view barplots.qzv
#A phylogenetic tree was created for the sequences that were kept
qiime alignment mafft --i-sequences filtered-rep-seqs.qza --p-n-threads 16 --p-parttree --o-alignment aligned-filtered-rep-seqs.qza
qiime alignment mask --i-alignment aligned-filtered-rep-seqs.qza --o-masked-alignment masked-aligned-filtered-rep-seqs.qza
qiime phylogeny fasttree --i-alignment masked-aligned-filtered-rep-seqs.qza --p-n-threads 1 --o-tree unrooted-tree-fasttree.qza --verbose
qiime phylogeny midpoint-root --i-tree unrooted-tree-fasttree.qza --o-rooted-tree rooted-tree-fasttree.qza
#Alpha diversity analysis
#Created a relative frequency table to use proportions as a normalization method for the dataset
qiime feature-table relative-frequency --i-table filtered-table.qza --o-relative-frequency-table relative-frequency-table.qza
mkdir metrics
##Faith's PD
#Given that two samples were taken for each bird, it is necessary to know if the alpha diversity metrics differ among the two sample points to know if they are independent or not
cd metrics
mkdir longitudinal
cd longitudinal/
qiime diversity-lib faith-pd --i-table ../../relative-frequency-table.qza --i-phylogeny ../..rooted-tree-fasttree.qza --o-vector faith_pd_vector.qza
###A new sample metadata file was created that included only paired data (two sample points for the same bird)
qiime longitudinal pairwise-differences --m-metadata-file sample-metadata-clean-paired.txt --m-metadata-file faith_pd_vector.qza --p-metric faith_pd --p-state-column Replicate --p-state-1 1 --p-state-2 2 --p-individual-id-column BirdID --p-replicate-handling random --o-visualization pairwise-differences-faith_pd.qzv
qiime tools view pairwise-differences-faith_pd.qzv
#As there were no differences between sampling points in terms of Faith's PD, only samples from the first sampling point will be used for the comparison between sexes or seasons
cd ../..
#The feature table was filtered to include only the samples from the first sampling point
qiime feature-table filter-samples --i-table relative-frequency-table.qza --m-metadata-file sample-metadata-clean.txt --p-where "[Replicate]='1'" --o-filtered-table rep1-relative-frequency-table.qza
qiime diversity-lib faith-pd --i-table rep1-relative-frequency-table.qza --i-phylogeny rooted-tree-fasttree.qza --o-vector metrics/rep1-faith_pd_vector.qza
qiime diversity alpha-group-significance --i-alpha-diversity metrics/rep1-faith_pd_vector.qza --m-metadata-file sample-metadata-clean.txt --o-visualization metrics/rep1-faith_pd_vector.qzv
qiime tools view metrics/rep1-faith_pd_vector.qzv
##Shannon diversity
#Checking for differences between replicates
cd metrics/longitudinal
qiime diversity-lib shannon-entropy --i-table ../../relative-frequency-table.qza --o-vector shannon_vector.qza
qiime longitudinal pairwise-differences --m-metadata-file sample-metadata-clean-paired.txt --m-metadata-file shannon_vector.qza --p-metric shannon_entropy --p-state-column Replicate --p-state-1 1 --p-state-2 2 --p-individual-id-column BirdID --p-replicate-handling random --o-visualization pairwise-differences-shannon.qzv
qiime tools view pairwise-differences-shannon.qzv
#As there were no differences between sampling points in terms of Shannon diversity, only samples from the first sampling point will be used for the comparison between sexes or seasons
cd ../..
qiime diversity-lib shannon-entropy --i-table rep1-relative-frequency-table.qza --o-vector metrics/rep1-shannon_vector.qza
qiime diversity alpha-group-significance --i-alpha-diversity metrics/rep1-shannon_vector.qza --m-metadata-file sample-metadata-clean.txt --o-visualization metrics/rep1-shannon_vector.qzv
qiime tools view metrics/rep1-shannon_vector.qzv
#Community composition analysis
##Unweighted UniFrac
#Repeatability analysis to determine if both data points can be used
qiime diversity beta-phylogenetic --i-table relative-frequency-table.qza --i-phylogeny rooted-tree-fasttree.qza --p-metric unweighted_unifrac --o-distance-matrix metrics/unweighted_unifrac_distance_matrix_relative_frequency.qza
qiime diversity beta-group-significance --i-distance-matrix metrics/unweighted_unifrac_distance_matrix_relative_frequency.qza --m-metadata-file sample-metadata-clean.txt --m-metadata-column Replicate --p-method permdisp --p-pairwise --o-visualization metrics/unweighted_unifrac_permdisp_replicate_relative_frequency.qzv
qiime tools view metrics/unweighted_unifrac_permdisp_replicate_relative_frequency.qzv
qiime diversity adonis --i-distance-matrix metrics/unweighted_unifrac_distance_matrix_relative_frequency.qza --m-metadata-file sample-metadata-clean.txt --p-formula "Replicate" --o-visualization metrics/unweighted_unifrac_adonis_replicate_relative_frequency.qzv
qiime tools view metrics/unweighted_unifrac_adonis_replicate_relative_frequency.qzv
#As there were no differences between sampling points in terms of unweighted UniFrac distances, only samples from the first sampling point will be used for the comparison between sexes or seasons
#Filtered out samples that are not sexed
qiime feature-table filter-samples --i-table rep1-relative-frequency-table.qza --m-metadata-file sample-metadata-clean.txt --p-where "[Sex] IN ('F', 'M')" --o-filtered-table rep1-filtered-relative-frequency-table-sex.qza
#Created unweighted UniFrac distance matrix
qiime diversity beta-phylogenetic --i-table rep1-relative-frequency-table.qza --i-phylogeny rooted-tree-fasttree.qza --p-metric unweighted_unifrac --o-distance-matrix metrics/unweighted_unifrac_distance_matrix_rep1.qza
qiime diversity beta-phylogenetic --i-table rep1-filtered-relative-frequency-table-sex.qza --i-phylogeny rooted-tree-fasttree.qza --p-metric unweighted_unifrac --o-distance-matrix metrics/unweighted_unifrac_distance_matrix-filtered_rep1.qza
#PERMDISP and PERMANOVA (adonis) for differences between sexes
qiime diversity beta-group-significance --i-distance-matrix metrics/unweighted_unifrac_distance_matrix_rep1.qza --m-metadata-file sample-metadata-clean.txt --m-metadata-column Sex --p-method permdisp --p-pairwise --o-visualization metrics/unweighted_unifrac_permdisp_sex_rep1.qzv
qiime tools view metrics/unweighted_unifrac_permdisp_sex_rep1.qzv
qiime diversity adonis --i-distance-matrix metrics/unweighted_unifrac_distance_matrix-filtered_rep1.qza --m-metadata-file sample-metadata-clean.txt --p-formula "Sex" --o-visualization metrics/unweighted_unifrac_adonis_sex_rep1.qzv
qiime tools view metrics/unweighted_unifrac_adonis_sex_rep1.qzv
#PERMDISP and PERMANOVA for differences between seasons
qiime diversity beta-group-significance --i-distance-matrix metrics/unweighted_unifrac_distance_matrix_rep1.qza --m-metadata-file sample-metadata-clean.txt --m-metadata-column Status --p-method permdisp --p-pairwise --o-visualization metrics/unweighted_unifrac_permdisp_status_rep1.qzv
qiime tools view metrics/unweighted_unifrac_permdisp_status_rep1.qzv
qiime diversity adonis --i-distance-matrix metrics/unweighted_unifrac_distance_matrix_rep1.qza --m-metadata-file sample-metadata-clean.txt --p-formula "Status" --o-visualization metrics/unweighted_unifrac_adonis_status_rep1.qzv
qiime tools view metrics/unweighted_unifrac_adonis_status_rep1.qzv
#PCoAs and biplots
qiime diversity pcoa --i-distance-matrix metrics/unweighted_unifrac_distance_matrix_rep1.qza --p-number-of-dimensions 46 --o-pcoa metrics/unweighted_unifrac_emperor_rep1.qza
qiime diversity pcoa-biplot --i-pcoa metrics/unweighted_unifrac_emperor_rep1.qza --i-features rep1-relative-frequency-table.qza --o-biplot metrics/unweighted_unifrac_biplot_rep1.qza
qiime emperor biplot --i-biplot metrics/unweighted_unifrac_biplot_rep1.qza --m-sample-metadata-file sample-metadata-clean.txt --o-visualization metrics/unweighted_unifrac_biplot_rep1.qzv
qiime tools view metrics/unweighted_unifrac_biplot_rep1.qzv
##Weighted UniFrac
#Repeatability analysis to determine if both data points can be used
qiime diversity beta-phylogenetic --i-table relative-frequency-table.qza --i-phylogeny rooted-tree-fasttree.qza --p-metric weighted_unifrac --o-distance-matrix metrics/weighted_unifrac_distance_matrix_relative_frequency.qza
qiime diversity beta-group-significance --i-distance-matrix metrics/weighted_unifrac_distance_matrix_relative_frequency.qza --m-metadata-file sample-metadata-clean.txt --m-metadata-column Replicate --p-method permdisp --p-pairwise --o-visualization metrics/weighted_unifrac_permdisp_replicate_relative_frequency.qzv
qiime tools view metrics/weighted_unifrac_permdisp_replicate_relative_frequency.qzv
qiime diversity adonis --i-distance-matrix metrics/weighted_unifrac_distance_matrix_relative_frequency.qza --m-metadata-file sample-metadata-clean.txt --p-formula "Replicate" --o-visualization metrics/weighted_unifrac_adonis_replicate_relative_frequency.qzv
qiime tools view metrics/weighted_unifrac_adonis_replicate_relative_frequency.qzv
#As there were no differences between sampling points in terms of weighted UniFrac distances, only samples from the first sampling point will be used for the comparison between sexes or seasons
#Created unweighted UniFrac distance matrix
qiime diversity beta-phylogenetic --i-table rep1-relative-frequency-table.qza --i-phylogeny rooted-tree-fasttree.qza --p-metric weighted_unifrac --o-distance-matrix metrics/weighted_unifrac_distance_matrix_rep1.qza
qiime diversity beta-phylogenetic --i-table rep1-filtered-relative-frequency-table-sex.qza --i-phylogeny rooted-tree-fasttree.qza --p-metric weighted_unifrac --o-distance-matrix metrics/weighted_unifrac_distance_matrix-filtered_rep1.qza
#PERMDISP and PERMANOVA (adonis) for differences between sexes
qiime diversity beta-group-significance --i-distance-matrix metrics/weighted_unifrac_distance_matrix_rep1.qza --m-metadata-file sample-metadata-clean.txt --m-metadata-column Sex --p-method permdisp --p-pairwise --o-visualization metrics/weighted_unifrac_permdisp_sex_rep1.qzv
qiime tools view metrics/weighted_unifrac_permdisp_sex_rep1.qzv
qiime diversity adonis --i-distance-matrix metrics/weighted_unifrac_distance_matrix-filtered_rep1.qza --m-metadata-file sample-metadata-clean.txt --p-formula "Sex" --o-visualization metrics/weighted_unifrac_adonis_sex_rep1.qzv
qiime tools view metrics/weighted_unifrac_adonis_sex_rep1.qzv
#PERMDISP and PERMANOVA for differences between seasons
qiime diversity beta-group-significance --i-distance-matrix metrics/weighted_unifrac_distance_matrix_rep1.qza --m-metadata-file sample-metadata-clean.txt --m-metadata-column Status --p-method permdisp --p-pairwise --o-visualization metrics/weighted_unifrac_permdisp_status_rep1.qzv
qiime tools view metrics/weighted_unifrac_permdisp_status_rep1.qzv
qiime diversity adonis --i-distance-matrix metrics/weighted_unifrac_distance_matrix_rep1.qza --m-metadata-file sample-metadata-clean.txt --p-formula "Status" --o-visualization metrics/weighted_unifrac_adonis_status_rep1.qzv
qiime tools view metrics/weighted_unifrac_adonis_status_rep1.qzv
#PCoAs and biplots
qiime diversity pcoa --i-distance-matrix metrics/weighted_unifrac_distance_matrix_rep1.qza --p-number-of-dimensions 46 --o-pcoa metrics/weighted_unifrac_emperor_rep1.qza
qiime diversity pcoa-biplot --i-pcoa metrics/weighted_unifrac_emperor_rep1.qza --i-features rep1-relative-frequency-table.qza --o-biplot metrics/weighted_unifrac_biplot_rep1.qza
qiime emperor biplot --i-biplot metrics/weighted_unifrac_biplot_rep1.qza --m-sample-metadata-file sample-metadata-clean.txt --o-visualization metrics/weighted_unifrac_biplot_rep1.qzv
qiime tools view metrics/weighted_unifrac_biplot_rep1.qzv
#Exporting fles for subsequent analyses
qiime tools export --input-path relative-frequency-table.qza --output-path .
biom convert -i feature-table.biom -o feature-table.txt --to-tsv
###The first row of the file that says "# Constructed from biom file" was removed and "#OTU ID" was replaced by "SampleID"
##The relative frequency table was manually filtered to remove the ASVs that had a mean relative abundance of less than 0.01%
##The table was transposed so that ASVs were on separate columns with each row being the relative abundance of a given ASV per individuals
##Individuals with no stable isotope table (See Table S1 of Góngora et al., 2020) were removed
qiime tools export --input-path taxonomy-16S-silva-132.qza --output-path .
###The confidence column was removed
sed -i -r 's/(.*)\s+[^\s]+$/\1/' taxonomy.tsv
###The taxonomy was separated and the "D_#__" header for each taxonomic level was removed
sed -i "s/;/\\t/g; s/D_[0-9]__//g" taxonomy.tsv
##The header of the file was changed to:
#ASV Domain Phylum Class Order Family Genus Species