# 1. MAG Quality Control
## 1.1 Fetching datasets from BUSCO

In [None]:
#Estimate and assess the purity of our dataset with BUSCO. Same code as in W4.
#Bacteria
! qiime annotate fetch-busco-db \
    --p-lineages bacteria_odb12 \
    --o-db $data_dir/busco-db-bacteria.qza

#Archaea
qiime annotate fetch-busco-db \
    --p-lineages archaea_odb12 \
    --o-db $data_dir/busco-db-archaea.qza

#Fungi
qiime annotate fetch-busco-db \
  --p-lineages fungi_odb12 \
  --o-db $data_dir/busco-db-fungi.qza

## 1.2 Run BUSCO

In [None]:
#Bacteria
! qiime annotate evaluate-busco \
    --i-mags $data_dir/updog_mags.qza \
    --i-db $data_dir/busco-db-bacteria.qza \
    --p-lineage-dataset bacteria_odb12 \
    --p-cpu 3 \
    --o-results $data_dir/busco-results-bacteria.qza \
    --o-visualization $data_dir/mags-busco-bacteria.qzv

#Archaea
mosh annotate filter-mags \
  --i-mags $data_dir/updog_mags.qza \
  --m-metadata-file $data_dir/busco-results-archaea.qza \
  --p-where "complete > 50 AND contamination < 10" \
  --p-no-exclude-ids \
  --p-on mag \
  --o-filtered-mags $data_dir/mags_filtered_archaea_50.qza \
  --verbose

#Fungi
mosh annotate filter-mags \
  --i-mags $data_dir/updog_mags.qza \
  --m-metadata-file $data_dir/busco-results-fungi.qza \
  --p-where "complete > 50 AND contamination < 10" \
  --p-no-exclude-ids \
  --p-on mag \
  --o-filtered-mags $data_dir/mags_filtered_fungi_50.qza \
  --verbose

## 1.3 MAGs filtering
Now that we evaluated the quality of our MAGs, we can use this information to filter out only the best ones.

In [None]:
# --- Bacteria ---
mosh annotate filter-mags \
  --i-mags $data_dir/mags.qza \
  --m-metadata-file $data_dir/busco-results-bacteria.qza \
  --p-where "complete > 50 AND contamination < 10" \
  --p-no-exclude-ids \
  --p-on mag \
  --o-filtered-mags $data_dir/mags_filtered_bacteria_50.qza \
  --verbose

# --- Archaea ---
mosh annotate filter-mags \
  --i-mags $data_dir/mags.qza \
  --m-metadata-file $data_dir/busco-results-archaea.qza \
  --p-where "complete > 50 AND contamination < 10" \
  --p-no-exclude-ids \
  --p-on mag \
  --o-filtered-mags $data_dir/mags_filtered_archaea_50.qza \
  --verbose

# --- Fungi ---
mosh annotate filter-mags \
  --i-mags $data_dir/mags.qza \
  --m-metadata-file $data_dir/busco-results-fungi.qza \
  --p-where "complete > 50 AND contamination < 10" \
  --p-no-exclude-ids \
  --p-on mag \
  --o-filtered-mags $data_dir/mags_filtered_fungi_50.qza \
  --verbose

In [None]:
#merging all those tables together
qiime feature-table merge \
  --i-tables $data_dir/mags_filtered_bacteria_50.qza \
  --i-tables $data_dir/mags_filtered_archaea_50.qza \
  --i-tables $data_dir/mags_filtered_fungi_50.qza \
  --o-merged-table $data_dir/mags_filtered_all_50.qza