In [1]:
#Install QIIME2
#!/usr/bin/env python3

"""Set up Qiime 2 on Google colab.

Do not use this on o local machine, especially not as an admin!
"""

import os
import sys
import shutil
from subprocess import Popen, PIPE

r = Popen(["pip", "install", "rich"])
r.wait()
from rich.console import Console  # noqa
con = Console()

PREFIX = "/usr/local/miniforge3/"

has_conda = "conda version" in os.popen("%s/bin/conda info" % PREFIX).read()
has_qiime = "QIIME 2 release:" in os.popen("qiime info").read()


MINICONDA_PATH = (
    "https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-Linux-x86_64.sh"
)

QIIME_YAML_TEMPLATE = (
    "https://data.qiime2.org/distro/amplicon/qiime2-amplicon-{version}-py{python}-linux-conda.yml"
)

if len(sys.argv) == 2:
    version = sys.argv[1]
else:
    version = "2024.2"

if tuple(float(v) for v in version.split(".")) < (2021, 4):
    pyver = "36"
else:
    pyver = "38"

CONDA = "mamba"
CONDA_ARGS = ["-q"]

if tuple(float(v) for v in version.split(".")) < (2024, 2):
    QIIME_YAML_TEMPLATE = (
        "https://data.qiime2.org/distro/core/qiime2-{version}-py{python}-linux-conda.yml"
    )

QIIME_YAML_URL = QIIME_YAML_TEMPLATE.format(version=version, python=pyver)
QIIME_YAML = os.path.basename(QIIME_YAML_URL)


def cleanup():
    """Remove downloaded files."""
    if os.path.exists(os.path.basename(MINICONDA_PATH)):
        os.remove(os.path.basename(MINICONDA_PATH))
    if os.path.exists(QIIME_YAML):
        os.remove(QIIME_YAML)
    if os.path.exists("/content/sample_data"):
        shutil.rmtree("/content/sample_data")
    con.log(":broom: Cleaned up unneeded files.")


def run_and_check(args, check, message, failure, success, console=con):
    """Run a command and check that it worked."""
    console.log(message)
    r = Popen(args, env=os.environ, stdout=PIPE, stderr=PIPE,
              universal_newlines=True)
    o, e = r.communicate()
    out = o + e
    if r.returncode == 0 and check in out:
        console.log("[blue]%s[/blue]" % success)
    else:
        console.log("[red]%s[/red]" % failure, out)
        cleanup()
        sys.exit(1)


if __name__ == "__main__":
    if not has_conda:
        run_and_check(
            ["wget", MINICONDA_PATH],
            "saved",
            ":snake: Downloading miniforge...",
            "failed downloading miniforge :sob:",
            ":snake: Done."
        )

        run_and_check(
            ["bash", os.path.basename(MINICONDA_PATH), "-bfp", PREFIX],
            "installation finished.",
            ":snake: Installing miniforge...",
            "could not install miniforge :sob:",
            ":snake: Installed miniforge to `/usr/local`."
        )
    else:
        con.log(":snake: Miniforge is already installed. Skipped.")

    if not has_qiime:
        run_and_check(
            ["wget", QIIME_YAML_URL],
            "saved",
            ":mag: Downloading Qiime 2 package list...",
            "could not download package list :sob:",
            ":mag: Done."
        )

        run_and_check(
            [PREFIX + "bin/" + CONDA, "env", "create", *CONDA_ARGS, "--prefix", "/usr/local", "--file", QIIME_YAML],
            "Verifying transaction: ...working... done",
            ":mag: Installing Qiime 2. This may take a little bit.\n :clock1:",
            "could not install Qiime 2 :sob:",
            ":mag: Done."
        )

        run_and_check(
            ["pip", "install", "empress"],
            "Successfully installed empress-",
            ":evergreen_tree: Installing Empress...",
            "could not install Empress :sob:",
            ":evergreen_tree: Done."
        )
    else:
        con.log(":mag: Qiime 2 is already installed. Skipped.")

    run_and_check(
        ["qiime", "info"],
        "QIIME 2 release:",
        ":bar_chart: Checking that Qiime 2 command line works...",
        "Qiime 2 command line does not seem to work :sob:",
        ":bar_chart: Qiime 2 command line looks good :tada:"
    )

    if sys.version_info[0:2] == (int(pyver[0]), int(pyver[1])):
        sys.path.append("/usr/local/lib/python3.{}/site-packages".format(pyver[1]))
        con.log(":mag: Fixed import paths to include Qiime 2.")

        con.log(":bar_chart: Checking if Qiime 2 import works...")
        try:
            import qiime2  # noqa
        except Exception:
            con.log("[red]Qiime 2 can not be imported :sob:[/red]")
            sys.exit(1)
        con.log("[blue]:bar_chart: Qiime 2 can be imported :tada:[/blue]")

    cleanup()

    con.log("[green]Everything is A-OK. "
            "You can start using Qiime 2 now :thumbs_up:[/green]")

In [2]:
!qiime --version

q2cli version 2024.2.0
Run `qiime info` for more version details.


In [3]:
!pwd
!cd /home
!mkdir /home/BT
!cd /home/BT

/content


Download sample from: https://drive.google.com/file/d/1UYI72glvlrlNmgUQIlZjUGdoZhulSzRE/view and upload in /home/BT

In [9]:
!cat /home/BT/metadata.tsv
!zcat /home/BT/SeqData/forward.fastq.gz | head -n 12
!zcat /home/BT/SeqData/reverse.fastq.gz | head -n 12
!zcat /home/BT/SeqData/barcodes.fastq.gz | head -n 12

sample-id	barcode-sequence	elevation	extract-concen	amplicon-concentration	extract-group-no	transect-name	site-name	depth	ph	toc	ec	average-soil-relative-humidity	relative-humidity-soil-high	relative-humidity-soil-low	percent-relative-humidity-soil-100	average-soil-temperature	temperature-soil-high	temperature-soil-low	vegetation	percentcover
#q2:types	categorical	numeric	numeric	numeric	categorical	categorical	categorical	numeric	numeric	numeric	numeric	numeric	numeric	numeric	numeric	numeric	numeric	numeric	categorical	numeric
BAQ1370.1.2	GCCCAAGTTCAC	1370	0.019	0.950	B	Baquedano	BAQ1370	2	7.98	525	6.08	16.17	23.97	11.42	0	22.61	35.21	12.46	no	0
BAQ1370.3	GCGCCGAATCTT	1370	0.124	17.460	E	Baquedano	BAQ1370	2		771	6.08	16.17	23.97	11.42	0	22.61	35.21	12.46	no	0
BAQ1370.1.3	ATAAAGAGGAGG	1370	1.200	0.960	J	Baquedano	BAQ1370	3	8.13			16.17	23.97	11.42	0	22.61	35.21	12.46	no	0
BAQ1552.1.1	ATCCCAGCATGC	1552	0.722	18.830	J	Baquedano	BAQ1552	1	7.87			15.75	35.36	11.1	0	22.63	30.65	10.96	

In [20]:
#Create Manifest.tsv
output_file="/home/BT/manifest.tsv"
!echo -e "sample-id\tforward-absolute-filepath\treverse-absolute-filepath" > $output_file
!printf "%s\t%s\t%s\n" "sample1" "/home/BT/SeqData/forward.fastq.gz" "/home/BT/SeqData/reverse.fastq.gz" >> $output_file


In [25]:
#Import to QIIME2
!qiime tools import \
  --type 'EMPPairedEndSequences' \
  --input-path /home/BT/SeqData \
  --output-path /home/BT/emp-paired-end-sequences.qza


[32mImported /home/BT/SeqData as EMPPairedEndDirFmt to /home/BT/emp-paired-end-sequences.qza[0m
[0m

In [26]:
#Demultiplexing
!qiime demux emp-paired \
--i-seqs /home/BT/emp-paired-end-sequences.qza \
--m-barcodes-file /home/BT/metadata.tsv \
--m-barcodes-column barcode-sequence \
--o-per-sample-sequences /home/BT/demux.qza \
--o-error-correction-details /home/BT/demux-details.qza \
--p-rev-comp-barcodes \
--p-rev-comp-mapping-barcodes


[32mSaved SampleData[PairedEndSequencesWithQuality] to: /home/BT/demux.qza[0m
[32mSaved ErrorCorrectionDetails to: /home/BT/demux-details.qza[0m
[0m

In [28]:
#Denoise Sequences
!qiime dada2 denoise-paired \
--i-demultiplexed-seqs /home/BT/demux.qza \
--p-trunc-len-f 0 \
--p-trunc-len-r 0 \
--o-table /home/BT/table.qza \
--o-representative-sequences /home/BT/rep-seqs.qza \
--o-denoising-stats /home/BT/denoising-stats.qza
!qiime metadata tabulate \
--m-input-file /home/BT/denoising-stats.qza \
--o-visualization /home/BT/denoising-stats.qzv

[32mSaved FeatureTable[Frequency] to: /home/BT/table.qza[0m
[32mSaved FeatureData[Sequence] to: /home/BT/rep-seqs.qza[0m
[32mSaved SampleData[DADA2Stats] to: /home/BT/denoising-stats.qza[0m
[0m[32mSaved Visualization to: /home/BT/denoising-stats.qzv[0m
[0m

In [32]:
#Summarize the Feature Table and Feature Data
!qiime feature-table summarize \
--i-table /home/BT/table.qza \
--m-sample-metadata-file /home/BT/metadata.tsv \
--o-visualization /home/BT/table.qzv
!qiime feature-table tabulate-seqs \
--i-data /home/BT/rep-seqs.qza \
--o-visualization /home/BT/rep-seqs.qzv


[32mSaved Visualization to: /home/BT/table.qzv[0m
[0m[32mSaved Visualization to: /home/BT/rep-seqs.qzv[0m
[0m