<a href="https://colab.research.google.com/github/jkim1134/repository-bioinformatics/blob/main/Final_flymanure_project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#!/usr/bin/env python3

"""Set up Qiime 2 on Google colab.

Do not use this on o local machine, especially not as an admin!
"""

import os
import sys
import shutil
from subprocess import Popen, PIPE, run

r = Popen(["pip", "install", "rich"])
r.wait()
from rich.console import Console  # noqa
con = Console()

PREFIX = "/usr/local/miniforge3/"

has_conda = "conda version" in os.popen("%s/bin/conda info" % PREFIX).read()
qiime_installed = os.path.exists(os.path.join(PREFIX, "envs", "qiime2", "bin", "qiime"))
qiime_active = "QIIME 2 release:" in os.popen("qiime info").read()


MINICONDA_PATH = (
    "https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-Linux-x86_64.sh"
)

QIIME_YAML_TEMPLATE = (
    "https://data.qiime2.org/distro/amplicon/qiime2-amplicon-{version}-py{python}-linux-conda.yml"
)

if len(sys.argv) == 2:
    version = sys.argv[1]
else:
    version = "2025.4"

qiime_ver = tuple(int(v) for v in version.split("."))

if qiime_ver < (2021, 4):
    pyver = "36"
elif qiime_ver < (2024, 5):
    pyver = "38"
elif qiime_ver < (2024, 10):
    pyver = "39"
else:
  pyver = "310"

CONDA = "mamba"
CONDA_ARGS = ["-q"] if CONDA == "conda" else ["-y"]

if tuple(float(v) for v in version.split(".")) < (2023, 9):
    QIIME_YAML_TEMPLATE = (
        "https://data.qiime2.org/distro/core/qiime2-{version}-py{python}-linux-conda.yml"
    )

QIIME_YAML_URL = QIIME_YAML_TEMPLATE.format(version=version, python=pyver)
QIIME_YAML = os.path.basename(QIIME_YAML_URL)


def cleanup():
    """Remove downloaded files."""
    if os.path.exists(os.path.basename(MINICONDA_PATH)):
        os.remove(os.path.basename(MINICONDA_PATH))
    if os.path.exists(QIIME_YAML):
        os.remove(QIIME_YAML)
    if os.path.exists("/content/sample_data"):
        shutil.rmtree("/content/sample_data")
    con.log(":broom: Cleaned up unneeded files.")


def run_and_check(args, check, message, failure, success, console=con):
    """Run a command and check that it worked."""
    console.log(message)
    r = Popen(args, env=os.environ, stdout=PIPE, stderr=PIPE,
              universal_newlines=True)
    o, e = r.communicate()
    out = o + e
    if r.returncode == 0 and check in out:
        console.log("[blue]%s[/blue]" % success)
    else:
        console.log("[red]%s[/red]" % failure, out)
        open("logs.txt", "w").write(out)
        cleanup()
        sys.exit(1)

def run_in_env(cmd, env, console=con):
    """Activate a conda environment in colab."""
    conda_profile = os.path.join(PREFIX, "etc", "profile.d", "conda.sh")
    console.log(f":snake: Activating the {env} environment.")

    full = f". {conda_profile} && conda activate {env} && {cmd}"
    return run(
        full,
        shell=True,
        executable="/bin/bash",
        capture_output=True,
        text=True
    )

def mock_qiime2(console=con):
    con.log(":penguin: Setting up the Qiime2 command...")
    conda_profile = os.path.join(PREFIX, "etc", "profile.d", "conda.sh")
    with open("/usr/local/bin/qiime", "w") as mocky:
        mocky.write("#!/usr/bin/env bash")
        mocky.write(f'\n\n. {conda_profile} && conda activate qiime2 && qiime "$@"\n')
    run("chmod +x /usr/local/bin/qiime", shell=True, executable="/bin/bash")
    con.log(":penguin: Done.")

if __name__ == "__main__":
    if not has_conda:
        run_and_check(
            ["wget", MINICONDA_PATH],
            "saved",
            ":snake: Downloading miniforge...",
            "failed downloading miniforge :sob:",
            ":snake: Done."
        )

        run_and_check(
            ["bash", os.path.basename(MINICONDA_PATH), "-bfp", PREFIX],
            "installation finished.",
            ":snake: Installing miniforge...",
            "could not install miniforge :sob:",
            ":snake: Installed miniforge to `/usr/local`."
        )
    else:
        con.log(":snake: Miniforge is already installed. Skipped.")

    if not qiime_installed:
        run_and_check(
            ["wget", QIIME_YAML_URL],
            "saved",
            ":mag: Downloading Qiime 2 package list...",
            "could not download package list :sob:",
            ":mag: Done."
        )

        if CONDA == "mamba":
            CONDA_ARGS.append("-y")

        run_and_check(
            [PREFIX + "bin/" + CONDA, "env", "create", *CONDA_ARGS, "-n", "qiime2", "--file", QIIME_YAML],
            "Verifying transaction: ...working... done" if CONDA == "conda" else "Transaction finished",
            f":mag: Installing Qiime 2 ({version}). This may take a little bit.\n :clock1:",
            "could not install Qiime 2 :sob:",
            ":mag: Done."
        )

        mock_qiime2()

        con.log(":evergreen_tree: Installing empress...")
        rc = run_in_env(
            "pip install --verbose Cython && pip install iow==1.0.7 empress",
            "qiime2"
        )
        if rc.returncode == 0:
            con.log(":evergreen_tree: Done.")
        else:
            con.log("could not install Empress :sob:")
    else:
        con.log(":mag: Qiime 2 is already installed. Skipped.")
        if not qiime_active:
            mock_qiime2()

    run_and_check(
        ["qiime", "info"],
        "QIIME 2 release:",
        ":bar_chart: Checking that Qiime 2 command line works...",
        "Qiime 2 command line does not seem to work :sob:",
        ":bar_chart: Qiime 2 command line looks good :tada:"
    )

    cleanup()

    con.log("[green]Everything is A-OK. "
            "You can start using Qiime 2 now :thumbs_up:[/green]")

In [None]:
!qiime --version

q2cli version 2025.4.0
Run `qiime info` for more version details.


In [None]:
from google.colab import files
import os

print("üìÅ UPLOAD ONLY THE ZIP FILE:")
print("1. Click the üìÅ FOLDER icon on the LEFT")
print("2. Click the üì§ UPLOAD button")
print("3. Select ONLY: manure_amplicon.zip")
print("4. Wait for upload to complete")

# Create project structure
!mkdir -p /content/fly_cow_project/{data,results}

print("Ready for zip file upload!")

üìÅ UPLOAD ONLY THE ZIP FILE:
1. Click the üìÅ FOLDER icon on the LEFT
2. Click the üì§ UPLOAD button
3. Select ONLY: manure_amplicon.zip
4. Wait for upload to complete
‚úÖ Ready for zip file upload!


Step 3: After Upload, Extract and Organize

In [None]:
# Move zip to project folder
!mv /content/manure_amplicon.zip /content/fly_cow_project/data/

# Extract everything
!unzip /content/fly_cow_project/data/manure_amplicon.zip -d /content/fly_cow_project/data/

# Move files from subfolder to main data folder
!mv /content/fly_cow_project/data/manure_amplicon/* /content/fly_cow_project/data/
!rmdir /content/fly_cow_project/data/manure_amplicon/

print("‚úÖ Files extracted and organized!")
!ls -la /content/fly_cow_project/data/


Archive:  /content/fly_cow_project/data/manure_amplicon.zip
   creating: /content/fly_cow_project/data/manure_amplicon/
   creating: /content/fly_cow_project/data/manure_amplicon/classifier/
  inflating: /content/fly_cow_project/data/manure_amplicon/classifier/2024.09.backbone.v4.nb.sklearn-1.4.2.qza  
  inflating: /content/fly_cow_project/data/manure_amplicon/manifest.tsv  
  inflating: /content/fly_cow_project/data/manure_amplicon/metadata.tsv  
  inflating: /content/fly_cow_project/data/manure_amplicon/SRR25781641.fastq  
  inflating: /content/fly_cow_project/data/manure_amplicon/SRR25781671.fastq  
  inflating: /content/fly_cow_project/data/manure_amplicon/SRR25781756.fastq  
  inflating: /content/fly_cow_project/data/manure_amplicon/SRR25781792.fastq  
  inflating: /content/fly_cow_project/data/manure_amplicon/SRR25781827.fastq  
  inflating: /content/fly_cow_project/data/manure_amplicon/SRR25781835.fastq  
  inflating: /content/fly_cow_project/data/manure_amplicon/SRR25781850.fas

Step 4: Run Analysis

In [None]:
print("üöÄ STARTING ANALYSIS...")

# 1. Import data
!qiime tools import \
  --type 'SampleData[SequencesWithQuality]' \
  --input-path /content/fly_cow_project/data/manifest.tsv \
  --output-path /content/fly_cow_project/results/demux.qza \
  --input-format SingleEndFastqManifestPhred33V2

# 2. Quality control
!qiime demux summarize \
  --i-data /content/fly_cow_project/results/demux.qza \
  --o-visualization /content/fly_cow_project/results/demux.qzv

print("STEP 1 COMPLETE!")

üöÄ STARTING ANALYSIS...
[31m[1mThere was a problem importing /content/fly_cow_project/data/manifest.tsv:

  /content/fly_cow_project/data/manifest.tsv is not a(n) SingleEndFastqManifestPhred33V2 file:

  Filepath on line 1 and column "absolute-filepath" could not be found (/data/SRR25781641.fastq) for sample "SRR25781641".[0m

[?25h[0mUsage: [94mqiime demux summarize[0m [OPTIONS]

  Summarize counts per sample for all samples, and
  generate interactive positional quality plots
  based on `n` randomly selected sequences.

[1mInputs[0m:
  [94m[4m--i-data[0m ARTIFACT [32m[0m
    [32mSampleData[SequencesWithQuality |[0m
    [32mPairedEndSequencesWithQuality |[0m
    [32mJoinedSequencesWithQuality][0m
                         The demultiplexed
                         sequences to be
                         summarized.    [35m[required][0m
[1mParameters[0m:
  [94m--p-n[0m INTEGER          The number of sequences
                         that should be selected


In [None]:
# FIX THE MANIFEST FILE PATHS
print("üîß Fixing manifest file paths...")

# Read the current manifest
with open('/content/fly_cow_project/data/manifest.tsv', 'r') as f:
    content = f.read()

# Update the paths
new_content = content.replace(
    '/data/',
    '/content/fly_cow_project/data/'
)

# Save the fixed manifest
with open('/content/fly_cow_project/data/manifest.tsv', 'w') as f:
    f.write(new_content)

print("Manifest file fixed!")
print("Updated paths:")
!cat /content/fly_cow_project/data/manifest.tsv

üîß Fixing manifest file paths...
‚úÖ Manifest file fixed!
Updated paths:
sample-id	absolute-filepath	direction
SRR25781641	/content/fly_cow_project/data/SRR25781641.fastq	forward
SRR25781671	/content/fly_cow_project/data/SRR25781671.fastq	forward
SRR25781756	/content/fly_cow_project/data/SRR25781756.fastq	forward
SRR25781792	/content/fly_cow_project/data/SRR25781792.fastq	forward
SRR25781827	/content/fly_cow_project/data/SRR25781827.fastq	forward
SRR25781835	/content/fly_cow_project/data/SRR25781835.fastq	forward
SRR25781850	/content/fly_cow_project/data/SRR25781850.fastq	forward
SRR25781893	/content/fly_cow_project/data/SRR25781893.fastq	forward
SRR25781907	/content/fly_cow_project/data/SRR25781907.fastq	forward
SRR25781923	/content/fly_cow_project/data/SRR25781923.fastq	forward

In [None]:
print("üöÄ RESTARTING ANALYSIS WITH FIXED PATHS...")

# 1. Import data
print("1. Importing FASTQ files...")
!qiime tools import \
  --type 'SampleData[SequencesWithQuality]' \
  --input-path /content/fly_cow_project/data/manifest.tsv \
  --output-path /content/fly_cow_project/results/demux.qza \
  --input-format SingleEndFastqManifestPhred33V2

# 2. Quality control
print("2. Quality control...")
!qiime demux summarize \
  --i-data /content/fly_cow_project/results/demux.qza \
  --o-visualization /content/fly_cow_project/results/demux.qzv

print("IMPORT SUCCESSFUL!")

üöÄ RESTARTING ANALYSIS WITH FIXED PATHS...
1. Importing FASTQ files...
[32mImported /content/fly_cow_project/data/manifest.tsv as SingleEndFastqManifestPhred33V2 to /content/fly_cow_project/results/demux.qza[0m
[?25h[0m2. Quality control...
[32mSaved Visualization to: /content/fly_cow_project/results/demux.qzv[0m
[?25h[0m‚úÖ IMPORT SUCCESSFUL!


DADA2 DENOISING

In [None]:
print("3. DADA2 Denoising...")
!qiime dada2 denoise-single \
  --i-demultiplexed-seqs /content/fly_cow_project/results/demux.qza \
  --p-trim-left 0 \
  --p-trunc-len 250 \
  --p-n-threads 6 \
  --o-representative-sequences /content/fly_cow_project/results/rep-seqs.qza \
  --o-table /content/fly_cow_project/results/table.qza \
  --o-denoising-stats /content/fly_cow_project/results/denoising-stats.qza

print("DADA2 complete!")



3. DADA2 Denoising...
[32mSaved FeatureTable[Frequency] to: /content/fly_cow_project/results/table.qza[0m
[32mSaved FeatureData[Sequence] to: /content/fly_cow_project/results/rep-seqs.qza[0m
[32mSaved SampleData[DADA2Stats] to: /content/fly_cow_project/results/denoising-stats.qza[0m
[?25h[0m‚úÖ DADA2 complete!


checking current files

In [None]:
print("Current files:")
!ls -la /content/fly_cow_project/results/

üìÅ Current files:
total 213360
drwxr-xr-x 2 root root      4096 Nov 20 04:54 .
drwxr-xr-x 4 root root      4096 Nov 19 22:05 ..
-rw-r--r-- 1 root root 217629063 Nov 19 22:13 demux.qza
-rw-r--r-- 1 root root    322407 Nov 19 22:15 demux.qzv
-rw-r--r-- 1 root root     32347 Nov 20 04:54 denoising-stats.qza
-rw-r--r-- 1 root root    303106 Nov 20 04:54 rep-seqs.qza
-rw-r--r-- 1 root root    173595 Nov 20 04:54 table.qza


Step 4: Taxonomic Classification (5-10 minutes)

In [None]:
print("4. Taxonomic Classification...")
!qiime feature-classifier classify-sklearn \
  --i-classifier /content/fly_cow_project/data/classifier/2024.09.backbone.v4.nb.sklearn-1.4.2.qza \
  --i-reads /content/fly_cow_project/results/rep-seqs.qza \
  --o-classification /content/fly_cow_project/results/taxonomy.qza

print("‚úÖ Taxonomy complete!")

Step 5: Create Taxa Barplot (2-3 minutes)

In [None]:
print("5. Creating Taxa Barplot...")
!qiime taxa barplot \
  --i-table /content/fly_cow_project/results/table.qza \
  --i-taxonomy /content/fly_cow_project/results/taxonomy.qza \
  --m-metadata-file /content/fly_cow_project/data/metadata.tsv \
  --o-visualization /content/fly_cow_project/results/taxa-bar-plots.qzv

print("‚úÖ Barplot created!")

View Your Results

In [None]:
print("üéâ ANALYSIS COMPLETE!")
print("\nüìä YOUR RESULTS:")
!ls -la /content/fly_cow_project/results/

print("\nüåê TO VIEW RESULTS:")
print("1. Download .qzv files from the results folder")
print("2. Go to: https://view.qiime2.org")
print("3. Drag and drop files to view interactive plots")
print("\nüìà Key visualizations:")
print("‚Ä¢ demux.qzv - Quality control")
print("‚Ä¢ taxa-bar-plots.qzv - MAIN RESULTS (microbes by state)")