<a href="https://colab.research.google.com/github/jkim1134/repository-bioinformatics/blob/main/combined_fly_manure_analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

##  QIIME2 Installation

In [None]:
#!/usr/bin/env python3

"""Set up Qiime 2 on Google colab.

Do not use this on o local machine, especially not as an admin!
"""

import os
import sys
import shutil
from subprocess import Popen, PIPE, run

r = Popen(["pip", "install", "rich"])
r.wait()
from rich.console import Console  # noqa
con = Console()

PREFIX = "/usr/local/miniforge3/"

has_conda = "conda version" in os.popen("%s/bin/conda info" % PREFIX).read()
qiime_installed = os.path.exists(os.path.join(PREFIX, "envs", "qiime2", "bin", "qiime"))
qiime_active = "QIIME 2 release:" in os.popen("qiime info").read()


MINICONDA_PATH = (
    "https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-Linux-x86_64.sh"
)

QIIME_YAML_TEMPLATE = (
    "https://data.qiime2.org/distro/amplicon/qiime2-amplicon-{version}-py{python}-linux-conda.yml"
)

if len(sys.argv) == 2:
    version = sys.argv[1]
else:
    version = "2025.4"

qiime_ver = tuple(int(v) for v in version.split("."))

if qiime_ver < (2021, 4):
    pyver = "36"
elif qiime_ver < (2024, 5):
    pyver = "38"
elif qiime_ver < (2024, 10):
    pyver = "39"
else:
  pyver = "310"

CONDA = "mamba"
CONDA_ARGS = ["-q"] if CONDA == "conda" else ["-y"]

if tuple(float(v) for v in version.split(".")) < (2023, 9):
    QIIME_YAML_TEMPLATE = (
        "https://data.qiime2.org/distro/core/qiime2-{version}-py{python}-linux-conda.yml"
    )

QIIME_YAML_URL = QIIME_YAML_TEMPLATE.format(version=version, python=pyver)
QIIME_YAML = os.path.basename(QIIME_YAML_URL)


def cleanup():
    """Remove downloaded files."""
    if os.path.exists(os.path.basename(MINICONDA_PATH)):
        os.remove(os.path.basename(MINICONDA_PATH))
    if os.path.exists(QIIME_YAML):
        os.remove(QIIME_YAML)
    if os.path.exists("/content/sample_data"):
        shutil.rmtree("/content/sample_data")
    con.log(":broom: Cleaned up unneeded files.")


def run_and_check(args, check, message, failure, success, console=con):
    """Run a command and check that it worked."""
    console.log(message)
    r = Popen(args, env=os.environ, stdout=PIPE, stderr=PIPE,
              universal_newlines=True)
    o, e = r.communicate()
    out = o + e
    if r.returncode == 0 and check in out:
        console.log("[blue]%s[/blue]" % success)
    else:
        console.log("[red]%s[/red]" % failure, out)
        open("logs.txt", "w").write(out)
        cleanup()
        sys.exit(1)

def run_in_env(cmd, env, console=con):
    """Activate a conda environment in colab."""
    conda_profile = os.path.join(PREFIX, "etc", "profile.d", "conda.sh")
    console.log(f":snake: Activating the {env} environment.")

    full = f". {conda_profile} && conda activate {env} && {cmd}"
    return run(
        full,
        shell=True,
        executable="/bin/bash",
        capture_output=True,
        text=True
    )

def mock_qiime2(console=con):
    con.log(":penguin: Setting up the Qiime2 command...")
    conda_profile = os.path.join(PREFIX, "etc", "profile.d", "conda.sh")
    with open("/usr/local/bin/qiime", "w") as mocky:
        mocky.write("#!/usr/bin/env bash")
        mocky.write(f'\n\n. {conda_profile} && conda activate qiime2 && qiime "$@"\n')
    run("chmod +x /usr/local/bin/qiime", shell=True, executable="/bin/bash")
    con.log(":penguin: Done.")

if __name__ == "__main__":
    if not has_conda:
        run_and_check(
            ["wget", MINICONDA_PATH],
            "saved",
            ":snake: Downloading miniforge...",
            "failed downloading miniforge :sob:",
            ":snake: Done."
        )

        run_and_check(
            ["bash", os.path.basename(MINICONDA_PATH), "-bfp", PREFIX],
            "installation finished.",
            ":snake: Installing miniforge...",
            "could not install miniforge :sob:",
            ":snake: Installed miniforge to `/usr/local`."
        )
    else:
        con.log(":snake: Miniforge is already installed. Skipped.")

    if not qiime_installed:
        run_and_check(
            ["wget", QIIME_YAML_URL],
            "saved",
            ":mag: Downloading Qiime 2 package list...",
            "could not download package list :sob:",
            ":mag: Done."
        )

        if CONDA == "mamba":
            CONDA_ARGS.append("-y")

        run_and_check(
            [PREFIX + "bin/" + CONDA, "env", "create", *CONDA_ARGS, "-n", "qiime2", "--file", QIIME_YAML],
            "Verifying transaction: ...working... done" if CONDA == "conda" else "Transaction finished",
            f":mag: Installing Qiime 2 ({version}). This may take a little bit.\n :clock1:",
            "could not install Qiime 2 :sob:",
            ":mag: Done."
        )

        mock_qiime2()

        con.log(":evergreen_tree: Installing empress...")
        rc = run_in_env(
            "pip install --verbose Cython && pip install iow==1.0.7 empress",
            "qiime2"
        )
        if rc.returncode == 0:
            con.log(":evergreen_tree: Done.")
        else:
            con.log("could not install Empress :sob:")
    else:
        con.log(":mag: Qiime 2 is already installed. Skipped.")
        if not qiime_active:
            mock_qiime2()

    run_and_check(
        ["qiime", "info"],
        "QIIME 2 release:",
        ":bar_chart: Checking that Qiime 2 command line works...",
        "Qiime 2 command line does not seem to work :sob:",
        ":bar_chart: Qiime 2 command line looks good :tada:"
    )

    cleanup()

    con.log("[green]Everything is A-OK. "
            "You can start using Qiime 2 now :thumbs_up:[/green]")

In [None]:
print("‚úÖ QIIME2 installed!")
!qiime --version

‚úÖ QIIME2 installed!
q2cli version 2025.4.0
Run `qiime info` for more version details.


# Project setup (modified for combined data)

In [None]:
!mkdir -p /content/combined_fly_manure/{data,results}

#Upload Both Datasets

In [None]:
from google.colab import files
print("UPLOAD BOTH:")
print("- manure_amplicon.zip")
print("- fly_amplicon.zip")

UPLOAD BOTH:
- manure_amplicon.zip
- fly_amplicon.zip


CHECK FLY DATA STRUCTURE

In [None]:
# Check what's in the fly zip file
print("üîç CHECKING FLY DATA STRUCTURE:")
!unzip -l /content/fly_amplicon.zip | head -20

# Also check manure structure to compare
print("\nüîç CHECKING MANURE DATA STRUCTURE:")
!unzip -l /content/manure_amplicon.zip | head -20

üîç CHECKING FLY DATA STRUCTURE:
Archive:  /content/fly_amplicon.zip
  Length      Date    Time    Name
---------  ---------- -----   ----
 48988744  2025-11-18 16:02   classifier.qza
      568  2025-11-17 22:39   manifest.tsv
      267  2025-11-18 16:12   metadata.tsv
 84670222  2025-11-17 02:20   SRR25781682.fastq
 68081268  2025-11-17 02:02   SRR25781707.fastq
 62215856  2025-11-16 20:53   SRR25781709.fastq
 40244982  2025-11-17 02:13   SRR25781740.fastq
 61696604  2025-11-17 02:15   SRR25781760.fastq
 64754792  2025-11-17 02:21   SRR25781796.fastq
 76991900  2025-11-17 02:18   SRR25781819.fastq
103137764  2025-11-17 02:16   SRR25781830.fastq
 34282658  2025-11-17 02:36   SRR25781882.fastq
101473852  2025-11-17 02:08   SRR25781890.fastq
---------                     -------
746539477                     13 files

üîç CHECKING MANURE DATA STRUCTURE:
Archive:  /content/manure_amplicon.zip
  Length      Date    Time    Name
---------  ---------- -----   ----
        0  2025-11-17 21:

# **STEP 1: EXTRACT AND ORGANIZE BOTH DATASETS**

In [None]:
# Create combined project
!mkdir -p /content/combined_fly_manure/{data,results}

# Extract both - they have different structures
!unzip -q /content/fly_amplicon.zip -d /content/combined_fly_manure/data/fly/
!unzip -q /content/manure_amplicon.zip -d /content/combined_fly_manure/data/manure/

# Move manure files from subfolder to main manure folder
!mv /content/combined_fly_manure/data/manure/manure_amplicon/* /content/combined_fly_manure/data/manure/
!rmdir /content/combined_fly_manure/data/manure/manure_amplicon/

print("‚úÖ Both datasets organized!")

‚úÖ Both datasets organized!


# **STEP 2: CHECK FILE STRUCTURES**

In [None]:
print("üìÅ FLY DATA:")
!ls -la /content/combined_fly_manure/data/fly/ | head -10

print("\nüìÅ MANURE DATA:")
!ls -la /content/combined_fly_manure/data/manure/ | head -10

print("\nüìä SAMPLE COUNTS:")
print(f"Fly samples: {len([f for f in os.listdir('/content/combined_fly_manure/data/fly/') if f.endswith('.fastq')])}")
print(f"Manure samples: {len([f for f in os.listdir('/content/combined_fly_manure/data/manure/') if f.endswith('.fastq')])}")

üìÅ FLY DATA:
total 729088
drwxr-xr-x 2 root root      4096 Nov 18 17:54 .
drwxr-xr-x 4 root root      4096 Nov 18 17:54 ..
-rw-rw-rw- 1 root root  48988744 Nov 18 16:02 classifier.qza
-rw-rw-rw- 1 root root       568 Nov 17 22:39 manifest.tsv
-rw-rw-rw- 1 root root       267 Nov 18 16:12 metadata.tsv
-rw-rw-rw- 1 root root  84670222 Nov 17 02:20 SRR25781682.fastq
-rw-rw-rw- 1 root root  68081268 Nov 17 02:02 SRR25781707.fastq
-rw-rw-rw- 1 root root  62215856 Nov 16 20:53 SRR25781709.fastq
-rw-rw-rw- 1 root root  40244982 Nov 17 02:13 SRR25781740.fastq

üìÅ MANURE DATA:
total 998464
drwxr-xr-x 3 root root      4096 Nov 18 17:54 .
drwxr-xr-x 4 root root      4096 Nov 18 17:54 ..
drwxrwxrwx 2 root root      4096 Nov 17 21:53 classifier
-rw-rw-rw- 1 root root       487 Nov 14 23:47 manifest.tsv
-rw-rw-rw- 1 root root       326 Nov 17 20:01 metadata.tsv
-rw-rw-rw- 1 root root  99008852 Nov 14 23:16 SRR25781641.fastq
-rw-rw-rw- 1 root root  81976652 Nov 14 23:16 SRR25781671.fastq
-rw-rw-r

# **STEP 3: CREATE COMBINED MANIFEST**

In [None]:
# Read fly manifest
with open('/content/combined_fly_manure/data/fly/manifest.tsv', 'r') as f:
    fly_lines = f.read().split('\n')[1:]  # Skip header

# Read manure manifest
with open('/content/combined_fly_manure/data/manure/manifest.tsv', 'r') as f:
    manure_lines = f.read().split('\n')[1:]  # Skip header

# Create combined manifest
combined_manifest = "sample-id\tabsolute-filepath\tdirection\n"

# Add fly samples (update paths)
for line in fly_lines:
    if line.strip():
        parts = line.split('\t')
        if len(parts) == 3:
            sample_id, old_path, direction = parts
            new_path = f"/content/combined_fly_manure/data/fly/{os.path.basename(old_path)}"
            combined_manifest += f"{sample_id}\t{new_path}\t{direction}\n"

# Add manure samples (update paths)
for line in manure_lines:
    if line.strip():
        parts = line.split('\t')
        if len(parts) == 3:
            sample_id, old_path, direction = parts
            new_path = f"/content/combined_fly_manure/data/manure/{os.path.basename(old_path)}"
            combined_manifest += f"{sample_id}\t{new_path}\t{direction}\n"

# Save combined manifest
with open('/content/combined_fly_manure/data/combined_manifest.tsv', 'w') as f:
    f.write(combined_manifest)

print("‚úÖ Combined manifest created!")
print("First few lines:")
!head -5 /content/combined_fly_manure/data/combined_manifest.tsv

‚úÖ Combined manifest created!
First few lines:
sample-id	absolute-filepath	direction
SRR25781709 	/content/combined_fly_manure/data/fly/C:\SRA\fastq\SRR25781925.fastq	forward
SRR25781707	/content/combined_fly_manure/data/fly/C:\SRA\fastq\SRR25781707.fastq	forward
SRR25781890	/content/combined_fly_manure/data/fly/C:\SRA\fastq\SRR25781890.fastq	forward
SRR25781740	/content/combined_fly_manure/data/fly/C:\SRA\fastq\SRR25781740.fastq	forward


# **STEP 4: CREATE COMBINED METADATA**

In [None]:
# Read fly metadata
with open('/content/combined_fly_manure/data/fly/metadata.tsv', 'r') as f:
    fly_meta = f.read().split('\n')

# Read manure metadata
with open('/content/combined_fly_manure/data/manure/metadata.tsv', 'r') as f:
    manure_meta = f.read().split('\n')

# Create combined metadata
combined_metadata = fly_meta[0] + "\n"  # Header

# Add fly samples (label as 'fly')
for line in fly_meta[1:]:
    if line.strip() and not line.startswith('#'):
        parts = line.split('\t')
        if len(parts) >= 2:
            combined_metadata += f"{parts[0]}\tfly\t{parts[2] if len(parts) > 2 else 'Fly gut sample'}\n"

# Add manure samples (label as 'manure')
for line in manure_meta[1:]:
    if line.strip() and not line.startswith('#'):
        parts = line.split('\t')
        if len(parts) >= 2:
            combined_metadata += f"{parts[0]}\tmanure\t{parts[2] if len(parts) > 2 else 'Cattle manure sample'}\n"

# Save combined metadata
with open('/content/combined_fly_manure/data/combined_metadata.tsv', 'w') as f:
    f.write(combined_metadata)

print("‚úÖ Combined metadata created!")
print("First few lines:")
!head -5 /content/combined_fly_manure/data/combined_metadata.tsv

‚úÖ Combined metadata created!
First few lines:
sample-id	sample-type	State
SRR25781709	fly	Kansas
SRR25781707	fly	Kansas
SRR25781890	fly	Kansas
SRR25781740	fly	Kansas


# **RUN THE COMBINED ANALYSIS PIPELINE**

In [None]:
print("üöÄ STARTING COMBINED FLY + MANURE ANALYSIS (20 SAMPLES)")

# 1. Import combined data
!qiime tools import \
  --type 'SampleData[SequencesWithQuality]' \
  --input-path /content/combined_fly_manure/data/combined_manifest.tsv \
  --output-path /content/combined_fly_manure/results/combined_demux.qza \
  --input-format SingleEndFastqManifestPhred33V2

# 2. Quality control
!qiime demux summarize \
  --i-data /content/combined_fly_manure/results/combined_demux.qza \
  --o-visualization /content/combined_fly_manure/results/combined_demux.qzv

print("‚úÖ Data imported and quality checked!")

üöÄ STARTING COMBINED FLY + MANURE ANALYSIS (20 SAMPLES)
[31m[1mThere was a problem importing /content/combined_fly_manure/data/combined_manifest.tsv:

  /content/combined_fly_manure/data/combined_manifest.tsv is not a(n) SingleEndFastqManifestPhred33V2 file:

  Filepath on line 1 and column "absolute-filepath" could not be found (/content/combined_fly_manure/data/fly/C:\SRA\fastq\SRR25781925.fastq) for sample "SRR25781709".[0m

[0m[?25hUsage: [94mqiime demux summarize[0m [OPTIONS]

  Summarize counts per sample for all samples, and
  generate interactive positional quality plots
  based on `n` randomly selected sequences.

[1mInputs[0m:
  [94m[4m--i-data[0m ARTIFACT [32m[0m
    [32mSampleData[SequencesWithQuality |[0m
    [32mPairedEndSequencesWithQuality |[0m
    [32mJoinedSequencesWithQuality][0m
                         The demultiplexed
                         sequences to be
                         summarized.    [35m[required][0m
[1mParameters[0m:
  [

In [None]:
print("üîß FIXING MANIFEST PATHS...")

# Read the broken manifest
with open('/content/combined_fly_manure/data/combined_manifest.tsv', 'r') as f:
    content = f.read()

# Fix the paths - remove Windows paths and use correct Colab paths
lines = content.split('\n')
fixed_lines = []

for line in lines:
    if line.startswith('sample-id') or not line.strip():
        fixed_lines.append(line)
        continue

    parts = line.split('\t')
    if len(parts) == 3:
        sample_id, filepath, direction = parts

        # Extract just the filename from any path format
        filename = os.path.basename(filepath.replace('C:\\SRA\\fastq\\', '').replace('\\', '/'))

        # Create correct Colab path
        if 'SRR25781682' <= sample_id <= 'SRR25781890':  # Fly samples
            new_path = f"/content/combined_fly_manure/data/fly/{filename}"
        else:  # Manure samples
            new_path = f"/content/combined_fly_manure/data/manure/{filename}"

        fixed_lines.append(f"{sample_id}\t{new_path}\t{direction}")

# Save fixed manifest
with open('/content/combined_fly_manure/data/combined_manifest.tsv', 'w') as f:
    f.write('\n'.join(fixed_lines))

print("‚úÖ MANIFEST PATHS FIXED!")
print("First 5 fixed lines:")
!head -5 /content/combined_fly_manure/data/combined_manifest.tsv

üîß FIXING MANIFEST PATHS...
‚úÖ MANIFEST PATHS FIXED!
First 5 fixed lines:
sample-id	absolute-filepath	direction
SRR25781709 	/content/combined_fly_manure/data/fly/SRR25781925.fastq	forward
SRR25781707	/content/combined_fly_manure/data/fly/SRR25781707.fastq	forward
SRR25781890	/content/combined_fly_manure/data/fly/SRR25781890.fastq	forward
SRR25781740	/content/combined_fly_manure/data/fly/SRR25781740.fastq	forward


# **VERIFY FILES EXIST**

In [None]:
print("üîç VERIFYING FILES EXIST AT FIXED PATHS...")

# Check a few sample paths from the fixed manifest
!ls -la "/content/combined_fly_manure/data/fly/SRR25781709.fastq"
!ls -la "/content/combined_fly_manure/data/manure/SRR25781641.fastq"

print("‚úÖ File verification complete!")

üîç VERIFYING FILES EXIST AT FIXED PATHS...
-rw-rw-rw- 1 root root 62215856 Nov 16 20:53 /content/combined_fly_manure/data/fly/SRR25781709.fastq
-rw-rw-rw- 1 root root 99008852 Nov 14 23:16 /content/combined_fly_manure/data/manure/SRR25781641.fastq
‚úÖ File verification complete!


In [None]:
print("üîç CHECKING ACTUAL FILES IN FOLDERS:")

print("FLY FILES:")
!ls /content/combined_fly_manure/data/fly/*.fastq

print("\nMANURE FILES:")
!ls /content/combined_fly_manure/data/manure/*.fastq

üîç CHECKING ACTUAL FILES IN FOLDERS:
FLY FILES:
/content/combined_fly_manure/data/fly/SRR25781682.fastq
/content/combined_fly_manure/data/fly/SRR25781707.fastq
/content/combined_fly_manure/data/fly/SRR25781709.fastq
/content/combined_fly_manure/data/fly/SRR25781740.fastq
/content/combined_fly_manure/data/fly/SRR25781760.fastq
/content/combined_fly_manure/data/fly/SRR25781796.fastq
/content/combined_fly_manure/data/fly/SRR25781819.fastq
/content/combined_fly_manure/data/fly/SRR25781830.fastq
/content/combined_fly_manure/data/fly/SRR25781882.fastq
/content/combined_fly_manure/data/fly/SRR25781890.fastq

MANURE FILES:
/content/combined_fly_manure/data/manure/SRR25781641.fastq
/content/combined_fly_manure/data/manure/SRR25781671.fastq
/content/combined_fly_manure/data/manure/SRR25781756.fastq
/content/combined_fly_manure/data/manure/SRR25781792.fastq
/content/combined_fly_manure/data/manure/SRR25781827.fastq
/content/combined_fly_manure/data/manure/SRR25781835.fastq
/content/combined_fly




# **CREATING CLEAN MANIFEST FROM ACTUAL FILES.**


In [None]:
print("üìã CREATING CLEAN MANIFEST FROM ACTUAL FILES...")

# Create clean manifest header
manifest_content = "sample-id\tabsolute-filepath\tdirection\n"

# Add fly samples (10 files)
fly_samples = [
    "/content/combined_fly_manure/data/fly/SRR25781682.fastq",
    "/content/combined_fly_manure/data/fly/SRR25781707.fastq",
    "/content/combined_fly_manure/data/fly/SRR25781709.fastq",
    "/content/combined_fly_manure/data/fly/SRR25781740.fastq",
    "/content/combined_fly_manure/data/fly/SRR25781760.fastq",
    "/content/combined_fly_manure/data/fly/SRR25781796.fastq",
    "/content/combined_fly_manure/data/fly/SRR25781819.fastq",
    "/content/combined_fly_manure/data/fly/SRR25781830.fastq",
    "/content/combined_fly_manure/data/fly/SRR25781882.fastq",
    "/content/combined_fly_manure/data/fly/SRR25781890.fastq"
]

# Add manure samples (10 files)
manure_samples = [
    "/content/combined_fly_manure/data/manure/SRR25781641.fastq",
    "/content/combined_fly_manure/data/manure/SRR25781671.fastq",
    "/content/combined_fly_manure/data/manure/SRR25781756.fastq",
    "/content/combined_fly_manure/data/manure/SRR25781792.fastq",
    "/content/combined_fly_manure/data/manure/SRR25781827.fastq",
    "/content/combined_fly_manure/data/manure/SRR25781835.fastq",
    "/content/combined_fly_manure/data/manure/SRR25781850.fastq",
    "/content/combined_fly_manure/data/manure/SRR25781893.fastq",
    "/content/combined_fly_manure/data/manure/SRR25781907.fastq",
    "/content/combined_fly_manure/data/manure/SRR25781923.fastq"
]

# Add all samples to manifest
for file_path in fly_samples + manure_samples:
    sample_id = os.path.basename(file_path).replace('.fastq', '')
    manifest_content += f"{sample_id}\t{file_path}\tforward\n"

# Save the clean manifest
with open('/content/combined_fly_manure/data/combined_manifest.tsv', 'w') as f:
    f.write(manifest_content)

print("‚úÖ CLEAN MANIFEST CREATED!")
print(f"Total samples: {len(fly_samples) + len(manure_samples)}")
print("First 5 lines:")
!head -5 /content/combined_fly_manure/data/combined_manifest.tsv

üìã CREATING CLEAN MANIFEST FROM ACTUAL FILES...
‚úÖ CLEAN MANIFEST CREATED!
Total samples: 20
First 5 lines:
sample-id	absolute-filepath	direction
SRR25781682	/content/combined_fly_manure/data/fly/SRR25781682.fastq	forward
SRR25781707	/content/combined_fly_manure/data/fly/SRR25781707.fastq	forward
SRR25781709	/content/combined_fly_manure/data/fly/SRR25781709.fastq	forward
SRR25781740	/content/combined_fly_manure/data/fly/SRR25781740.fastq	forward


# **VERIFY EVERY FILE EXISTS**

In [None]:
print("üîç VERIFYING ALL FILES EXIST...")

all_files = fly_samples + manure_samples
missing_files = []

for file_path in all_files:
    if os.path.exists(file_path):
        print(f"‚úÖ {os.path.basename(file_path)}")
    else:
        print(f"‚ùå {os.path.basename(file_path)} - MISSING!")
        missing_files.append(file_path)

if not missing_files:
    print("üéâ ALL FILES EXIST - READY FOR ANALYSIS!")
else:
    print(f"‚ùå {len(missing_files)} files missing")

üîç VERIFYING ALL FILES EXIST...
‚úÖ SRR25781682.fastq
‚úÖ SRR25781707.fastq
‚úÖ SRR25781709.fastq
‚úÖ SRR25781740.fastq
‚úÖ SRR25781760.fastq
‚úÖ SRR25781796.fastq
‚úÖ SRR25781819.fastq
‚úÖ SRR25781830.fastq
‚úÖ SRR25781882.fastq
‚úÖ SRR25781890.fastq
‚úÖ SRR25781641.fastq
‚úÖ SRR25781671.fastq
‚úÖ SRR25781756.fastq
‚úÖ SRR25781792.fastq
‚úÖ SRR25781827.fastq
‚úÖ SRR25781835.fastq
‚úÖ SRR25781850.fastq
‚úÖ SRR25781893.fastq
‚úÖ SRR25781907.fastq
‚úÖ SRR25781923.fastq
üéâ ALL FILES EXIST - READY FOR ANALYSIS!


# **Running Import again**

In [None]:
print("üöÄ IMPORTING COMBINED DATA (20 SAMPLES)...")

!qiime tools import \
  --type 'SampleData[SequencesWithQuality]' \
  --input-path /content/combined_fly_manure/data/combined_manifest.tsv \
  --output-path /content/combined_fly_manure/results/combined_demux.qza \
  --input-format SingleEndFastqManifestPhred33V2

print("‚úÖ IMPORT COMPLETE!")

üöÄ IMPORTING COMBINED DATA (20 SAMPLES)...
[32mImported /content/combined_fly_manure/data/combined_manifest.tsv as SingleEndFastqManifestPhred33V2 to /content/combined_fly_manure/results/combined_demux.qza[0m
[0m[?25h‚úÖ IMPORT COMPLETE!


# **CHECK RESULTS**

In [None]:
print("üìä CHECKING IMPORT RESULTS:")
!ls -la /content/combined_fly_manure/results/

üìä CHECKING IMPORT RESULTS:
total 351008
drwxr-xr-x 2 root root      4096 Nov 18 18:19 .
drwxr-xr-x 4 root root      4096 Nov 18 17:33 ..
-rw-r--r-- 1 root root 359420882 Nov 18 18:20 combined_demux.qza


# QUALITY CONTROL





In [None]:
print("1. QUALITY CONTROL...")
!qiime demux summarize \
  --i-data /content/combined_fly_manure/results/combined_demux.qza \
  --o-visualization /content/combined_fly_manure/results/combined_demux.qzv

print("Quality control complete!")

1. QUALITY CONTROL...
[32mSaved Visualization to: /content/combined_fly_manure/results/combined_demux.qzv[0m
[0m[?25hQuality control complete!


UPLOAD THE CORRECT CLASSIFIER

In [None]:
print("üìÅ UPLOAD THE CORRECT CLASSIFIER:")
print("File name: silva-138-99-nb-classifier.qza")
print("Use the file uploader on the left")

# First, remove any existing invalid classifier
!rm -f /content/combined_fly_manure/data/silva-138-99-nb-classifier.qza

# After uploading, move the new one to the project
!mv /content/silva-138-99-nb-classifier.qza /content/combined_fly_manure/data/

print("‚úÖ Classifier ready for use!")

üìÅ UPLOAD THE CORRECT CLASSIFIER:
File name: silva-138-99-nb-classifier.qza
Use the file uploader on the left
‚úÖ Classifier ready for use!


# **DADA2 Denoising**

In [None]:
print("üöÄ RESTARTING DADA2 WITH CORRECT SETUP")

!qiime dada2 denoise-single \
  --i-demultiplexed-seqs /content/combined_fly_manure/results/combined_demux.qza \
  --p-trim-left 0 \
  --p-trunc-len 250 \
  --p-n-threads 4 \
  --o-representative-sequences /content/combined_fly_manure/results/combined_rep-seqs.qza \
  --o-table /content/combined_fly_manure/results/combined_table.qza \
  --o-denoising-stats /content/combined_fly_manure/results/combined_denoising-stats.qza

print("‚úÖ DADA2 COMPLETE!")

üöÄ RESTARTING DADA2 WITH CORRECT SETUP
[32mSaved FeatureTable[Frequency] to: /content/combined_fly_manure/results/combined_table.qza[0m
[32mSaved FeatureData[Sequence] to: /content/combined_fly_manure/results/combined_rep-seqs.qza[0m
[32mSaved SampleData[DADA2Stats] to: /content/combined_fly_manure/results/combined_denoising-stats.qza[0m
[0m[?25h‚úÖ DADA2 COMPLETE!


In [None]:
print("üßπ Cleaning up corrupted classifier...")

# Remove the old corrupted classifier
!rm -f "/content/combined_fly_manure/data/silva-138-99-nb-classifier.qza"

# Verify it's gone
!ls -la "/content/combined_fly_manure/data/silva-138-99-nb-classifier.qza" 2>/dev/null || echo "‚úÖ Corrupted classifier successfully removed!"

print("‚úÖ Cleanup complete!")

üßπ Cleaning up corrupted classifier...
‚úÖ Corrupted classifier successfully removed!
‚úÖ Cleanup complete!


In [None]:
print("üîÑ Starting fresh download...")

# Download the proper pre-formatted files
!wget -O "/content/combined_fly_manure/data/silva-138-99-seqs.qza" \
  "https://data.qiime2.org/2023.9/common/silva-138-99-seqs.qza"

!wget -O "/content/combined_fly_manure/data/silva-138-99-tax.qza" \
  "https://data.qiime2.org/2023.9/common/silva-138-99-tax.qza"

print("‚úÖ Fresh downloads complete!")

üîÑ Starting fresh download...
--2025-11-19 02:28:56--  https://data.qiime2.org/2023.9/common/silva-138-99-seqs.qza
Resolving data.qiime2.org (data.qiime2.org)... 54.200.1.12
Connecting to data.qiime2.org (data.qiime2.org)|54.200.1.12|:443... connected.
HTTP request sent, awaiting response... 302 FOUND
Location: https://qiime2-data.s3-us-west-2.amazonaws.com/2023.9/common/silva-138-99-seqs.qza [following]
--2025-11-19 02:28:56--  https://qiime2-data.s3-us-west-2.amazonaws.com/2023.9/common/silva-138-99-seqs.qza
Resolving qiime2-data.s3-us-west-2.amazonaws.com (qiime2-data.s3-us-west-2.amazonaws.com)... 3.5.86.211, 52.92.243.50, 52.92.208.82, ...
Connecting to qiime2-data.s3-us-west-2.amazonaws.com (qiime2-data.s3-us-west-2.amazonaws.com)|3.5.86.211|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 97137296 (93M) [binary/octet-stream]
Saving to: ‚Äò/content/combined_fly_manure/data/silva-138-99-seqs.qza‚Äô


2025-11-19 02:29:00 (28.8 MB/s) - ‚Äò/content/combined

# **TRAIN NEW CLASSIFIER**

In [None]:
# 3. TRAIN NEW CLASSIFIER
print("üîß STEP 3: Training new classifier...")
!qiime feature-classifier fit-classifier-naive-bayes \
  --i-reference-reads /content/combined_fly_manure/data/silva-138-99-seqs.qza \
  --i-reference-taxonomy /content/combined_fly_manure/data/silva-138-99-tax.qza \
  --o-classifier /content/combined_fly_manure/data/custom-silva-classifier.qza

üîß STEP 3: Training new classifier...
/usr/local/bin/qiime: line 3: 136664 Killed                  qiime "$@"


# **Taxonomic Classification**

In [None]:
print("üöÄ STEP 4: Running taxonomic classification...")
!qiime feature-classifier classify-sklearn \
  --i-classifier /content/combined_fly_manure/data/custom-silva-classifier.qza \
  --i-reads /content/combined_fly_manure/results/combined_rep-seqs.qza \
  --o-classification /content/combined_fly_manure/results/combined_taxonomy.qza


üöÄ STEP 4: Running taxonomic classification...
Usage: [94mqiime feature-classifier classify-sklearn[0m 
           [OPTIONS]

  Classify reads by taxon using a fitted
  classifier.

[1mInputs[0m:
  [94m[4m--i-reads[0m ARTIFACT [32mFeatureData[Sequence][0m
                         The feature data to be
                         classified.    [35m[required][0m
  [94m[4m--i-classifier[0m ARTIFACT
    [32mTaxonomicClassifier[0m  The taxonomic classifier
                         for classifying the
                         reads.         [35m[required][0m
[1mParameters[0m:
  [94m--p-reads-per-batch[0m VALUE [32mInt % Range(1,[0m
    [32mNone) | Str % Choices('auto')[0m
                         Number of reads to
                         process in each batch. If
                         "auto", this parameter is
                         autoscaled to min( number
                         of query sequences /
                         [4mn-jobs[0m, 20000).
      

# **Create Main Comparison Visualization**

In [None]:
print("4. CREATING FLY vs MANURE COMPARISON PLOT...")
!qiime taxa barplot \
  --i-table /content/combined_fly_manure/results/combined_table.qza \
  --i-taxonomy /content/combined_fly_manure/results/combined_taxonomy.qza \
  --m-metadata-file /content/combined_fly_manure/data/combined_metadata.tsv \
  --o-visualization /content/combined_fly_manure/results/combined_taxa-bar-plots.qzv

print("üéâ COMBINED ANALYSIS COMPLETE!")