In [2]:
# @title ### **Step 1: Mount Google Drive & Install Tools**
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


Follow the instruction https://github.com/FelixKrueger/TrimGalore

Trim Galore is a a Perl wrapper around two tools: Cutadapt and FastQC. To use, ensure that these two pieces of software are available and copy the trim_galore script to a location available on the PATH

In [None]:
# 1. Install dependencies
!pip install cutadapt
!apt-get install perl

# 2. Download and install TrimGalore
!wget https://github.com/FelixKrueger/TrimGalore/archive/refs/tags/0.6.10.tar.gz
!tar -xzf 0.6.10.tar.gz

# 3. Make the script executable and add to PATH
import os
os.environ['PATH'] += os.pathsep + '/content/TrimGalore-0.6.10'

# 4. Verify installation
!ls /content/TrimGalore-0.6.10/trim_galore  # Check if file exists
!/content/TrimGalore-0.6.10/trim_galore --version

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
perl is already the newest version (5.34.0-3ubuntu1.4).
0 upgraded, 0 newly installed, 0 to remove and 31 not upgraded.
--2025-07-15 20:21:34--  https://github.com/FelixKrueger/TrimGalore/archive/refs/tags/0.6.10.tar.gz
Resolving github.com (github.com)... 140.82.116.4
Connecting to github.com (github.com)|140.82.116.4|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://codeload.github.com/FelixKrueger/TrimGalore/tar.gz/refs/tags/0.6.10 [following]
--2025-07-15 20:21:35--  https://codeload.github.com/FelixKrueger/TrimGalore/tar.gz/refs/tags/0.6.10
Resolving codeload.github.com (codeload.github.com)... 140.82.116.10
Connecting to codeload.github.com (codeload.github.com)|140.82.116.10|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: unspecified [application/x-gzip]
Saving to: ‘0.6.10.tar.gz’

0.6.10.tar.gz           [     <=>       

In [None]:
! ls -al /content/drive/MyDrive/Lab_share/Lab_data/class_466/raw.small.RNA-Seq/

total 4016603
-rw------- 1 root root 348532629 Jul  9 01:13 YapPool_S01_sub_1.fq.gz
-rw------- 1 root root 352131287 Jul  9 01:16 YapPool_S01_sub_2.fq.gz
-rw------- 1 root root 355127133 Jul  9 01:29 YapPool_S02_sub_1.fq.gz
-rw------- 1 root root 359222563 Jul  9 01:32 YapPool_S02_sub_2.fq.gz
-rw------- 1 root root 304346742 Jul  9 01:45 YapPool_S03_sub_1.fq.gz
-rw------- 1 root root 308548182 Jul  9 01:47 YapPool_S03_sub_2.fq.gz
-rw------- 1 root root 364095003 Jul  9 01:59 YapPool_S09_sub_1.fq.gz
-rw------- 1 root root 367720065 Jul  9 02:02 YapPool_S09_sub_2.fq.gz
-rw------- 1 root root 345373866 Jul  9 02:16 YapPool_S10_sub_1.fq.gz
-rw------- 1 root root 349413817 Jul  9 02:19 YapPool_S10_sub_2.fq.gz
-rw------- 1 root root 327577645 Jul  9 02:31 YapPool_S11_sub_1.fq.gz
-rw------- 1 root root 330909050 Jul  9 02:35 YapPool_S11_sub_2.fq.gz


In [None]:
##### This process is done within 5 minutes
!trim_galore \
--paired \
--quality 20 \
--length 20 \
--gzip \
--output_dir ./ \
--fastqc \
--clip_R1 2 \
--clip_R2 2 \
--three_prime_clip_R1 2 \
--three_prime_clip_R2 2 \
--illumina \
drive/MyDrive/Lab_share/Lab_data/class_466/raw.small.RNA-Seq/YapPool_S01_sub_1.fq.gz \
drive/MyDrive/Lab_share/Lab_data/class_466/raw.small.RNA-Seq/YapPool_S01_sub_2.fq.gz

Multicore support not enabled. Proceeding with single-core trimming.
Path to Cutadapt set as: 'cutadapt' (default)
Cutadapt seems to be working fine (tested command 'cutadapt --version')
Cutadapt version: 5.1
single-core operation.
Proceeding with 'gzip' for decompression
To decrease CPU usage of decompression, please install 'igzip' and run again

No quality encoding type selected. Assuming that the data provided uses Sanger encoded Phred scores (default)

Output will be written into the directory: /content/
Writing report to '/content/YapPool_S01_sub_1.fq.gz_trimming_report.txt'

SUMMARISING RUN PARAMETERS
Input filename: drive/MyDrive/Lab_share/Lab_data/class_466/raw.small.RNA-Seq/YapPool_S01_sub_1.fq.gz
Trimming mode: paired-end
Trim Galore version: 0.6.10
Cutadapt version: 5.1
Number of cores used for trimming: 1
Quality Phred score cutoff: 20
Quality encoding type selected: ASCII+33
Adapter sequence: 'AGATCGGAAGAGC' (Illumina TruSeq, Sanger iPCR; user defined)
Maximum trimming er

In [None]:
%%writefile run_trimgalore.sh
#!/bin/bash

# Exit immediately if a command exits with a non-zero status.
set -e

echo "Starting TrimGalore process..."

# --- Configuration for inputs and outputs ---
SEQ_DIR='drive/MyDrive/Lab_share/Lab_data/class_466/raw.small.RNA-Seq'
OUT_DIR='drive/MyDrive/Lab_share/Lab_data/class_466/clean.trimgalore.RNA-Seq'
# Define samples as a bash array
SAMPLES=("YapPool_S01" "YapPool_S02" "YapPool_S03" "YapPool_S09" "YapPool_S10" "YapPool_S11")

# Create the output directory if it doesn't exist
mkdir -p ${OUT_DIR}
echo "Ensured output directory exists: ${OUT_DIR}"

# Loop through each sample and run TrimGalore
for SAMPLE in "${SAMPLES[@]}"; do
    echo "Processing sample: ${SAMPLE}"

    trim_galore \
    --paired \
    --quality 20 \
    --length 20 \
    --gzip \
    --output_dir ${OUT_DIR} \
    --clip_R1 2 \
    --clip_R2 2 \
    --three_prime_clip_R1 2 \
    --three_prime_clip_R2 2 \
    --illumina \
    --basename ${SAMPLE}_trim_ \
    ${SEQ_DIR}/${SAMPLE}_sub_1.fq.gz \
    ${SEQ_DIR}/${SAMPLE}_sub_2.fq.gz \
    > ${OUT_DIR}/${SAMPLE}_trim.log 2>&1

    echo "Finished processing ${SAMPLE}"
done

Writing run_trimgalore.sh


In [None]:
! chmod +x run_trimgalore.sh

In [4]:
# This took 140 minutes to finish
! time ./run_trimgalore.sh

/bin/bash: line 1: ./run_trimgalore.sh: No such file or directory

real	0m0.002s
user	0m0.001s
sys	0m0.001s


In [6]:
! ls -al drive/MyDrive/Lab_share/Lab_data/class_466/clean.trimgalore.RNA-Seq/

total 3761363
-rw------- 1 root root      5244 Jul 15 20:55 YapPool_S01_sub_1.fq.gz_trimming_report.txt
-rw------- 1 root root      5540 Jul 15 21:07 YapPool_S01_sub_2.fq.gz_trimming_report.txt
-rw------- 1 root root     13637 Jul 15 21:08 YapPool_S01_trim.log
-rw------- 1 root root 327476943 Jul 15 21:00 YapPool_S01_trim__val_1.fq.gz
-rw------- 1 root root 328406041 Jul 15 21:00 YapPool_S01_trim__val_2.fq.gz
-rw------- 1 root root      5251 Jul 15 21:13 YapPool_S02_sub_1.fq.gz_trimming_report.txt
-rw------- 1 root root      5535 Jul 15 21:25 YapPool_S02_sub_2.fq.gz_trimming_report.txt
-rw------- 1 root root     13639 Jul 15 21:25 YapPool_S02_trim.log
-rw------- 1 root root 334036068 Jul 15 21:18 YapPool_S02_trim__val_1.fq.gz
-rw------- 1 root root 334476109 Jul 15 21:18 YapPool_S02_trim__val_2.fq.gz
-rw------- 1 root root      5135 Jul 15 21:30 YapPool_S03_sub_1.fq.gz_trimming_report.txt
-rw------- 1 root root      5516 Jul 15 21:41 YapPool_S03_sub_2.fq.gz_trimming_report.txt
-rw-----

In [7]:
! pip install multiqc

Collecting multiqc
  Downloading multiqc-1.30-py3-none-any.whl.metadata (46 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/46.3 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m46.3/46.3 kB[0m [31m2.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting boto3 (from multiqc)
  Downloading boto3-1.39.5-py3-none-any.whl.metadata (6.6 kB)
Collecting kaleido==0.2.1 (from multiqc)
  Downloading kaleido-0.2.1-py2.py3-none-manylinux1_x86_64.whl.metadata (15 kB)
Collecting rich-click (from multiqc)
  Downloading rich_click-1.8.9-py3-none-any.whl.metadata (7.9 kB)
Collecting coloredlogs (from multiqc)
  Downloading coloredlogs-15.0.1-py2.py3-none-any.whl.metadata (12 kB)
Collecting spectra>=0.0.10 (from multiqc)
  Downloading spectra-0.1.0-py3-none-any.whl.metadata (927 bytes)
Collecting python-dotenv (from multiqc)
  Downloading python_dotenv-1.1.1-py3-none-any.whl.metadata (24 kB)
Collecting polars-lts-cpu (from multi

In [8]:
!multiqc drive/MyDrive/Lab_share/Lab_data/class_466/clean.trimgalore.RNA-Seq/


[91m///[0m ]8;id=11701;https://multiqc.info\[1mMultiQC[0m]8;;\ 🔍 [2mv1.30[0m

[34m       file_search[0m | Search path: /content/drive/MyDrive/Lab_share/Lab_data/class_466/clean.trimgalore.RNA-Seq
[2K         [34msearching[0m | [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [35m100%[0m [32m30/30[0m  
[?25h[34m          cutadapt[0m | Found 12 reports
[34m     write_results[0m | Data        : multiqc_data
[34m     write_results[0m | Report      : multiqc_report.html
[34m           multiqc[0m | MultiQC complete


In [11]:
! ls -al

total 4752
drwxr-xr-x 1 root root    4096 Jul 16 01:30 .
drwxr-xr-x 1 root root    4096 Jul 15 23:38 ..
drwxr-xr-x 4 root root    4096 Jul 14 13:37 .config
drwx------ 5 root root    4096 Jul 15 23:40 drive
drwxr-xr-x 2 root root    4096 Jul 16 01:30 multiqc_data
-rw-r--r-- 1 root root 4839639 Jul 16 01:30 multiqc_report.html
drwxr-xr-x 1 root root    4096 Jul 14 13:37 sample_data


In [16]:
from IPython.display import HTML
import os

html_file_name = '/content/multiqc_report.html' # Replace with your HTML file
html_file_path = os.path.join(os.getcwd(), html_file_name)

if not os.path.exists(html_file_path):
    print(f"Error: {html_file_path} not found.")
else:
    with open(html_file_path, 'r', encoding='utf-8') as f:
        html_content = f.read()

    display(HTML(html_content))

Output hidden; open in https://colab.research.google.com to view.