# CTC360 Pre/Post Treatment PipelineComplete Colab-ready notebook.

## Mount Google Drive

In [None]:
from google.colab import drivedrive.mount('/content/drive', force_remount=True)

## Create folders

In [None]:
!mkdir -p /content/CTC360/{fastq,trimmed,qc,bam,variants,figures}!ls -R /content/CTC360

## Install tools

In [None]:
!apt-get update!apt-get install -y fastqc bwa samtools fastp bcftools

## Download GATK

In [None]:
%cd /content!wget -c https://github.com/broadinstitute/gatk/releases/download/4.4.0.0/gatk-4.4.0.0.zip!unzip -o gatk-4.4.0.0.zip

## Download chr12 reference

In [None]:
%cd /content/CTC360!wget -c https://hgdownload.soe.ucsc.edu/goldenPath/hg38/chromosomes/chr12.fa.gz!gunzip chr12.fa.gz!mv chr12.fa hg38.fa!bwa index hg38.fa!samtools faidx hg38.fa/content/gatk-4.4.0.0/gatk CreateSequenceDictionary -R hg38.fa

## Download FASTQs

In [None]:
%cd /content/CTC360/fastq!wget https://ftp.sra.ebi.ac.uk/vol1/fastq/SRR139/004/SRR13973704/SRR13973704_1.fastq.gz -O CTC360_pre_R1.fastq.gz!wget https://ftp.sra.ebi.ac.uk/vol1/fastq/SRR139/004/SRR13973704/SRR13973704_2.fastq.gz -O CTC360_pre_R2.fastq.gz!wget https://ftp.sra.ebi.ac.uk/vol1/fastq/SRR139/008/SRR13973878/SRR13973878_1.fastq.gz -O CTC360_post_R1.fastq.gz!wget https://ftp.sra.ebi.ac.uk/vol1/fastq/SRR139/008/SRR13973878/SRR13973878_2.fastq.gz -O CTC360_post_R2.fastq.gz!ls -lh

## FastQC

In [None]:
%cd /content/CTC360/qc!fastqc ../fastq/*.gz

## fastp trimming

In [None]:
%cd /content/CTC360!fastp -i fastq/CTC360_pre_R1.fastq.gz -I fastq/CTC360_pre_R2.fastq.gz -o trimmed/CTC360_pre_R1.trimmed.fastq.gz -O trimmed/CTC360_pre_R2.trimmed.fastq.gz --html qc/pre_fastp.html --json qc/pre_fastp.json!fastp -i fastq/CTC360_post_R1.fastq.gz -I fastq/CTC360_post_R2.fastq.gz -o trimmed/CTC360_post_R1.trimmed.fastq.gz -O trimmed/CTC360_post_R2.trimmed.fastq.gz --html qc/post_fastp.html --json qc/post_fastp.json

## BWA alignment

In [None]:
%cd /content/CTC360!bwa mem -t 8 hg38.fa trimmed/CTC360_pre_R1.trimmed.fastq.gz trimmed/CTC360_pre_R2.trimmed.fastq.gz | samtools sort -o bam/CTC360_pre.bam!bwa mem -t 8 hg38.fa trimmed/CTC360_post_R1.trimmed.fastq.gz trimmed/CTC360_post_R2.trimmed.fastq.gz | samtools sort -o bam/CTC360_post.bam!samtools index bam/CTC360_pre.bam!samtools index bam/CTC360_post.bam

## Mark duplicates

In [None]:
/content/gatk-4.4.0.0/gatk MarkDuplicates -I bam/CTC360_pre.bam -O bam/CTC360_pre.dedup.bam -M bam/CTC360_pre.metrics --REMOVE_DUPLICATES true/content/gatk-4.4.0.0/gatk MarkDuplicates -I bam/CTC360_post.bam -O bam/CTC360_post.dedup.bam -M bam/CTC360_post.metrics --REMOVE_DUPLICATES true!samtools index bam/CTC360_pre.dedup.bam!samtools index bam/CTC360_post.dedup.bam

## Mutect2

In [None]:
/content/gatk-4.4.0.0/gatk Mutect2 -R hg38.fa -I bam/CTC360_pre.dedup.bam -tumor PRE -O variants/CTC360_pre.unfiltered.vcf.gz/content/gatk-4.4.0.0/gatk Mutect2 -R hg38.fa -I bam/CTC360_post.dedup.bam -tumor POST -O variants/CTC360_post.unfiltered.vcf.gz

## Filter Mutect Calls

In [None]:
/content/gatk-4.4.0.0/gatk FilterMutectCalls -R hg38.fa -V variants/CTC360_pre.unfiltered.vcf.gz -O variants/CTC360_pre.vcf.gz/content/gatk-4.4.0.0/gatk FilterMutectCalls -R hg38.fa -V variants/CTC360_post.unfiltered.vcf.gz -O variants/CTC360_post.vcf.gz

## Python VAF analysis

In [None]:
import gzip, pandas as pddef load_vcf(path):    rows=[]    with gzip.open(path,"rt") as f:        for l in f:            if not l.startswith("#"):                rows.append(l.strip().split("\t"))    return pd.DataFrame(rows, columns=["CHROM","POS","ID","REF","ALT","QUAL","FILTER","INFO","FORMAT","SAMPLE"])pre = load_vcf("/content/CTC360/variants/CTC360_pre.vcf.gz")post = load_vcf("/content/CTC360/variants/CTC360_post.vcf.gz")pre.head()