# Good Donor and Recipient Pipeline

## Install Utilities

In [9]:
!sudo apt-get update -y -qq

In [10]:
!sudo apt-get install -y -qq tree libcurl4-openssl-dev zlib1g-dev samtools

## Install Libraries

In [11]:
!pip install -q biopython pyBigWig pysam

## Import Libraries

In [12]:
from Bio import SeqIO
from Bio.SeqIO.FastaIO import SimpleFastaParser
from Bio.SeqIO.QualityIO import FastqGeneralIterator

## Download Datasets

In [8]:
!tree -L 1 ../

../
├── config
├── cromwell-executions
├── cromwell-workflow-logs
├── data
├── diagrams
├── docker
├── inputs
├── notebooks
├── pipelines
├── README.md
└── scripts_and_binaries

10 directories, 1 file


In [None]:
# Download data from SRA
sudo docker run -it --rm -v "/home/daniel/genomics/pipelines/lgtsearch/data/:/tmp/" dformoso/sratoolkit 
fasterq-dump --split-files SRR5377828

In [None]:
# Download data from SRA
sudo docker run -it --rm -v "/home/daniel/genomics/pipelines/lgtsearch/data/:/tmp/" dformoso/sratoolkit 
fasterq-dump --split-files SRR5377828

In [None]:


#####################################
###### PSEUDOMONAS AERUGINOSA #######
#####################################
# Use BWA to align reads
sudo docker run -it --rm -v "/home/daniel/genomics/pipelines/lgtsearch/data:/tmp/" dformoso/bwa 
bwa index ref_genomes/pseudomonas_aeruginosa_pao1.fasta
bwa mem -t 24 \
    ref_genomes/pseudomonas_aeruginosa_pao1.fasta \
    SRR5377828/SRR5377828_1.fastq \
    SRR5377828/SRR5377828_2.fastq > \
    alignments/SRR5377828-pseudomonas_aeruginosa_pao1.sam

# SAM to BAM conversion and indexing
sudo docker run -it --rm -v "/home/daniel/genomics/pipelines/lgtsearch/data/:/tmp/" dformoso/samtools 
samtools view -@ 24 -b -S alignments/SRR5377828-pseudomonas_aeruginosa_pao1.sam > alignments/SRR5377828-pseudomonas_aeruginosa_pao1.bam
samtools sort -@ 24 alignments/SRR5377828-pseudomonas_aeruginosa_pao1.bam -o alignments/SRR5377828-pseudomonas_aeruginosa_pao1-sorted.bam
samtools index -@ 24 alignments/SRR5377828-pseudomonas_aeruginosa_pao1-sorted.bam


#########################
####### GRCh38_p13 ######
#########################
# Use BWA to align reads
sudo docker run -it --rm -v "/home/daniel/genomics/pipelines/lgtsearch/data:/tmp/" dformoso/bwa 
bwa index ref_genomes/GRCh38_p13.fasta

bwa mem -t 24 \
    ref_genomes/GRCh38_p13.fasta \
    SRR5377828/SRR5377828_1.fastq \
    SRR5377828/SRR5377828_2.fastq > \
    alignments/SRR5377828-GRCh38_p13.sam

# SAM to BAM conversion and indexing
sudo docker run -it --rm -v "/home/daniel/genomics/pipelines/lgtsearch/data/:/tmp/" dformoso/samtools 
samtools view -@ 24 -b -S alignments/SRR5377828-GRCh38_p13.sam > alignments/SRR5377828-GRCh38_p13.bam
samtools sort -@ 24 alignments/SRR5377828-GRCh38_p13.bam -o alignments/SRR5377828-GRCh38_p13-sorted.bam
samtools index -@ 24 alignments/SRR5377828-GRCh38_p13-sorted.bam


In [None]:
#########################
####### Bucketise #######
#########################

# Recipient -> Donor (Pseudomonas)
## R1 Mapped - R2 Mapped
samtools view -@ 24 -f 3 -F 2048 ../data/alignments/SRR5377828-pseudomonas_aeruginosa_pao1-sorted.bam \
    -o ../data/buckets/SRR5377828-pseudomonas_aeruginosa_pao1_MM.bam

## R1 Mapped - R2 Unmapped
samtools view -@ 24 -f 73 -F 4022 ../data/alignments/SRR5377828-pseudomonas_aeruginosa_pao1-sorted.bam \
    -o ../data/buckets/SRR5377828-pseudomonas_aeruginosa_pao1_MU_R1.bam
samtools view -@ 24 -f 133 -F 3962 ../data/alignments/SRR5377828-pseudomonas_aeruginosa_pao1-sorted.bam \
    -o ../data/buckets/SRR5377828-pseudomonas_aeruginosa_pao1_MU_R2.bam
samtools merge  \
    ../data/buckets/SRR5377828-pseudomonas_aeruginosa_pao1_MU.bam \
    -@ 24 -f \
    ../data/buckets/SRR5377828-pseudomonas_aeruginosa_pao1_MU_R1.bam \
    ../data/buckets/SRR5377828-pseudomonas_aeruginosa_pao1_MU_R2.bam

## R1 Unmapped - R2 Mapped
samtools view -@ 24 -f 69 -F 4026 ../data/alignments/SRR5377828-pseudomonas_aeruginosa_pao1-sorted.bam \
    -o ../data/buckets/SRR5377828-pseudomonas_aeruginosa_pao1_UM_R1.bam
samtools view -@ 24 -f 137 -F 3958 ../data/alignments/SRR5377828-pseudomonas_aeruginosa_pao1-sorted.bam \
    -o ../data/buckets/SRR5377828-pseudomonas_aeruginosa_pao1_UM_R2.bam
samtools merge  \
    ../data/buckets/SRR5377828-pseudomonas_aeruginosa_pao1_UM.bam \
    -@ 24 -f \
    ../data/buckets/SRR5377828-pseudomonas_aeruginosa_pao1_UM_R1.bam \
    ../data/buckets/SRR5377828-pseudomonas_aeruginosa_pao1_UM_R2.bam

## R1 Unmapped - R2 Unmapped
samtools view -@ 24 -f 77 ../data/alignments/SRR5377828-pseudomonas_aeruginosa_pao1-sorted.bam \
    -o ../data/buckets/SRR5377828-pseudomonas_aeruginosa_pao1_UU_R1.bam
samtools view -@ 24 -f 141 ../data/alignments/SRR5377828-pseudomonas_aeruginosa_pao1-sorted.bam \
    -o ../data/buckets/SRR5377828-pseudomonas_aeruginosa_pao1_UU_R2.bam
samtools merge  \
    ../data/buckets/SRR5377828-pseudomonas_aeruginosa_pao1_UU.bam \
    -@ 24 -f \
    ../data/buckets/SRR5377828-pseudomonas_aeruginosa_pao1_UU_R1.bam \
    ../data/buckets/SRR5377828-pseudomonas_aeruginosa_pao1_UU_R2.bam

In [None]:
# Recipient -> Host (Human)
## R1 Mapped - R2 Mapped
samtools view -@ 24 -f 3 -F 2048 ../data/alignments/SRR5377828-GRCh38_p13-sorted.bam \
    -o ../data/buckets/SRR5377828-GRCh38_p13_MM.bam

## R1 Mapped - R2 Unmapped
samtools view -@ 24 -f 73 -F 4022 ../data/alignments/SRR5377828-GRCh38_p13-sorted.bam \
    -o ../data/buckets/SRR5377828-GRCh38_p13_MU_R1.bam
samtools view -@ 24 -f 133 -F 3962 ../data/alignments/SRR5377828-GRCh38_p13-sorted.bam \
    -o ../data/buckets/SRR5377828-GRCh38_p13_MU_R2.bam
samtools merge  \
    ../data/buckets/SRR5377828-GRCh38_p13_MU.bam \
    -@ 24 -f \
    ../data/buckets/SRR5377828-GRCh38_p13_MU_R1.bam \
    ../data/buckets/SRR5377828-GRCh38_p13_MU_R2.bam

## R1 Unmapped - R2 Mapped
samtools view -@ 24 -f 69 -F 4026 ../data/alignments/SRR5377828-GRCh38_p13-sorted.bam \
    -o ../data/buckets/SRR5377828-GRCh38_p13_UM_R1.bam
samtools view -@ 24 -f 137 -F 3958 ../data/alignments/SRR5377828-GRCh38_p13-sorted.bam \
    -o ../data/buckets/SRR5377828-GRCh38_p13_UM_R2.bam
samtools merge  \
    ../data/buckets/SRR5377828-GRCh38_p13_UM.bam \
    -@ 24 -f \
    ../data/buckets/SRR5377828-GRCh38_p13_UM_R1.bam \
    ../data/buckets/SRR5377828-GRCh38_p13_UM_R2.bam

## R1 Unmapped - R2 Unmapped
samtools view -@ 24 -f 77 ../data/alignments/SRR5377828-GRCh38_p13-sorted.bam \
    -o ../data/buckets/SRR5377828-GRCh38_p13_UU_R1.bam
samtools view -@ 24 -f 141 ../data/alignments/SRR5377828-GRCh38_p13-sorted.bam \
    -o ../data/buckets/SRR5377828-GRCh38_p13_UU_R2.bam
samtools merge  \
    ../data/buckets/SRR5377828-GRCh38_p13_UU.bam \
    -@ 24 -f \
    ../data/buckets/SRR5377828-GRCh38_p13_UU_R1.bam \
    ../data/buckets/SRR5377828-GRCh38_p13_UU_R2.bam