- Download Reference Genomes and Annotation files from RefSeq assembly in ftp directory
Necessary module(s): kent/385
- Piliocolobus tephrosceles
scripts/download_Rcolobus.sh
scripts/download_Rcolobus_annotations.sh
- Hepatocystis sp.
scripts/download_hep.sh
scripts/download_hep_annotations.sh
- Concatenate Host-Pathogen Reference Genomes and Annotation files
- Reference Genomes
cd genomes
cat *.fa > combined.fa
- Annotation files
cat Rcolobus.fix.gtf hep.gtf > combined.gtf
- Index concatenated Reference Genome (SBATCH JOB)
Necessary module(s): gcc/10.2.0, star/intel/2.7.6a
To submit sbatch job:
sbatch sbatch/name_of_job.sbatch
sbatch/index_genomes.sbatch
- Trim Colobus reads (ARRAY JOB)
Necessary module(s): trimmomatic/0.36
To submit an sbatch array job:
sbatch --array=1-[number of individuals] sbatch/name_of_job.sbatch
sbatch/trim_reads.sbatch
Necessary module(s): gcc/10.2.0 star/intel/2.7.6a
mkdir edgeR_results
gunzip $SCRATCH/colobus_hep/data/*TRIM_*.fastq.gz
sbatch/edgeR_align_genome.sbatch
Necessary module(s): bamtools/intel/2.5.1, samtools/intel/1.12
# Create index of reference genome
cd /scratch/aet359/colobus_paper/SnakeMake/genomes/
samtools faidx combined.fa
# Create bed file of chromosomes of interest for unique host and pathogen data
awk 'BEGIN {OFS="\t"}; { print $1,1,$2 }' combined.fa.fai | grep -v "NW_022" > to_include.bed
cd ..
# Create necessary results files
mkdir edgeR_results/mapped_reads
cd edgeR_results/mapped_reads
mkdir colobus
cd colobus
mkdir merged_reads
cd ..
mkdir hepatocystis
cd hepatocystis
mkdir merged_reads
cd ../../..
sbatch/edgeR_extract_reads.sbatch
Necessary module(s): r/intel/4.0.4
Necessary R package(s): BiocManager, Rsubread
module load r/intel/4.0.4
R
if (!requireNamespace("BiocManager", quietly = TRUE))
install.packages("BiocManager")
BiocManager::install("Rsubread")
library(Rsubread)
bams = list.files(path = "edgeR_results/mapped_reads/colobus", pattern = "*.bam$", full.names=TRUE)
gtf.file = "genomes/combined.gtf"
colobus.fc = featureCounts(bams, annot.ext=gtf.file,
GTF.featureType="gene",
isGTFAnnotationFile=TRUE,
isPairedEnd=FALSE,
nthreads=8,
allowMultiOverlap=FALSE)
save.image("colobus.fc.Rdata")
- As file is running, create percent_parasitemia table in excel (enter Colobus_Reads_Mapped):
Colobus_Reads_Mapped = "Successfully assigned alignments"
- Do same for "unique" pathogen data
if (!requireNamespace("BiocManager", quietly = TRUE))
install.packages("BiocManager")
BiocManager::install("Rsubread")
library(Rsubread)
bams = list.files(path = "edgeR_results/mapped_reads/hepatocystis", pattern = "*.bam$", full.names=TRUE)
gtf.file = "genomes/combined.gtf"
hepato.fc = featureCounts(bams, annot.ext=gtf.file,
GTF.featureType="gene",
isGTFAnnotationFile=TRUE,
isPairedEnd=FALSE,
nthreads=8,
allowMultiOverlap=FALSE)
save.image("hepato.fc.Rdata")
- As file is running, create percent_parasitemia table in excel (enter Hepatocystis_Reads_Mapped):
Hepatocystis_Reads_Mapped = "Successfully assigned alignments"
- Calculate Total_Reads:
Total_Reads = sum(Colobus_Reads_Mapped, Hepatocystis_Reads_Mapped)
- Calculate Percent Parasitemia:
Percent Hepotocystis = Hepatocystis_Reads_Mapped / Total_Reads
module load r/intel/4.0.4
Rscript scripts/immune_cell_PCR.R
Necessary module(s): r/intel/4.0.4
Necessary R package(s): BiocManager, edgeR, GO.db, gprofiler2
module load r/intel/4.0.4
# Colobus
Rscript scripts/edgeR_find_DE_host_genes.R
# Hepatocystis
Rscript scripts/edgeR_find_DE_pathogen_genes.R
Necessary module(s): gcc/10.2.0, star/intel/2.7.6a
mkdir EMMREML_results
sbatch/EMMREML_align_genome.sbatch
Necessary module(s): bamtools/intel/2.5.1, samtools/intel/1.12
# Create necessary results files
mkdir EMMREML_results/mapped_reads
cd EMMREML_results/mapped_reads
mkdir colobus
cd colobus
mkdir merged_reads
cd ..
mkdir hepatocystis
cd hepatocystis
mkdir merged_reads
cd ../../..
sbatch/EMMREML_extract_reads.sbatch
Necessary module(s): r/intel/4.0.4
Necessary R package(s): BiocManager, Rsubread
module load r/intel/4.0.4
R
if (!requireNamespace("BiocManager", quietly = TRUE))
install.packages("BiocManager")
BiocManager::install("Rsubread")
library(Rsubread)
bams = list.files(path = "EMMREML_results/mapped_reads/colobus", pattern = "*.bam$", full.names=TRUE)
gtf.file = "genomes/combined.gtf"
colobus.EMMREML.fc = featureCounts(bams, annot.ext=gtf.file,
GTF.featureType="gene",
isGTFAnnotationFile=TRUE,
isPairedEnd=FALSE,
nthreads=8,
allowMultiOverlap=FALSE)
save.image("colobus.EMMREML.fc.Rdata")
- As file is running, create percent_parasitemia table in excel (enter Colobus_Reads_Mapped):
Colobus_Reads_Mapped = "Successfully assigned alignments"
- Do same for "unique" pathogen data
if (!requireNamespace("BiocManager", quietly = TRUE))
install.packages("BiocManager")
BiocManager::install("Rsubread")
library(Rsubread)
bams = list.files(path = "EMMREML_results/mapped_reads/hepatocystis", pattern = "*.bam$", full.names=TRUE)
gtf.file = "genomes/combined.gtf"
hepato.EMMREML.fc = featureCounts(bams, annot.ext=gtf.file,
GTF.featureType="gene",
isGTFAnnotationFile=TRUE,
isPairedEnd=FALSE,
nthreads=8,
allowMultiOverlap=FALSE)
save.image("hepato.EMMREML.fc.Rdata")
- As file is running, create percent_parasitemia table in excel (enter Hepatocystis_Reads_Mapped):
Plasmodium_Reads_Mapped = "Successfully assigned alignments"
- Calculate Total_Reads:
Total_Reads = sum(Colobus_Reads_Mapped, Hepatocystis_Reads_Mapped)
- Calculate Percent Parasitemia:
Percent Parasitemia = Hepatocystis_Reads_Mapped / Total_Reads
Necessary module(s): jvarkit/base, picard/2.23.8, gatk/4.2.0.0, bamtools/intel/2.5.1, samtools/intel/1.12, gcc/10.2.0, star/intel/2.7.6a
module load r/intel/4.0.4
Rscript scripts/relatedness.sh
Necessary module(s): r/intel/4.0.4
Necessary R package(s): BiocManager, edgeR, GO.db, gprofiler2
module load r/intel/4.0.4
Rscript scripts/EMMREML_find_DE_host_genes.R
Necessary R package(s): Hmisc
module load r/intel/3.6.0
Rscript script/explore_hepato_life_stage_data.R