-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit e26295c
Showing
1 changed file
with
137 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,137 @@ | ||
# The following pipeline is written by Dr. Ildem Akerman (IMSR, University of Birmingham) for the analysis of Lexogen 5' Quantseq RNA-seq data generated by the University of Birmingham Genomics Facility | ||
# Quantseq Library kits information : https://www.lexogen.com/quantseq-3mrna-sequencing/ | ||
# Pipeline modified from PMID: 28041957 PMCID: PMC5300904 DOI: 10.1016/j.cmet.2016.11.016 Akerman Et al 2017 | ||
|
||
# please note that the true directory names have been removed and replaced with the words "folder or directory" - please insert your own directory names. | ||
# Any special parameters used for specific pipelines are indicated in the article's materials and methods section. | ||
# Please reach me from University of Birmingham e-mail address for questions or to report a fault; https://www.birmingham.ac.uk/staff/profiles/metabolism-systems/akerman-ildem.aspx | ||
|
||
############################################################################################# | ||
# TRIMMING adapters, low quality and polyA tails READS and fastQC to test if it makes a difference | ||
############################################################################################# | ||
# Quality controls were performed using fastQC and reads are trimmed using Trimmomatic and/or bbduk | ||
#!/bin/bash | ||
for file in *.fastq | ||
do | ||
java -jar ${TMATIC_ROOT}/trimmomatic-0.32.jar SE -trimlog Triming_LOG_"${file}" ${file} tt_"${file}" ILLUMINACLIP:TruSeq3-SE:2:30:10 LEADING:3 TRAILING:3 SLIDINGWINDOW:4:15 MINLEN:36 | ||
done | ||
echo "trimmomatic done" | ||
###### ###################### | ||
#Alternatively bbduk script from BBMAP suite | ||
#cd /bbmap/ | ||
#for i in *.fastq | ||
#do | ||
#cat $i | bbduk.sh in=${i} out=/trimmo_bbdukpA/"${i}" ref=sourcefolderhere/polyA.fa.gz k=13 ktrim=r useshortkmers=t mink=5 qtrim=r trimq=10 minlength=20 | ||
#done | ||
######################## | ||
|
||
|
||
############################################################################################# | ||
# Align to human or mouse genome (example mouse) | ||
############################################################################################# | ||
#!/bin/bash | ||
# Remember to create an index directory for STAR (/genomeDirectory/STARmm10index) | ||
STAR --runThreadN 6 --runMode genomeGenerate --genomeDir genomeDirectory/STARmm10index --genomeFastaFiles /folder/mm10.fa --sjdbGTFfile folder/gencode.vM20.annotation.gtf --sjdbOverhang 74 | ||
|
||
# Now align to the mouse (or human) genome | ||
for file in *.fastq | ||
do | ||
STAR --runThreadN 8 --runMode alignReads --genomeDir /genomeDirectory/STARmm10index --readFilesIn ${file} --outFileNamePrefix mm10_"${file}" --outFilterScoreMinOverLread 0.3 --outFilterMatchNminOverLread 0.3 --outFilterMultimapNmax 20 --outReadsUnmapped Fastx_failed --outSAMtype BAM SortedByCoordinate | ||
done | ||
|
||
############################################################################################# | ||
# HT-seq count | ||
############################################################################################# | ||
# Count the number of reads taht fall on each gene (gene annotation indicated below for mouse) | ||
#!/bin/bash | ||
for file in *.bam | ||
do | ||
htseq-count -m intersection-nonempty -s yes -f bam -r pos ${file} directory/gencode.vM20.annotation.gtf > "${file}"_HTseq | ||
done | ||
|
||
# Please note that -s is used for stranded libraries ONLY. | ||
|
||
|
||
############################################################################################# | ||
# DeSeq2 in R (not bash) Example analysis for Nasteska et al (David Hodson Lab) in R Studio / R | ||
############################################################################################# | ||
|
||
# Get the gene name conversion from source - for MOUSE (similar for human). This makes life easier as one can see gene symbols, rather than gencode names. | ||
#source("https://bioconductor.org/biocLite.R") | ||
#biocLite("EnsDb.Hsapiens.v92") | ||
source("https://bioconductor.org/biocLite.R") | ||
biocLite("ensembldb") | ||
library('biomaRt') | ||
#### convert genes | ||
#get the gene names from gencode mouse file | ||
x <- read.table("U:AkermanFolder/HTseq_COUNTS_GencodevM20_mm10.txt", header = FALSE) | ||
x <- x[,1] | ||
foo <- data.frame(do.call('rbind', strsplit(as.character(x),'.',fixed=TRUE))) | ||
genes <- foo[,1] | ||
############ | ||
mart <- useDataset("mmusculus_gene_ensembl", useMart("ensembl")) | ||
G_list <- getBM(filters= "ensembl_gene_id", attributes= c("ensembl_gene_id","mgi_symbol","entrezgene", "description"),values=genes,mart= mart) | ||
### | ||
x <- read.table("U:AkermanFolder/DH_ttbb_HTseq_COUNTS_GencodevM20_mm10.txt", header = FALSE) | ||
x <- x[,1] | ||
merged <- cbind(x,foo) | ||
colnames(merged) <- c("ENSG.version","ensembl_gene_id","version") | ||
x <- merge(merged,G_list, by = "ensembl_gene_id") | ||
setwd("U:AkermanFolder") | ||
write.table(x, file = "GencodeMousev20_conversion_table.txt", sep = "\t", col.names = T, row.names = F, qmethod = "double", quote=FALSE) | ||
|
||
############################## deseq2 ############################### | ||
library(DESeq2) | ||
|
||
sampleTable <- read.table("U:AkermanFolder/sample_table.txt", header = TRUE) | ||
sampleTable <- data.frame(sampleTable) | ||
directory<-'U:AkermanFolder/HT-seq' | ||
|
||
### create the deseq object | ||
ddsHTSeq <- DESeqDataSetFromHTSeqCount(sampleTable=sampleTable, directory=directory, design= ~ individual + condition) | ||
colData(ddsHTSeq)$condition<-factor(colData(ddsHTSeq)$condition, levels=c('M3C','WT')) # CTL vs treatment first | ||
|
||
|
||
#run deseq2 | ||
############## cut off low expression (at least 3 samples 10 or above) | ||
dds<-DESeq(ddsHTSeq) | ||
dds <- estimateSizeFactors(dds) | ||
idx <- rowSums( counts(dds, normalized=TRUE) >= 10 ) >= 3 | ||
#This would say, e.g. filter out genes where there are less than 3 samples with normalized counts greater than or equal to 5. | ||
dds <- dds[idx,] | ||
dds <- DESeq(dds) | ||
res <- results(dds, contrast=c("condition","WT","M3C")) | ||
|
||
#write.table(as.data.frame(res),file="HodsonMelton3_WTvM3C_paired_DEseq2.txt") | ||
## get baseMean for each condition | ||
BM <- sapply( levels(dds$condition), function(lvl) rowMeans( counts(dds,normalized=TRUE)[,dds$condition == lvl] ) ) | ||
BM <- as.data.frame(BM) | ||
BM$ENSG.version <- row.names(BM) | ||
############ with gene name | ||
|
||
res <- as.data.frame(res) | ||
rn <- row.names(res) | ||
res$ENSG.version <- rn | ||
DH <- merge(res, x, by = "ENSG.version", all.x = TRUE) | ||
DH <- merge(DH, BM, by = "ENSG.version", all.x = TRUE) | ||
DH <- DH[,c(1,14,13,2:12)] | ||
write.table(DH, file = "DH4b_WTvM3_1-3-2019_DESeq2cutoff10.txt", sep = "\t", col.names = T, row.names = F, qmethod = "double", quote=FALSE) | ||
|
||
|
||
|
||
|
||
# Please find an example sample table below (text file for DeSeq2) | ||
sampleName fileName condition individual | ||
A1_M3C-3 mm10_4c_50pttpe_A1-Exp-3-L16-M2-M3C_S3.fastqAligned.sortedByCoord.out.bam_HTseq M3C exp3 | ||
B2_WT-1 mm10_4c_50pttpe_B2-Exp1-L14-WT-3_S2.fastqAligned.sortedByCoord.out.bam_HTseq WT exp1 | ||
B2_WT-6 mm10_4c_50pttpe_B2-Exp-6-L24-M3-WT_S9.fastqAligned.sortedByCoord.out.bam_HTseq WT exp6 | ||
C1_WT-3 mm10_4c_50pttpe_C1-Exp-3-L16-M5-WT_S4.fastqAligned.sortedByCoord.out.bam_HTseq WT exp3 | ||
C1_M3C-5 mm10_4c_50pttpe_C1-Exp-5-L22-M1-M3C_S7.fastqAligned.sortedByCoord.out.bam_HTseq M3C exp5 | ||
C2_M3C-6 mm10_4c_50pttpe_C2-Exp-6-L24-M4-M3C_S1.fastqAligned.sortedByCoord.out.bam_HTseq M3C exp6 | ||
D1_WT-5 mm10_4c_50pttpe_D1-Exp-5-L22-M2-WT_S8.fastqAligned.sortedByCoord.out.bam_HTseq WT exp5 | ||
E1_WT-3 mm10_4c_50pttpe_E1-Exp-3-L16-M7-WT_S5.fastqAligned.sortedByCoord.out.bam_HTseq WT exp3 | ||
F1_M3C-1 mm10_4c_50pttpe_F1-Exp-1-L14-M3C-1_S1.fastqAligned.sortedByCoord.out.bam_HTseq M3C exp1 | ||
F1_M3C-3 mm10_4c_50pttpe_F1-Exp-3-L16-M8-M3C_S6.fastqAligned.sortedByCoord.out.bam_HTseq M3C exp3 | ||
|
||
|
||
|