-
Notifications
You must be signed in to change notification settings - Fork 0
/
loess_smooth_600
67 lines (54 loc) · 2 KB
/
loess_smooth_600
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
#!/usr/bin/env Rscript
args <- commandArgs(trailingOnly = TRUE)
bgfiles <- unlist(args)
lspan <- 600000
threads <- 10
if(length(bgfiles)==0){
stdi <- unlist(strsplit(readLines(file("stdin"))," "))
}
if(length(bgfiles)==0){
bgfiles <- stdi
}
require(parallel,quietly=T)
removeext <- function( filenames ){
filenames<-as.character(filenames)
for(i in 1:length(filenames)){
namevector<-unlist(strsplit(filenames[i],"\\."))
filenames[i]<-paste(namevector[1:(length(namevector)-1)],collapse=".")
}
filenames
}
options(scipen=9999)
if(lspan<1){
stop("lspan must indicate distance in bp")
}
bgnames <- basename(removeext(bgfiles))
numbgs <- length(bgfiles)
outnames <- paste(bgnames,"_loess",".bg",sep="")
dump <- mclapply(seq_len(numbgs), function(x){
curbg <- as.data.frame( read.table ( bgfiles[x], header=FALSE, stringsAsFactors=FALSE ) )
chroms <- unique(curbg[,1])
numchroms <- length(chroms)
all=split(curbg,curbg[,1])
lscores<-lapply(1:numchroms,function(i){
cur <- all[[i]]
curchrom <- cur[1,1]
chromlspan <- lspan/(max(cur[,3])-min(cur[,2]))
cura <- as.data.frame(lapply(4:ncol(cur), function(k){
smoothed_vals <- tryCatch({
predict(loess(cur[, k] ~ cur[, 2], span = chromlspan), cur[, 2])
}, error = function(err) {
return(NA)
})
cur[, k][!is.na(cur[, k])] <- smoothed_vals[!is.na(cur[, k])]
cur[, k]
}))
cura <- cbind(cur[,1:3],cura)
colnames(cura) <- colnames(cur)
return(cura)
})
smoothbg<-do.call(rbind,lscores)
write.table(smoothbg,file=outnames[x],col.names=FALSE,quote=FALSE, sep="\t", row.names=FALSE)
rm(curbg)
gc()
}, mc.cores=threads, mc.preschedule=FALSE)