Skip to content

Commit

Permalink
moves R scripts to the right resources path
Browse files Browse the repository at this point in the history
  • Loading branch information
CBiasuzzi committed Dec 21, 2017
1 parent 01a1ef1 commit 9ca5414
Show file tree
Hide file tree
Showing 27 changed files with 5,283 additions and 0 deletions.
178 changes: 178 additions & 0 deletions uncertainties-analysis/src/main/resources/R/bluestone/error_box_v1.1.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,178 @@
##############################################################################################
### Copyright (c) 2017, RTE (http://www.rte-france.com)
### This Source Code Form is subject to the terms of the Mozilla Public
### License, v. 2.0. If a copy of the MPL was not distributed with this
### file, You can obtain one at http://mozilla.org/MPL/2.0/.
###
### Program name : error_box.R
### Author : Cedric Faucheux (Bluestone)
### Project : RTE prototype for the iTesla online module
### Function : Compute error box with the error_box function
### Parameters : dataset= the dataset on which the error box is calculated
### the dataset is in the equipment base
### prct= the percentage of observations in the error box
### epsilon= the precision for z, the box main parameter
### Outputs : z= the box parameter
### inf= the lower bounds of the box
### sup= the upper bounds of the box
### loadings= the transition matrix from equipments to principal components
### structure: rows= equipments, columns= principal components
### Version : 1.1
### Modification : "comp.XX" names changed to "DimXX"
### Date : 14/10/2014
### Comments : This file also contains four other functions:
### plot_error_box function plots the error box, given two principal components
### nb_inbounds function calculates which principal components have observations out of the box
### find_z function calculates the box parameter, called by error_box.
### nb_inbox function calculates the number of observations in the box, called by error_box.
##############################################################################################
### Example:
### box=error_box(errors_P,0.95,0.00001)
### data_PC=as.matrix(errors_P)%*%box[[4]]
### PC_inbound=nb_inbounds(data_PC,error_box = box)
### plot_error_box(data_PC,error_box = box,comp1 = PC_inbound[1,1],comp2 = PC_inbound[2,1])
##############################################################################################


#Function which computes the error boxes
error_box<-function(dataset,prct,epsilon){
# dataset: the dataset on which the error box is calculated, the dataset is in the equipment base
# prct: the percentage of observations in the error box
# epsilon: the precision on z

#PCA: error boxes are calculated on PCA axes
acp=princomp(dataset,cor=F)
data_PC=acp$scores
#Compute mu and sigma for each column
mu=apply(data_PC,2,mean)
sigma=apply(data_PC,2,sd)
#Find z
z=find_z(data_PC,prct,epsilon,mu,sigma)
#Lower bound
inf=mu-z*sigma
#Upper bound
sup=mu+z*sigma
res=list(z,inf,sup,acp$loadings)
names(res)=c("z","inf","sup","loadings")
names(res$inf)=paste("Dim",substr(names(res$inf),6,nchar(names(res$inf))),sep="")
names(res$sup)=paste("Dim",substr(names(res$sup),6,nchar(names(res$sup))),sep="")
colnames(res$loadings)=paste("Dim",substr(colnames(res$loadings),6,nchar(colnames(res$loadings))),sep="")

# z: the box parameter
# inf: the lower bound of the box
# sup: the upper bound of the box
# loadings: the matrix to go from equipments to principal components
# loadings structure: rows= equipments, columns= principal components

return(res)
}

#Function which find z
find_z<-function(data_PC,prct,epsilon,mu,sigma){

#find extrema of z
val=matrix(0,nrow=nrow(data_PC),ncol=ncol(data_PC))
for(k in 1:ncol(data_PC)){
if(sigma[k]!=0){
val[,k]=data_PC[,k]/sigma[k]
}else{
val[,k]=0
}
}
zmax<-max(na.omit(val))
zmin=0

#initialisation
zd=(zmax+zmin)/2


#Required number of observations in the box
ndata=prct*nrow(data_PC)

#### Find zd by dichotomy
#Stop iterations when zd change less than Epsilon from one iteration to the other
nbox=nb_inbox(zd,data_PC,mu,sigma)
nb_iter=0
while(nb_iter!=-1){
nb_iter=nb_iter+1
old_zd=zd
if(nbox<ndata){
zmin=zd
zd=(zmax+zmin)/2
}else{
zmax=zd
zd=(zmax+zmin)/2
}
if(abs(zd-old_zd)<epsilon){
z=c(zd,old_zd)
zd=z[which.min(c(abs(nb_inbox(zd,data_PC,mu,sigma)-ndata),abs(
nb_inbox(old_zd,data_PC,mu,sigma)-ndata)))]
nb_iter=-1
}
nbox=nb_inbox(zd,data_PC,mu,sigma)
print(c(zmin,zmax,zd,nbox,nbox/ndata*prct*100,nb_iter))
}
return(zd)
}


##Function which calculates the number of observation in the box
nb_inbox<-function(z,data_PC,mu,sigma){
if (z!=0){
inf=mu-z*sigma
sup=mu+z*sigma
data_inf<-sweep(data_PC,2,inf)
data_sup<-sweep(data_PC,2,sup)
nobs_line=length(which(apply((data_inf>=0)&(data_sup<=0),1,sum)==ncol(data_PC)))
}else {nobs_line=0}
return(nobs_line)
}


##Function which calculates the number of observation in the box for each principal component
nb_inbounds<-function(data_PC,error_box){
inf=error_box[[2]]
sup=error_box[[3]]
nobs_pc<-as.data.frame(matrix(0,ncol(data_PC),0))

for (col in 1:ncol(data_PC)){
nobs_pc[col,1]<-colnames(data_PC)[col]
nobs_pc[col,2]<-length(data_PC[,col][which((data_PC[,col]>=inf[col])&(data_PC[,col]<=sup[col]))])
}
nobs_pc[,3]=nobs_pc[,2]/nrow(data_PC)*100

#Sort according to the number of observations in the box
nobs_pc=nobs_pc[order(nobs_pc[,2]),]
colnames(nobs_pc)<-c("Principal Component","Observations_in_box","%Observations_in_box")
return(nobs_pc)
}


##Function to plot error box
plot_error_box<-function(data_PC,error_box,comp1,comp2){

data=data_PC[,c(comp1,comp2)]

inf=error_box[[2]]
sup=error_box[[3]]

abs1<-as.numeric(inf[comp1])
abs2<-as.numeric(sup[comp1])
ord1<-as.numeric(inf[comp2])
ord2<-as.numeric(sup[comp2])

a1=min(abs1,min(data[,1]))
a2=max(abs2,max(data[,1]))
o1=min(ord1,min(data[,2]))
o2=max(ord2,max(data[,2]))

smoothScatter(data[,1],data[,2],main=paste("Error box for ",colnames(data)[1]," and ",colnames(data)[2],sep=""),xlab=colnames(data)[1],ylab=colnames(data)[2],xlim=c(a1-1,a2+1),ylim= c(o1-1,o2+1))
rect(abs1,ord1,abs2,ord2,border='red',xpd=T)

}






Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
##############################################################################################
### Copyright (c) 2017, RTE (http://www.rte-france.com)
### This Source Code Form is subject to the terms of the Mozilla Public
### License, v. 2.0. If a copy of the MPL was not distributed with this
### file, You can obtain one at http://mozilla.org/MPL/2.0/.
###
### Program name : export_error_box_v0.1.R
### Author : Cedric Faucheux (Bluestone)
### Project : RTE prototype for the iTestla online module
### Function : This file is an example of how to import a dataset, compute error boxes and export results
### Version : 1.0
### Date : 06/10/2014
##############################################################################################


#Dependencies
source("preprocess_data_error_box_v1.0.R")
source("error_box_v1.1.R")
library("data.table")

#Input paths
gen_path = "forecastsDiff_gen.csv"
load_path = "forecastsDiff_load.csv"

#Output paths
export_inf="bornes_inf.csv"
export_sup="bornes_sup.csv"
export_transfer_matrix="matrice.csv"
export_mean_vector="vecteur.csv"

#Parameters
prct_inbox=0.95
precision_z=0.00001


#Import data
data_gen<-as.data.frame(fread(gen_path,sep=",",header=TRUE,stringsAsFactors=F))
data_load<-as.data.frame(fread(load_path,sep=",",header=TRUE,stringsAsFactors=F))

#Preprocess
errors_P=preprocess_error_box_data(data_gen,data_load)

#Compute error boxes
box=error_box(errors_P,prct_inbox,precision_z)


#Export data

#INF
inf=data.frame(PC=names(box[[2]]),inf=box[[2]])
write.table(inf, file = export_inf, append = FALSE, quote = FALSE, sep = ",",
eol = "\n", na = "NA", dec = ".", row.names = FALSE,
col.names = TRUE, )

#SUP
sup=data.frame(PC=names(box[[3]]),sup=box[[3]])
write.table(sup, file = export_sup, append = FALSE, quote = FALSE, sep = ",",
eol = "\n", na = "NA", dec = ".", row.names = FALSE,
col.names = TRUE, )

#Transition matrix
loadings=t(as.matrix(box[[4]]))
loadings=cbind(row.names(loadings),loadings)
write.table(loadings, file = export_transfer_matrix, append = FALSE, quote = TRUE, sep = ",",
eol = "\n", na = "NA", dec = ".", row.names = FALSE,
col.names = TRUE )

#Mean
means=t(apply(errors_P,2,mean))
# bidouille GJ pour etre conforme au fichiers de sortie d'EB
means2=cbind(1, means)
write.table(means2, file = export_mean_vector, append = FALSE, quote = TRUE, sep = ",",
eol = "\n", na = "NA", dec = ".", row.names = FALSE,
col.names = TRUE )

Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
##############################################################################################
### Copyright (c) 2017, RTE (http://www.rte-france.com)
### This Source Code Form is subject to the terms of the Mozilla Public
### License, v. 2.0. If a copy of the MPL was not distributed with this
### file, You can obtain one at http://mozilla.org/MPL/2.0/.
###
### Program name : preprocess_data_error_box.R
### Author : Cedric Faucheux (Bluestone)
### Project : RTE prototype for the iTestla online module
### Function : Preprocess input data for error boxes
### Parameters : data_gen = generator equipment dataset
### data_load = load equipment dataset
### Outputs : errors_P = data frame of forecasting errors for all equipments
### Version : 1.0
### Date : 06/10/2014
##############################################################################################
### Example:
### errors_P=import_error_box_data(gen_path = "Data/forecastsDiffGen2013.csv",load_path = "Data/forecastsDiffLoads2013.csv")
##############################################################################################


preprocess_error_box_data<-function(data_gen,data_load){

data_gen[,!(names(data_gen) %in% c("horizon"))]=as.data.frame(apply(data_gen[,!(names(data_gen) %in% c("horizon"))],MARGIN=2,FUN=as.numeric))
data_load[,!(names(data_load) %in% c("horizon"))]=as.data.frame(apply(data_load[,!(names(data_load) %in% c("horizon"))],MARGIN=2,FUN=as.numeric))


gen_P=data_gen[, (substr(names(data_gen),nchar(names(data_gen))-1,nchar(names(data_gen)))=="_P" | names(data_gen)=="datetime" | names(data_gen)=="horizon")]
load_P=data_load[,substr(names(data_load),nchar(names(data_load))-1,nchar(names(data_load)))=="_P" | names(data_load)=="datetime" | names(data_load)=="horizon"]


data_P=merge(gen_P,load_P,by=c("horizon", "datetime"))

#remove Nas
data_P[is.na(data_P)]=0


#Remove half observations
datetime=data_P$datetime[seq(1,nrow(data_P),2)]

date=as.POSIXct(datetime, origin="1970-01-01")

minute=substr(date,15,16)


demi_pas_datetime=datetime[minute=="00"]

for (i in 1:length(demi_pas_datetime)){

time_stamp=demi_pas_datetime[i]

if ((time_stamp-1800) %in% datetime){

#Replace forecast
data_P[data_P$horizon=="DACF" & data_P$datetime==(time_stamp-1800),!(names(data_P) %in% c("horizon","forecastTime","datetime"))]=(data_P[data_P$horizon=="DACF" & data_P$datetime==(time_stamp-1800),!(names(data_P) %in% c("horizon","forecastTime","datetime"))]+data_P[data_P$horizon=="DACF" & data_P$datetime==time_stamp,!(names(data_P) %in% c("horizon","forecastTime","datetime"))])/2

#Replace measure
data_P[data_P$horizon=="SN" & data_P$datetime==(time_stamp-1800),!(names(data_P) %in% c("horizon","forecastTime","datetime"))]=(data_P[data_P$horizon=="SN" & data_P$datetime==(time_stamp-1800),!(names(data_P) %in% c("horizon","forecastTime","datetime"))]+data_P[data_P$horizon=="SN" & data_P$datetime==time_stamp,!(names(data_P) %in% c("horizon","forecastTime","datetime"))])/2

#Remove data 00
data_P=data_P[data_P$datetime!=time_stamp,]

}


}

#Remove remaining half obs
datetime=data_P$datetime
date=as.POSIXct(datetime, origin="1970-01-01")
minute=substr(date,15,16)
data_P=data_P[minute=="30",]

data_P=data_P[order(data_P$datetime),]

errors_P=data_P[data_P$horizon=="DACF",!names(data_P)%in%c("datetime","horizon")]-data_P[data_P$horizon=="SN",!names(data_P)%in%c("datetime","horizon")]


return(errors_P)
}

0 comments on commit 9ca5414

Please sign in to comment.