diff --git a/DataPartition.R b/DataPartition.R new file mode 100644 index 0000000..1b9f30e --- /dev/null +++ b/DataPartition.R @@ -0,0 +1,117 @@ +#Función para realizar las particiones necesarias para el proceso 2-5CV +#Mediante un fichero de entrenamiento, test, un fichero con las clases de test y el nombre final +do_partitions <- function(train,test,testC,name){ + train<-read.table(train) + test<-read.table(test) + testR<-read.table(testC) + + y<-colnames(train)[ncol(train)] + + names(testR)[1]<-y + test<-cbind(test,testR) + + train<-rbind(train,test) + + for(j in 1:2){ + indicesTotal<-1:nrow(train) + nTraining <- as.integer(nrow(train)*.20) + for(i in 1:5){ + nombreArc<-paste(name,j,"_","5-",i,".txt",sep="") + if(i!=5){ + indices <- sample(indicesTotal,nTraining) + }else{ + indices <- indicesTotal + } + indicesTotal <- indicesTotal[-match(indices,indicesTotal)] + + data<-train[indices,] + + write.table(data, nombreArc, sep='\t') + } + } + +} + +#Función para realizar las particiones necesarias para el proceso 2-5CV +#Mediante un fichero de entrenamiento, test, el nombre final y si tiene o no cabecera (ficheros .csv) +do_partitions_csv <- function(train,test,name,head){ + train<-read.csv(train,header=head) + test<-read.csv(test,header=head) + + train<-rbind(train,test) + + for(j in 1:2){ + indicesTotal<-1:nrow(train) + nTraining <- as.integer(nrow(train)*.20) + for(i in 1:5){ + nombreArc<-paste(name,j,"_","5-",i,".txt",sep="") + if(i!=5){ + indices <- sample(indicesTotal,nTraining) + }else{ + indices <- indicesTotal + } + indicesTotal <- indicesTotal[-match(indices,indicesTotal)] + + data<-train[indices,] + + write.table(data, nombreArc, sep='\t') + } + } + +} + +#Función para realizar las particiones necesarias para el proceso 2-5CV +#Mediante un fichero de datos, el nombre final y el separador utilizado. +do_partitions1 <- function(train,name,separ){ + train<-read.table(train,sep=separ) + + for(j in 1:2){ + indicesTotal<-1:nrow(train) + nTraining <- as.integer(nrow(train)*.20) + for(i in 1:5){ + nombreArc<-paste(name,j,"_","5-",i,".txt",sep="") + if(i!=5){ + indices <- sample(indicesTotal,nTraining) + }else{ + indices <- indicesTotal + } + indicesTotal <- indicesTotal[-match(indices,indicesTotal)] + + data<-train[indices,] + + write.table(data, nombreArc, sep='\t') + } + } + +} + + +#Función para realizar las particiones necesarias para el proceso 2-5CV +#Mediante un fichero de datos, un fichero con las clases correspondientes y el nombre final. +do_partitions2 <- function(train,trainC,name){ + train<-read.table(train) + trainR<-read.table(trainC) + + names(trainR)[1]<-"class" + + train<-cbind(train,trainR) + + for(j in 1:2){ + indicesTotal<-1:nrow(train) + nTraining <- as.integer(nrow(train)*.20) + for(i in 1:5){ + nombreArc<-paste(name,j,"_","5-",i,".txt",sep="") + if(i!=5){ + indices <- sample(indicesTotal,nTraining) + }else{ + indices <- indicesTotal + } + indicesTotal <- indicesTotal[-match(indices,indicesTotal)] + + data<-train[indices,] + + write.table(data, nombreArc, sep='\t') + } + } + +} diff --git a/DeepLearning_Classification.R b/DeepLearning_Classification.R new file mode 100644 index 0000000..dde4bd8 --- /dev/null +++ b/DeepLearning_Classification.R @@ -0,0 +1,157 @@ +#Función para obtener la clasificación de un dataset comprimido mediante diferentes clasificadores y modelos de compresión +#data -> Dataset seleccionado. +#porcLayers -> Vector con el porcentaje de las diferentes capas a aplicar, si no es válido se aplica sin compresión. +#modelo -> Modelo seleccionado para realizar la compresión, si no es válido se aplica sin compresión. +#clasif -> Clasificador que se aplicará, si no es válido no se obtiene resultados. +deepLearning_classification <- function(data,porcLayers,modelo,clasif){ + timTot<-Sys.time() + porcLayersC<-paste(porcLayers,collapse="_") + contents<-paste("Ejecución para el dataset",data,"con el modelo",modelo,"y el clasificador",clasif,":\n",sep=" ") + contents<-paste(contents,"Porcentajes de las capas: ",porcLayersC,"\n",sep="") + accTot<-0.0 + timComTot<-0.0 + timClasTot<-0.0 + porcLayers<-sort(porcLayers,decreasing = TRUE) + if(porcLayers[length(porcLayers)]<=0 || !is.numeric(porcLayers)){ + modelo<-"NO_APLICA" + } + for(j in 1:2){ + for(i in 1:5){ + ej<-paste(j,"-",i,sep="") + contents<-paste(contents,"----Ejecución",ej,":----\n",sep=" ") + nombreTest<-paste(data,j,"_","5-",i,".txt",sep="") + test<-read.table(nombreTest,sep='\t') + train<-NULL + layers<-as.integer(porcLayers*(ncol(test)-1)) + for(k in 1:5){ + if(k!=i){ + nombreTrain<-paste(data,j,"_","5-",k,".txt",sep="") + trainR<-read.table(nombreTrain, sep='\t') + if(is.null(test)){ + train<-trainR + }else{ + train<-rbind(train,trainR) + } + } + } + switch(modelo, + H2O_v1={ + timCom<-Sys.time() + data_comp<-compress_data_h2ov1(train,test,layers) + timCom<-as.numeric(Sys.time())-as.numeric(timCom) + timComTot<-timComTot+timCom + contents<-paste(contents,'Tiempo en comprimir dataset:',timCom,"seg.\n",sep=" ") + }, + H2O_v2={ + timCom<-Sys.time() + data_comp<-compress_data_h2ov2(train,test,layers) + timCom<-as.numeric(Sys.time())-as.numeric(timCom) + timComTot<-timComTot+timCom + contents<-paste(contents,'Tiempo en comprimir dataset:',timCom,"seg.\n",sep=" ") + }, + autoencoders={ + timCom<-Sys.time() + data_comp<-compress_data_autoencoder(train,test,layers) + timCom<-as.numeric(Sys.time())-as.numeric(timCom) + timComTot<-timComTot+timCom + contents<-paste(contents,'Tiempo en comprimir dataset:',timCom,"seg.\n",sep=" ") + }, + { + contents<-paste(contents,'Modelo no válido. No se aplica compresión.',"\n",sep=" ") + y <- colnames(train)[ncol(train)] + train[,y] <- as.factor(train[,y]) + test[,y] <- as.factor(test[,y]) + train_res <- as.h2o(train) + test_res <- as.h2o(test) + data_comp<-c(train_res,test_res) + }) + switch(clasif, + RandomForest={ + timCla<-Sys.time() + result<-clasif_randomForest(data_comp) + timCla<-as.numeric(Sys.time())-as.numeric(timCla) + timClasTot<-timClasTot+timCla + contents<-paste(contents,'Tiempo en clasificar dataset:',timCla,"seg.\n",sep=" ") + tableResult<-table(result$Predicted, result$Real) + if(nrow(tableResult)!= ncol(tableResult)){ + dif<-setdiff(colnames(tableResult),rownames(tableResult)) + for(z in 1:length(dif)){ + name<-dif[z] + newRow<-rep(0,ncol(tableResult)) + tableResult<-rbind(tableResult,newRow) + rownames(tableResult)[ncol(tableResult)]<-name + } + tableResult<-as.table(tableResult) + } + confMatrix<-confusionMatrix(tableResult) + accuracy<-as.numeric(confMatrix[[3]][1]) + contents<-paste(contents,'Precisión para la ejecución:',accuracy,"\n",sep=" ") + accTot<-accTot+accuracy + #print(accuracy) + }, + KNN={ + timCla<-Sys.time() + result<-clasif_KNN(data_comp) + timCla<-as.numeric(Sys.time())-as.numeric(timCla) + timClasTot<-timClasTot+timCla + contents<-paste(contents,'Tiempo en clasificar dataset:',timCla,"seg.\n",sep=" ") + tableResult<-table(result$Predicted, result$Real) + if(nrow(tableResult)!= ncol(tableResult)){ + dif<-setdiff(colnames(tableResult),rownames(tableResult)) + for(z in 1:length(dif)){ + name<-dif[z] + newRow<-rep(0,ncol(tableResult)) + tableResult<-rbind(tableResult,newRow) + rownames(tableResult)[ncol(tableResult)]<-name + } + tableResult<-as.table(tableResult) + } + confMatrix<-confusionMatrix(tableResult) + accuracy<-as.numeric(confMatrix[[3]][1]) + contents<-paste(contents,'Precisión para la ejecución:',accuracy,"\n",sep=" ") + accTot<-accTot+accuracy + #print(accuracy) + }, + C4.5={ + timCla<-Sys.time() + result<-clasif_C45(data_comp) + timCla<-as.numeric(Sys.time())-as.numeric(timCla) + contents<-paste(contents,'Tiempo en clasificar dataset:',timCla,"seg.\n",sep=" ") + timClasTot<-timClasTot+timCla + tableResult<-table(result$Predicted, result$Real) + if(nrow(tableResult)!= ncol(tableResult)){ + dif<-setdiff(colnames(tableResult),rownames(tableResult)) + for(z in 1:length(dif)){ + name<-dif[z] + newRow<-rep(0,ncol(tableResult)) + tableResult<-rbind(tableResult,newRow) + rownames(tableResult)[ncol(tableResult)]<-name + } + tableResult<-as.table(tableResult) + } + confMatrix<-confusionMatrix(tableResult) + accuracy<-as.numeric(confMatrix[[3]][1]) + contents<-paste(contents,'Precisión para la ejecución:',accuracy,"\n",sep=" ") + accTot<-accTot+accuracy + #print(accuracy) + }, + { + contents<-paste(contents,'Clasificador no válido.',"\n",sep=" ") + }) + } + } + accTot<-accTot/10 + timClasTot<-timClasTot/10 + timComTot<-timComTot/10 + timTot<-as.numeric(Sys.time())-as.numeric(timTot) + contents<-paste(contents,'----Resultados Finales----\n',sep=" ") + contents<-paste(contents,'Tiempo medio en la compresión de datos:',timComTot,"seg.\n",sep=" ") + contents<-paste(contents,'Tiempo medio en la clasificación:',timClasTot,"seg.\n",sep=" ") + contents<-paste(contents,'Precisión media todas las ejecuciones:',accTot,"\n",sep=" ") + contents<-paste(contents,'Tiempo total:',timTot,"seg.\n",sep=" ") + nomArch<-paste(data,"_",porcLayersC,"_",modelo,"_",clasif,sep="") + nomArch<-gsub('\\.','',nomArch) + nomArch<-paste(nomArch,".txt",sep="") + write(contents,nomArch) + #cat(contents) +} \ No newline at end of file diff --git a/Ejecuciones.R b/Ejecuciones.R new file mode 100644 index 0000000..727a81e --- /dev/null +++ b/Ejecuciones.R @@ -0,0 +1,46 @@ +#Ejecuciones programadas +#Carga de funciones utilizadas: +source("~/Documents/RepositoriosGit/Redes-Neuronales/FuncionesDeepLearning.R") +source("~/Documents/RepositoriosGit/Redes-Neuronales/DeepLearning_Classification.R") + +#1- Carga de los paquetes necesarios: +if(!is.installed('autoencoder')) + install.packages('autoencoder') +library('autoencoder') + +if(!is.installed('h2o')) + install.packages('h2o') +library('h2o') +h2o.init (nthreads = -1) + +if(!is.installed('RWeka')) + install.packages('RWeka') +library(RWeka) + +if(!is.installed('caret')) + install.packages('caret') +library(caret) + +if(!is.installed('kknn')) + install.packages('kknn') +library(kknn) + +#2- Establecer working directory. +setwd("~/Documents/RepositoriosGit/Redes-Neuronales/Dataset") + +#3- Ejecuciones +dataset<-c("coil2000","letter","MNIST","madelon","gisette","arcene") +layers<-list(c(1.5,0.1),c(1.5,0.15),c(1.5,0.2),c(1.5,0.5,0.1),c(1.5,0.5,0.15),c(1.5,0.5,0.2),c(1.5,0.5,0.3,0.1),c(1.5,0.5,0.3,0.15),c(1.5,0.5,0.3,0.2)) +model<-c("H2O_v1","H2O_v2","autoencoders") +clasif<-c("RandomForest","KNN","C4.5") + +for(i in 1:length(dataset)){ + for(j in 1:length(layers)){ + for(k in 1:length(model)){ + for(z in 1:length(clasif)){ + deepLearning_classification(dataset[i],layers[[j]],model[k],clasif[z]) + } + } + } +} + diff --git a/FuncionesDeepLearning.R b/FuncionesDeepLearning.R new file mode 100644 index 0000000..f176d6a --- /dev/null +++ b/FuncionesDeepLearning.R @@ -0,0 +1,198 @@ +#Función para comprobar que un paquete esté instalado: +is.installed <- function(paquete) is.element( + paquete, installed.packages()) + +# Función que construye un vector de modelos de autoencoder (1 por capa) + get_stacked_ae_array <- function(training_data,layers,args){ + vector <- c() + index = 0 + for(i in 1:length(layers)){ + index = index + 1 + ae_model <- do.call(h2o.deeplearning, + modifyList(list(x=names(training_data), + training_frame=training_data, + autoencoder=T, + hidden=layers[i]), + args)) + training_data = h2o.deepfeatures(ae_model,training_data,layer=1) + + names(training_data) <- gsub("DF", paste0("L",index,sep=""), names(training_data)) + vector <- c(vector, ae_model) + } + vector + } + + # Función que devuelve el dataset comprimido según el modelo dado. + apply_stacked_ae_array <- function(data,ae){ + index = 0 + for(i in 1:length(ae)){ + index = index + 1 + data = h2o.deepfeatures(ae[[i]],data,layer=1) + names(data) <- gsub("DF", paste0("L",index,sep=""), names(data)) + } + data + } + # Función que obtiene el dataset comprimido mediante la función deepLearning del paquete H2O + compress_data_h2ov1 <- function(train,test,layers){ + train<-as.h2o(train) + test<-as.h2o(test) + y <- colnames(train)[ncol(train)] + x <- setdiff(names(train), y) + + train[,y] <- as.factor(train[,y]) + test[,y] <- as.factor(test[,y]) + + model <- h2o.deeplearning(x = x, training_frame=train, validation_frame=test,autoencoder=TRUE ,hidden =layers,input_dropout_ratio = 0.2, sparse = TRUE) + + layerR<-length(layers) + + training_data <- h2o.deepfeatures(model,train,layer=layerR) + training_data$classLabel <- train[,y] + test_data <- h2o.deepfeatures(model,test,layer=layerR) + test_data$classLabel <- test[,y] + + result<-c(training_data,test_data) + + result + + } + + #Función que devuelve el dataset comprimido mediante diferentes funciones que utilizan internamente el paquete H2O. + compress_data_h2ov2<- function(train_hex,test_hex,layers){ + train_hex<-as.h2o(train_hex) + test_hex<-as.h2o(test_hex) + response<-ncol(train_hex) + + train <- train_hex[,-response] + test <- test_hex [,-response] + train_hex[,response] <- as.factor(train_hex[,response]) + test_hex[,response] <- as.factor(test_hex [,response]) + + args <- list(activation="Tanh", epochs=1, l1=1e-5) + ae <- get_stacked_ae_array(train, layers, args) + + ## Now compress the training/testing data with this 3-stage set of AE models + train_compressed <- apply_stacked_ae_array(train, ae) + test_compressed <- apply_stacked_ae_array(test, ae) + + ## Build a simple model using these new features (compressed training data) and evaluate it on the compressed test set. + train_w_resp <- h2o.cbind(train_compressed, train_hex[,response]) + test_w_resp <- h2o.cbind(test_compressed, test_hex[,response]) + + result<-c(train_w_resp,test_w_resp) + + result + } + + #Función que obtiene el modelo comprimido mediante el paquete autoencoder. + compress_data_autoencoder<- function(train,test,layers){ + y <- colnames(train)[ncol(train)] + x <- setdiff(names(train), y) + + train[,y] <- as.factor(train[,y]) + test[,y] <- as.factor(test[,y]) + + train_aut <- train[,x] + test_aut <- test[,x] + + train_aut <- data.matrix(train_aut) + test_aut <- data.matrix(test_aut) + nl1=length(layers)+2 + + model1<-autoencode(X.train=train_aut,nl=nl1,N.hidden=layers, unit.type="tanh", lambda= 1e-05,beta = 1, rho = 0.99, epsilon = 1e-08, max.iteration=100, rescale.flag=TRUE,rescaling.offset=0.001) + + train_comp<-predict(model1,X.input=train_aut,hidden.output=TRUE) + train_comp<-train_comp$X.output + train_comp <- data.frame(train_comp) + + test_comp<-predict(model1,X.input=test_aut,hidden.output=TRUE) + test_comp<-test_comp$X.output + test_comp<-data.frame(test_comp) + + train_comp$classLabel <- train[,y] + test_comp$classLabel <- test[,y] + + train_res <- as.h2o(train_comp) + test_res <- as.h2o(test_comp) + + result<-c(train_res,test_res) + + result + + } + #Función que aplica el clasificador RandomForest y devuelve la matriz de confusión. + clasif_randomForest<- function(data_comp){ + train<-data_comp[[1]] + test<-data_comp[[2]] + + colnames(train)[ncol(train)]<-"classLabel" + colnames(test)[ncol(test)]<-"classLabel" + y1 <-"classLabel" + x1 <- setdiff(names(train), "classLabel") + + model<-h2o.randomForest(x=x1,y=y1,training_frame=train, validation_frame=test) + + predictions<-h2o.predict(object= model, newdata= test) + + train<-as.data.frame(train) + test<-as.data.frame(test) + + predict<-factor(as.vector(predictions[1])) + + result <- data.frame( + Real = test$classLabel, + Predicted = levels(train$classLabel)[predict]) + + result + + } + + #Función que aplica el clasificador C4.5 y devuelve la matriz de confusión. + clasif_C45<- function(data_comp){ + train<-data_comp[[1]] + test<-data_comp[[2]] + + train<-as.data.frame(train) + test<-as.data.frame(test) + + colnames(train)[ncol(train)]<-"classLabel" + colnames(test)[ncol(test)]<-"classLabel" + x1 <- setdiff(names(train), "classLabel") + + model<-J48(classLabel~.,data=train) + + predictions <- predict(model, test[,x1]) + + result <- data.frame( + Real = test$classLabel, + Predicted = levels(train$classLabel)[predictions]) + + result + + } + + #Función que aplica el clasificador KNN y devuelve la matriz de confusión. + clasif_KNN<- function(data_comp){ + train<-data_comp[[1]] + test<-data_comp[[2]] + + train<-as.data.frame(train) + test<-as.data.frame(test) + + colnames(train)[ncol(train)]<-"classLabel" + colnames(test)[ncol(test)]<-"classLabel" + x1 <- setdiff(names(train), "classLabel") + + model<-kknn(classLabel~.,train,test) + + result <- data.frame( + Real = test$classLabel, + Predicted = levels(train$classLabel)[model$fitted.values]) + + result + + } + + + + \ No newline at end of file