<a href="https://colab.research.google.com/github/garciasergio94y/TFM/blob/main/Pyramidal_RNN_R.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Instalación y carga de librerías:

packages <- c("readr", "dplyr", "caret", "tensorflow", "tfdatasets", "keras")

# Función para instalar paquetes si no están ya instalados
install_packages <- function(package) {
  if (!require(package, character.only = TRUE)) {
    install.packages(package)
    }
}

# Aplicar la función para cada uno de los paquetes
lapply(packages, install_packages)

require(readr)
require(dplyr)
require(caret)
require(tensorflow)
require(keras)


Loading required package: readr

Loading required package: dplyr


Attaching package: ‘dplyr’


The following objects are masked from ‘package:stats’:

    filter, lag


The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union


Loading required package: caret

“there is no package called ‘caret’”
Installing package into ‘/usr/local/lib/R/site-library’
(as ‘lib’ is unspecified)

also installing the dependencies ‘listenv’, ‘parallelly’, ‘future’, ‘globals’, ‘shape’, ‘future.apply’, ‘numDeriv’, ‘progressr’, ‘SQUAREM’, ‘diagram’, ‘lava’, ‘prodlim’, ‘proxy’, ‘iterators’, ‘Rcpp’, ‘clock’, ‘gower’, ‘hardhat’, ‘ipred’, ‘timeDate’, ‘e1071’, ‘foreach’, ‘ModelMetrics’, ‘plyr’, ‘pROC’, ‘recipes’, ‘reshape2’




In [3]:
tensorflow::tf_gpu_configured()

TensorFlow built with CUDA:  TRUE 
GPU device name:  /device:GPU:0

In [5]:
# Leer archivo dataset desde Github:
url <- "https://raw.githubusercontent.com/garciasergio94y/TFM/pyramidal_RNN/Resultados/dataset?token=GHSAT0AAAAAACCAWAK5TXDVVN4TPHC4WDVKZCODIDA"
dataset <- read.csv(url)
# Guardar una copia local:
write.csv(dataset, file = "dataset", row.names = F, col.names = F) 


“attempt to set 'col.names' ignored”


In [6]:
# Convertir fechas en formato POSIXct:
dataset[,1] %<>% as.POSIXct()

In [7]:
# Establecer puntos de corte para training y validation sets:

time_point_50 <- as.POSIXct(as.Date(quantile(dataset[,1], 0.5)))

time_point_75 <- as.POSIXct(as.Date(quantile(dataset[,1], 0.75)))

time_point_50
time_point_75

             50% 
"2022-11-08 UTC" 

             75% 
"2022-12-23 UTC" 

In [8]:
# Muestra de entrenamiento:
train <- dataset %>%
  subset(dataset[,1] <= time_point_50)

  table(train$CTRL)



    0     1 
53928   926 

In [10]:
# Conversión a factores de las variables categóricas:
# Obtener índices de las columnas numéricas que no son "resultado" o empiezan por "W"
new_train <- train

# Transformar columnas numéricas a factores
new_train$CTRL %<>% as.factor()

# Upsampling del dataset de training de la clase minoritaria
"%ni%" <- Negate("%in%")
train_bal <- caret::upSample(
  x = new_train[,colnames(new_train) %ni% "CTRL"], 
  y = new_train$CTRL)
names(train_bal)[ncol(train_bal)] <- "CTRL"

# Devolver los factores a variable numérica: 

train_bal$CTRL %<>% as.numeric()

new_train <- train_bal

table(new_train$CTRL)

# Muestra de validación:
val <- dataset %>%
  subset(dataset[,1] > time_point_50 &
           dataset[,1] <= time_point_75)

# Muestra de test:
test <- dataset %>%
  subset(dataset[,1] > time_point_75)


    1     2 
53928 53928 

In [None]:
# Función parar crear un array con inputs y targets. Se excluye la columna de tiempos:
input_data_colnames <- names(new_train) %>%
  setdiff(c("TIEMPO_MUESTRA"))

df_to_inputs_and_targets <- function(df) {
  inputs <- df[input_data_colnames] %>%
    as.matrix()
  targets <- as.numeric(as.character(df$CTRL))
  list(inputs, targets)
}

#Función para calcular el sequence_length a partir del promedio de muestras diarias: 
seq_length <- function(df) {
  date <- as.Date(df$TIEMPO_MUESTRA)
  day_obs <- aggregate(x = df$TIEMPO_MUESTRA, by = list(date),
                       FUN = length)
  mean_day_obs <- round(mean(day_obs[, 2]))
  return(mean_day_obs)
}

#Función para crear el dataset:
sampling_rate <- 1    
sequence_length <- seq_length(new_train)
batch_size <- 256
make_dataset <- function(df) {
  c(inputs, targets) %<-% df_to_inputs_and_targets(df)
  timeseries_dataset_from_array(inputs, targets,
                                sampling_rate = sampling_rate,
                                sequence_length = sequence_length,
                                shuffle = FALSE,
                                batch_size = batch_size) 
}

train_ds <- make_dataset(new_train) %>%
  dataset_map(function(x, y) list(x, keras::k_expand_dims(y)))
  
test_ds <- make_dataset(test) %>%
  dataset_map(function(x, y) list(x, keras::k_expand_dims(y)))

val_ds <- make_dataset(val) %>%
  dataset_map(function(x, y) list(x, keras::k_expand_dims(y)))


In [None]:
# Definir la arquitectura de la red neuronal (Long short-term        memory) :
# Número de variables a entrenar:
ncol_input_data <- length(input_data_colnames)
# Capas de entrada y de salida:
inputs <- layer_input(shape = c(sequence_length, ncol_input_data)) 
outputs <- inputs %>%
  layer_conv_1d(filters = 16, kernel_size = 3, 
                activation = "relu") %>%
  layer_max_pooling_1d(pool_size = 2) %>%
  layer_lstm(units = 16) %>%
  layer_dropout(rate = 0.5) %>%
  layer_dense(units = 1, activation = "sigmoid") 
model_5 <- keras_model(inputs, outputs) 

# Definir callbacks para guardar el mejor modelo:
callbacks <- list(
  callback_model_checkpoint(file.path(resultsdir, "lstm_5.keras",
                                     save_best_only = TRUE))) 

# Función para calcular F1 como métrica para el modelo:
  f1_score <- function(y_true, y_pred) {
  true_positives <- sum(backend()$round(backend()$clip(y_true * y_pred, 0, 1)))
  possible_positives <- sum(backend()$round(backend()$clip(y_true, 0, 1)))
  predicted_positives <- sum(backend()$round(backend()$clip(y_pred, 0, 1)))
  precision <- true_positives / (predicted_positives + backend()$epsilon())
  recall <- true_positives / (possible_positives + backend()$epsilon())
  f1_score <- 2 * precision * recall / (precision + recall + backend()$epsilon())
  return(f1_score)
}


# Compilar el modelo:
model_5 %>% compile(optimizer = "adam",
                  loss = "binary_crossentropy",
                  metrics = list("binary_accuracy", f1_score)) 
 
# Entrenar el modelo:
history_5 <- model_5 %>%
  fit(train_ds, epochs = 100, validation_data = val_ds,
      callbacks = callbacks)