In [13]:
!pip install --upgrade tensorflow

Collecting tensorflow
  Downloading tensorflow-2.16.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (589.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m589.8/589.8 MB[0m [31m2.3 MB/s[0m eta [36m0:00:00[0m
Collecting h5py>=3.10.0 (from tensorflow)
  Downloading h5py-3.11.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (5.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.3/5.3 MB[0m [31m67.3 MB/s[0m eta [36m0:00:00[0m
Collecting ml-dtypes~=0.3.1 (from tensorflow)
  Downloading ml_dtypes-0.3.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.2/2.2 MB[0m [31m68.8 MB/s[0m eta [36m0:00:00[0m
Collecting tensorboard<2.17,>=2.16 (from tensorflow)
  Downloading tensorboard-2.16.2-py3-none-any.whl (5.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.5/5.5 MB[0m [31m56.4 MB/s[0m eta [36m0:00:00[0m
[?25

In [2]:
import sys
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv1D, MaxPooling1D, Flatten, Dropout, SimpleRNN, LSTM

In [9]:
# Cargamos los datos
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
# Cargar y normalizar los datos
data = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/Monografía/datos_arbei/data_normalize.csv')
data[['ndvi', 'precipitation', 'real_evapotranspiration', 'interception', 'potential_evapotranspiration', 'soil_moisture', 'recharge']] = MinMaxScaler().fit_transform(
    data[['ndvi', 'precipitation', 'real_evapotranspiration', 'interception', 'potential_evapotranspiration', 'soil_moisture', 'recharge']]
)

# Convertir el DataFrame a un array de numpy, ignorando la columna de fecha
data_values = data[['ndvi', 'precipitation', 'soil_moisture', 'recharge']].values

In [4]:
# Función para crear secuencias
def create_sequences(data, n_past_steps=1, n_forecast_steps=1, n_overlay_steps=1, include_target_as_feature=False):
    x_sequence = []
    y_sequence = []
    start_idx = 0

    while True:
        end_idx = start_idx + n_past_steps
        forecast_end_idx = end_idx + n_forecast_steps

        if forecast_end_idx > len(data):
            break

        if include_target_as_feature:
            x = data[start_idx:end_idx]  # Usar todas las columnas excepto la de fecha
        else:
            x = data[start_idx:end_idx, 1:]  # Ignorar la columna de fecha y la columna objetivo
        y = data[end_idx:forecast_end_idx, 0]  # La columna objetivo es 'ndvi'
        x_sequence.append(x)
        y_sequence.append(y)

        start_idx += n_overlay_steps

    return np.array(x_sequence), np.array(y_sequence)


In [8]:
# Función para crear y entrenar modelos
def create_and_train_model(model_type, input_shape, x_train, y_train, epochs, batch_size):
    model = Sequential()
    if model_type == "CNN":
        model.add(Conv1D(filters=64, kernel_size=1, activation='relu', input_shape=input_shape))
        model.add(Conv1D(filters=64, kernel_size=1, activation='relu'))
        model.add(MaxPooling1D(pool_size=1))
        model.add(Dropout(0.3))
        model.add(Flatten())
        model.add(Dense(128, activation='relu'))
        model.add(Dropout(0.3))
    elif model_type == "RNN":
        model.add(SimpleRNN(50, activation='relu', return_sequences=True, input_shape=input_shape))
        model.add(Dropout(0.3))
        model.add(SimpleRNN(50, activation='relu'))
        model.add(Dropout(0.3))
        model.add(Dense(128, activation='relu'))
        model.add(Dropout(0.3))
    elif model_type == "LSTM":
        model.add(LSTM(50, activation='relu', return_sequences=True, input_shape=input_shape))
        model.add(LSTM(50, activation='relu'))
        model.add(Dropout(0.3))
        model.add(Dense(128, activation='relu'))
        model.add(Dropout(0.3))

    model.add(Dense(1))

    model.compile(optimizer='adam', loss='mse', metrics=['mae', 'mse', 'RootMeanSquaredError'])
    history = model.fit(x_train, y_train, epochs=epochs, batch_size=batch_size, validation_split=0.2, verbose=0)
    return model, history

In [None]:
# Evaluar los modelos
evaluation_results = []
look_back_values = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
epochs_values = [100, 200, 300]
batch_size_values = [16, 32]
model_types = ["CNN", "RNN", "LSTM"]

for look_back in look_back_values:
    # Crear las secuencias
    x, y = create_sequences(data_values, n_past_steps=look_back, n_forecast_steps=1, n_overlay_steps=1)

    # Dividir los datos en entrenamiento y prueba
    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

    # Asegurar que las dimensiones son correctas
    x_train = x_train.reshape((x_train.shape[0], x_train.shape[1], x_train.shape[2]))
    x_test = x_test.reshape((x_test.shape[0], x_test.shape[1], x_test.shape[2]))

    for epochs in epochs_values:
        for batch_size in batch_size_values:
            for model_type in model_types:
                model, history = create_and_train_model(model_type, (x_train.shape[1], x_train.shape[2]), x_train, y_train, epochs, batch_size)
                results = model.evaluate(x_test, y_test, verbose=0)
                y_pred = model.predict(x_test).flatten()
                ss_res = np.sum((y_test.flatten() - y_pred) ** 2)
                ss_tot = np.sum((y_test.flatten() - np.mean(y_test.flatten())) ** 2)
                r2 = 1 - (ss_res / ss_tot)
                evaluation_results.append([model_type, look_back, epochs, batch_size] + results[:3] + [r2])

In [31]:
results_df = pd.DataFrame(evaluation_results, columns=["Model", "Look_Back", "Epochs", "Batch_Size", "MAE", "MSE", "RMSE", "R2"])
results_df.head(5)

Unnamed: 0,Model,Look_Back,Epochs,Batch_Size,MAE,MSE,RMSE,R2
0,CNN,1,100,16,0.017216,0.107429,0.017216,0.654976
1,RNN,1,100,16,0.02363,0.142035,0.02363,0.526442
2,LSTM,1,100,16,0.017647,0.101199,0.017647,0.646341
3,CNN,1,100,32,0.014007,0.091653,0.014007,0.719281
4,RNN,1,100,32,0.022224,0.133055,0.022224,0.554607


In [34]:
# Filtrar los mejores modelos para cada tipo
best_cnn = results_df[results_df["Model"] == "CNN"].sort_values(by="RMSE", ascending=True).iloc[0]
best_rnn = results_df[results_df["Model"] == "RNN"].sort_values(by="RMSE", ascending=True).iloc[0]
best_lstm = results_df[results_df["Model"] == "LSTM"].sort_values(by="RMSE", ascending=True).iloc[0]

# Crear un DataFrame con los mejores modelos
best_models_df = pd.DataFrame([best_cnn, best_rnn, best_lstm])

# Mostrar los mejores modelos
print("Mejores modelos por tipo:")
print(best_models_df)

Mejores modelos por tipo:
    Model  Look_Back  Epochs  Batch_Size       MAE       MSE      RMSE  \
102   CNN          6     300          16  0.003789  0.051282  0.003789   
97    RNN          6     200          16  0.006891  0.071712  0.006891   
107  LSTM          6     300          32  0.005289  0.063358  0.005289   

           R2  
102  0.860355  
97   0.746044  
107  0.805088  


In [30]:
from sklearn.preprocessing import MinMaxScaler

# Normalizar las métricas MSE y R2
scaler = MinMaxScaler()

results_df["MSE_norm"] = scaler.fit_transform(results_df[["MSE"]])
results_df["R2_norm"] = scaler.fit_transform(results_df[["R2"]])

# Invertir la normalización de MSE para que valores más bajos sean mejores
results_df["MSE_norm"] = 1 - results_df["MSE_norm"]

# Calcular la puntuación compuesta como promedio de MSE_norm y R2_norm
results_df["Composite_Score"] = (results_df["MSE_norm"] + results_df["R2_norm"]) / 2

# Filtrar los mejores modelos para cada tipo basados en la puntuación compuesta
best_cnn = results_df[results_df["Model"] == "CNN"].sort_values(by="Composite_Score", ascending=False).iloc[0]
best_rnn = results_df[results_df["Model"] == "RNN"].sort_values(by="Composite_Score", ascending=False).iloc[0]
best_lstm = results_df[results_df["Model"] == "LSTM"].sort_values(by="Composite_Score", ascending=False).iloc[0]

# Crear un DataFrame con los mejores modelos
best_models_df = pd.DataFrame([best_cnn, best_rnn, best_lstm])

# Mostrar los mejores modelos
print("Mejores modelos por tipo basados en MSE y R2:")
print(best_models_df)


Mejores modelos por tipo basados en MSE y R2:
    Model  Look_Back  Epochs  Batch_Size       MAE       MSE      RMSE  \
90    CNN          6     100          16  0.003823  0.049667  0.003823   
31    RNN          2     300          16  0.009758  0.079603  0.009758   
107  LSTM          6     300          32  0.005289  0.063358  0.005289   

           R2  MSE_norm   R2_norm  Composite_Score  
90   0.859117  1.000000  0.999308         0.999654  
31   0.831474  0.819193  0.983868         0.901531  
107  0.805088  0.917308  0.969130         0.943219  
