## RNN Precipitación Cerro Saroche

**PROYECTO:** SISTEMA PARA EL SEGUIMIENTO DE ECOSISTEMAS VENEZOLANOS \
**AUTOR:** Javier Martinez

Directorio de trabajo

In [1]:
import os

print('> Directorio actual: ', os.getcwd())  
os.chdir('../')
print('> Directorio actual: ', os.getcwd()) 

> Directorio actual:  /media/javier/Compartida/doctorado/ssev-analytics/cerro_saroche
> Directorio actual:  /media/javier/Compartida/doctorado/ssev-analytics


In [2]:
from utils.MONGO import CONEXION
from datetime import datetime
import pandas as pd
import numpy as np

from tensorflow import keras

from utils.UTILS import *

# Creando Coenxión con Mongo DB

In [3]:
# Creando la conexión con MongoDB
db = CONEXION.conexion()
db.list_collection_names()

['meteorological', 'estimateSSTNino34', 'SSTNino34']

# Descargando la Información Precipitación

In [4]:
# Parque
park = 'cerro_saroche'

# Realizando consulta
meteorological = db.meteorological.find({"park":park})

# Generando pandas dataframe
data_pandas = pd.DataFrame([file for file in meteorological])
data_pandas['periodo'] = data_pandas.time.apply(lambda x: datetime.fromordinal(x))
data_pandas['mes_year'] =  data_pandas['periodo'].dt.strftime('%B-%Y')
data_pandas.index = pd.to_datetime(data_pandas.periodo)
data_pandas.head()

Unnamed: 0_level_0,_id,id_point,park,time,elevacion_maxima,elevacion_media,elevacion_mediana,latitud,longitud,ndvi_maxima,ndvi_media,ndvi_mediana,precipitacion_mm,time_actualizacion,periodo,mes_year
periodo,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
1970-01-01,633988a2eed0e0231b327c97,1,cerro_saroche,719163,921.0,508.541046,491.0,10.31,-69.83,,,,0.913065,738430,1970-01-01,January-1970
1970-01-01,633988a2eed0e0231b327ca7,2,cerro_saroche,719163,1000.0,625.942932,614.0,10.31,-69.73,,,,0.958915,738430,1970-01-01,January-1970
1970-01-01,633988a2eed0e0231b327cc0,3,cerro_saroche,719163,1025.0,731.954834,731.0,10.31,-69.63,,,,1.026073,738430,1970-01-01,January-1970
1970-01-01,633988a2eed0e0231b327ccc,4,cerro_saroche,719163,1103.0,761.12915,737.0,10.31,-69.53,,,,1.095035,738430,1970-01-01,January-1970
1970-01-01,633988a3eed0e0231b327cdc,5,cerro_saroche,719163,1202.0,726.967285,709.0,10.31,-69.43,,,,1.203287,738430,1970-01-01,January-1970


In [5]:
# Registros
print(data_pandas.shape)

(9435, 16)


In [6]:
# Data disponible
print(data_pandas.index.min())
print(data_pandas.index.max())

1970-01-01 00:00:00
2022-05-01 00:00:00


In [7]:
DIR = './cerro_saroche/'

In [8]:
# Cantidad de registros
park_points = data_pandas[[ 'id_point','latitud', 'longitud','precipitacion_mm']]\
                .groupby(['id_point','latitud', 'longitud'],as_index=False)\
                .count()

# Guardando resumen de experimentos
park_points.to_csv(DIR + 'summary/poinst.csv')
park_points

Unnamed: 0,id_point,latitud,longitud,precipitacion_mm
0,1,10.31,-69.83,629
1,2,10.31,-69.73,629
2,3,10.31,-69.63,629
3,4,10.31,-69.53,629
4,5,10.31,-69.43,629
5,6,10.21,-69.83,629
6,7,10.21,-69.73,629
7,8,10.21,-69.63,629
8,9,10.21,-69.53,629
9,10,10.21,-69.43,629


In [9]:
pd_precipitacion = data_pandas[['id_point', 'latitud', 'longitud',
                                'precipitacion_mm']]
pd_precipitacion.head()

Unnamed: 0_level_0,id_point,latitud,longitud,precipitacion_mm
periodo,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1970-01-01,1,10.31,-69.83,0.913065
1970-01-01,2,10.31,-69.73,0.958915
1970-01-01,3,10.31,-69.63,1.026073
1970-01-01,4,10.31,-69.53,1.095035
1970-01-01,5,10.31,-69.43,1.203287


# Cargando data SST

In [10]:
# Realizando consulta
data_sst = db.estimateSSTNino34.find()

# Generando pandas dataframe
pd_sst = pd.DataFrame([file for file in data_sst])[['oni','time']]
pd_sst['periodo'] = pd_sst.time.apply(lambda x: datetime.fromordinal(x))
pd_sst.index = pd.to_datetime(pd_sst.periodo)
pd_sst

pd_sst.head()

Unnamed: 0_level_0,oni,time,periodo
periodo,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1950-01-01,-1.47,711858,1950-01-01
1950-02-01,-1.336667,711889,1950-02-01
1950-03-01,-1.163333,711917,1950-03-01
1950-04-01,-1.18,711948,1950-04-01
1950-05-01,-1.07,711978,1950-05-01


In [11]:
oni_max = pd_sst.oni.max()
oni_min = pd_sst.oni.min()

pd_sst['oni'] = pd_sst['oni'].apply(lambda x: (x-oni_min)/(oni_max-oni_min))

# Integrando base de datos

In [12]:
# Entrenamiento
pd_model = pd.merge(pd_precipitacion.reset_index(drop=False),pd_sst[['oni']].reset_index(drop=False),
                    on=['periodo'],
                    how='left'
                    )

# Pronostico
pd_sst_pron = pd_sst[['periodo','oni']][pd_sst.periodo > pd_model.periodo.max()].copy()

# Ajustando modelo RNN

In [13]:
id_point = 1
y_output = 'precip_t'
feature = 'oni'

prediction_order = 12 # rango de prediccion
auto_order = 20*12 # componente autoregresiva

In [14]:
# Data
data_pd = pd_model.query(f'id_point=={id_point}').copy()
data_pd.index = pd.to_datetime(data_pd.periodo)

# Transformacion
transformacion = LogMinimax.create( data_pd.precipitacion_mm.to_numpy() )
data_pd['precip_t'] = transformacion.transformacion()

data_pd = data_pd[[y_output,feature]].sort_index().copy()
data_pd.head()

Unnamed: 0_level_0,precip_t,oni
periodo,Unnamed: 1_level_1,Unnamed: 2_level_1
1970-01-01,0.60906,0.543835
1970-02-01,0.167493,0.506771
1970-03-01,0.464581,0.497505
1970-04-01,0.605545,0.476123
1970-05-01,0.820615,0.444048


Redefiniendo serie temporal

In [15]:
x_data = []
y_data = []

for t in range(auto_order+1, data_pd.shape[0]+1):
    x_data.append( np.array(data_pd[(t-auto_order-1):(t-1)]) )
    y_data.append( np.array( data_pd[(t-auto_order-1):t][[y_output]] )[-1] )

In [16]:
x_data = np.array(x_data)
y_data = np.array(y_data).reshape(x_data.shape[0],1,1)

Entrenamiento y validación

In [17]:
x_train = x_data[:-prediction_order]
x_vasl = x_data[-prediction_order:]

y_train = y_data[:-prediction_order]
y_vasl = y_data[-prediction_order:]

print(x_train.shape)
print(x_vasl.shape)

print(y_train.shape)
print(y_vasl.shape)

(377, 240, 2)
(12, 240, 2)
(377, 1, 1)
(12, 1, 1)


Modelos RNN

In [18]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

In [19]:
# Metrícas
mae = keras.metrics.MeanAbsoluteError()
rmse = keras.metrics.RootMeanSquaredError()

In [20]:
model = keras.models.Sequential()

rate = 0.2
model.add(keras.layers.LSTM(auto_order, return_sequences=True ))
model.add(keras.layers.Dropout(rate))

model.add(keras.layers.LSTM(auto_order, return_sequences=False ))
model.add(keras.layers.Dropout(rate-0.1))

model.add(keras.layers.Dense(1))

model.compile(loss='mean_squared_error', optimizer='adam', metrics=[mae,rmse]) 

In [21]:
callback = keras.callbacks.EarlyStopping(
                                            monitor="loss",
                                            min_delta=0,
                                            patience=10,
                                            verbose=0,
                                            mode="min",
                                            baseline=None,
                                            restore_best_weights=False,
                                        )

Entrenamiento

In [22]:
epochs=500

history = model.fit(x=x_train,
                    y=y_train,
                    epochs=epochs,
                    batch_size=1,
                    verbose=0,
                    workers=2,
                    callbacks=[callback])

In [None]:
print(f'Total epocas:{len(history.epoch)}')

Total epocas:81


Evaluación

In [None]:
# make predictions
trainPredict = model.predict(x_train, verbose=0).reshape(-1)
testPredict = model.predict(x_vasl, verbose=0).reshape(-1)

In [None]:
trainPredict.shape

(605,)

In [None]:
# Data de test
trainind_pd = pd.DataFrame(trainPredict,
                            index = data_pd.index[:-prediction_order][-len(trainPredict):],
                            columns=['prediction']
                            )

trainind_pd[y_output] = y_train.reshape(-1)
trainind_pd['type'] = 'training'
trainind_pd['precipitacion_mm'] = trainind_pd[y_output].apply(lambda x: transformacion.inversa(x) if np.isnan(x)==False else np.nan )
trainind_pd['prediction_precipitacion_mm'] = trainind_pd['prediction'].apply(lambda x: transformacion.inversa(x) if np.isnan(x)==False else np.nan )

trainind_pd.head()

Unnamed: 0_level_0,prediction,precip_t,type,precipitacion_mm,prediction_precipitacion_mm
periodo,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1971-01-01,0.505371,0.488734,training,0.47232,0.517388
1971-02-01,0.400332,0.283769,training,0.153676,0.291017
1971-03-01,0.48448,0.515458,training,0.546783,0.46144
1971-04-01,0.652396,0.817589,training,2.861612,1.157717
1971-05-01,0.704987,0.825787,training,2.99305,1.544258


In [None]:
trainig_metrics = metrics(observado=trainind_pd.precipitacion_mm,prediccion=trainind_pd.prediction_precipitacion_mm)
trainig_metrics

{'mape': 61.70487731345017,
 'mae': 0.6114203347532523,
 'mse': 0.9949820567450053,
 'rmse': 0.989989293244521,
 'r2': 0.4718533272717301}

In [None]:
# Data de Validacion
validation_pd = pd.DataFrame(testPredict,
                            index = data_pd.index[-prediction_order:],
                            columns=['prediction']
                            )

validation_pd[y_output] = y_vasl.reshape(-1)
validation_pd['type'] = 'validation'

validation_pd['precipitacion_mm'] = validation_pd[y_output].apply(lambda x: transformacion.inversa(x) if np.isnan(x)==False else np.nan )
validation_pd['prediction_precipitacion_mm'] = validation_pd['prediction'].apply(lambda x: transformacion.inversa(x) if np.isnan(x)==False else np.nan )
validation_pd.head()

Unnamed: 0_level_0,prediction,precip_t,type,precipitacion_mm,prediction_precipitacion_mm
periodo,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2021-06-01,0.525114,0.763679,validation,2.129873,0.576484
2021-07-01,0.535403,0.566845,validation,0.724552,0.609909
2021-08-01,0.576858,0.660034,validation,1.207184,0.765402
2021-09-01,0.691543,0.582156,validation,0.787944,1.434613
2021-10-01,0.81864,0.717919,validation,1.657627,2.878138


In [None]:
validation_metrics = metrics(observado=validation_pd.precipitacion_mm,prediccion=validation_pd.prediction_precipitacion_mm)
validation_metrics

{'mape': 168.29115161942616,
 'mae': 0.9721708365989242,
 'mse': 1.3836596783621964,
 'rmse': 1.9145141055253767,
 'r2': -1.0495351098286183}

In [None]:
def precipitacion_data_rnn(data_pd,auto_order,y_output):
    """
    Funcion para darle estructura a los datos para modelo rnn
    """

    x_data = []
    y_data = []

    for t in range(auto_order+1, data_pd.shape[0]+1):
        x_data.append( np.array(data_pd[(t-auto_order-1):(t-1)]) )
        y_data.append( np.array( data_pd[(t-auto_order-1):t][[y_output]] )[-1] )

    x_data = np.array(x_data)
    y_data = np.array(y_data).reshape(x_data.shape[0],1,1)

    return x_data, y_data

In [None]:
def one_step_predict(data_for_test,pd_sst_pron,auto_order,y_output,feature):
    """
    Funcion para el pronostico a un paso de la precipitacion
    """

    for t in pd_sst_pron.index:

        x_entrada, y_salida = precipitacion_data_rnn(data_pd=data_for_test[data_for_test.index < t],
                                                    auto_order=auto_order,
                                                    y_output=y_output)

        predict = model.predict(x_entrada, verbose=0).reshape(-1)

        data_forecast = pd.DataFrame({y_output:predict[-1],
                                feature:pd_sst_pron[pd_sst_pron.index == t][feature][0]},
                                index = [t])
                                
        data_for_test = pd.concat([data_for_test, data_forecast]).copy()

    return data_for_test

In [None]:
pd_test = one_step_predict(data_for_test=data_pd[:(data_pd.shape[0] - auto_order)],
                pd_sst_pron=data_pd[-auto_order:],
                auto_order=auto_order,
                y_output=y_output,
                feature=feature)

pd_test = pd_test[-auto_order:].rename(columns={'precip_t':'prediction'})
pd_test['type'] = 'test'
pd_test[y_output] = data_pd[-auto_order:][y_output]


pd_test['precipitacion_mm'] = pd_test[y_output].apply(lambda x: transformacion.inversa(x) if np.isnan(x)==False else np.nan )
pd_test['prediction_precipitacion_mm'] = pd_test['prediction'].apply(lambda x: transformacion.inversa(x) if np.isnan(x)==False else np.nan )
pd_test.head()

Unnamed: 0,prediction,oni,type,precip_t,precipitacion_mm,prediction_precipitacion_mm
2021-06-01,0.614502,0.354241,test,0.763679,2.129873,0.940694
2021-07-01,0.525114,0.348539,test,0.566845,0.724552,0.576484
2021-08-01,0.534216,0.328582,test,0.660034,1.207184,0.605955
2021-09-01,0.554468,0.28938,test,0.582156,0.787944,0.677054
2021-10-01,0.655893,0.26087,test,0.717919,1.657627,1.18011


In [None]:
test_metrics = metrics(observado=pd_test.precipitacion_mm,prediccion=pd_test.prediction_precipitacion_mm)
test_metrics

{'mape': 454.3043143337145,
 'mae': 1.047542000074278,
 'mse': 1.3533780808292513,
 'rmse': 1.8316322296690675,
 'r2': -0.9608079941360712}

In [None]:
pd_prediction = one_step_predict(data_for_test=data_pd,
                                pd_sst_pron=pd_sst_pron,
                                auto_order=auto_order,
                                y_output=y_output,
                                feature=feature)

In [None]:
pd_prediction = pd_prediction.rename(columns={'precip_t':'prediction'})
pd_prediction['type'] = 'prediction'

pd_prediction[y_output] = np.nan

pd_prediction['precipitacion_mm'] =  np.nan
pd_prediction['prediction_precipitacion_mm'] = pd_prediction['prediction'].apply(lambda x: transformacion.inversa(x) if np.isnan(x)==False else np.nan )

pd_prediction.head()

Unnamed: 0,prediction,oni,type,precip_t,precipitacion_mm,prediction_precipitacion_mm
1970-01-01,0.60906,0.543835,prediction,,,0.913065
1970-02-01,0.167493,0.506771,prediction,,,0.081278
1970-03-01,0.464581,0.497505,prediction,,,0.413783
1970-04-01,0.605545,0.476123,prediction,,,0.895653
1970-05-01,0.820615,0.444048,prediction,,,2.90945


In [None]:
# Resultados del modelo
dict_metrics = {'epocas':[len(history.epoch)],
                'auto_order':[auto_order],
                'id_point':[id_point],
                'training_mse':[history.history["loss"][-1]],
                'training_rmse':[history.history["root_mean_squared_error"][-1]],
                'training_mae':[history.history["mean_absolute_error"][-1]],
                'trainig_mape':[trainig_metrics['mape']],
                'trainig_r':[trainig_metrics['r2']],
                'validation_mse':[validation_metrics["mse"]],
                'validation_rmse':[validation_metrics["rmse"]],
                'validation_mae':[validation_metrics["mae"]],
                'validation_mape':[validation_metrics['mape']],
                'validation_r':[validation_metrics['r2']],
                'test_mse':[test_metrics["mse"]],
                'test_rmse':[test_metrics["rmse"]],
                'test_mae':[test_metrics["mae"]],
                'test_mape':[test_metrics['mape']],
                'test_r':[test_metrics['r2']]
                }

experimento_pd = pd.DataFrame.from_dict(dict_metrics)
experimento_pd

Unnamed: 0,epocas,auto_order,id_point,training_mse,training_rmse,training_mae,trainig_mape,trainig_r,validation_mse,validation_rmse,validation_mae,validation_mape,validation_r,test_mse,test_rmse,test_mae,test_mape,test_r
0,196,12,1,0.015218,0.12336,0.097935,61.704877,0.471853,1.38366,1.914514,0.972171,168.291152,-1.049535,1.353378,1.831632,1.047542,454.304314,-0.960808


In [None]:
columns = [ 'precip_t',
            'prediction',
            #'oni',
            'type',
            'precipitacion_mm',
            'prediction_precipitacion_mm']

In [None]:
# Uniendo informacion
pd_summary = pd.concat([trainind_pd[columns], 
                        pd_test[columns], 
                        pd_prediction[columns]
                        ])
pd_summary['periodo'] = pd.to_datetime(pd_summary.index.values)
pd_summary = pd_summary[ ['periodo']+ columns ]
pd_summary

Unnamed: 0,periodo,precip_t,prediction,type,precipitacion_mm,prediction_precipitacion_mm
1971-01-01,1971-01-01,0.488734,0.469698,training,0.472320,0.425546
1971-02-01,1971-02-01,0.283769,0.417885,training,0.153676,0.320390
1971-03-01,1971-03-01,0.515458,0.506914,training,0.546783,0.521779
1971-04-01,1971-04-01,0.817589,0.717392,training,2.861612,1.652848
1971-05-01,1971-05-01,0.825787,0.759201,training,2.993050,2.078264
...,...,...,...,...,...,...
2024-02-01,2024-02-01,,0.502425,prediction,,0.509105
2024-03-01,2024-03-01,,0.573754,prediction,,0.752498
2024-04-01,2024-04-01,,0.630557,prediction,,1.027175
2024-05-01,2024-05-01,,0.671114,prediction,,1.282727
