## Análisis Precipitación Total Parque Nacional Cerro Saroche

**PROYECTO:** SISTEMA PARA EL SEGUIMIENTO DE ECOSISTEMAS VENEZOLANOS \
**AUTOR:** Javier Martinez

In [None]:
import os

print('> Directorio actual: ', os.getcwd())  
os.chdir('../')
print('> Directorio actual: ', os.getcwd()) 

> Directorio actual:  /media/javier/Compartida/doctorado/ssev-analytics/cerro_saroche
> Directorio actual:  /media/javier/Compartida/doctorado/ssev-analytics


In [None]:
import pandas as pd

from utils.MONGO import CONEXION
from utils.UTILS import *
from datetime import datetime


In [None]:
park = 'cerro_saroche'

## Creando Coenxión con Mongo DB

In [None]:
# Creando la conexión con MongoDB
db = CONEXION.conexion()
db.list_collection_names()

['meteorological', 'estimateSSTNino34', 'SSTNino34']

## Descargando la Información Precipitación

In [None]:
# Realizando consulta
meteorological = db.meteorological.find({"park":park,})

# Generando pandas dataframe
data_pandas = pd.DataFrame([file for file in meteorological])
data_pandas['periodo'] = data_pandas.time.apply(lambda x: datetime.fromordinal(x))
data_pandas['mes_year'] =  data_pandas['periodo'].dt.strftime('%B-%Y')
data_pandas.index = pd.to_datetime(data_pandas.periodo)

data_pandas.head()

Unnamed: 0_level_0,_id,id_point,park,time,elevacion_maxima,elevacion_media,elevacion_mediana,latitud,longitud,ndvi_maxima,ndvi_media,ndvi_mediana,precipitacion_mm,time_actualizacion,periodo,mes_year
periodo,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
1970-01-01,6354801a343c9f2921d883bf,1,cerro_saroche,719163,921.0,508.541046,491.0,10.31,-69.83,,,,0.913065,738450,1970-01-01,January-1970
1970-02-01,6354801a343c9f2921d883d3,1,cerro_saroche,719194,921.0,508.541046,491.0,10.31,-69.83,,,,0.081278,738450,1970-02-01,February-1970
1970-03-01,6354801a343c9f2921d883e0,1,cerro_saroche,719222,921.0,508.541046,491.0,10.31,-69.83,,,,0.413783,738450,1970-03-01,March-1970
1970-04-01,6354801a343c9f2921d883ee,1,cerro_saroche,719253,921.0,508.541046,491.0,10.31,-69.83,,,,0.895653,738450,1970-04-01,April-1970
1970-05-01,6354801b343c9f2921d88407,1,cerro_saroche,719283,921.0,508.541046,491.0,10.31,-69.83,,,,2.90945,738450,1970-05-01,May-1970


Consulta de la data

In [None]:
fig_pre = precipitacion_graf( data_pandas,
                    rows=5, 
                    cols=3,
                    park = 'Cerro Saroche'
                    )

fig_pre.write_image(f'./{park}/figures/precipitacion_cerro_saroche.png')
fig_pre.show()

In [None]:
pd_idPoints = data_pandas\
                    .groupby(['id_point', 'latitud', 'longitud','elevacion_media'],as_index=False)\
                    .count()[['id_point', 'latitud', 'longitud','elevacion_media']]

pd_idPoints.head(15)

Unnamed: 0,id_point,latitud,longitud,elevacion_media
0,1,10.31,-69.83,508.541046
1,2,10.31,-69.73,625.942932
2,3,10.31,-69.63,731.954834
3,4,10.31,-69.53,761.12915
4,5,10.31,-69.43,726.967285
5,6,10.21,-69.83,604.697083
6,7,10.21,-69.73,652.016235
7,8,10.21,-69.63,850.282715
8,9,10.21,-69.53,769.925049
9,10,10.21,-69.43,919.076721


## Experimentos

In [None]:
DIR = f'./{park}/experiments/narx/precipitacion/'
experiments = [DIR + x for x in os.listdir(DIR)]

In [None]:
DIR+'id_point_1'

'./cerro_saroche/experiments/narx/precipitacion/id_point_1'

In [None]:
# Resultados
pd_summary = pd.concat(list(map(
                                lambda expe: pd.concat([pd.read_csv(expe + '/' +x) for x in os.listdir( expe ) if x.find('summary')!=-1 ]),
                                experiments)))

pd_summary.head()

Unnamed: 0,epocas,prediction_order,auto_order,exog_order,exog_delay,activation,id_point,n_neurons,capas,training_mse,...,validation_mse,validation_rmse,validation_mae,validation_mape,validation_r,test_mse,test_rmse,test_mae,test_mape,test_r
0,45,24,300,300,0,sigmoid,1,[400],1,0.65902,...,1.485337,2.206226,0.901779,109.537519,-0.000819,1.472471,2.16817,0.886656,105.936384,0.016445
0,91,24,300,300,0,sigmoid,10,[400],1,0.592166,...,1.109228,1.230387,0.801822,71.29184,0.072245,1.102538,1.21559,0.788168,69.970404,0.083402
0,100,24,300,300,0,sigmoid,11,[400],1,0.561695,...,1.697089,2.88011,0.932025,79.914884,-0.10676,1.704409,2.90501,0.919154,74.838975,-0.116329
0,100,24,300,300,0,sigmoid,12,[400],1,0.835044,...,1.474119,2.173028,0.847828,80.763725,-0.093834,1.486556,2.209848,0.850171,79.300669,-0.112368
0,83,24,300,300,0,sigmoid,13,[400],1,0.657379,...,1.216241,1.479242,0.83042,94.793286,0.080985,1.221886,1.493005,0.823595,92.313141,0.072435


Selección del mejor modelo

In [None]:
best_models = []

for id in pd_summary.id_point.unique():

    data_id = pd_summary.query(f"id_point=={id}")
    min_id = data_id.validation_r.min()

    best_models.append( data_id[data_id.validation_r==data_id.validation_r.max()] )

pd_best_model = pd.concat(best_models)
pd_best_model.head()

Unnamed: 0,epocas,prediction_order,auto_order,exog_order,exog_delay,activation,id_point,n_neurons,capas,training_mse,...,validation_mse,validation_rmse,validation_mae,validation_mape,validation_r,test_mse,test_rmse,test_mae,test_mape,test_r
0,45,24,300,300,0,sigmoid,1,[400],1,0.65902,...,1.485337,2.206226,0.901779,109.537519,-0.000819,1.472471,2.16817,0.886656,105.936384,0.016445
0,91,24,300,300,0,sigmoid,10,[400],1,0.592166,...,1.109228,1.230387,0.801822,71.29184,0.072245,1.102538,1.21559,0.788168,69.970404,0.083402
0,100,24,300,300,0,sigmoid,11,[400],1,0.561695,...,1.697089,2.88011,0.932025,79.914884,-0.10676,1.704409,2.90501,0.919154,74.838975,-0.116329
0,100,24,300,300,0,sigmoid,12,[400],1,0.835044,...,1.474119,2.173028,0.847828,80.763725,-0.093834,1.486556,2.209848,0.850171,79.300669,-0.112368
0,83,24,300,300,0,sigmoid,13,[400],1,0.657379,...,1.216241,1.479242,0.83042,94.793286,0.080985,1.221886,1.493005,0.823595,92.313141,0.072435


Resultados mejores modelos

In [None]:
data_precipitacion = []

for id in pd_best_model.id_point.unique():
    data = pd.read_pickle( f'{DIR}id_point_{id}/predicciones.pkl' ).reset_index(drop=False)

    data['id_point'] = id
    data['park'] = park
    data['periodo'] =  pd.to_datetime(data.periodo)
    #data['periodo'] =  data['index']

    data = pd.merge(data, pd_idPoints, on = ['id_point'], how='left')[['park','periodo','id_point','latitud','longitud','type','precipitacion_mm','prediction_precipitacion_mm','elevacion_media']]

    data_precipitacion.append(data)

precipitacion_pd = pd.concat(data_precipitacion)
precipitacion_pd.head()

Unnamed: 0,park,periodo,id_point,latitud,longitud,type,precipitacion_mm,prediction_precipitacion_mm,elevacion_media
0,cerro_saroche,1995-01-01,1,10.31,-69.83,training,0.072351,0.176529,508.541046
1,cerro_saroche,1995-02-01,1,10.31,-69.83,training,0.340843,0.335624,508.541046
2,cerro_saroche,1995-03-01,1,10.31,-69.83,training,2.29073,1.018381,508.541046
3,cerro_saroche,1995-04-01,1,10.31,-69.83,training,1.064486,1.298513,508.541046
4,cerro_saroche,1995-05-01,1,10.31,-69.83,training,1.11433,1.298577,508.541046


## Gráfico NARX precipitación 

In [None]:
from plotly.subplots import make_subplots
import plotly.graph_objects as go

rows = 5
cols = 3
fig = make_subplots(rows=rows,
                    cols=cols,
                    subplot_titles='test',
                    horizontal_spacing=0.1,
                    vertical_spacing=0.13)

#------------------
def model_trace(precipitacion_pd, row, col):

    data_figure_ajuste = precipitacion_pd.query("type=='training'")
    data_figure_validacion = precipitacion_pd.query("type=='test'")
    data_figure_pronostico = precipitacion_pd.query("type=='prediction'")

    if data_figure_ajuste.id_point.unique()[0]==1:
        showlegend=True
    else:
        showlegend=False

    marker_line_width=1.5
    marker_size=1
    line_width=2
    fig.add_trace(go.Scatter(x=data_figure_ajuste.periodo, y=data_figure_ajuste['prediction_precipitacion_mm'],
                             mode='lines+markers',name='Pronóstico entrenamiento',
                             marker_symbol='hexagram',
                             marker_line_width=marker_line_width,
                             marker_size=marker_size,
                             marker_line_color='#009DD8',
                             marker_color='#009DD8',
                             line=dict(color='#009DD8', width=line_width),
                             legendgroup='group1',
                             showlegend=showlegend
                            ),     
            row=row, 
            col=col)

    fig.add_trace(go.Scatter(x=data_figure_ajuste.periodo, y=data_figure_ajuste['precipitacion_mm'],
                             mode='lines+markers',name='Precipitación entrenamiento',
                             marker_symbol='x-thin',
                             marker_line_width=marker_line_width,
                             marker_size=marker_size,
                             marker_line_color='#0055D8',
                             marker_color='#0055D8',
                             line=dict(color='#0055D8', width=line_width),
                             legendgroup='group2',
                             showlegend=showlegend
                            ),     
            row=row, 
            col=col)

    fig.add_trace(go.Scatter(x=data_figure_validacion.periodo, y=data_figure_validacion['prediction_precipitacion_mm'],
                        mode='lines+markers',name='Pronóstico validación',                       
                             marker_symbol='hexagram',
                             marker_line_width=marker_line_width,
                             marker_size=marker_size,
                             marker_line_color='#00B6FF',
                             marker_color='#00B6FF', 
                             line=dict(color='#00B6FF', width=line_width),
                             legendgroup='group3',
                             showlegend=showlegend
                            ),       
            row=row, 
            col=col)

    fig.add_trace(go.Scatter(x=data_figure_validacion.periodo, y=data_figure_validacion['precipitacion_mm'],
                        mode='lines+markers',name='Precipitación validación',
                        marker_symbol='square',
                             marker_line_width=marker_line_width,
                             marker_size=marker_size,
                        marker_line_color='#009BD9',
                        marker_color='#009BD9', 
                        line=dict(color='#009BD9', width=line_width),
                        legendgroup='group4',
                        showlegend=showlegend
                            ),
            row=row, 
            col=col)


    fig.add_trace(go.Scatter(x=data_figure_pronostico.periodo, y=data_figure_pronostico['prediction_precipitacion_mm'],
                             text=data_figure_pronostico['prediction_precipitacion_mm'].apply(lambda x: str(round(x,2)) ),
                             textposition="top right",
                             marker_symbol='star',
                             marker_line_width=marker_line_width,
                             marker_size=marker_size,
                             marker_line_color='#29E8FF',
                             marker_color='#29E8FF',
                             mode='lines+markers',name='Pronóstico Precipitación',
                             line=dict(color='#29E8FF', width=line_width,dash='dot'),
                            legendgroup='group5',
                            showlegend=showlegend
                            ),
            row=row, 
            col=col)

    fig.add_vline(x=data_figure_ajuste.periodo.max(), line_width=2, line_dash="dash", line_color="#580606")
    fig.add_vline(x=data_figure_validacion.periodo.max(), line_width=2, line_dash="dash", line_color="#580606")
    
    fig.update_xaxes(tickformat="%Y/%m",
                     showline=True, 
                     linewidth=1, 
                     linecolor='black',
                     gridcolor='#E4E4E4',
                     mirror=True,
                     ticks="outside", 
                     tickwidth=2, 
                     tickcolor='#5C2B05',
                     ticklen=10,
                    range=[data_figure_ajuste.periodo.max() - pd.DateOffset(months=4*12), data_figure_pronostico.periodo.max() + pd.DateOffset(months=3)],
                    title_text='Mes',
                    title_font=dict(size=12)
                    )
    fig.update_yaxes(showline=True,
                     linewidth=1,
                     linecolor='black', 
                     gridcolor='#E4E4E4',
                     mirror=True,
                     ticks="outside", 
                     tickwidth=2, 
                     tickcolor='#5C2B05',
                     ticklen=10,
                     range=[0,10],
                     title_text='Precipitación (mm)',
                     title_font=dict(size=12))

    return fig
#----------------------

In [None]:
rows=5 
cols=3


from plotly.subplots import make_subplots
import plotly.graph_objects as go

column_titles = list(map(lambda x: 'Punto id:' + str(int(x)), precipitacion_pd.sort_values('id_point').id_point.unique().tolist() ))

fig = make_subplots(rows=rows,
                    cols=cols,
                    subplot_titles=column_titles,
                    horizontal_spacing=0.1,
                    vertical_spacing=0.13)


row_order = precipitacion_pd.sort_values('id_point').id_point.unique().reshape(rows,cols).tolist()

for row in range(len(row_order)):

    list_row = row_order[row]
    for col in range(len(list_row)):
        # print(row_order[row][col])

        model_trace(precipitacion_pd.query(f"id_point=={row_order[row][col]}"),
                    row=row+1, col=col+1)
        # print(row)
        # print(col)
    

#------------
# fig['layout']['title']['y']=0.98
# fig['layout']['margin']['t']=100

#------------
annotations_title = fig['layout']['annotations']

y_loc=7.5
yshift=4
annotations = []

for id_ in precipitacion_pd.sort_values('id_point').id_point.unique().tolist():
    
    xref='x{id_}'.format(id_=str(int(id_)))
    yref='y{id_}'.format(id_=str(int(id_)))
    
    titulo_annotations =  list(filter(lambda x: x.text=='Punto id:{id_}'.format(id_=int(id_)),
                                  annotations_title))[0]

    annotations= annotations+[ dict(font=dict(size=16),
                                    showarrow=titulo_annotations['showarrow'],
                                    text=titulo_annotations['text'],
                                    x=titulo_annotations['x'],
                                    xanchor=titulo_annotations['xanchor'],
                                    xref='paper',#xref,
                                    y=titulo_annotations['y'],
                                    yanchor=titulo_annotations['yanchor'],
                                    yref='paper',#yref
                                    ),
                                dict(
                                    x=precipitacion_pd.query("type=='training'").periodo.max() - pd.DateOffset(months=12*2),
                                    y=y_loc, # annotation point
                                    xref=xref, 
                                    yref=yref, 
                                    text='Entrenamiento',
                                    showarrow=False,
                                    yshift=yshift,
                                    font=dict(size=9)
                                  ),
                                dict(
                                    x=precipitacion_pd.query("type=='test'").periodo.max() - pd.DateOffset(months=6),
                                    y=y_loc, # annotation point
                                    xref=xref, 
                                    yref=yref, 
                                    text='Validación',
                                    showarrow=False,
                                    yshift=yshift,
                                    font=dict(size=9)
                                  ),
                                dict(
                                    x=precipitacion_pd.query("type=='prediction'").periodo.max() - pd.DateOffset(months=6),
                                    y=y_loc, # annotation point
                                    xref=xref, 
                                    yref=yref, 
                                    text='Pronóstico',
                                    showarrow=False,
                                    yshift=yshift,
                                    font=dict(size=9)
                                  )
                                    ]

fig['layout']['annotations'] = annotations

        
#------------       
fig.update_xaxes(tickformat="%Y/%m",showline=True, linewidth=1, linecolor='black', gridcolor='#E4E4E4',mirror=True,
                 ticks="outside", tickwidth=2, tickcolor='#5C2B05', ticklen=10)
fig.update_yaxes(showline=True, linewidth=1, linecolor='black', gridcolor='#E4E4E4',mirror=True,
                 ticks="outside", tickwidth=2, tickcolor='#5C2B05', ticklen=10)   

#------------
fig.update_layout( height=800,
                   width=1500,
                   #showlegend = True,
                   font = dict(size = 12),
                   template='plotly_white',
                   title_text="""Precipitación total Cerro Saroche
                        <br><sup>Pronóstico para el periodo {date_init} al {date_fin}</sup>
                        """.format(date_init=str(precipitacion_pd.query("type=='prediction'").periodo.min().strftime('%Y/%m')),
                                   date_fin=str(precipitacion_pd.query("type=='prediction'").periodo.max().strftime('%Y/%m'))),
                  legend_title_text='Serie',
                  legend_title = dict( font = dict(size = 14)),
                   uniformtext_minsize=8,
                   uniformtext_mode='hide',
                   legend = dict(
                                # #orientation="h",
                                # yanchor="bottom",
                                # y=1.05,
                                # xanchor="right",
                                # x=1,
                                # font_size=14
                            )
                 )

fig.write_image(f'./{park}/figures/narx_precipitacion.png')
fig.show()

## Data para variación espacio-temporal de la precipitación

Predicciones

In [None]:
columns = ['park',	'periodo', 'year', 'month',	'id_point',	'latitud',	'longitud',	'type',	'precipitacion_mm','prediction_precipitacion_mm', 'elevacion_media']

precipitacion_pd['year'] = precipitacion_pd['periodo'].dt.strftime("%Y").astype(int)
precipitacion_pd['month'] = precipitacion_pd['periodo'].dt.strftime("%m").astype(int)

precipitacion_pd[columns].to_pickle(f'./{park}/data/narx_precipitacion.pkl')