## Análisis Precipitación Total Parque Nacional Terepaima

**PROYECTO:** SISTEMA PARA EL SEGUIMIENTO DE ECOSISTEMAS VENEZOLANOS \
**AUTOR:** Javier Martinez

In [1]:
import os

print('> Directorio actual: ', os.getcwd())  
os.chdir('../')
print('> Directorio actual: ', os.getcwd()) 

> Directorio actual:  /media/javier/Compartida/doctorado/ssev-analytics/terepaima
> Directorio actual:  /media/javier/Compartida/doctorado/ssev-analytics


In [2]:
import pandas as pd
import pickle

from utils.MONGO import CONEXION
from utils.UTILS import *
from datetime import datetime


In [3]:
park = 'terepaima'

## Creando Coenxión con Mongo DB

In [4]:
# Creando la conexión con MongoDB
db = CONEXION.conexion()
db.list_collection_names()

['estimateMeteorological', 'meteorological', 'estimateSSTNino34', 'SSTNino34']

## Descargando la Información Precipitación

In [5]:
# Realizando consulta
meteorological = db.meteorological.find({"park":park,})

# Generando pandas dataframe
data_pandas = pd.DataFrame([file for file in meteorological])
data_pandas['periodo'] = data_pandas.time.apply(lambda x: datetime.fromordinal(x))
data_pandas['mes_year'] =  data_pandas['periodo'].dt.strftime('%B-%Y')
data_pandas.index = pd.to_datetime(data_pandas.periodo)

data_pandas.head()

Unnamed: 0_level_0,_id,id_point,park,time,elevacion_maxima,elevacion_media,elevacion_mediana,latitud,longitud,ndvi_maxima,ndvi_media,ndvi_mediana,precipitacion_mm,time_actualizacion,periodo,mes_year
periodo,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
1970-01-01,63548350343c9f2921dbb11e,1,terepaima,719163,1632.0,1001.651794,986.0,9.96,-69.38,,,,1.698088,738450,1970-01-01,January-1970
1970-02-01,63548350343c9f2921dbb12a,1,terepaima,719194,1632.0,1001.651794,986.0,9.96,-69.38,,,,0.460838,738450,1970-02-01,February-1970
1970-03-01,63548350343c9f2921dbb147,1,terepaima,719222,1632.0,1001.651794,986.0,9.96,-69.38,,,,0.559042,738450,1970-03-01,March-1970
1970-04-01,63548350343c9f2921dbb165,1,terepaima,719253,1632.0,1001.651794,986.0,9.96,-69.38,,,,1.815903,738450,1970-04-01,April-1970
1970-05-01,63548350343c9f2921dbb174,1,terepaima,719283,1632.0,1001.651794,986.0,9.96,-69.38,,,,3.538272,738450,1970-05-01,May-1970


Consulta de la data

In [6]:
fig_pre = precipitacion_graf( data_pandas,
                    rows=3, 
                    cols=3,
                    park = 'Terepaima'
                    )

fig_pre.write_image(f'./{park}/figures/precipitacion_terepaima.png')
fig_pre.show()

In [7]:
pd_idPoints = data_pandas\
                    .groupby(['id_point', 'latitud', 'longitud','elevacion_media'],as_index=False)\
                    .count()[['id_point', 'latitud', 'longitud','elevacion_media']]

pd_idPoints.head(15)

Unnamed: 0,id_point,latitud,longitud,elevacion_media
0,1,9.96,-69.38,1001.651794
1,2,9.96,-69.28,896.212402
2,3,9.96,-69.18,470.032227
3,4,9.86,-69.38,1273.258179
4,5,9.86,-69.28,1118.260376
5,6,9.86,-69.18,415.874298
6,7,9.76,-69.38,946.540039
7,8,9.76,-69.28,779.210327
8,9,9.76,-69.18,319.055603


## Resultados de experimentos

In [8]:
DIR = f'./{park}/experiments/narx/precipitacion/'
experiments = [DIR + x for x in os.listdir(DIR)]

In [9]:
# Resultados
pd_summary = pd.concat(list(map(
                                lambda expe: pd.concat([pd.read_csv(expe + '/' +x) for x in os.listdir( expe ) if x.find('summary')!=-1 ]),
                                experiments)))

pd_summary.head()

Unnamed: 0,epocas,prediction_order,auto_order,exog_order,exog_delay,activation,id_point,n_neurons,capas,training_mse,...,validation_mse,validation_rmse,validation_mae,validation_mape,validation_r,test_mse,test_rmse,test_mae,test_mape,test_r
0,136,12,300,12,3,sigmoid,1,[312],1,0.503236,...,1.20339,1.448147,0.826594,92.828018,0.116837,1.299331,1.68826,0.875396,111.157368,-0.029598
0,67,12,300,6,3,sigmoid,1,[204],1,0.905964,...,0.967234,0.935542,0.72824,58.869794,0.429453,0.938787,0.881321,0.731257,64.029039,0.46252
0,224,12,300,7,3,sigmoid,1,[204],1,1.2369,...,1.88344,3.547347,1.487561,138.761769,-1.163375,2.023798,4.095759,1.607576,164.401273,-1.497829
0,101,12,300,6,3,sigmoid,1,"[204, 102]",2,0.675138,...,0.94285,0.888967,0.675058,54.720768,0.457857,0.930896,0.866568,0.683657,60.309027,0.471517
0,292,12,300,6,3,sigmoid,1,"[153, 102, 51]",3,0.340508,...,0.977751,0.955997,0.768393,82.373721,0.416978,1.004662,1.009347,0.77359,96.206662,0.384443


Resultados mejores modelos

In [10]:
list_confi = []

for id in pd_summary.sort_values('id_point').id_point.unique():
    # Configuracion
    with open(f'{DIR}id_point_{id}/model_confi.pkl', 'rb') as f:
        model_confi = pickle.load(f)

    list_confi.append( pd.DataFrame(model_confi['metrics'],index=[0]) )

resultados = pd.concat(list_confi)
resultados.to_csv(f'./{park}/data/narx_precipitacion_metricas.csv',index=False)
resultados.head(15)

Unnamed: 0,epocas,prediction_order,auto_order,exog_order,exog_delay,activation,id_point,n_neurons,capas,training_mse,...,validation_mse,validation_rmse,validation_mae,validation_mape,validation_r,test_mse,test_rmse,test_mae,test_mape,test_r
0,154,12,300,6,3,sigmoid,1,"[136, 68]",2,0.618685,...,0.89046,0.79292,0.62416,52.570545,0.516432,0.856055,0.73283,0.627661,57.276507,0.553079
0,86,12,300,6,3,sigmoid,2,"[204, 102]",2,0.646996,...,0.860606,0.740643,0.58765,73.771065,0.504247,0.87204,0.760454,0.600131,82.739385,0.490986
0,50,12,300,6,3,sigmoid,3,"[204, 102]",2,0.76868,...,0.756376,0.572104,0.586775,75.221626,0.650407,0.754464,0.569215,0.58906,80.438073,0.652172
0,98,12,300,6,3,sigmoid,4,"[136, 68]",2,1.138662,...,1.135454,1.289256,0.871301,60.142448,0.554768,1.141309,1.302587,0.879486,64.6395,0.550164
0,90,12,300,6,3,sigmoid,5,"[102, 68, 34]",3,1.011181,...,1.012693,1.025547,0.759925,89.097135,0.593936,1.02872,1.058265,0.767663,99.45294,0.580981
0,41,12,300,6,3,sigmoid,6,[204],1,0.979771,...,0.962431,0.926273,0.692862,80.690972,0.660111,0.959097,0.919867,0.689475,84.331606,0.662462
0,73,12,300,6,3,sigmoid,7,"[136, 68]",2,1.431064,...,1.426843,2.035882,1.066772,45.455854,0.666482,1.427769,2.038525,1.067363,46.641814,0.666049
0,190,12,300,6,3,sigmoid,8,"[153, 102, 51]",3,1.076001,...,1.298327,1.685654,0.966634,95.473198,0.612038,1.344002,1.80634,0.980965,104.90964,0.584261
0,42,12,300,6,3,sigmoid,9,"[204, 102]",2,1.88734,...,1.168452,1.365281,0.91071,59.389968,0.679191,1.168707,1.365877,0.909862,60.255154,0.679051


Pronóstico de los mejores modelos

In [11]:
prediction_order = resultados.prediction_order.min()
data_precipitacion = []

for id in pd_summary.id_point.unique():
    data = pd.read_pickle( f'{DIR}id_point_{id}/predicciones.pkl' ).reset_index(drop=False)

    data['id_point'] = id
    data['park'] = park
    data['periodo'] =  pd.to_datetime(data.periodo)
    #data['periodo'] =  data['index']

    data = pd.merge(data, pd_idPoints, on = ['id_point'], how='left')[['park','periodo','id_point','latitud','longitud','type','precipitacion_mm','prediction_precipitacion_mm','elevacion_media']]

    max_date = data.query("type=='prediction'")[:prediction_order].periodo.max()

    data_precipitacion.append(data[data.periodo<=max_date].query("(type=='training' or type=='test' or type=='prediction')"))

precipitacion_pd = pd.concat(data_precipitacion)
precipitacion_pd.head()

Unnamed: 0,park,periodo,id_point,latitud,longitud,type,precipitacion_mm,prediction_precipitacion_mm,elevacion_media
0,terepaima,1995-01-01,1,9.96,-69.38,training,0.230639,0.22416,1001.651794
1,terepaima,1995-02-01,1,9.96,-69.38,training,0.216488,0.193438,1001.651794
2,terepaima,1995-03-01,1,9.96,-69.38,training,2.480125,1.698412,1001.651794
3,terepaima,1995-04-01,1,9.96,-69.38,training,2.167091,2.218324,1001.651794
4,terepaima,1995-05-01,1,9.96,-69.38,training,2.611307,2.473603,1001.651794


## Gráfico NARX precipitación 

In [12]:
from plotly.subplots import make_subplots
import plotly.graph_objects as go

rows = 3
cols = 3
fig = make_subplots(rows=rows,
                    cols=cols,
                    subplot_titles='test',
                    horizontal_spacing=0.1,
                    vertical_spacing=0.13)

#------------------
def model_trace(precipitacion_pd, row, col):

    data_figure_ajuste = precipitacion_pd.query("type=='training'")
    data_figure_validacion = precipitacion_pd.query("type=='test'")
    data_figure_pronostico = precipitacion_pd.query("type=='prediction'")

    if data_figure_ajuste.id_point.unique()[0]==1:
        showlegend=True
    else:
        showlegend=False

    marker_line_width=1.5
    marker_size=1
    line_width=2
    fig.add_trace(go.Scatter(x=data_figure_ajuste.periodo, y=data_figure_ajuste['prediction_precipitacion_mm'],
                             mode='lines+markers',name='Pronóstico entrenamiento',
                             marker_symbol='hexagram',
                             marker_line_width=marker_line_width,
                             marker_size=marker_size,
                             marker_line_color='#009DD8',
                             marker_color='#009DD8',
                             line=dict(color='#009DD8', width=line_width),
                             legendgroup='group1',
                             showlegend=showlegend
                            ),     
            row=row, 
            col=col)

    fig.add_trace(go.Scatter(x=data_figure_ajuste.periodo, y=data_figure_ajuste['precipitacion_mm'],
                             mode='lines+markers',name='Precipitación entrenamiento',
                             marker_symbol='x-thin',
                             marker_line_width=marker_line_width,
                             marker_size=marker_size,
                             marker_line_color='#0055D8',
                             marker_color='#0055D8',
                             line=dict(color='#0055D8', width=line_width),
                             legendgroup='group2',
                             showlegend=showlegend
                            ),     
            row=row, 
            col=col)

    fig.add_trace(go.Scatter(x=data_figure_validacion.periodo, y=data_figure_validacion['prediction_precipitacion_mm'],
                        mode='lines+markers',name='Pronóstico validación',                       
                             marker_symbol='hexagram',
                             marker_line_width=marker_line_width,
                             marker_size=marker_size,
                             marker_line_color='#00B6FF',
                             marker_color='#00B6FF', 
                             line=dict(color='#00B6FF', width=line_width),
                             legendgroup='group3',
                             showlegend=showlegend
                            ),       
            row=row, 
            col=col)

    fig.add_trace(go.Scatter(x=data_figure_validacion.periodo, y=data_figure_validacion['precipitacion_mm'],
                        mode='lines+markers',name='Precipitación validación',
                        marker_symbol='square',
                             marker_line_width=marker_line_width,
                             marker_size=marker_size,
                        marker_line_color='#009BD9',
                        marker_color='#009BD9', 
                        line=dict(color='#009BD9', width=line_width),
                        legendgroup='group4',
                        showlegend=showlegend
                            ),
            row=row, 
            col=col)


    fig.add_trace(go.Scatter(x=data_figure_pronostico.periodo, y=data_figure_pronostico['prediction_precipitacion_mm'],
                             text=data_figure_pronostico['prediction_precipitacion_mm'].apply(lambda x: str(round(x,2)) ),
                             textposition="top right",
                             marker_symbol='star',
                             marker_line_width=marker_line_width,
                             marker_size=marker_size,
                             marker_line_color='#29E8FF',
                             marker_color='#29E8FF',
                             mode='lines+markers',name='Pronóstico Precipitación',
                             line=dict(color='#29E8FF', width=line_width,dash='dot'),
                            legendgroup='group5',
                            showlegend=showlegend
                            ),
            row=row, 
            col=col)

    fig.add_vline(x=data_figure_ajuste.periodo.max(), line_width=2, line_dash="dash", line_color="#580606")
    fig.add_vline(x=data_figure_validacion.periodo.max(), line_width=2, line_dash="dash", line_color="#580606")
    
    fig.update_xaxes(tickformat="%Y/%m",
                     showline=True, 
                     linewidth=1, 
                     linecolor='black',
                     gridcolor='#E4E4E4',
                     mirror=True,
                     ticks="outside", 
                     tickwidth=2, 
                     tickcolor='#5C2B05',
                     ticklen=10,
                    range=[data_figure_ajuste.periodo.max() - pd.DateOffset(months=4*12), data_figure_pronostico.periodo.max() + pd.DateOffset(months=3)],
                    title_text='Mes',
                    title_font=dict(size=12)
                    )
    fig.update_yaxes(showline=True,
                     linewidth=1,
                     linecolor='black', 
                     gridcolor='#E4E4E4',
                     mirror=True,
                     ticks="outside", 
                     tickwidth=2, 
                     tickcolor='#5C2B05',
                     ticklen=10,
                     range=[0,10],
                     title_text='Precipitación (mm)',
                     title_font=dict(size=12))

    return fig
#----------------------

In [13]:
rows=3 
cols=3


from plotly.subplots import make_subplots
import plotly.graph_objects as go

column_titles = list(map(lambda x: 'Punto id:' + str(int(x)), precipitacion_pd.sort_values('id_point').id_point.unique().tolist() ))

fig = make_subplots(rows=rows,
                    cols=cols,
                    subplot_titles=column_titles,
                    horizontal_spacing=0.1,
                    vertical_spacing=0.13)


row_order = precipitacion_pd.sort_values('id_point').id_point.unique().reshape(rows,cols).tolist()

for row in range(len(row_order)):

    list_row = row_order[row]
    for col in range(len(list_row)):
        # print(row_order[row][col])

        model_trace(precipitacion_pd.query(f"id_point=={row_order[row][col]}"),
                    row=row+1, col=col+1)
        # print(row)
        # print(col)
    

#------------
# fig['layout']['title']['y']=0.98
# fig['layout']['margin']['t']=100

#------------
annotations_title = fig['layout']['annotations']

y_loc=9
yshift=4
annotations = []

for id_ in precipitacion_pd.sort_values('id_point').id_point.unique().tolist():
    
    xref='x{id_}'.format(id_=str(int(id_)))
    yref='y{id_}'.format(id_=str(int(id_)))
    
    titulo_annotations =  list(filter(lambda x: x.text=='Punto id:{id_}'.format(id_=int(id_)),
                                  annotations_title))[0]

    annotations= annotations+[ dict(font=dict(size=16),
                                    showarrow=titulo_annotations['showarrow'],
                                    text=titulo_annotations['text'],
                                    x=titulo_annotations['x'],
                                    xanchor=titulo_annotations['xanchor'],
                                    xref='paper',#xref,
                                    y=titulo_annotations['y'],
                                    yanchor=titulo_annotations['yanchor'],
                                    yref='paper',#yref
                                    ),
                                dict(
                                    x=precipitacion_pd.query("type=='training'").periodo.max() - pd.DateOffset(months=12*2),
                                    y=y_loc, # annotation point
                                    xref=xref, 
                                    yref=yref, 
                                    text='Entrenamiento',
                                    showarrow=False,
                                    yshift=yshift,
                                    font=dict(size=9)
                                  ),
                                dict(
                                    x=precipitacion_pd.query("type=='test'").periodo.max() - pd.DateOffset(months=6),
                                    y=y_loc, # annotation point
                                    xref=xref, 
                                    yref=yref, 
                                    text='Validación',
                                    showarrow=False,
                                    yshift=yshift,
                                    font=dict(size=9)
                                  ),
                                dict(
                                    x=precipitacion_pd.query("type=='prediction'").periodo.max() - pd.DateOffset(months=6),
                                    y=y_loc, # annotation point
                                    xref=xref, 
                                    yref=yref, 
                                    text='Pronóstico',
                                    showarrow=False,
                                    yshift=yshift,
                                    font=dict(size=9)
                                  )
                                    ]

fig['layout']['annotations'] = annotations

        
#------------       
fig.update_xaxes(tickformat="%Y/%m",showline=True, linewidth=1, linecolor='black', gridcolor='#E4E4E4',mirror=True,
                 ticks="outside", tickwidth=2, tickcolor='#5C2B05', ticklen=10)
fig.update_yaxes(showline=True, linewidth=1, linecolor='black', gridcolor='#E4E4E4',mirror=True,
                 ticks="outside", tickwidth=2, tickcolor='#5C2B05', ticklen=10)   

#------------
fig.update_layout( height=800,
                   width=1500,
                   #showlegend = True,
                   font = dict(size = 12),
                   template='plotly_white',
                   title_text="""Precipitación total Terepaima
                        <br><sup>Pronóstico para el periodo {date_init} al {date_fin}</sup>
                        """.format(date_init=str(precipitacion_pd.query("type=='prediction'").periodo.min().strftime('%Y/%m')),
                                   date_fin=str(precipitacion_pd.query("type=='prediction'").periodo.max().strftime('%Y/%m'))),
                  legend_title_text='Serie',
                  legend_title = dict( font = dict(size = 14)),
                   uniformtext_minsize=8,
                   uniformtext_mode='hide',
                   legend = dict(
                                # #orientation="h",
                                # yanchor="bottom",
                                # y=1.05,
                                # xanchor="right",
                                # x=1,
                                # font_size=14
                            )
                 )

fig.write_image(f'./{park}/figures/narx_precipitacion_terepaima.png')
fig.show()

## Data para variación espacio-temporal de la precipitación

Predicciones

In [14]:
columns = ['park',	'periodo', 'year', 'month',	'id_point',	'latitud',	'longitud',	'type',	'precipitacion_mm','prediction_precipitacion_mm', 'elevacion_media']

precipitacion_pd['year'] = precipitacion_pd['periodo'].dt.strftime("%Y").astype(int)
precipitacion_pd['month'] = precipitacion_pd['periodo'].dt.strftime("%m").astype(int)

precipitacion_pd[columns].to_pickle(f'./{park}/data/narx_precipitacion.pkl')