## Variación espacio-temporal precipitación total

**PROYECTO:** SISTEMA PARA EL SEGUIMIENTO DE ECOSISTEMAS VENEZOLANOS \
**AUTOR:** Javier Martinez

In [1]:
import os

print('> Directorio actual: ', os.getcwd())  
os.chdir('../')
print('> Directorio actual: ', os.getcwd()) 

> Directorio actual:  /media/javier/Compartida/doctorado/ssev-analytics/cerro_saroche
> Directorio actual:  /media/javier/Compartida/doctorado/ssev-analytics


In [41]:
import pandas as pd
import pickle

from utils.MONGO import CONEXION
from utils.UTILS import *
from datetime import datetime

from plotly.subplots import make_subplots
import plotly.graph_objects as go

from sklearn.preprocessing import MinMaxScaler
from tensorflow import keras

In [3]:
import warnings
warnings.filterwarnings('ignore')

In [4]:
# Parque
park = 'cerro_saroche'

Directorio de trabajo

In [39]:
DIR = f'./{park}/'
experimento = f'{DIR}experiments/narx/ndvi'

Cargando datos

In [5]:
pd_precipitacion = pd.read_pickle(f'./{park}/data/ann_precipitacion.pkl')[['park',
                                                                            'periodo',
                                                                            'year',
                                                                            'month',
                                                                            'id_point',
                                                                            'latitud',
                                                                            'longitud',
                                                                            'type',
                                                                            'prediction_ann',
                                                                            'ndvi_media']]


# Transformacion
ndvi_transformacion = MinMaxScaler() #LogMinimax.create( pd_sst.oni.to_numpy() )
ndvi_transformacion.fit(pd_precipitacion[['prediction_ann','ndvi_media']])

pd_precipitacion[['precipitation_ann_t','ndvi_t']] = ndvi_transformacion.transform( pd_precipitacion[['prediction_ann','ndvi_media']] )
pd_precipitacion.head()

Unnamed: 0,park,periodo,year,month,id_point,latitud,longitud,type,prediction_ann,ndvi_media,precipitation_ann_t,ndvi_t
0,cerro_saroche,1995-01-01,1995,1,1,10.31,-69.83,training,0.207416,,0.008535,
1,cerro_saroche,1995-02-01,1995,2,1,10.31,-69.83,training,0.262076,,0.028418,
2,cerro_saroche,1995-03-01,1995,3,1,10.31,-69.83,training,0.698307,,0.1871,
3,cerro_saroche,1995-04-01,1995,4,1,10.31,-69.83,training,1.442033,,0.457635,
4,cerro_saroche,1995-05-01,1995,5,1,10.31,-69.83,training,1.287615,,0.401464,


In [6]:
# Puntos con vegetacion
list_ids = pd_precipitacion.dropna().id_point.unique()

array([ 1, 10, 12, 13, 14, 15,  2,  6,  7,  8,  9])

Definiendo modelo

In [7]:
exog_order = 1*12
auto_order = 6
exog_delay = 1
prediction_order = 12

y_output = 'ndvi_t'
exogena = 'precipitation_ann_t'

f_activation = 'sigmoid'

In [8]:
# Canculando N neuronas
id_point = 1
pd_model_id = pd_precipitacion[pd_precipitacion.id_point==id_point]
pd_model_id.index = pd.to_datetime(pd_model_id.periodo)
pd_model_id = pd_model_id[[y_output,exogena]].dropna().sort_index()

# Data entrenamiento
x_data, y_data = split_data(pd_model_id,exog_order,auto_order,exog_delay,prediction_order,exogena,y_output)

total = int(2*x_data.shape[-1]/3)
n_neurons = [total]
f_activation = 'sigmoid'
activation = len(n_neurons)*[f_activation]


kernel_initializer = 'lecun_normal'
bias_initializer = 'zeros'

epochs = 7
patience = 5

In [9]:
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

In [10]:
# Metrícas
mae = keras.metrics.MeanAbsoluteError()
rmse = keras.metrics.RootMeanSquaredError()

confi = {'Input':{'batch_size':None,
                'name':'input',
                'dtype':None,
                'sparse':None,
                'tensor':None,
                'ragged':None,
                'type_spec':None},
        'Dense':{'use_bias':True,
                'kernel_regularizer':None,
                'bias_regularizer':None,
                'activity_regularizer':None,
                'kernel_constraint':None,
                'bias_constraint':None
                }
        }

In [11]:
# Modelo
model = keras.models.Sequential()

# Entradas
model.add(keras.layers.Input(shape=(x_data.shape[-1],),
                                    batch_size = confi.get('Input').get('batch_size'),
                                    name = confi.get('Input').get('name'),
                                    dtype = confi.get('Input').get('dtype'),
                                    sparse = confi.get('Input').get('sparse'),
                                    tensor = confi.get('Input').get('tensor'),
                                    ragged = confi.get('Input').get('ragged'),
                                    type_spec = confi.get('Input').get('type_spec')
                                    ))

model.add(keras.layers.Dense(   units=n_neurons[0],
                                activation=activation[0],
                                use_bias = confi.get('Dense').get('use_bias'),
                                kernel_initializer=kernel_initializer,
                                bias_initializer=bias_initializer,
                                kernel_regularizer = confi.get('Dense').get('kernel_regularizer'),
                                bias_regularizer = confi.get('Dense').get('bias_regularizer'),
                                activity_regularizer = confi.get('Dense').get('activity_regularizer'),
                                kernel_constraint = confi.get('Dense').get('kernel_constraint'),
                                bias_constraint = confi.get('Dense').get('bias_constraint')
                                ))
                                
model.add(keras.layers.Dropout(0.1))

# Hidden Leyers
if len(n_neurons)>1:
    for index in list( range(1, len(n_neurons)) ):

        model.add(keras.layers.Dense(   units=n_neurons[index],
                                        activation=activation[index],
                                        use_bias = confi.get('Dense').get('use_bias'),
                                        kernel_initializer=kernel_initializer,
                                        bias_initializer=bias_initializer,
                                        kernel_regularizer = confi.get('Dense').get('kernel_regularizer'),
                                        bias_regularizer = confi.get('Dense').get('bias_regularizer'),
                                        activity_regularizer = confi.get('Dense').get('activity_regularizer'),
                                        kernel_constraint = confi.get('Dense').get('kernel_constraint'),
                                        bias_constraint = confi.get('Dense').get('bias_constraint')
                                        ))
                                        

# Out
model.add(keras.layers.Dense(   units=1,
                                activation='linear',
                                kernel_initializer=kernel_initializer,
                                bias_initializer=bias_initializer
                                ))
                                

model.compile(loss='mean_squared_error', optimizer='adam', metrics=[mae,rmse]) 

callback = keras.callbacks.EarlyStopping(
                                            monitor="loss",
                                            min_delta=0,
                                            patience=patience,
                                            verbose=0,
                                            mode="min",
                                            baseline=None,
                                            restore_best_weights=False,
                                        )

Iniciando experomento

In [90]:
iteraciones = 50

In [94]:
for i in range(1,iteraciones):
    for id_point in list_ids:

        pd_model_id = pd_precipitacion[pd_precipitacion.id_point==id_point]
        pd_model_id.index = pd.to_datetime(pd_model_id.periodo)
        pd_model_id = pd_model_id[[y_output,exogena]].dropna().sort_index()

        # Data entrenamiento
        x_data, y_data = split_data(pd_model_id,exog_order,auto_order,exog_delay,prediction_order,exogena,y_output)

        # Entrenando modelo
        history = model.fit(x=x_data,
                            y=y_data,
                            epochs=epochs,
                            batch_size=1,
                            verbose=0,
                            workers=2,
                            callbacks=[callback])

In [96]:
list_data = []

for id_point in list_ids:

    pd_model_id = pd_precipitacion[pd_precipitacion.id_point==id_point]
    pd_model_id.index = pd.to_datetime(pd_model_id.periodo)
    pd_model_id = pd_model_id[[y_output,exogena]].dropna().sort_index()

    # Datos
    x_data, y_data = split_data(pd_model_id,exog_order,auto_order,exog_delay,prediction_order,exogena,y_output)

    trainPredict = model.predict(x_data, verbose=0).reshape(-1)

    # Data de test
    trainind_pd = pd.DataFrame(trainPredict,
                                index = pd_model_id[-x_data.shape[0]:].index,
                                columns=['prediction']
                                )

    trainind_pd[y_output] = y_data.reshape(-1)
    trainind_pd['type'] = 'training'
    trainind_pd['precipitation_ann_t'] = np.nan

    trainind_pd['id_point'] = id_point

    trainind_pd[['prediction_ann','ndvi_prediction']] = ndvi_transformacion.inverse_transform(trainind_pd[['precipitation_ann_t','prediction']])
    trainind_pd[['prediction_ann','ndvi_media']] = ndvi_transformacion.inverse_transform(trainind_pd[['precipitation_ann_t',y_output]])

    trainind_pd = trainind_pd.reset_index(drop=False)[['id_point', 'periodo','type','ndvi_prediction','ndvi_media']]


    # Pronostico
    data_predict = pd_model_id[[y_output,exogena]]

    data_exogena = pd_precipitacion[(pd_precipitacion.periodo > data_predict.index.max()) & (pd_precipitacion.id_point==id_point)][[exogena,'periodo']]
    data_exogena.index = pd.to_datetime(data_exogena.periodo)
    data_exogena[y_output] = np.nan
    data_exogena = data_exogena.sort_index()[[exogena,y_output]]

    pd_prediction = predict_one_stap_narx(model,data_predict,data_exogena,exog_order,auto_order,exog_delay,prediction_order, exogena, y_output)
    pd_prediction = pd_prediction.rename(columns={y_output:'prediction'})
    pd_prediction['type'] = 'prediction'
    pd_prediction['id_point'] = id_point


    pd_prediction[['prediction_ann','ndvi_prediction']] = ndvi_transformacion.inverse_transform(pd_prediction[['precipitation_ann_t','prediction']])
    pd_prediction['ndvi_media'] = np.nan

    pd_prediction = pd_prediction.reset_index(drop=False)[['id_point', 'periodo','type','ndvi_prediction','ndvi_media']]


    # Uniendo informacion
    pd_summary = pd.concat([trainind_pd[list(pd_prediction)], 
                            pd_prediction[list(pd_prediction)]
                            ])

    list_data.append(pd_summary)

# Resultados
pd_analisis_ndvi = pd.concat(list_data)

pd_data_test = pd_analisis_ndvi.query("type=='training'")

list_metricas = []
for id in pd_data_test.sort_values('id_point').id_point.unique():
    pd_data = pd_data_test.query(f"id_point=={id}").sort_values('id_point')
    pd_metricas = pd.DataFrame(metrics(observado=pd_data.ndvi_media,
                                       prediccion=pd_data.ndvi_prediction)
                                ,index=[0])

    pd_metricas['id_point'] = id

    list_metricas.append(pd_metricas)

pd_metricas_ndvi = pd.concat(list_metricas) 
pd_metricas_ndvi

Unnamed: 0,mape,mae,mse,rmse,r2,id_point
0,10.779396,0.046806,0.063315,0.004009,0.684453,1
0,10.48424,0.044028,0.059043,0.003486,0.698865,2
0,9.308357,0.037736,0.049426,0.002443,0.748174,6
0,9.93919,0.032812,0.041182,0.001696,0.761156,7
0,10.814055,0.035399,0.045227,0.002045,0.710739,8
0,10.424338,0.031924,0.042019,0.001766,0.699383,9
0,10.766601,0.038369,0.048512,0.002353,0.640421,10
0,11.349111,0.035009,0.042276,0.001787,0.705737,12
0,9.587341,0.031583,0.040415,0.001633,0.736814,13
0,9.517295,0.031043,0.039808,0.001585,0.668641,14


In [108]:
# Modelo
model.save(f'{experimento}/model.h5')

# Pesos
model.save_weights(f'{experimento}/weights.h5')

# History
with open(f'{experimento}/history.pkl', 'wb') as file_pi:
    pickle.dump(history.history, file_pi)

# Metricas
pd_metricas_ndvi.to_csv(f'{DIR}data/narx_ndvi_metricas.csv')

# Guardanod datos
pd_analisis_ndvi.to_csv(f'{DIR}data/narx_ndvi.csv')

Gráfico

In [98]:
pd_periodo_null = pd_analisis_ndvi.query("id_point==1").copy()
pd_periodo_null['ndvi_prediction'] = np.nan 
pd_periodo_null['ndvi_media'] = np.nan 

def ndvi_model_trace(data,row, col, pd_periodo_null):
    """
    Funcion para la graficacion de la prediccion del ndvi
    """
    if data.shape[0]==0:
        data_figure_ajuste = pd_periodo_null[pd_periodo_null.type=='training'].sort_values('periodo')
        data_figure_pronostico = pd_periodo_null[pd_periodo_null.type=='prediction'].sort_values('periodo')
    else:
        data_figure_ajuste = data[data.type=='training'].sort_values('periodo')
        data_figure_pronostico = data[data.type=='prediction'].sort_values('periodo')



    if data.id_point.unique()==[1]:
        showlegend=True
    else:
        showlegend=False

    marker_line_width=1.5
    marker_size=1
    line_width=2


    fig.add_trace(go.Scatter(x=data_figure_ajuste['periodo'], y=data_figure_ajuste['ndvi_prediction'],
                             mode='lines+markers',name='Pronóstico entrenamiento',
                             marker_symbol='hexagram',
                             marker_line_width=marker_line_width,
                             marker_size=marker_size,
                             marker_line_color='#00D470',
                             marker_color='#00D470',
                             line=dict(color='#00D470', width=line_width),
                             legendgroup='group1',
                             showlegend=showlegend
                            ),     
            row=row, 
            col=col)

    fig.add_trace(go.Scatter(x=data_figure_ajuste['periodo'], y=data_figure_ajuste['ndvi_media'],
                             mode='lines+markers',name='NDVI entrenamiento',
                             marker_symbol='x-thin',
                             marker_line_width=marker_line_width,
                             marker_size=marker_size,
                             marker_line_color='#004A27',
                             marker_color='#004A27',
                             line=dict(color='#004A27', width=line_width),
                             legendgroup='group2',
                             showlegend=showlegend
                            ),     
            row=row, 
            col=col)

    fig.add_trace(go.Scatter(x=data_figure_pronostico['periodo'], y=data_figure_pronostico['ndvi_prediction'],
                             text=data_figure_pronostico['ndvi_prediction'].apply(lambda x: str(round(x,2)) ),
                             textposition="top right",
                             marker_symbol='star',
                             marker_line_width=marker_line_width,
                             marker_size=marker_size,
                             marker_line_color='#49FF00',
                             marker_color='#49FF00',
                             mode='lines+markers',name='Pronóstico NDVI',
                             line=dict(color='#49FF00', width=line_width,dash='dot'),
                            legendgroup='group5',
                            showlegend=showlegend
                            ),
            row=row, 
            col=col)

    fig.add_vline(x=data_figure_ajuste['periodo'].max(), line_width=2, line_dash="dash", line_color="#580606")
    
    fig.update_xaxes(tickformat="%Y/%m",
                     showline=True, 
                     linewidth=1, 
                     linecolor='black',
                     gridcolor='#E4E4E4',
                     mirror=True,
                     ticks="outside", 
                     tickwidth=2, 
                     tickcolor='#5C2B05',
                     ticklen=10,
                    range=[data.periodo.min(),
                           data.periodo.max() + pd.DateOffset(months=12)],
                    title_text='Mes',
                    title_font=dict(size=12)
                    )
    fig.update_yaxes(showline=True,
                     linewidth=1,
                     linecolor='black', 
                     gridcolor='#E4E4E4',
                     mirror=True,
                     ticks="outside", 
                     tickwidth=2, 
                     tickcolor='#5C2B05',
                     ticklen=10,
                     range=[0,1],
                     title_text='NDVI',
                     title_font=dict(size=12))

    return fig

In [99]:
rows=5
cols=3

list_id = list(range(1,16)) 

column_titles = list(map(lambda x: 'Punto id:' + str(int(x)), list_id))

fig = make_subplots(rows=rows,
                    cols=cols,
                    subplot_titles=column_titles,
                    horizontal_spacing=0.1,
                    vertical_spacing=0.13)


row_order = np.array(list_id).reshape(rows,cols).tolist()


for row in range(len(row_order)):

    list_row = row_order[row]
    for col in range(len(list_row)):
        

        data_fig = pd_analisis_ndvi\
                        .query(f"id_point=={row_order[row][col]}")\
                        .sort_values('periodo',ascending=True)

        ndvi_model_trace(data_fig, row=row+1, col=col+1,pd_periodo_null=pd_periodo_null)


#------------
fig['layout']['title']['y']=0.98
fig['layout']['margin']['t']=100

#------------
annotations_title = fig['layout']['annotations']

y_loc=0.8
yshift=4
annotations = []

for id_ in  list_id:
    
    xref='x{id_}'.format(id_=str(int(id_)))
    yref='y{id_}'.format(id_=str(int(id_)))
    
    titulo_annotations =  list(filter(lambda x: x.text=='Punto id:{id_}'.format(id_=int(id_)),
                                  annotations_title))[0]

    annotations= annotations+[
                dict(font=dict(size=16),
                    showarrow=titulo_annotations['showarrow'],
                    text=titulo_annotations['text'],
                    x=titulo_annotations['x'],
                    xanchor=titulo_annotations['xanchor'],
                    xref='paper',#xref,
                    y=titulo_annotations['y'],
                    yanchor=titulo_annotations['yanchor'],
                    yref='paper',#yref
                    ),
                dict(
                    x=pd_analisis_ndvi.query("type=='training'").periodo.max() + pd.DateOffset(months=12),
                    y=y_loc, # annotation point
                    xref=xref, 
                    yref=yref, 
                    text='Pronóstico',
                    showarrow=False,
                    yshift=yshift,
                    font=dict(size=9)
                  )
                    ]

fig['layout']['annotations'] = annotations

        
#------------       
fig.update_xaxes(tickformat="%Y/%m",showline=True, linewidth=1, linecolor='black', gridcolor='#E4E4E4',mirror=True,
                 ticks="outside", tickwidth=2, tickcolor='#5C2B05', ticklen=10)
fig.update_yaxes(showline=True, linewidth=1, linecolor='black', gridcolor='#E4E4E4',mirror=True,
                 ticks="outside", tickwidth=2, tickcolor='#5C2B05', ticklen=10)   

#------------
data_figure_pronostico = pd_analisis_ndvi.query("type=='prediction'").copy()
fig.update_layout( height=800,
                   width=1500,
                   #showlegend = True,
                   font = dict(size = 12),
                   template='plotly_white',
                   title_text="""NDVI Cerro Saroche
                        <br><sup>Pronóstico para el periodo {date_init} al {date_fin}</sup>
                        """.format(date_init=str(data_figure_pronostico['periodo'].min().strftime('%Y/%m')),
                                   date_fin=str(data_figure_pronostico['periodo'].max().strftime('%Y/%m'))),
                  legend_title_text='Serie',
                  legend_title = dict( font = dict(size = 14)),
                   uniformtext_minsize=8,
                   uniformtext_mode='hide',
                   legend = dict(font_size=14)
                 )

fig.write_image(f'./{park}/figures/narx_ndvi.png')

fig.show()