## Pronóstico del ONI

**PROYECTO:** SISTEMA PARA EL SEGUIMIENTO DE ECOSISTEMAS VENEZOLANOS \
**AUTOR:** Javier Martinez

In [20]:
from MONGO import CONEXION
from datetime import datetime
import pandas as pd
import numpy as np

In [2]:
import locale

# Cambiando sistema horario
locale.setlocale(locale.LC_TIME, 'es_ES.UTF-8')

'es_ES.UTF-8'

# Creando Coenxión con Mongo DB

In [65]:
# Creando la conexión con MongoDB
db = CONEXION.conexion()
db.list_collection_names()

['estimateSSTNino34', 'polygons', 'meteorological', 'SSTNino34']

# Descargando la Información SST NIÑO3.4

In [8]:
# Fecha actual
time = datetime.today().toordinal()

# Realizando consulta
sst_data = db.SSTNino34.find({"time":{"$lte":time}})

# Generando pandas dataframe
data_pandas = pd.DataFrame([file for file in sst_data])
data_pandas['periodo'] = data_pandas.time.apply(lambda x: datetime.fromordinal(x))
data_pandas['mes_year'] =  data_pandas['periodo'].dt.strftime('%B-%Y')
data_pandas.index = pd.to_datetime(data_pandas.periodo)
data_pandas.head()

Unnamed: 0_level_0,_id,year,month,nino34_mean,climatologica,anomalias,mes,time,time_actualizacion,periodo,mes_year
periodo,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2022-06-01,62e6d1d870bebdf1983a27a9,2022,6,26.96,27.73,-0.77,junio,738307,738367,2022-06-01,junio-2022
2022-05-01,62e6d1d870bebdf1983a27a8,2022,5,26.82,27.94,-1.12,mayo,738276,738367,2022-05-01,mayo-2022
2022-04-01,62e6d1d870bebdf1983a27a7,2022,4,26.71,27.83,-1.12,abril,738246,738367,2022-04-01,abril-2022
2022-03-01,62e6d1d870bebdf1983a27a6,2022,3,26.32,27.29,-0.98,marzo,738215,738367,2022-03-01,marzo-2022
2022-02-01,62e6d1d870bebdf1983a27a5,2022,2,25.87,26.76,-0.9,febrero,738187,738367,2022-02-01,febrero-2022


Función par ael calculo de los periodos base

In [11]:
# Funcion para identificar las anomalias climaticas
def anomalia_periodo(year = 2020):

    value = list(filter(
                        lambda x: x[1] <= year and x[2] >= year,
                        [
                        [1,1950,1955,1936,1965],
                        [2,1956,1960,1941,1970],
                        [3,1961,1965,1946,1975],
                        [4,1966,1970,1951,1980],
                        [5,1971,1975,1956,1985],
                        [6,1976,1980,1961,1990],
                        [7,1981,1985,1966,1995],
                        [8,1986,1990,1971,2000],
                        [9,1991,1995,1976,2005],
                        [10,1996,2000,1981,2010],
                        [11,2001,2005,1986,2015],
                        [12,2006,2010,1991,2020],
                        [13,2011,2015,1996,2025],
                        [14,2016,2020,2001,2030],
                        [15,2021,2025,2006,2035],
                        [16,2026,2030,2011,2040],
                        [17,2031,2035,2016,2045],
                        [18,2036,2040,2021,2050],
                        [19,2041,2045,2026,2055],
                        [20,2046,2050,2031,2060],
                        [21,2051,2055,2036,2065],
                        ]
                        ))
    if value==[]:
        return 0
    else:
        return value[0][0]


def periodo_anomalias_climaticas(data_pandas):

    data_pandas['anomalia_periodo'] = data_pandas['year'].apply(anomalia_periodo)

    # determinando climatologica
    pd_climatologica = data_pandas.groupby(['month','anomalia_periodo'],as_index=False).agg({'climatologica':'mean'})

    # periodo de anomalias
    pd_periodo_anomalias = pd.DataFrame(
                                        [
                                    [1,1950,1955,1936,1965],
                                    [2,1956,1960,1941,1970],
                                    [3,1961,1965,1946,1975],
                                    [4,1966,1970,1951,1980],
                                    [5,1971,1975,1956,1985],
                                    [6,1976,1980,1961,1990],
                                    [7,1981,1985,1966,1995],
                                    [8,1986,1990,1971,2000],
                                    [9,1991,1995,1976,2005],
                                    [10,1996,2000,1981,2010],
                                    [11,2001,2005,1986,2015],
                                    [12,2006,2010,1991,2020],
                                    [13,2011,2015,1996,2025],
                                    [14,2016,2020,2001,2030],
                                    [15,2021,2025,2006,2035],
                                    [16,2026,2030,2011,2040],
                                    [17,2031,2035,2016,2045],
                                    [18,2036,2040,2021,2050],
                                    [19,2041,2045,2026,2055],
                                    [20,2046,2050,2031,2060],
                                    [21,2051,2055,2036,2065],
                                    ],
                            columns=['anomalia_periodo','date_init','date_end','range_init','range_end']

                        )


    pd_climatologia_final = pd.merge(pd_periodo_anomalias, pd_climatologica,on=['anomalia_periodo'],how='inner')
    return pd_climatologia_final[['date_init','date_end','anomalia_periodo','month','climatologica','range_init','range_end']]

In [12]:
# Data de anomalias climaticas
pd_perioodo_anomalias = periodo_anomalias_climaticas(data_pandas)
pd_perioodo_anomalias.head()

Unnamed: 0,date_init,date_end,anomalia_periodo,month,climatologica,range_init,range_end
0,1950,1955,1,1,26.18,1936,1965
1,1950,1955,1,2,26.39,1936,1965
2,1950,1955,1,3,26.95,1936,1965
3,1950,1955,1,4,27.39,1936,1965
4,1950,1955,1,5,27.56,1936,1965


# Datos pronóstico SST

In [74]:
# Pronostico SST
pd_summary =  pd.read_pickle('./data/nino34/forecast_narx_sst_nino34.pkl')
pd_summary['mes'] =  pd_summary['periodo'].dt.strftime('%B')
pd_summary['nino34_mean'] = pd_summary.apply(lambda x: x.prediction_nino34_mean if np.isnan(x.nino34_mean) else x.nino34_mean,1)
pd_summary['year'] = pd_summary.index.strftime('%Y').astype(int)
pd_summary['month'] = pd_summary.index.strftime('%m').astype(int)
pd_summary['anomalia_periodo'] = pd_summary['year'].apply(lambda x: anomalia_periodo(int(x)) ).astype(int)

pd_summary['time'] = pd_summary.periodo.apply(lambda x: datetime.toordinal(x))
pd_summary['time_actualizacion'] = int(datetime.today().toordinal())

pd_summary.head()

Unnamed: 0_level_0,periodo,sst,sst_exogena,prediction,type,nino34_mean,prediction_nino34_mean,mes,year,month,anomalia_periodo,time,time_actualizacion
periodo,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
1950-01-01,1950-01-01,0.065844,0.065844,,training,24.56,,enero,1950,1,1,711858,738382
1950-02-01,1950-02-01,0.17238,0.17238,,training,25.07,,febrero,1950,2,1,711889,738382
1950-03-01,1950-03-01,0.33721,0.33721,,training,25.88,,marzo,1950,3,1,711917,738382
1950-04-01,1950-04-01,0.418685,0.418685,,training,26.29,,abril,1950,4,1,711948,738382
1950-05-01,1950-05-01,0.398931,0.398931,,training,26.19,,mayo,1950,5,1,711978,738382


Integrando bases de datos

In [75]:
# incorporando anomalias al pronostico
pd_oni = pd.merge(pd_summary,
                 pd_perioodo_anomalias,
                 on=['month','anomalia_periodo'],
                 how='left')[['periodo','year','month','nino34_mean','climatologica','mes','time','type','time_actualizacion']]

pd_oni.head()

Unnamed: 0,periodo,year,month,nino34_mean,climatologica,mes,time,type,time_actualizacion
0,1950-01-01,1950,1,24.56,26.18,enero,711858,training,738382
1,1950-02-01,1950,2,25.07,26.39,febrero,711889,training,738382
2,1950-03-01,1950,3,25.88,26.95,marzo,711917,training,738382
3,1950-04-01,1950,4,26.29,27.39,abril,711948,training,738382
4,1950-05-01,1950,5,26.19,27.56,mayo,711978,training,738382


In [81]:
# Calculando ONI
pd_oni['anomalias'] = pd_oni['nino34_mean'] - pd_oni['climatologica']
pd_oni['oni'] =  pd_oni.anomalias.rolling(min_periods=1, window=3, center=True).mean()

# eliminando nulos
#pd_oni = pd_oni.dropna().copy()
pd_oni.index = pd.to_datetime(pd_oni.periodo)
pd_oni.head()

Unnamed: 0_level_0,periodo,year,month,nino34_mean,climatologica,mes,time,type,time_actualizacion,anomalias,oni
periodo,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
1950-01-01,1950-01-01,1950,1,24.56,26.18,enero,711858,training,738382,-1.62,-1.47
1950-02-01,1950-02-01,1950,2,25.07,26.39,febrero,711889,training,738382,-1.32,-1.336667
1950-03-01,1950-03-01,1950,3,25.88,26.95,marzo,711917,training,738382,-1.07,-1.163333
1950-04-01,1950-04-01,1950,4,26.29,27.39,abril,711948,training,738382,-1.1,-1.18
1950-05-01,1950-05-01,1950,5,26.19,27.56,mayo,711978,training,738382,-1.37,-1.07


In [77]:
pd_oni[pd_oni.year>2022]

Unnamed: 0_level_0,periodo,year,month,nino34_mean,climatologica,mes,time,type,time_actualizacion,anomalias,oni
periodo,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2023-01-01,2023-01-01,2023,1,28.514276,26.55,enero,738521,self_prediction,738382,1.964276,1.874889
2023-02-01,2023-02-01,2023,2,28.546273,26.76,febrero,738552,self_prediction,738382,1.786273,1.689331
2023-03-01,2023-03-01,2023,3,28.607445,27.29,marzo,738580,self_prediction,738382,1.317445,1.291263
2023-04-01,2023-04-01,2023,4,28.60007,27.83,abril,738611,self_prediction,738382,0.77007,0.882335
2023-05-01,2023-05-01,2023,5,28.49949,27.94,mayo,738641,self_prediction,738382,0.55949,0.639598
2023-06-01,2023-06-01,2023,6,28.319234,27.73,junio,738672,self_prediction,738382,0.589234,0.574362


Gráfico de ONI

In [85]:
import plotly.graph_objects as go
from plotly.graph_objects import Layout

data_fig = pd_oni[ pd_oni.index < pd_oni.index.max() ].copy()
data_fig['color'] = data_fig['oni'].apply(lambda x: 0 if x<0 else 1)


max_date = data_fig.index.max() + pd.DateOffset(months=5) 

fig = go.Figure(layout=Layout(plot_bgcolor='rgba(0,0,0,0)'))
fig.add_trace(go.Scatter(x=data_fig.index.tolist(), y=len(data_fig.index.tolist())*[0],
                         mode='lines',name='NIÑO3.4 NARX entrenamiento',
                         line=dict(color='#B0ACAC', width=2),
                         fill = 'tozeroy',
                         fillcolor = '#F5FF8D',
                        showlegend=False))#,fill='tozeroy'))

fig.add_trace(go.Scatter(x=data_fig.index, 
                         y=data_fig.oni,
                         mode='lines+markers',
                         marker_symbol='x-thin',
                         marker_line_width=2,
                         marker_size=3,
                         marker_line_color='#003CAF',
                         marker_color='#003CAF',
                         name='NIÑO3.4 NARX entrenamiento',
                         line=dict(color='#0057FF', width=3),
                         fill = 'tonexty',
                         fillcolor = '#8FB5FE',
                         showlegend=False
                        ))

# pronostico
fig.add_trace(go.Scatter(x=data_fig[data_fig.type=='self_prediction'].index, 
                         y=data_fig[data_fig.type=='self_prediction'].oni,
                         text=data_fig[data_fig.type=='self_prediction'].oni.apply(lambda x: str(round(x,2)) ),
                         textposition="bottom right",
                         marker_symbol='star',
                         marker_line_width=3,
                         marker_size=3,
                         marker_line_color='#0057FF',
                         marker_color='#0057FF',
                         mode='lines+markers+text',
                         name='NIÑO3.4 NARX entrenamiento',
                         line=dict(color='#EF02F3', width=3),
                         showlegend=False
                        ))

fig.add_annotation(x=max_date, y=0.5+0.2,text="débil",showarrow=False,yshift=10,font=dict(color='#FF6C6C') )
fig.add_hline(y=0.5, line_width=0.75, line_dash="dash", line_color="#FF6C6C")

fig.add_annotation(x=max_date, y=1+0.2,text="moderado",showarrow=False,yshift=10,font=dict(color='#FF3F3F') )
fig.add_hline(y=1, line_width=1, line_dash="dash", line_color="#FF3F3F")

fig.add_annotation(x=max_date, y=1.5+0.2,text="fuerte",showarrow=False,yshift=10,font=dict(color='#FF0000') )
fig.add_hline(y=1.5, line_width=1.25, line_dash="dash", line_color="#FF0000")

fig.add_annotation(x=max_date, y=2+0.2,text="muy fuerte",showarrow=False,yshift=10,font=dict(color='#D70000') )
fig.add_hline(y=2, line_width=1.50, line_dash="dash", line_color="#D70000")


fig.add_hline(y=2.5, line_width=1.75, line_dash="dash", line_color="#AD0000")


fig.add_annotation(x=max_date, y=-0.5-0.35,text="débil",showarrow=False,yshift=10,font=dict(color='#69A6FF') )
fig.add_hline(y=-0.5, line_width=0.75, line_dash="dash", line_color="#69A6FF")

fig.add_annotation(x=max_date, y=-1-0.35,text="moderado",showarrow=False,yshift=10,font=dict(color='#6979FF') )
fig.add_hline(y=-1, line_width=1, line_dash="dash", line_color="#6979FF")

fig.add_annotation(x=max_date, y=-1.5-0.35,text="fuerte",showarrow=False,yshift=10,font=dict(color='#3F53FF') )
fig.add_hline(y=-1.5, line_width=1.25, line_dash="dash", line_color="#3F53FF")

fig.add_annotation(x=max_date, y=-2-0.35,text="muy fuerte",showarrow=False,yshift=10,font=dict(color='#001BFF') )
fig.add_hline(y=-2, line_width=1.5, line_dash="dash", line_color="#001BFF")

fig.add_hline(y=-2.5, line_width=1.75, line_dash="dash", line_color="#00059A")

# el nino y la nina
fig.add_annotation(x=data_fig.index.max() - pd.DateOffset(months=4*12), y=2+0.2,text="El Niño",showarrow=False,yshift=15,font=dict(color='#D70000') )
fig.add_annotation(x=data_fig.index.max() - pd.DateOffset(months=4*12), y=-2-0.35,text="La Niña",showarrow=False,yshift=15,font=dict(color='#001BFF') )

# linea de pronostico
fig.add_vline(x=data_fig[data_fig.type=='self_prediction'].index.min(), line_width=3, line_dash="dash", line_color="#580606")

fig.update_xaxes(tickformat="%Y/%m",showline=True, linewidth=1, linecolor='black', gridcolor='#E4E4E4',mirror=True,
                 ticks="outside", tickwidth=2, tickcolor='#5C2B05', ticklen=10)
fig.update_yaxes(showline=True, linewidth=1, linecolor='black', gridcolor='#E4E4E4',mirror=True,
                 ticks="outside", tickwidth=2, tickcolor='#5C2B05', ticklen=10)

fig.update_traces(textfont_size=14)
fig.update_layout(title="""
                        Índice Niño Oceánico (ONI) pronóstico periodo {date_init} al {date_fin}
                        <br><sup>Promedio de 3-meses para las anomalías SST en la región Niño 3.4 (variación periodos base de 30-años)
                        </sup>
                        """.format(date_init=str(data_fig[data_fig.type=='self_prediction'].index.min().strftime('%Y-%m-%d')),
                                   date_fin=str(data_fig[data_fig.type=='self_prediction'].index.max().strftime('%Y-%m-%d')) ),
                  xaxis_title='Mes',
                  yaxis_title='Promedio 3-Meses anomalías SST (°C)',
                   uniformtext_minsize=8,
                   uniformtext_mode='hide',
                   height=800,
                   width=1500,
                   font = dict(size = 22),
                   xaxis_range=[data_fig.index.max() - pd.DateOffset(months=5*12), max_date + pd.DateOffset(months=5) ]
                 )

fig.write_image('./figure/nino34/pronostico_oni.png')
fig.show()

# Guardando data en MONGODB

In [58]:
pd_oni.head()

Unnamed: 0_level_0,periodo,year,month,nino34_mean,climatologica,mes,time,type,time_actualizacion,anomalias,oni
periodo,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
1950-01-01,1950-01-01,1950,1,24.56,26.18,enero,711858,training,738382,-1.62,-1.47
1950-02-01,1950-02-01,1950,2,25.07,26.39,febrero,711889,training,738382,-1.32,-1.336667
1950-03-01,1950-03-01,1950,3,25.88,26.95,marzo,711917,training,738382,-1.07,-1.163333
1950-04-01,1950-04-01,1950,4,26.29,27.39,abril,711948,training,738382,-1.1,-1.18
1950-05-01,1950-05-01,1950,5,26.19,27.56,mayo,711978,training,738382,-1.37,-1.07


In [87]:
pd_oni.index.max()

Timestamp('2023-06-01 00:00:00')

In [88]:
import json

# Mongo coleccion
documentos = json.loads( pd_oni.drop(columns=['periodo'], axis=1).to_json(orient="records") )

documentos[-1]


{'year': 2023,
 'month': 6,
 'nino34_mean': 28.3192340737,
 'climatologica': 27.73,
 'mes': 'junio',
 'time': 738672,
 'type': 'self_prediction',
 'time_actualizacion': 738382,
 'anomalias': 0.5892340737,
 'oni': 0.5743618351}

In [67]:
# insertando coleccion
coleccion = db['estimateSSTNino34']

In [68]:
# Insertando documentos
for doc in documentos:
  coleccion.update_one({"time_actualizacion":doc.get('time') }, {"$set":doc}, upsert = True )