# Predicciones temporales con modelos de IA

Dentro de este cuaderno de Jupyter se estudiarán distintos modelos de IA para llevar a cabo predicciones sobre series temporales. Los distintos apartados que se presentarán en el cuaderno son:

 1. Carga de librerias, modelos y herramientas a utilizar
 2. Análisis y preprocesamiento de datos
 3. Creación y estudio de predicciones con LSTM
 4. Creación y estudio de predicciones con Prophet
 5. Creación y estudio de predicciones con TimeGPT
 6. Análisis de resultados y modelo a usar

## 1. Carga de librerias, modelos y herramientas

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from utilsforecast.preprocessing import fill_gaps

Para este primer acercamiento se tomará como referencia los datos del sensor Suntracer localizado en el Edificio Centro de Investigaciones Tecnológicas de la Información y las Comunicaciones

In [None]:
# Load the data
data = pd.read_csv('data/wot-data-db.interactions-datas_suntracer_temperature.csv')
data['timestamp'] = pd.to_datetime(data['timestamp']).dt.round("min").dt.tz_localize(None)
#Rename the column of data value to temperature
data.rename(columns = {'data.value':'temperature (Cº)'}, inplace = True)

df = data[['timestamp', 'temperature (Cº)']]


df.head(15)

In [None]:
# Plot the data
plt.figure(figsize=(10,6))
plt.plot(df['timestamp'], df['temperature (Cº)'])
plt.title('Suntracer temperature')
plt.ylabel('Temperature')
plt.show()

Los datos completos del Suntracer no son favorables a ser tratados directamente dentro del modelo debido a distintos problemas:
- Datos extremos y, probablemente, erróneos como temperaturas de -40º o 70º
- Ausencia de datos por largos periodos temporales, como lo son del 2023-05-16 al 2024-01-09

### Datos extremos

##### Temperaturas mínimas de -40º y máximas de 70º

In [None]:
df.describe().transpose()

### Ausencia de datos por largos periodos

In [None]:
# Dataframe auxiliar para mostrar datos faltantes
test_missing_data = df.copy()
test_missing_data.set_index('timestamp', inplace=True)

##### 16/5/2023 - 8/1/2024

In [None]:
missing_6_months = test_missing_data['2023-05-16':'2024-01-08']
missing_6_months.head()

##### 27/2/2024 - 13/5/2024

In [None]:
missing_3_months = test_missing_data['2024-02-27':'2024-05-13']
missing_3_months.head()

##### 19/6/2024 - 30/6/2024

In [None]:
# Check for data between 2024-06-18 and 2024-07-01 to prove the missing values and values are taken weirdly
missing_actual = test_missing_data['2024-06-19':'2024-06-30']
missing_actual.head()

Debido a todos estos problemas, se ha optado por tomar únicamente el tramo entre marzo y mayo de 2023

In [None]:
reduced_df = df[(df['timestamp'] >= '2023-03-01') & (df['timestamp'] <= '2023-05-15')]
reduced_df.describe().transpose()

In [None]:
plt.figure(figsize=(10,6))
plt.plot(reduced_df['timestamp'], reduced_df['temperature (Cº)'])
plt.title('Suntracer temperature')
plt.ylabel('Temperature')
plt.show()

In [None]:
# Boxplot (pendiente de estudiar varios valores)
plt.figure(figsize=(10,6))
plt.boxplot(reduced_df['temperature (Cº)'])
plt.ylabel('Temperature')
plt.show()


Antes de continuar con la elaboración y análisis de los distintos modelos, es necesario completar un preprocesamiento de los datos. Esto supone tratar los valores duplicados para obtener un único valor a partir de su media además de añadir con interpolación los valores faltantes (hay minutos que no se encuentran disponible). Estudiaremos las consecuencias de la interpolación en completed_df

### Registros duplicados

In [None]:
print('Número de registros duplicados:', reduced_df.duplicated(subset='timestamp').sum())
reduced_df = reduced_df.groupby('timestamp').mean().reset_index()
print('Número de registros duplicados:', reduced_df.duplicated(subset='timestamp').sum())

reduced_df.describe().transpose()

In [None]:
reduced_df.head(10)

In [None]:
plt.figure(figsize=(10,6))
plt.plot(reduced_df['timestamp'], reduced_df['temperature (Cº)'])
plt.xlabel('Timestamp')
plt.ylabel('Temperature')
plt.title('Temperature over Time')
plt.show()

### Interpolación de valores faltantes

In [None]:
print("Original data: ", len(reduced_df), "rows")
gaps = reduced_df[['timestamp', 'temperature (Cº)']]

# Adaptamos el dataset a las necesidades de la función fill_gaps
gaps.rename(columns={'timestamp': 'ds', 'temperature (Cº)': 'y'}, inplace=True)
gaps['ds'] = pd.to_datetime(gaps['ds'])
gaps['unique_id'] = 'suntracer_temperature'
gaps = gaps[['unique_id', 'ds', 'y']]
gaps.head()
completed_df = fill_gaps(gaps, freq='min')
print("Filled data: ", len(completed_df), "rows")



completed_df['y'] = completed_df['y'].interpolate(method='linear', limit_direction='both')
completed_df.describe().transpose()

In [None]:
# Plot the data
plt.figure(figsize=(10,6))
plt.plot(completed_df['ds'], completed_df['y'])
plt.xlabel('Timestamp')
plt.ylabel('Temperature')
plt.title('Temperature over Time')
plt.show()

Testing with TimeGPT without treating the data

In [None]:
from nixtla import NixtlaClient
nixtla_client = NixtlaClient(
    api_key='nixtla-tok-0Xy1KrijueISj5UTfxyGuceKmM8KuLVGO1b92Xs1i3DZB0bUvmr2CjtSqjHn9UlTQokrx3Wl8C7dnOlE')
nixtla_client.validate_api_key()

In [None]:
nixtla_client.plot(reduced_df,time_col='timestamp', target_col='temperature (Cº)')

In [None]:
#Separate to training and test data to predict the temperature
test = reduced_df.tail(15)
train = reduced_df.apply(lambda x: x.iloc[:-15]).reset_index(drop=True)

test_filled = completed_df.tail(15)
train_filled = completed_df.apply(lambda x: x.iloc[:-15]).reset_index(drop=True)

train.tail()

In [None]:
test.head()

In [None]:
# Forecast 15 minutes ahead
forecast = nixtla_client.forecast(
    df=train, 
    h=15, 
    freq='min',
    finetune_steps=10,
    finetune_loss='mae', 
    time_col='timestamp', 
    target_col='temperature (Cº)')

In [None]:
forecast.head()


In [None]:
nixtla_client.plot(reduced_df, forecast, time_col='timestamp', target_col='temperature (Cº)')

In [None]:
nixtla_client.plot(train, forecast, time_col='timestamp', target_col='temperature (Cº)', max_insample_length=60)

In [None]:
nixtla_client.plot(test, forecast, time_col='timestamp', target_col='temperature (Cº)')

Detección de anomalias dentro de TimeGPT

In [None]:
anomalies_df = nixtla_client.detect_anomalies(train, time_col='timestamp', target_col='temperature (Cº)', freq='min')
nixtla_client.plot(train, anomalies_df, time_col='timestamp', target_col='temperature')

In [None]:
#forecast['timestamp'] = pd.to_datetime(forecast['timestamp'])


#test = pd.merge(test, forecast, on='timestamp', how='left')

### Forecast with treated Data

In [None]:
forecast_without_gaps = nixtla_client.forecast(
    df = train_filled,
    h = 15,
    finetune_steps=10,
    finetune_loss='mae',
    time_col='ds',
    target_col='y',
    #model='timegpt-1-long-horizon'
    )

In [None]:
forecast_without_gaps.head()

In [None]:
nixtla_client.plot(train_filled, forecast_without_gaps, time_col='ds', target_col='y', max_insample_length=60)

In [None]:
nixtla_client.plot(test_filled, forecast_without_gaps, time_col='ds', target_col='y', max_insample_length=60)

In [None]:
# Evaluate the model
from utilsforecast.evaluation import evaluate
from utilsforecast.losses import mae, mse, rmse

In [None]:
forecast_without_gaps['ds'] = pd.to_datetime(forecast_without_gaps['ds'])

results_without_gaps = completed_df.merge(forecast_without_gaps, on=['ds', 'unique_id'], how='left')
results_without_gaps.tail()

In [None]:
forecast.tail()

In [None]:
forecast['timestamp'] = pd.to_datetime(forecast['timestamp'])

results = reduced_df.merge(forecast, on=['timestamp'], how='left')
results.rename(columns={'timestamp': 'ds', 'temperature (Cº)': 'y'}, inplace=True)
results['unique_id'] = 'suntracer_temperature'
results.tail(20)

In [None]:
evaluate(results_without_gaps, metrics=[mae, mse, rmse])

In [None]:
evaluate(results, metrics=[mae, mse, rmse])

## Prophet

In [None]:
from prophet import Prophet
from prophet.plot import plot_plotly, plot_components_plotly

In [None]:
m = Prophet()
m.fit(train_filled)

In [None]:
train_filled.tail()

In [None]:
future = m.make_future_dataframe(periods=15, freq='min')
future.tail(15)

In [None]:
forecast_prophet = m.predict(future)
forecast_prophet[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail()

In [None]:
fig1 = m.plot(forecast_prophet)

In [None]:
fig2 = m.plot_components(forecast_prophet)

In [None]:
#https://facebook.github.io/prophet/docs/additional_topics.html#saving-models
from neuralprophet import NeuralProphet

In [None]:
completed_df.head()
neuralProphet_df = completed_df.copy()
neuralProphet_df = neuralProphet_df[['ds', 'y']]
neuralProphet_df.head()

In [None]:
n = NeuralProphet()
# Uncomment this line if you're on a Jupyter notebook
#n.set_plotting_backend('plotly-static')
n_metrics = n.fit(neuralProphet_df)

In [None]:
forecast_neural = n.predict(neuralProphet_df)

In [None]:
n.plot(forecast_neural)

In [None]:
n.plot_parameters()

In [None]:
# Mejoramos el modelo con Autoregresión y uncertainty
n = NeuralProphet(n_lags=10, quantiles=[0.05, 0.95])
n_metrics = n.fit(neuralProphet_df)
forecast_neural = n.predict(neuralProphet_df)

In [None]:
n.highlight_nth_step_ahead_of_each_forecast(1).plot(forecast_neural)

In [None]:
# Train and Validation
test_prophet = neuralProphet_df.tail(15)
train_prophet = neuralProphet_df.apply(lambda x: x.iloc[:-15]).reset_index(drop=True)
train_prophet.tail()

In [None]:
test_prophet.head(15)

In [None]:
n_validator = NeuralProphet()
n_metrics = n_validator.fit(train_prophet, validation_df=test_prophet)
n_metrics

In [None]:
# Hacer predicciones en el futuro
future = n.make_future_dataframe(neuralProphet_df, periods=15)
forecast_future = n.predict(future)
forecast_future.tail()

### LSMT