
# Series de tiempo

In [0]:
%pip install pmdarima prophet xgboost

In [0]:
import mlflow
import pandas as pd
import numpy as np

from pmdarima import auto_arima

import matplotlib.pyplot as plt
import seaborn as sns

from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.holtwinters import ExponentialSmoothing

from prophet import Prophet
from xgboost import XGBRegressor

from sklearn.metrics import mean_absolute_error, mean_absolute_percentage_error, mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression

In [0]:
air_passengers = pd.read_csv("AirPassengers.csv")

In [0]:
sns.lineplot(x=air_passengers.index, y='#Passengers', data=air_passengers)
plt.show()


## Decompocisión de series de tiempo

In [0]:
decomposition = seasonal_decompose(
    air_passengers['#Passengers'], 
    model='aditive', 
    period=12
)

In [0]:
decomposition.plot()
plt.show()


## MLflow

In [0]:
mlflow.set_registry_uri("databricks")

mlflow.create_experiment("/Users/miguel.arquez12@gmail.com/Series de Tiempo - DSRP Agosto 6 2025")

In [0]:
mlflow.set_experiment("/Users/miguel.arquez12@gmail.com/Series de Tiempo - DSRP Agosto 6 2025")

In [0]:
train_air_passengers = air_passengers[:int(len(air_passengers)*0.8)]
test_air_passengers = air_passengers[int(len(air_passengers)*0.8):]

In [0]:
model.arima_res_.summary()

In [0]:
with mlflow.start_run(run_name="AUTOARIMA v1") as run:
    model = auto_arima(train_air_passengers['#Passengers'], seasonal=True)
    predictions = model.predict(n_periods=len(test_air_passengers))
    mae =  mean_absolute_error(
        test_air_passengers['#Passengers'].values, predictions
    )
    mse = mean_squared_error(
        test_air_passengers['#Passengers'].values, predictions
    )
    rmse = mse ** 0.5
    mape = mean_absolute_percentage_error(
        test_air_passengers['#Passengers'].values, predictions
    )

    mlflow.log_metric("MAE", mae)
    mlflow.log_metric("MSE", mse)
    mlflow.log_metric("RMSE", rmse)
    mlflow.log_metric("MAPE", mape)

    sns.lineplot(x=train_air_passengers.index, y='#Passengers', data=train_air_passengers, label='Train')
    sns.lineplot(x=test_air_passengers.index, y='#Passengers', data=test_air_passengers, label='Test')
    sns.lineplot(x=test_air_passengers.index, y=predictions, label='Predictions')
    mlflow.log_figure(plt.gcf(), "predictions.png")


In [0]:
with mlflow.start_run(run_name="HOTLWINTERS ETS") as run:
    model = ExponentialSmoothing(train_air_passengers['#Passengers'], trend='add', seasonal='add', seasonal_periods=12).fit()
    predictions = model.forecast(len(test_air_passengers))
    mae =  mean_absolute_error(
        test_air_passengers['#Passengers'].values, predictions
    )
    mse = mean_squared_error(
        test_air_passengers['#Passengers'].values, predictions
    )
    rmse = mse ** 0.5
    mape = mean_absolute_percentage_error(
        test_air_passengers['#Passengers'].values, predictions
    )

    mlflow.log_metric("MAE", mae)
    mlflow.log_metric("MSE", mse)
    mlflow.log_metric("RMSE", rmse)
    mlflow.log_metric("MAPE", mape)

    sns.lineplot(x=train_air_passengers.index, y='#Passengers', data=train_air_passengers, label='Train')
    sns.lineplot(x=test_air_passengers.index, y='#Passengers', data=test_air_passengers, label='Test')
    sns.lineplot(x=test_air_passengers.index, y=predictions, label='Predictions')
    mlflow.log_figure(plt.gcf(), "predictions.png")


In [0]:
with mlflow.start_run(run_name="PROPHET") as run:
    model = Prophet(yearly_seasonality=True, daily_seasonality=False)
    TRAIN_PROPHET_DF = train_air_passengers.rename(columns={'Month': 'ds', '#Passengers': 'y'})
    TRAIN_PROPHET_DF["ds"] = pd.to_datetime(TRAIN_PROPHET_DF['ds'])
    TEST_PROPHET_DF = test_air_passengers.rename(columns={'Month': 'ds', '#Passengers': 'y'})
    TEST_PROPHET_DF["ds"] = pd.to_datetime(TEST_PROPHET_DF['ds'])
                                    
    model.fit(TRAIN_PROPHET_DF)
    predictions = model.predict(TEST_PROPHET_DF)
    y_pred = predictions['yhat'].values
    
    mae = mean_absolute_error(TEST_PROPHET_DF['y'].values, y_pred)
    mse = mean_squared_error(TEST_PROPHET_DF['y'].values, y_pred)
    rmse = mse ** 0.5
    mape = mean_absolute_percentage_error(TEST_PROPHET_DF['y'].values, y_pred)

    mlflow.log_metric("MAE", mae)
    mlflow.log_metric("MSE", mse)
    mlflow.log_metric("RMSE", rmse)
    mlflow.log_metric("MAPE", mape)

    sns.lineplot(x=train_air_passengers.index, y='#Passengers', data=train_air_passengers, label='Train')
    sns.lineplot(x=test_air_passengers.index, y='#Passengers', data=test_air_passengers, label='Test')
    sns.lineplot(x=test_air_passengers.index, y=y_pred, label='Predictions')
    mlflow.log_figure(plt.gcf(), "predictions.png")

In [0]:
with mlflow.start_run(run_name="XGBOOST v2") as run:
    model = Prophet(yearly_seasonality=True, daily_seasonality=False)
    xgboost_df = air_passengers.rename(columns={'Month': 'ds', '#Passengers': 'y'})
    xgboost_df["LAG1"] = xgboost_df['y'].shift(1)
    xgboost_df["LAG2"] = xgboost_df['y'].shift(2)
    xgboost_df["LAG3"] = xgboost_df['y'].shift(3)
    xgboost_df["LAG4"] = xgboost_df['y'].shift(4)
    xgboost_df["ds"] = pd.to_datetime(TRAIN_PROPHET_DF['ds'])

    X, y = xgboost_df.drop(['y', "ds"], axis=1), xgboost_df['y']
    X_train, X_test, y_train, y_test = train_test_split(X, y, shuffle=False, random_state=42, test_size=0.2)


    xgb = XGBRegressor().fit(X_train, y_train)                          
    y_pred = xgb.predict(X_test)
    
    mae = mean_absolute_error(y_test, y_pred)
    mse = mean_squared_error(y_test, y_pred)
    rmse = mse ** 0.5
    mape = mean_absolute_percentage_error(y_test, y_pred)

    mlflow.log_metric("MAE", mae)
    mlflow.log_metric("MSE", mse)
    mlflow.log_metric("RMSE", rmse)
    mlflow.log_metric("MAPE", mape)

    sns.lineplot(x=X_train.index, y=y_train,  label='Train')
    sns.lineplot(x=X_test.index, y=y_test, label='Test')
    sns.lineplot(x=X_test.index, y=y_pred, label='Predictions')
    mlflow.log_figure(plt.gcf(), "predictions.png")


## Ensamble

In [0]:
with mlflow.start_run(run_name="ENSAMBLE v1") as run:

    ensemble_df = train_air_passengers.rename(columns={'Month': 'ds', '#Passengers': 'y'})
    test_ensemble_df = test_air_passengers.rename(columns={'Month': 'ds', '#Passengers': 'y'})


    xgboost_ensemble_df = ensemble_df.copy()
    xgboost_ensemble_df["LAG1"] = xgboost_ensemble_df['y'].shift(1)
    xgboost_ensemble_df["LAG2"] = xgboost_ensemble_df['y'].shift(2)
    xgboost_ensemble_df["LAG3"] = xgboost_ensemble_df['y'].shift(3)
    xgboost_ensemble_df["LAG4"] = xgboost_ensemble_df['y'].shift(4)


    test_xgboost_ensemble_df = test_ensemble_df.copy()
    test_xgboost_ensemble_df["LAG1"] = test_xgboost_ensemble_df['y'].shift(1)
    test_xgboost_ensemble_df["LAG2"] = test_xgboost_ensemble_df['y'].shift(2)
    test_xgboost_ensemble_df["LAG3"] = test_xgboost_ensemble_df['y'].shift(3)
    test_xgboost_ensemble_df["LAG4"] = test_xgboost_ensemble_df['y'].shift(4)



    X_xgboost, y_xgboost = xgboost_ensemble_df.drop(['y', "ds"], axis=1), xgboost_ensemble_df['y']

    arima = auto_arima(ensemble_df['y'], seasonal=True)
    ets = ExponentialSmoothing(ensemble_df['y'], trend='add', seasonal='add', seasonal_periods=12).fit()
    prophet = Prophet(yearly_seasonality=True, daily_seasonality=False)
    prophet.fit(ensemble_df)
    xgb = XGBRegressor().fit(X_xgboost, y_xgboost)

    final_ensemble_df = pd.DataFrame(
        {
            "arima": arima.predict_in_sample(),
            "ets": ets.fittedvalues,
            "prophet": prophet.predict(ensemble_df)["yhat"].values,
            "xgboost": xgb.predict(X_xgboost),
            "exogenous": np.random.normal(size=len(ensemble_df)),
            "y" : ensemble_df["y"]


        }
    )
    lr = LinearRegression().fit(final_ensemble_df.drop(["y"], axis=1), final_ensemble_df["y"])

    test_final_ensemble_df = pd.DataFrame(
        {
            "arima": arima.predict(n_periods=len(test_air_passengers)),
            "ets": ets.forecast(steps=len(test_air_passengers)),
            "prophet": prophet.predict(test_ensemble_df)["yhat"].values,
            "xgboost": xgb.predict(test_xgboost_ensemble_df.drop(["ds", "y"], axis=1)),
            "exogenous": np.random.normal(size=len(test_ensemble_df))
        }
    )

    y_pred = lr.predict(test_final_ensemble_df)

    mae = mean_absolute_error(test_ensemble_df['y'], y_pred)
    mse = mean_squared_error(test_ensemble_df['y'], y_pred)
    rmse = mse ** 0.5
    mape = mean_absolute_percentage_error(test_ensemble_df['y'], y_pred)

    mlflow.log_metric("MAE", mae)
    mlflow.log_metric("MSE", mse)
    mlflow.log_metric("RMSE", rmse)
    mlflow.log_metric("MAPE", mape)
    

    sns.lineplot(x=ensemble_df.index, y=ensemble_df['y'],  label='Train')
    sns.lineplot(x=test_final_ensemble_df.index, y=test_ensemble_df['y'], label='Test')
    sns.lineplot(x=test_final_ensemble_df.index, y=lr.predict(test_final_ensemble_df), 
    label='Predictions Doordash')
    sns.lineplot(x=test_final_ensemble_df.index, y=(test_final_ensemble_df["arima"] + test_final_ensemble_df["ets"] + test_final_ensemble_df["prophet"] + test_final_ensemble_df["xgboost"]) / 4
    , label='avg predicton')

    mlflow.log_figure(plt.gcf(), "predictions.png")