# Test Greykite

### Imports

In [1]:
import pandas as pd
from greykite.algo.changepoint.adalasso.changepoint_detector import ChangepointDetector
from greykite.algo.forecast.silverkite.constants.silverkite_holiday import SilverkiteHoliday
from greykite.algo.forecast.silverkite.constants.silverkite_seasonality import SilverkiteSeasonalityEnum
from greykite.algo.forecast.silverkite.forecast_simple_silverkite_helper import cols_interact
from greykite.common import constants as cst
from greykite.common.features.timeseries_features import build_time_features_df
from greykite.common.features.timeseries_features import convert_date_to_continuous_time
from greykite.framework.benchmark.data_loader_ts import DataLoaderTS
from greykite.framework.templates.autogen.forecast_config import EvaluationPeriodParam
from greykite.framework.templates.autogen.forecast_config import ForecastConfig
from greykite.framework.templates.autogen.forecast_config import MetadataParam
from greykite.framework.templates.autogen.forecast_config import ModelComponentsParam
from greykite.framework.templates.forecaster import Forecaster
from greykite.framework.templates.model_templates import ModelTemplateEnum
from greykite.framework.utils.result_summary import summarize_grid_search_results
from greykite.framework.input.univariate_time_series import UnivariateTimeSeries
from greykite.detection.detector.greykite import GreykiteDetector
from greykite.detection.detector.config import ADConfig
from greykite.detection.detector.data import DetectorData

import plotly

import warnings
warnings.filterwarnings('ignore')

In [11]:
retail_csv = pd.read_csv('./data_productos_ventas_3_sin_2025.csv')
df = pd.DataFrame(retail_csv)
df['fecha_venta'] = pd.to_datetime(df['fecha_venta'])
df

Unnamed: 0,fecha_venta,sucursal,codigo_articulo,producto,precio_venta,cantidad
0,2022-03-15,BARQUISIMETO,1973,MARGARINA 500GR MAVESA.,8.86,136
1,2022-03-15,CAGUA,1973,MARGARINA 500GR MAVESA.,8.86,160
2,2022-03-16,BARQUISIMETO,386,HUEVOS A GRANEL,19.60,109
3,2022-03-16,GUARENAS,386,HUEVOS A GRANEL,19.60,331
4,2022-03-16,IPSFA,386,HUEVOS A GRANEL,19.60,694
...,...,...,...,...,...,...
32441,2024-12-31,GUARENAS,386,HUEVOS A GRANEL,270.04,346
32442,2024-12-31,LA CASCADA,386,HUEVOS A GRANEL,270.04,266
32443,2024-12-31,MAÑONGO - NAGUANAGUA,386,HUEVOS A GRANEL,259.63,56
32444,2024-12-31,SANTA CECILIA - VALENCIA,386,HUEVOS A GRANEL,270.04,17


In [12]:
df = df.query('`codigo_articulo` == 386')
    
df = df.groupby(['fecha_venta']).agg({'cantidad': 'sum'}).reset_index()

df.rename(columns={'fecha_venta': 'ts', 'cantidad': 'y'}, inplace=True)

df

Unnamed: 0,ts,y
0,2022-03-15,1909
1,2022-03-16,1860
2,2022-03-17,1940
3,2022-03-18,2098
4,2022-03-19,2387
...,...,...
1014,2024-12-27,4246
1015,2024-12-28,4844
1016,2024-12-29,4893
1017,2024-12-30,6767


In [13]:
# No es un dataframe pero pareceira un dataframe5
ts = UnivariateTimeSeries()
ts.load_data(
    df=df,
    time_col="ts",
    value_col="y",
    freq="D"
    # anomaly_info=anomaly_info,
    # regressor_cols=["sale_price"]
)

metadata = MetadataParam(
        time_col="ts",  # name of the time column
        value_col="y",  # name of the value column
        freq="D"  # "H" for hourly, "D" for daily, "W" for weekly, etc.
    )

In [14]:
anomaly_detector = GreykiteDetector()  # Creates an instance of the Greykite anomaly detector

forecast_config = ForecastConfig(
    model_template=ModelTemplateEnum.AUTO.name,
    forecast_horizon=7,  # forecasts 7 steps ahead
    coverage=None,       # Confidence Interval will be tuned by the AD model
    metadata_param=metadata)

ad_config = ADConfig()  # Default anomaly detection config

detector = GreykiteDetector(
    forecast_config=forecast_config,
    ad_config=ad_config,
    reward=None)

df

Unnamed: 0,ts,y
0,2022-03-15,1909
1,2022-03-16,1860
2,2022-03-17,1940
3,2022-03-18,2098
4,2022-03-19,2387
...,...,...
1014,2024-12-27,4246
1015,2024-12-28,4844
1016,2024-12-29,4893
1017,2024-12-30,6767


In [15]:
train_data = DetectorData(df=df)
train_data
detector.fit(data=train_data)

Fitting 3 folds for each of 1 candidates, totalling 3 fits


In [16]:
print(detector.fitted_df)

fig = detector.plot(
    phase="train",
    title="Greykite Detector Peyton Manning - fit phase")
plotly.io.show(fig)

             ts  actual     forecast  forecast_lower  forecast_upper  \
0    2022-03-15  1909.0  2394.440052     2329.268576     2459.611529   
1    2022-03-16  1860.0  2418.129234     2352.957757     2483.300711   
2    2022-03-17  1940.0  2391.084262     2325.912785     2456.255739   
3    2022-03-18  2098.0  2304.600444     2239.428968     2369.771921   
4    2022-03-19  2387.0  2583.472526     2518.301049     2648.644003   
...         ...     ...          ...             ...             ...   
1018 2024-12-27  4246.0  5078.973076     5013.801599     5144.144552   
1019 2024-12-28  4844.0  6278.439021     6213.267545     6343.610498   
1020 2024-12-29  4893.0  5484.636107     5419.464631     5549.807584   
1021 2024-12-30  6767.0  6418.958222     6353.786746     6484.129699   
1022 2024-12-31  5450.0  5420.704903     5355.533426     5485.876379   

      is_anomaly_predicted   z_score is_anomaly  
0                     True -0.467081       None  
1                     True -0.53702

In [17]:
forecaster = Forecaster()

df = detector.fitted_df.rename(columns={"forecast": "y"})

result = forecaster.run_forecast_config(
            df=df,
            config=ForecastConfig(
                model_template=ModelTemplateEnum.SILVERKITE.name,
                forecast_horizon=72, 
                coverage=0.95,
                metadata_param=metadata,
                # model_components_param=model_components,
                # evaluation_period_param=evaluation_period 
            )
        )

Fitting 3 folds for each of 1 candidates, totalling 3 fits


In [18]:
result.forecast.plot()

In [46]:
frecast_wo_2025_df = result.forecast.df_test

frecast_wo_2025_df

frecast_wo_2025_df[frecast_wo_2025_df['ts'] >= '2025-01-01']['forecast_lower'].sum()

351578.0099160804

In [47]:
print(pd.DataFrame(result.backtest.test_evaluation, index=["Value"]).transpose())

                                                           Value
CORR                                                    0.847947
R2                                                      0.622691
MSE                                                 225255.84206
RMSE                                                  474.611254
MAE                                                   401.954584
MedAE                                                 354.605279
MAPE                                                    9.495051
MedAPE                                                  8.910729
sMAPE                                                    4.57345
Q80                                                   137.648364
Q95                                                     105.9839
Q99                                                    97.540043
OutsideTolerance1p                                      0.930556
OutsideTolerance2p                                         0.875
OutsideTolerance3p       

In [23]:
retail_csv = pd.read_csv('./data_productos_ventas_3.csv')
df = pd.DataFrame(retail_csv)
df['fecha_venta'] = pd.to_datetime(df['fecha_venta'])

df = df.query('`codigo_articulo` == 386')
    
df = df.groupby(['fecha_venta']).agg({'cantidad': 'sum'}).reset_index()

df.rename(columns={'fecha_venta': 'ts', 'cantidad': 'y'}, inplace=True)


ts2025 = UnivariateTimeSeries()
ts2025.load_data(
    df=df,
    time_col="ts",
    value_col="y",
    freq="D"
    # anomaly_info=anomaly_info,
    # regressor_cols=["sale_price"]
)

ts2025.plot()

In [None]:
df[df['ts'] >= '2025-01-01']['y'].sum()

290965