In [None]:
import coiled

cluster = coiled.Cluster(name="han-wang-1e3d51eb-8", software="fugue-env")

In [None]:
import numpy as np
import pandas as pd
import datetime

from typing import Dict, Any, Tuple
from copy import deepcopy

# greykite configs
from greykite.algo.changepoint.adalasso.changepoint_detector import ChangepointDetector
from greykite.algo.forecast.silverkite.constants.silverkite_holiday import SilverkiteHoliday
from greykite.algo.forecast.silverkite.constants.silverkite_seasonality import SilverkiteSeasonalityEnum
from greykite.algo.forecast.silverkite.forecast_simple_silverkite_helper import cols_interact
from greykite.common import constants as cst
from greykite.common.features.timeseries_features import build_time_features_df
from greykite.common.features.timeseries_features import convert_date_to_continuous_time
from greykite.framework.benchmark.data_loader_ts import DataLoaderTS
from greykite.framework.templates.autogen.forecast_config import EvaluationPeriodParam
from greykite.framework.templates.autogen.forecast_config import ForecastConfig
from greykite.framework.templates.autogen.forecast_config import MetadataParam
from greykite.framework.templates.autogen.forecast_config import ModelComponentsParam
from greykite.framework.templates.forecaster import Forecaster
from greykite.framework.templates.model_templates import ModelTemplateEnum
from greykite.framework.utils.result_summary import summarize_grid_search_results

# Define Objective Function

In [None]:
def objective(**gkparams) -> float:
    dl = DataLoaderTS()
    ts = dl.load_peyton_manning_ts()
    df_full = ts.make_future_dataframe(periods=365)
    df_features = build_time_features_df(
     dt=df_full["ts"],
     conti_year_origin=convert_date_to_continuous_time(df_full["ts"][0])
    )
    is_football_season = (df_features["woy"] <= 6) | (df_features["woy"] >= 36)
    df_full["is_football_season"] = is_football_season.astype(int).tolist()
    df_full.reset_index(drop=True, inplace=True)
    
    anomaly_df = pd.DataFrame({
        # start and end date are inclusive
        # each row is an anomaly interval
        cst.START_DATE_COL: ["2010-06-05", "2012-03-01"],  # inclusive
        cst.END_DATE_COL: ["2010-06-20", "2012-03-20"],  # inclusive
        cst.ADJUSTMENT_DELTA_COL: [np.nan, np.nan],  # mask as NA
    })

    # Creates anomaly_info dictionary.
    # This will be fed into the template.
    anomaly_info = {
        "value_col": "y",
        "anomaly_df": anomaly_df,
        "adjustment_delta_col": cst.ADJUSTMENT_DELTA_COL,
    }

    # Specifies dataset information
    metadata = MetadataParam(
        time_col="ts",  # name of the time column
        value_col="y",  # name of the value column
        freq="D",  # "H" for hourly, "D" for daily, "W" for weekly, etc.
        anomaly_info=anomaly_info,  # this is the anomaly information we defined above,
        train_end_date=datetime.datetime(2016, 1, 20)
    )

    # Defines the cross-validation config
    evaluation_period = EvaluationPeriodParam(
        test_horizon=365,             # leaves 365 days as testing data
        cv_horizon=365,               # each cv test size is 365 days (same as forecast horizon)
        cv_max_splits=3,              # 3 folds cv
        cv_min_train_periods=365 * 4  # uses at least 4 years for training because we have 8 years data
    )

    model_components = ModelComponentsParam(**gkparams)
    
    try:
    
        forecaster = Forecaster()
        # Runs the forecast
        run = forecaster.run_forecast_config(
         df=df_full,
         config=ForecastConfig(
             model_template=ModelTemplateEnum.SILVERKITE.name,
             model_components_param=model_components,
             forecast_horizon=365,  # forecasts 365 steps ahead
             coverage=0.95,  # 95% prediction intervals
             metadata_param=metadata,
             evaluation_period_param=evaluation_period
         )
        )

        result = summarize_grid_search_results(
            grid_search=run.grid_search,
            column_order = ["mean_test"]
        ).to_dict("records")[0]

        return result["mean_test_MAPE"]
    except:
        return 10000

# Define a Hybrid Search Space

In [None]:
from tune import Space, Grid, Rand, RandInt, Choice, TransitionChoice, FuncParam

weekly_seasonality = RandInt(0,8)
yearly_seasonality = RandInt(0,50)

space = Space(
    seasonality = {
        "yearly_seasonality": yearly_seasonality,
        "quarterly_seasonality": RandInt(0,10),
        "monthly_seasonality": RandInt(0,10),
        "weekly_seasonality": weekly_seasonality,
    },
    changepoints = {
        "changepoints_dict": dict(
            method="auto",
            yearly_seasonality_order=yearly_seasonality,
            regularization_strength=Rand(0.6,0.8),
            resample_freq=TransitionChoice(*[f"{x}D" for x in range(1,32)]),
            potential_changepoint_n=RandInt(10,200),
            no_changepoint_distance_from_end=TransitionChoice(*[f"{x}D" for x in range(91,365)])
        )
    },
    events = {
        "holidays_to_model_separately": SilverkiteHoliday.ALL_HOLIDAYS_IN_COUNTRIES,
        "holiday_lookup_countries": ["UnitedStates"],
        "holiday_pre_num_days": Grid(0,1,2,3,4,5),
        "holiday_post_num_days": Grid(0,1,2,3,4,5),
        "daily_event_df_dict": {
            "superbowl": pd.DataFrame({
                "date": ["2008-02-03", "2009-02-01", "2010-02-07", "2011-02-06",
                      "2012-02-05", "2013-02-03", "2014-02-02", "2015-02-01", "2016-02-07"],
            "event_name": ["event"] * 9
        })
     }
    },
    custom = {
        "fit_algorithm_dict": Grid(
            {"fit_algorithm": "ridge"},
            {"fit_algorithm": "linear", "fit_algorithm_params": dict(missing="drop")}
        ),
    },
    regressors = {
        "regressor_cols": ["is_football_season"]
    },
    uncertainty={
        "uncertainty_dict": "auto",
    },
).sample(4)

# Tuning Flow

In [None]:
from tune import suggest_for_noniterative_objective

result = suggest_for_noniterative_objective(
    objective,
    space,
    execution_engine = cluster,
    logger           = "wandb:GreyKite_tuning"
)

In [None]:
result[0]