## XGBoost

In [None]:
import pandas as pd
owid_covid = pd.read_csv("https://covid.ourworldindata.org/data/owid-covid-data.csv")
owid_covid["date"] = pd.to_datetime(owid_covid["date"])
df = owid_covid[owid_covid.location == "France"].set_index("date", drop=True).resample('D').interpolate(method='linear').reset_index()


In [None]:
from sklearn.base import TransformerMixin, BaseEstimator
class DateFeatures(TransformerMixin, BaseEstimator):
    features = [
        "hour",
        "year",
        "day",
        "weekday",
        "month",
        "quarter",
    ]
    
    def __init__(self):
        super().__init__()
    def transform(self, df: pd.DataFrame):
        Xt = []
        for col in df.columns:
            for feature in self.features:
                date_feature = getattr(
                    getattr(
                        df[col], "dt"
                    ), feature
                )
                date_feature.name = f"{col}_{feature}"
                Xt.append(date_feature)
        
        df2 = pd.concat(Xt, axis=1)
        return df2
    def fit(self, df: pd.DataFrame, y=None, **fit_params):
        return self


In [None]:
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline, make_pipeline
preprocessor = ColumnTransformer(
    transformers=[(
        "date",
        make_pipeline(
            DateFeatures(),
            ColumnTransformer(transformers=[
                ("cyclical", CyclicalFeatures(),
                  ["date_day", "date_weekday", "date_month"]
                )
            ], remainder="passthrough")
        ), ["date"],
  ),], remainder="passthrough"
)


In [None]:
from xgboost import XGBRegressor
pipeline = Pipeline(
    [
        ("preprocessing", preprocessor),
         ("xgb", XGBRegressor(objective="reg:squarederror", n_estimators=1000))
    ]
)


In [None]:
TRAIN_SIZE = int(len(df) * 0.9)
HORIZON = 1
TARGET_COL = "new_cases"


In [None]:
X_train, X_test = df.iloc[HORIZON:TRAIN_SIZE], df.iloc[TRAIN_SIZE+HORIZON:]
y_train = df.shift(periods=HORIZON).iloc[HORIZON:TRAIN_SIZE][TARGET_COL]
y_test = df.shift(periods=HORIZON).iloc[TRAIN_SIZE+HORIZON:][TARGET_COL]


In [None]:
FEATURE_COLS = ["date"]
pipeline.fit(X_train[FEATURE_COLS], y_train)


In [None]:
MAX_HORIZON = 90
X_test_horizon = pd.Series(pd.date_range(
    start=df.date.min(), 
    periods=len(df) + MAX_HORIZON,
    name="date"
)).reset_index()


In [None]:
forecasted = pd.concat(
    [pd.Series(pipeline.predict(X_test_horizon[FEATURE_COLS])), pd.Series(X_test_horizon.date)],
    axis=1
)
forecasted.columns = [TARGET_COL, "date"]


In [None]:
actual = pd.concat(
    [pd.Series(df[TARGET_COL]), pd.Series(df.date)],
    axis=1
)
actual.columns = [TARGET_COL, "date"]


In [None]:
fig, ax = plt.subplots(figsize=(12, 6))
forecasted.set_index("date").plot(linestyle='--', ax=ax)
actual.set_index("date").plot(linestyle='-.', ax=ax)
plt.legend(["forecast", "actual"])


In [None]:
from sklearn.metrics import mean_squared_error
test_data = actual.merge(forecasted, on="date", suffixes=("_actual", "_predicted"))
mse = mean_squared_error(test_data.new_cases_actual, test_data.new_cases_predicted, squared=False)  # RMSE
print("The root mean squared error (RMSE) on test set: {:.2f}".format(mse))


## SkTime

In [None]:
from sklearn.neighbors import KNeighborsRegressor
from sktime.forecasting.compose import make_reduction
regressor = KNeighborsRegressor(n_neighbors=1)
forecaster = make_reduction(regressor, window_length=15, strategy="recursive") forecaster.fit(y_train) 
y_pred = forecaster.predict(fh)


## LightGBM

In [None]:
import pandas as pd
from darts.timeseries import TimeSeries
from darts.models.forecasting.lgbm import LightGBMModel
df = pd.read_csv('AirPassengers.csv', delimiter=",")
series = TimeSeries.from_dataframe(df, 'Month', ['#Passengers'])
train, val = series.split_before(pd.Timestamp('19540101'))
model = LightGBM(lags=15)
model.fit(train)
forecast = model.predict_time_series(val)


In [None]:
add_encoders={
    'cyclic': {'future': ['month']},
    'datetime_attribute': {'future': ['hour', 'dayofweek']},
    'position': {'past': ['relative'], 'future': ['relative']},
    'transformer': Scaler()
}


## CatBoost

In [None]:
from sktime.forecasting.compose import make_reduction
import lightgbm as lgb
regressor = lgb.LGBMRegressor()
forecaster = make_reduction(
    regressor,
    window_length=10,
    strategy="recursive",
    scitype="tabular-regressor"
)


## Ensembles with Kats

In [None]:
conda install -c conda-forge fbprophet 

In [None]:
pip install kats

In [None]:
df = owid_covid[owid_covid.location == "France"].set_index("date", drop=True).resample('D').interpolate(method='linear').reset_index()

In [None]:
from kats.models.ensemble.ensemble import EnsembleParams, BaseModelParams
from kats.models.ensemble.kats_ensemble import KatsEnsemble
from kats.models import linear_model, quadratic_model
model_params = EnsembleParams(
            [
                BaseModelParams("linear", linear_model.LinearModelParams()),
                BaseModelParams("quadratic", quadratic_model.QuadraticModelParams()),
            ]
        )


In [None]:
KatsEnsembleParam = {
    "models": model_params,
    "aggregation": "weightedavg",
    "seasonality_length": 30,
    "decomposition_method": "additive",
}


In [None]:
from kats.consts import TimeSeriesData
TARGET_COL = "new_cases"
df_ts = TimeSeriesData(
    value=df[TARGET_COL], time=df["date"]
)


In [None]:
m = KatsEnsemble(
    data=df_ts, 
    params=KatsEnsembleParam
).fit()


In [None]:
m.predict(steps=90)
m.aggregate()
m.plot()
plt.ylabel(TARGET_COL)
