# 2 - Composition Operators 

In [None]:
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import MinMaxScaler
from sklearn.svm import SVR

from helicast.column_filters import (
    AllSelector,
    DTypeRemover,
    DTypeSelector,
    NameRemover,
    NameSelector,
    RegexRemover,
    RegexSelector,
)
from helicast.sklearn import (
    Pipeline,
    TransformedTargetRegressor,
)

from sklearn.gaussian_process import GaussianProcessRegressor

In [None]:
def read_data() -> pd.DataFrame:
    df = pd.read_csv("../data/victoria-daily-electricity.csv")
    df = df.convert_dtypes()
    df["school_day"] = df["school_day"].astype("category")
    df = df.ffill()
    return df


df = read_data()
display(df.dtypes)
df

In [None]:
from sklearn.ensemble import AdaBoostRegressor

model = Pipeline(
    [
        ("scaler", MinMaxScaler()),
        ("reg", AdaBoostRegressor(GaussianProcessRegressor(), n_estimators=5)),
    ]
)

X = (RegexRemover(patterns="demand") & DTypeSelector(dtypes="number")).fit_transform(df)
y = NameSelector(names="demand").fit_transform(df)
model.fit(X, y)

# print(model[-1].coef_)


plt.figure(figsize=(15, 5))
plt.plot(y[-200:])
plt.plot(model.predict(X)[-200:], "--", label="prediction")
plt.legend()

In [None]:
from sklearn.pipeline import Pipeline

model = TransformedTargetRegressor(
    regressor=Pipeline(
        [
            ("scaler", MinMaxScaler()),
            ("reg", GaussianProcessRegressor()),
        ]
    ),
    transformer=MinMaxScaler(),
)


X = (RegexRemover(patterns="demand") & DTypeSelector(dtypes="number")).fit_transform(df)
y = NameSelector(names="demand").fit_transform(df)
model.fit(X, y)

print(model.score(X, y))


plt.figure(figsize=(15, 5))
plt.plot(y[-200:])
plt.plot(model.predict(X)[-200:], "--", label="prediction")
plt.legend()