# Predicting Ahead
This offsets the data by 24 hours, and adds an additional column for the 24 hours ahead time.


In [3]:
from capymoa.regressor import KNNRegressor, AdaptiveRandomForestRegressor
from capymoa.stream._stream import NumpyStream
from capymoa.evaluation import prequential_evaluation
from capymoa.evaluation.visualization import plot_windowed_results
import numpy as np
import pandas as pd
from pathlib import Path
from pprint import pprint  # Import pprint module

path_to_csv = Path("../power_plant/MHPA-docs-data.csv")
data = pd.read_csv(path_to_csv).fillna(0)  #.to_numpy()


# remove slrmj_tot, recnum, tmstamp from x_features, so they are not used as features

data = data.drop(columns=["RecNum", "TmStamp", "SlrMJ_Tot", "in_bytes_str"])

# The target variable is slrw_avg column, but offset 24 hours (we are predicting in the future)
# column 12 is slrw_avg, so we want to predict column 12, but offset 24 hours. One row is 1 minute, so 24 hours is 1440 rows
# this will decrease the dataset size by 1440 rows
targets = data["SlrW_Avg"].iloc[1440:].to_numpy()
x_features = data[:-1440].to_numpy()




stream = NumpyStream(
    x_features,
    targets,
    target_type="numeric", # "numeric" or "categorical", we are doing regression
    feature_names=["batt_volt", "mean_wind_speed", "mean_wind_direction", "std_wind_dir", "Max_Gust_Min", "Max_Gust_Hr","Rain_mm","Barametric_Avg","Air_Temp_Avg","RH_Avg","SlrW_avg","Dew_Point_Avg"],
)
schema = stream.get_schema()

In [9]:
# Define the models to test
import regressors
from capymoa.regressor import SOKNLBT, SGDRegressor, PassiveAggressiveRegressor, ORTO, ARFFIMTDD, AdaptiveRandomForestRegressor, FIMTDD

models = [
    regressors.GaussianRandomRegressor(schema=schema),
    regressors.LastGuessRegressor(schema=schema),
    PassiveAggressiveRegressor(schema),
    AdaptiveRandomForestRegressor(schema),
    FIMTDD(schema),
]

In [10]:
import seaborn as sns
import matplotlib.pyplot as plt

# Run all tests

results = []

for learner in models:
    print(f"Running {learner}")
    result = prequential_evaluation(stream=stream, learner=learner, window_size=5000)
    print(f"Done in {result['cpu_time']} secs")
    sns.lineplot(result['windowed'].metrics_per_window()["mean absolute error"], label=learner.__str__())
    results.append(result)

plt.show()


Running GaussianRandomRegressor
Done in 21.841640799999993 secs
Running LastGuessRegressor
Done in 20.696883100000008 secs
Running PassiveAggressiveRegressor




In [None]:
for learner, result in zip(models, results):
    sns.lineplot(result['windowed'].metrics_per_window()["relative mean absolute error"], label=learner.__str__())

plt.show()

for learner, result in zip(models, results):
    sns.lineplot(result['windowed'].metrics_per_window()["relative mean squared error"], label=learner.__str__())

plt.show()