In [1]:
from influxdb import InfluxDBClient
import pandas as pd
import numpy as np
from skimpy import skim
import datetime as dt
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import FunctionTransformer
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import TimeSeriesSplit
from sklearn.linear_model import LinearRegression
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.preprocessing import PolynomialFeatures
import ewtpy

In [2]:
import sys
sys.path.append('..')
import fx

In [3]:
data = fx.pull_data()

In [4]:
X_train, y_train, X_test, y_test = fx.data_splitting(data, n_splits=5)

In [5]:
def model_testing(model, X_train, y_train, X_test, y_test):
    pip = Pipeline(steps=[
        ("col_transformer", ColumnTransformer(transformers=[
            ("Speed", "passthrough", ["Speed"]),
            ("Direction", fx.WindDirectionMapper(), ["Direction"]),
            ], remainder="drop")),
        ("LinRegModel", LinearRegression())
    ])
    pip.fit(ewt_transformed, y_train)
    predict = pip.predict(fx.EWT_transformation(X_test, level=3, log=True))
    print("Predicted Score: ", fx.RMSE(predict, y_test))
    return predict  

In [6]:
LinearPipeline = Pipeline(steps=[
    ("col_transformer", ColumnTransformer(transformers=[
        ("Speed", "passthrough", ["Speed"]),
        ("Direction", fx.WindDirectionMapper(), ["Direction"]),
        ], remainder="drop")),
    ("LinRegModel", LinearRegression())
])

In [7]:
LinearPipeline.fit(X_train, y_train)
predict = LinearPipeline.predict(X_test)

In [8]:
linplot_df = fx.create_timestamps(predict, X_test, y_test)
linplot_df = linplot_df.drop(columns=["actual"])
# rename predict column
linplot_df = linplot_df.rename(columns={"predict": "predict_lin"})
linplot_df

Unnamed: 0,predict_lin
2023-01-31 18:00:00+00:00,36.231339
2023-01-31 21:00:00+00:00,39.137733
2023-02-01 00:00:00+00:00,44.223922
2023-02-01 03:00:00+00:00,42.695288
2023-02-01 06:00:00+00:00,34.702704
...,...
2023-02-15 00:00:00+00:00,23.454317
2023-02-15 03:00:00+00:00,23.454317
2023-02-15 06:00:00+00:00,22.001120
2023-02-15 09:00:00+00:00,23.454317


In [9]:
fx.RMSE(y_test, predict)

8.080737592175543

In [10]:
LinLogRadPipeline = Pipeline(steps=[
    ("col_transformer", ColumnTransformer(transformers=[
        ("Speed", PolynomialFeatures(), ["Speed"]),
        ("Direction", fx.WindDirectionRadianMapper(), ["Direction"]),
        ], remainder="drop")),
    ("LinRegModel", LinearRegression())
])

In [11]:
LinLogRadPipeline.fit(X_train, y_train)
predict = LinLogRadPipeline.predict(X_test)

In [12]:
fx.RMSE(y_test, predict)

5.960819308483332

In [13]:
linlogradplot_df = fx.create_timestamps(predict, X_test, y_test)
linlogradplot_df.drop(columns=["actual"], inplace=True)
# rename predict column
linlogradplot_df.rename(columns={"predict": "predict_linlograd"}, inplace=True)
linlogradplot_df

Unnamed: 0,predict_linlograd
2023-01-31 18:00:00+00:00,28.578554
2023-01-31 21:00:00+00:00,27.515914
2023-02-01 00:00:00+00:00,24.015380
2023-02-01 03:00:00+00:00,25.174672
2023-02-01 06:00:00+00:00,28.800181
...,...
2023-02-15 00:00:00+00:00,25.137315
2023-02-15 03:00:00+00:00,25.137315
2023-02-15 06:00:00+00:00,23.878547
2023-02-15 09:00:00+00:00,25.137315


In [14]:
LinearLogTransformedPipeline = Pipeline(steps=[
    ("col_transformer", ColumnTransformer(transformers=[
        ("Speed", PolynomialFeatures(), ["Speed"]),
        ("Direction", fx.WindDirectionMapper(), ["Direction"]),
        ], remainder="drop")),
    ("LinRegModel", LinearRegression())
])

In [15]:
LinearLogTransformedPipeline.fit(X_train, y_train)

In [16]:
predict = LinearLogTransformedPipeline.predict(X_test)
fx.RMSE(y_test, predict)

5.960819308483328

In [17]:
logplot_df = fx.create_timestamps(predict, X_test, y_test)
# rename predict column
logplot_df.rename(columns={"predict": "predict_log"}, inplace=True)

In [18]:
# combining all plot dataframes into one
plot_df = pd.concat([linplot_df, linlogradplot_df, logplot_df], axis=1)
plot_df


Unnamed: 0,predict_lin,predict_linlograd,predict_log,actual
2023-01-31 18:00:00+00:00,36.231339,28.578554,28.578554,18.617446
2023-01-31 21:00:00+00:00,39.137733,27.515914,27.515914,14.899595
2023-02-01 00:00:00+00:00,44.223922,24.015380,24.015380,7.563296
2023-02-01 03:00:00+00:00,42.695288,25.174672,25.174672,11.086492
2023-02-01 06:00:00+00:00,34.702704,28.800181,28.800181,15.854655
...,...,...,...,...
2023-02-15 00:00:00+00:00,23.454317,25.137315,25.137315,21.172519
2023-02-15 03:00:00+00:00,23.454317,25.137315,25.137315,22.722648
2023-02-15 06:00:00+00:00,22.001120,23.878547,23.878547,24.276798
2023-02-15 09:00:00+00:00,23.454317,25.137315,25.137315,23.977367


In [19]:
px.line(plot_df, title="Predicted vs Actual Wind Generation")