In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import lazypredict
import pandas as pd
import datetime
from pytz import utc


In [3]:
start =  datetime.datetime(2022,11,1,tzinfo=utc)
stop  = datetime.datetime(2023,3,1,tzinfo=utc)
location = 'nw2d01'
name_location = 'NW2_D01'

home_folder = "../../"
# Data Paths
data_file_name = '_'.join([location, start.strftime("%Y%m%d"), stop.strftime("%Y%m%d")])
data_path = home_folder + "data/nw2/raw/" + data_file_name + ".parquet"
mvbc_path = "../../data/nw2/mvbc_data.parquet"
tracked_frequencies_path = "../../data/nw2/tracked_modes/" + location + ".parquet"
simulations_data_path = "../../data/nw2/simulations/" + location + "/"

# Get all the data
data = pd.read_parquet(data_path)
mvbc_data = pd.read_parquet(mvbc_path)
tracked_frequencies = pd.read_parquet(tracked_frequencies_path)
#simulation_data = read_simulations_csv_files(simulations_data_path + "eigen_frequencies/")
#simulation_shifts = read_simulations_csv_files(simulations_data_path + "mean_shifts/")
#simulation_errors = pd.read_csv(simulations_data_path + "errors/Errors_No_scour.csv", index_col=0)

In [4]:
def get_data_subset(
    data: pd.DataFrame,
    list_of_parameters: list[str]
    ) -> pd.DataFrame:
    """Get a subset of the data
    for the given parameters."""
    return data.filter(regex='|'.join(list_of_parameters))

def get_weather_subset(
    data: pd.DataFrame,
    weater_parameters: list[str] = \
        [
            'Wave_height', 
            'Sea_water_temperature', 
            'Tide_TAW', 
            'Air_pressure', 
            'Air_temperature', 
            #'Relative_humidity' # TOO MANY NANs
        ]
    ) -> pd.DataFrame:
    """Get a subset of the weather data
    for the given parameters."""
    return get_data_subset(data, weater_parameters)

def get_scada_subset(
    data: pd.DataFrame,
    scada_parameters: list[str] = \
        [
        'rpm', 
        'yaw', 
        'pitch', 
        'power', 
        #'std_NW2_D01_towerxacc',  # TOO MANY NANs
        #'std_NW2_D01_toweryacc', # TOO MANY NANs
        #'mean_NW2_D01_towerxacc', # TOO MANY NANs
        #'mean_NW2_D01_toweryacc', # TOO MANY NANs
        'windspeed',
        'winddirection',
        ],
    statistics: list[str] = ['mean']
    ) -> pd.DataFrame:
    """Get a subset of the SCADA data
    for the given parameters."""
    means_data = get_data_subset(data, statistics)
    return get_data_subset(means_data, scada_parameters)

def get_mpe_data(
    data:pd.DataFrame,
    mode_direction:str,
    name_location:str = 'NW2_D01'
    ) -> pd.DataFrame:
    mpe_data = data.filter(regex='mpe')
    if mode_direction not in ['FA', 'SS']:
        raise ValueError('mode_direction must be FA or SS')
    mpe_direction = \
        pd.DataFrame.from_records(
            mpe_data['_'.join(['mpe', name_location, mode_direction])].explode().dropna().tolist()
        )\
            .set_index(
                mpe_data['_'.join(['mpe', name_location, mode_direction])].explode().dropna().index
            )
    return mpe_direction


In [5]:
weather_inputs = get_weather_subset(mvbc_data)
scada_inputs = get_scada_subset(data)

inputs = pd.concat([
            weather_inputs,
            scada_inputs
        ],axis=1)

prediction_params = tracked_frequencies 

In [6]:
mode = 'SS1'
y = prediction_params[mode].dropna()
X = inputs.loc[y.index].dropna()
y = y.loc[X.index]

In [7]:
import scipy

In [8]:
from lazypredict.Supervised import LazyRegressor
from sklearn import datasets
from sklearn.utils import shuffle
import numpy as np
from sklearn.model_selection import train_test_split

mode = 'SS1'

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 64)
reg = LazyRegressor(verbose=1, ignore_warnings=False, custom_metric=None)
models,pred = reg.fit(X_train, X_test, y_train, y_test)

  2%|▏         | 1/42 [00:01<01:18,  1.92s/it]

{'Model': 'AdaBoostRegressor', 'R-Squared': 0.21247488362008027, 'Adjusted R-Squared': 0.2098003617676013, 'RMSE': 0.002503247249260273, 'Time taken': 1.922532558441162}


  5%|▍         | 2/42 [00:04<01:41,  2.55s/it]

{'Model': 'BaggingRegressor', 'R-Squared': 0.19027409756134672, 'Adjusted R-Squared': 0.18752417939931354, 'RMSE': 0.0025382860147463654, 'Time taken': 2.980142831802368}
{'Model': 'BayesianRidge', 'R-Squared': 0.23964425243823784, 'Adjusted R-Squared': 0.2370620007484634, 'RMSE': 0.0024596876321066083, 'Time taken': 0.07480955123901367}


 17%|█▋        | 7/42 [00:05<00:16,  2.09it/s]

{'Model': 'DecisionTreeRegressor', 'R-Squared': -0.4636220956489632, 'Adjusted R-Squared': -0.4685927171531741, 'RMSE': 0.003412604973968617, 'Time taken': 0.21725869178771973}
{'Model': 'DummyRegressor', 'R-Squared': -2.3207151732318465e-05, 'Adjusted R-Squared': -0.0034193958762365195, 'RMSE': 0.002820827832985447, 'Time taken': 0.011232614517211914}
{'Model': 'ElasticNet', 'R-Squared': -2.3207151732318465e-05, 'Adjusted R-Squared': -0.0034193958762365195, 'RMSE': 0.002820827832985447, 'Time taken': 0.015764951705932617}
{'Model': 'ElasticNetCV', 'R-Squared': 0.23951610373816634, 'Adjusted R-Squared': 0.23693341684132152, 'RMSE': 0.002459894898569539, 'Time taken': 0.1539607048034668}
{'Model': 'ExtraTreeRegressor', 'R-Squared': -0.45741073346447725, 'Adjusted R-Squared': -0.4623602605000159, 'RMSE': 0.0034053560186445083, 'Time taken': 0.05700278282165527}


 21%|██▏       | 9/42 [00:11<00:47,  1.44s/it]

{'Model': 'ExtraTreesRegressor', 'R-Squared': 0.2029903346681503, 'Adjusted R-Squared': 0.20028360224497943, 'RMSE': 0.002518276048466514, 'Time taken': 6.361960411071777}
{'Model': 'GammaRegressor', 'R-Squared': 0.0840978568687204, 'Adjusted R-Squared': 0.08098735252341505, 'RMSE': 0.002699579232082795, 'Time taken': 0.02999591827392578}


 26%|██▌       | 11/42 [01:06<05:13, 10.10s/it]

{'Model': 'GaussianProcessRegressor', 'R-Squared': -33.06048859477828, 'Adjusted R-Squared': -33.17616175765033, 'RMSE': 0.016462545095486603, 'Time taken': 54.49226450920105}


 29%|██▊       | 12/42 [01:10<04:30,  9.02s/it]

{'Model': 'GradientBoostingRegressor', 'R-Squared': 0.2816574759126246, 'Adjusted R-Squared': 0.2792179057474621, 'RMSE': 0.002390767420639533, 'Time taken': 4.499237537384033}


 33%|███▎      | 14/42 [01:11<02:33,  5.47s/it]

{'Model': 'HistGradientBoostingRegressor', 'R-Squared': 0.2815966239130824, 'Adjusted R-Squared': 0.27915684708784116, 'RMSE': 0.0023908686814532896, 'Time taken': 0.20273494720458984}
{'Model': 'HuberRegressor', 'R-Squared': 0.23909549715631717, 'Adjusted R-Squared': 0.23651138183329135, 'RMSE': 0.0024605750608776965, 'Time taken': 0.13912320137023926}


 36%|███▌      | 15/42 [01:11<01:51,  4.14s/it]

{'Model': 'KNeighborsRegressor', 'R-Squared': 0.16674818122261825, 'Adjusted R-Squared': 0.16391836646295432, 'RMSE': 0.002574895901515569, 'Time taken': 0.19302845001220703}


 45%|████▌     | 19/42 [01:27<01:17,  3.38s/it]

{'Model': 'KernelRidge', 'R-Squared': -6048.928260842869, 'Adjusted R-Squared': -6069.4744821671275, 'RMSE': 0.21940506710274002, 'Time taken': 15.604030847549438}
{'Model': 'Lars', 'R-Squared': 0.07773375328886567, 'Adjusted R-Squared': 0.07460163574832146, 'RMSE': 0.0027089419467106364, 'Time taken': 0.0178530216217041}
{'Model': 'LarsCV', 'R-Squared': 0.18225176608760718, 'Adjusted R-Squared': 0.17947460320615105, 'RMSE': 0.0025508290145909865, 'Time taken': 0.06443929672241211}
{'Model': 'Lasso', 'R-Squared': -2.3207151732318465e-05, 'Adjusted R-Squared': -0.0034193958762365195, 'RMSE': 0.002820827832985447, 'Time taken': 0.02093029022216797}


 55%|█████▍    | 23/42 [01:27<00:29,  1.55s/it]

{'Model': 'LassoCV', 'R-Squared': 0.23951644077752532, 'Adjusted R-Squared': 0.23693375502530323, 'RMSE': 0.0024598943534682584, 'Time taken': 0.1733846664428711}
{'Model': 'LassoLars', 'R-Squared': -2.3207151732318465e-05, 'Adjusted R-Squared': -0.0034193958762365195, 'RMSE': 0.002820827832985447, 'Time taken': 0.016962528228759766}
{'Model': 'LassoLarsCV', 'R-Squared': 0.2396625774668616, 'Adjusted R-Squared': 0.23708038801089848, 'RMSE': 0.0024596579919559363, 'Time taken': 0.06304931640625}
{'Model': 'LassoLarsIC', 'R-Squared': 0.2396625774668616, 'Adjusted R-Squared': 0.23708038801089848, 'RMSE': 0.0024596579919559363, 'Time taken': 0.03678703308105469}
{'Model': 'LinearRegression', 'R-Squared': 0.2396625774668616, 'Adjusted R-Squared': 0.23708038801089848, 'RMSE': 0.0024596579919559363, 'Time taken': 0.021997690200805664}


 60%|█████▉    | 25/42 [01:28<00:20,  1.21s/it]

{'Model': 'LinearSVR', 'R-Squared': -0.23198697066518115, 'Adjusted R-Squared': -0.23617093382582244, 'RMSE': 0.0031309389414910386, 'Time taken': 0.8871674537658691}
{'Model': 'MLPRegressor', 'R-Squared': -84.03889119266908, 'Adjusted R-Squared': -84.32769261382357, 'RMSE': 0.026012394705562894, 'Time taken': 0.8038537502288818}


 64%|██████▍   | 27/42 [02:54<03:34, 14.31s/it]

{'Model': 'NuSVR', 'R-Squared': 0.21298776625185734, 'Adjusted R-Squared': 0.2103149862051671, 'RMSE': 0.0025024319856503862, 'Time taken': 85.73093223571777}
{'Model': 'OrthogonalMatchingPursuit', 'R-Squared': 0.08952994050521401, 'Adjusted R-Squared': 0.08643788411298103, 'RMSE': 0.002691561919304539, 'Time taken': 0.01295614242553711}
{'Model': 'OrthogonalMatchingPursuitCV', 'R-Squared': 0.22821194788462063, 'Adjusted R-Squared': 0.2255908708320522, 'RMSE': 0.0024781099209343006, 'Time taken': 0.02993154525756836}
{'Model': 'PassiveAggressiveRegressor', 'R-Squared': -506.07457543995304, 'Adjusted R-Squared': -507.7966564309501, 'RMSE': 0.063519556725475, 'Time taken': 0.025424718856811523}
{'Model': 'PoissonRegressor', 'R-Squared': 0.039569855526176534, 'Adjusted R-Squared': 0.03630812919421844, 'RMSE': 0.0027644225828468917, 'Time taken': 0.018133163452148438}


In [None]:
models