In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
#IMPORTS
import os
import pandas as pd


In [4]:
loc = 'nw2'
turbine = 'c02'

In [5]:
# GET THE DATA
package_folder = os.path.dirname(os.path.dirname(os.getcwd()))
data_folder = os.path.join(package_folder, 'data')
ss2_selected = pd.read_csv(os.path.join(data_folder, 'processed','nw2', turbine+'_ss2_selected_data_large.csv'))
SS1_dbscan = pd.read_parquet(os.path.join(data_folder, 'interim',loc,'tracked_modes', 'dbscan_based', loc+turbine+'_SS1_mode.parquet'))
SS2_dbscan = pd.read_parquet(os.path.join(data_folder, 'interim',loc,'tracked_modes', 'dbscan_based', loc+turbine+'_SS2_mode.parquet'))
FA1_dbscan = pd.read_parquet(os.path.join(data_folder, 'interim',loc,'tracked_modes', 'dbscan_based', loc+turbine+'_FA1_mode.parquet'))
FA2_dbscan = pd.read_parquet(os.path.join(data_folder, 'interim',loc,'tracked_modes', 'dbscan_based', loc+turbine+'_FA2_mode.parquet'))

In [6]:
from src.data.utils import get_input_data
from src.data.harmonics import *

data = get_input_data(data_folder, loc, turbine)
harmonics_data = harmonics(data['turbine_data'].filter(regex='rpm'), [1,3,6, 9])

test_data_path = '_'.join([loc, turbine, 'SS2', 'test_data.parquet'])
test_turbine_data = pd.read_parquet(os.path.join(data_folder, 'raw', loc, test_data_path))
#remove duplicated index
duplicated_indices = test_turbine_data.index.duplicated(keep='first')
test_turbine_data = test_turbine_data[~duplicated_indices]
test_turbine_data.index = pd.to_datetime(test_turbine_data.index, utc=True)

wandelaar_weather_station_data = pd.read_parquet(os.path.join(data_folder,'external', loc, 'test_wandelaar_weather_data.parquet'))
westhinder_weather_station_data = pd.read_parquet(os.path.join(data_folder,'external', loc, 'test_westhinder_weather_data.parquet'))

mvbc_test_data = wandelaar_weather_station_data.copy()
mvbc_test_data[mvbc_test_data.filter(regex='Tide').columns] = westhinder_weather_station_data.filter(regex='Tide').values
#mvbc_test_data.rename(columns={mvbc_test_data.filter(regex='Tide').columns[0] :westhinder_weather_station_data.filter(regex='Tide').columns[0]}, inplace=True)
mvbc_test_data.isna().sum()
mvbc_test_data.index = pd.to_datetime(mvbc_test_data.index, utc=True)

In [7]:
scada_data = data['turbine_data'].filter(regex='mean_').filter(regex='rpm|yaw|pitch|power|windspeed|winddirection')
1 - scada_data.isna().sum()/len(scada_data)

mean_NW2_C02_rpm              0.972362
mean_NW2_C02_yaw              0.972362
mean_NW2_C02_pitch            0.972362
mean_NW2_C02_power            0.972362
mean_NW2_C02_windspeed        0.972362
mean_NW2_C02_winddirection    0.972362
dtype: float64

In [8]:
mvbc_data = pd.concat([data['mvbc_data'], mvbc_test_data], axis=0)
mvbc_data = mvbc_data[~mvbc_data.index.duplicated(keep='first')]
1 - mvbc_data.isna().sum()/len(mvbc_data)

mvbc_WandelaarBuoy_10%_highest_waves                                    1.000000
mvbc_WandelaarBuoy_Wave_height                                          1.000000
mvbc_WandelaarBuoy_Average_wave_period                                  1.000000
mvbc_WandelaarBuoy_Height_waves_with_period_>_10_s                      1.000000
mvbc_WandelaarBuoy_Sea_water_temperature                                0.937501
mvbc_WandelaarMeasuringpile_Max_3-seconds_wind_gust_(at_10_m_height)    0.999967
mvbc_WandelaarMeasuringpile_Average_wind_direction                      1.000000
mvbc_WandelaarMeasuringpile_Average_wind_speed_(at_10_m_height)         0.999967
mvbc_WandelaarMeasuringpile_Tide_TAW                                    0.992893
mvbc_WandelaarMeasuringpile_Relative_humidity                           0.783976
mvbc_WandelaarMeasuringpile_Air_pressure                                0.999609
mvbc_WandelaarMeasuringpile_Air_temperature                             1.000000
mvbc_WandelaarBuoy_1%_wave_h

In [9]:
from sklearn.model_selection import train_test_split
X_ = pd.concat([mvbc_data, scada_data], axis=1)
#only keep columns of X with 90% or more non-nan values
X_ = X_.loc[:, X_.isna().sum()/len(X_) < 0.1]

y_ = SS2_dbscan['mean_frequency']
#X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [10]:
Xy = pd.DataFrame(y_)
for col in X_.columns:
    Xy[col] = X_[col]
Xy = Xy.dropna()
y = Xy[y_.name]
X = Xy[X_.columns]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [11]:
from xgboost import XGBRegressor
from sklearn.feature_selection import RFE
regr_model = XGBRegressor()
selector = RFE(regr_model, n_features_to_select=13)  # Adjust the number of features
selector = selector.fit(X, y)

selected_features = selector.support_
ranking = selector.ranking_

print("Selected features:", selected_features)
print("Feature ranking:", ranking)

Selected features: [ True  True  True False  True False False False  True  True  True  True
  True  True  True  True  True]
Feature ranking: [1 1 1 4 1 5 2 3 1 1 1 1 1 1 1 1 1]


In [12]:
len(SS1_dbscan['mean_frequency'])


53844

In [13]:
y_unique = y[~y.index.duplicated(keep='first')]
print(len(y_unique))
print('Availability: ', len(y_unique)/len(X_))

50768
Availability:  0.827581710000815


In [16]:
from sklearn.model_selection import train_test_split
X_ = pd.concat([mvbc_data, scada_data], axis=1)
#only keep columns of X with 90% or more non-nan values
X_ = X_.loc[:, X_.isna().sum()/len(X_) < 0.1]

y_ = SS2_dbscan['mean_frequency']
#X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

Xy = pd.DataFrame(y_)
for col in X_.columns:
    Xy[col] = X_[col]
Xy = Xy.dropna()
y = Xy[y_.name]
X = Xy[X_.columns]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

y_unique = y_[~y_.index.duplicated(keep='first')].dropna()
print('Availability: ', len(y_unique)/len(X_))

from xgboost import XGBRegressor
from sklearn.feature_selection import RFE
regr_model = XGBRegressor()
selector = RFE(regr_model, n_features_to_select=12)  # Adjust the number of features
selector = selector.fit(X, y)

selected_features = selector.support_
ranking = selector.ranking_

print("Selected features:", X.columns[selected_features])
print("Removed features: ", X.columns[~selected_features])
print("Feature ranking:", ranking)
print('ordered features: ', X.columns[ranking.argsort()])

Availability:  0.917466786209145
Selected features: Index(['mvbc_WandelaarBuoy_10%_highest_waves',
       'mvbc_WandelaarBuoy_Wave_height',
       'mvbc_WandelaarBuoy_Average_wave_period',
       'mvbc_WandelaarBuoy_Sea_water_temperature',
       'mvbc_WandelaarMeasuringpile_Tide_TAW',
       'mvbc_WandelaarMeasuringpile_Air_temperature', 'mean_NW2_C02_rpm',
       'mean_NW2_C02_yaw', 'mean_NW2_C02_pitch', 'mean_NW2_C02_power',
       'mean_NW2_C02_windspeed', 'mean_NW2_C02_winddirection'],
      dtype='object')
Removed features:  Index(['mvbc_WandelaarBuoy_Height_waves_with_period_>_10_s',
       'mvbc_WandelaarMeasuringpile_Max_3-seconds_wind_gust_(at_10_m_height)',
       'mvbc_WandelaarMeasuringpile_Average_wind_direction',
       'mvbc_WandelaarMeasuringpile_Average_wind_speed_(at_10_m_height)',
       'mvbc_WandelaarMeasuringpile_Air_pressure'],
      dtype='object')
Feature ranking: [1 1 1 5 1 6 3 4 1 2 1 1 1 1 1 1 1]
ordered features:  Index(['mvbc_WandelaarBuoy_10%_highest_wav

In [17]:
from sklearn.model_selection import train_test_split
X_ = pd.concat([mvbc_data, scada_data], axis=1)
#only keep columns of X with 90% or more non-nan values
X_ = X_.loc[:, X_.isna().sum()/len(X_) < 0.1]

y_ = SS1_dbscan['mean_frequency']
#X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

Xy = pd.DataFrame(y_)
for col in X_.columns:
    Xy[col] = X_[col]
Xy = Xy.dropna()
y = Xy[y_.name]
X = Xy[X_.columns]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

y_unique = y_[~y_.index.duplicated(keep='first')].dropna()
print('Availability: ', len(y_unique)/len(X_))

from xgboost import XGBRegressor
from sklearn.feature_selection import RFE
regr_model = XGBRegressor()
selector = RFE(regr_model, n_features_to_select=12)  # Adjust the number of features
selector = selector.fit(X, y)

selected_features = selector.support_
ranking = selector.ranking_

print("Selected features:", X.columns[selected_features])
print("Removed features: ", X.columns[~selected_features])
print("Feature ranking:", ranking)
print('ordered features: ', X.columns[ranking.argsort()])

Availability:  0.8705680984595322
Selected features: Index(['mvbc_WandelaarBuoy_10%_highest_waves',
       'mvbc_WandelaarBuoy_Wave_height',
       'mvbc_WandelaarBuoy_Average_wave_period',
       'mvbc_WandelaarBuoy_Sea_water_temperature',
       'mvbc_WandelaarMeasuringpile_Tide_TAW',
       'mvbc_WandelaarMeasuringpile_Air_temperature', 'mean_NW2_C02_rpm',
       'mean_NW2_C02_yaw', 'mean_NW2_C02_pitch', 'mean_NW2_C02_power',
       'mean_NW2_C02_windspeed', 'mean_NW2_C02_winddirection'],
      dtype='object')
Removed features:  Index(['mvbc_WandelaarBuoy_Height_waves_with_period_>_10_s',
       'mvbc_WandelaarMeasuringpile_Max_3-seconds_wind_gust_(at_10_m_height)',
       'mvbc_WandelaarMeasuringpile_Average_wind_direction',
       'mvbc_WandelaarMeasuringpile_Average_wind_speed_(at_10_m_height)',
       'mvbc_WandelaarMeasuringpile_Air_pressure'],
      dtype='object')
Feature ranking: [1 1 1 4 1 5 3 6 1 2 1 1 1 1 1 1 1]
ordered features:  Index(['mvbc_WandelaarBuoy_10%_highest_wa

In [20]:
from sklearn.model_selection import train_test_split
X_ = pd.concat([mvbc_data, scada_data], axis=1)
#only keep columns of X with 90% or more non-nan values
X_ = X_.loc[:, X_.isna().sum()/len(X_) < 0.1]

y_ = SS1_dbscan['mean_frequency']
#X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

Xy = pd.DataFrame(y_)
for col in X_.columns:
    Xy[col] = X_[col]
Xy = Xy.dropna()
y = Xy[y_.name]
X = Xy[X_.columns]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

y_unique = y_[~y_.index.duplicated(keep='first')].dropna()
print('Availability: ', len(y_unique)/len(X_))

from xgboost import XGBRegressor
from sklearn.feature_selection import RFE
regr_model = XGBRegressor()
selector = RFE(regr_model, n_features_to_select=12)  # Adjust the number of features
selector = selector.fit(X, y)

selected_features = selector.support_
ranking = selector.ranking_

print("Selected features:", X.columns[selected_features])
print("Removed features: ", X.columns[~selected_features])
print("Feature ranking:", ranking)
print('ordered features: ', X.columns[ranking.argsort()])

selected_data = X_[X.columns[selected_features]]
selected_data.to_parquet(os.path.join(data_folder, 'interim', loc, 'rfe_selected_data', loc+turbine+'_rfe_selected_data.parquet'))

Availability:  0.8705680984595322
Selected features: Index(['mvbc_WandelaarBuoy_10%_highest_waves',
       'mvbc_WandelaarBuoy_Wave_height',
       'mvbc_WandelaarBuoy_Average_wave_period',
       'mvbc_WandelaarBuoy_Sea_water_temperature',
       'mvbc_WandelaarMeasuringpile_Tide_TAW',
       'mvbc_WandelaarMeasuringpile_Air_temperature', 'mean_NW2_C02_rpm',
       'mean_NW2_C02_yaw', 'mean_NW2_C02_pitch', 'mean_NW2_C02_power',
       'mean_NW2_C02_windspeed', 'mean_NW2_C02_winddirection'],
      dtype='object')
Removed features:  Index(['mvbc_WandelaarBuoy_Height_waves_with_period_>_10_s',
       'mvbc_WandelaarMeasuringpile_Max_3-seconds_wind_gust_(at_10_m_height)',
       'mvbc_WandelaarMeasuringpile_Average_wind_direction',
       'mvbc_WandelaarMeasuringpile_Average_wind_speed_(at_10_m_height)',
       'mvbc_WandelaarMeasuringpile_Air_pressure'],
      dtype='object')
Feature ranking: [1 1 1 4 1 5 3 6 1 2 1 1 1 1 1 1 1]
ordered features:  Index(['mvbc_WandelaarBuoy_10%_highest_wa

In [18]:
from sklearn.model_selection import train_test_split
X_ = pd.concat([mvbc_data, scada_data], axis=1)
#only keep columns of X with 90% or more non-nan values
X_ = X_.loc[:, X_.isna().sum()/len(X_) < 0.1]

y_ = SS1_dbscan['mean_frequency']
#X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

Xy = pd.DataFrame(y_)
for col in X_.columns:
    Xy[col] = X_[col]
Xy = Xy.dropna()
y = Xy[y_.name]
X = Xy[X_.columns]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

y_unique = y_[~y_.index.duplicated(keep='first')].dropna()
print('Availability: ', len(y_unique)/len(X_))

from xgboost import XGBRegressor
from sklearn.feature_selection import RFE
regr_model = XGBRegressor()
selector = RFE(regr_model, n_features_to_select=12)  # Adjust the number of features
selector = selector.fit(X, y)

selected_features = selector.support_
ranking = selector.ranking_

print("Selected features:", X.columns[selected_features])
print("Removed features: ", X.columns[~selected_features])
print("Feature ranking:", ranking)
print('ordered features: ', X.columns[ranking.argsort()])

selected_data = X_[X.columns[selected_features]]
selected_data.to_parquet(os.path.join(data_folder, 'interim', loc, 'rfe_selected_data', loc+turbine+'_rfe_selected_data.parquet'))

Availability:  0.8705680984595322
Selected features: Index(['mvbc_WandelaarBuoy_10%_highest_waves',
       'mvbc_WandelaarBuoy_Wave_height',
       'mvbc_WandelaarBuoy_Average_wave_period',
       'mvbc_WandelaarBuoy_Sea_water_temperature',
       'mvbc_WandelaarMeasuringpile_Tide_TAW',
       'mvbc_WandelaarMeasuringpile_Air_temperature', 'mean_NW2_C02_rpm',
       'mean_NW2_C02_yaw', 'mean_NW2_C02_pitch', 'mean_NW2_C02_power',
       'mean_NW2_C02_windspeed', 'mean_NW2_C02_winddirection'],
      dtype='object')
Removed features:  Index(['mvbc_WandelaarBuoy_Height_waves_with_period_>_10_s',
       'mvbc_WandelaarMeasuringpile_Max_3-seconds_wind_gust_(at_10_m_height)',
       'mvbc_WandelaarMeasuringpile_Average_wind_direction',
       'mvbc_WandelaarMeasuringpile_Average_wind_speed_(at_10_m_height)',
       'mvbc_WandelaarMeasuringpile_Air_pressure'],
      dtype='object')
Feature ranking: [1 1 1 4 1 5 3 6 1 2 1 1 1 1 1 1 1]
ordered features:  Index(['mvbc_WandelaarBuoy_10%_highest_wa

In [35]:
from sklearn.model_selection import train_test_split
X_ = pd.concat([mvbc_data, scada_data], axis=1)
#only keep columns of X with 90% or more non-nan values
X_ = X_.loc[:, X_.isna().sum()/len(X_) < 0.1]

y_ = SS2_dbscan['mean_frequency']
#X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

Xy = pd.DataFrame(y_)
for col in X_.columns:
    Xy[col] = X_[col]
Xy = Xy.dropna()
y = Xy[y_.name]
X = Xy[X_.columns]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

y_unique = y_[~y_.index.duplicated(keep='first')].dropna()
low_rpm_indices = X[X[X.filter(regex='rpm').columns[0]] <= 0].index
common_indices = y_unique.index.intersection(low_rpm_indices)
print('Availability: ', len(common_indices)/len(X_))

from xgboost import XGBRegressor
from sklearn.feature_selection import RFE
regr_model = XGBRegressor()
selector = RFE(regr_model, n_features_to_select=13)  # Adjust the number of features
selector = selector.fit(X, y)

selected_features = selector.support_
ranking = selector.ranking_

print("Selected features:", X.columns[selected_features])
print("Removed features: ", X.columns[~selected_features])
print("Feature ranking:", ranking)
print('ordered features: ', X.columns[ranking.argsort()])

Availability:  0.040427092672589454
Selected features: Index(['mvbc_WandelaarBuoy_10%_highest_waves',
       'mvbc_WandelaarBuoy_Wave_height',
       'mvbc_WandelaarBuoy_Average_wave_period',
       'mvbc_WandelaarBuoy_Sea_water_temperature',
       'mvbc_WandelaarMeasuringpile_Tide_TAW',
       'mvbc_WandelaarMeasuringpile_Air_pressure',
       'mvbc_WandelaarMeasuringpile_Air_temperature', 'mean_NW2_C02_rpm',
       'mean_NW2_C02_yaw', 'mean_NW2_C02_pitch', 'mean_NW2_C02_power',
       'mean_NW2_C02_windspeed', 'mean_NW2_C02_winddirection'],
      dtype='object')
Removed features:  Index(['mvbc_WandelaarBuoy_Height_waves_with_period_>_10_s',
       'mvbc_WandelaarMeasuringpile_Max_3-seconds_wind_gust_(at_10_m_height)',
       'mvbc_WandelaarMeasuringpile_Average_wind_direction',
       'mvbc_WandelaarMeasuringpile_Average_wind_speed_(at_10_m_height)'],
      dtype='object')
Feature ranking: [1 1 1 4 1 5 2 3 1 1 1 1 1 1 1 1 1]
ordered features:  Index(['mvbc_WandelaarBuoy_10%_highest_

In [15]:
from sklearn.model_selection import train_test_split
X_ = pd.concat([mvbc_data, scada_data], axis=1)
#only keep columns of X with 90% or more non-nan values
X_ = X_.loc[:, X_.isna().sum()/len(X_) < 0.1]

y_ = FA1_dbscan['mean_frequency']
#X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

Xy = pd.DataFrame(y_)
for col in X_.columns:
    Xy[col] = X_[col]
Xy = Xy.dropna()
y = Xy[y_.name]
X = Xy[X_.columns]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

y_unique = y_[~y_.index.duplicated(keep='first')].dropna()
print('Availability: ', len(y_unique)/len(X_))

from xgboost import XGBRegressor
from sklearn.feature_selection import RFE
regr_model = XGBRegressor()
selector = RFE(regr_model, n_features_to_select=5)  # Adjust the number of features
selector = selector.fit(X, y)

selected_features = selector.support_
ranking = selector.ranking_

print("Selected features:", selected_features)
print("Feature ranking:", ranking)
print('ordered features: ', X.columns[ranking.argsort()])

Availability:  0.40684652375906755
Selected features: [False False False False False False False False False False False  True
  True  True False  True  True]
Feature ranking: [13 12  9  8  7 10 11  6  5  4  2  1  1  1  3  1  1]
ordered features:  Index(['mean_NW2_C02_winddirection', 'mean_NW2_C02_pitch', 'mean_NW2_C02_yaw',
       'mean_NW2_C02_rpm', 'mean_NW2_C02_windspeed',
       'mvbc_WandelaarMeasuringpile_Air_temperature', 'mean_NW2_C02_power',
       'mvbc_WandelaarMeasuringpile_Air_pressure',
       'mvbc_WandelaarMeasuringpile_Tide_TAW',
       'mvbc_WandelaarMeasuringpile_Average_wind_speed_(at_10_m_height)',
       'mvbc_WandelaarBuoy_Sea_water_temperature',
       'mvbc_WandelaarBuoy_Height_waves_with_period_>_10_s',
       'mvbc_WandelaarBuoy_Average_wave_period',
       'mvbc_WandelaarMeasuringpile_Max_3-seconds_wind_gust_(at_10_m_height)',
       'mvbc_WandelaarMeasuringpile_Average_wind_direction',
       'mvbc_WandelaarBuoy_Wave_height',
       'mvbc_WandelaarBuoy_10%

In [16]:

from sklearn.model_selection import train_test_split
X_ = pd.concat([mvbc_data, scada_data], axis=1)
#only keep columns of X with 90% or more non-nan values
X_ = X_.loc[:, X_.isna().sum()/len(X_) < 0.1]

y_ = FA2_dbscan['mean_frequency']
#X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

Xy = pd.DataFrame(y_)
for col in X_.columns:
    Xy[col] = X_[col]
Xy = Xy.dropna()
y = Xy[y_.name]
X = Xy[X_.columns]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

from xgboost import XGBRegressor
from sklearn.feature_selection import RFE
regr_model = XGBRegressor()
selector = RFE(regr_model, n_features_to_select=5)  # Adjust the number of features
selector = selector.fit(X, y)

selected_features = selector.support_
ranking = selector.ranking_

print("Selected features:", selected_features)
print("Feature ranking:", ranking)
print('ordered features: ', X.columns[ranking.argsort()])

Selected features: [False False False False False False False False  True False False  True
 False  True  True  True False]
Feature ranking: [ 3  2 10 12  7 11  9 13  1  8  6  1  5  1  1  1  4]
ordered features:  Index(['mvbc_WandelaarMeasuringpile_Tide_TAW', 'mean_NW2_C02_power',
       'mean_NW2_C02_pitch', 'mean_NW2_C02_rpm', 'mean_NW2_C02_windspeed',
       'mvbc_WandelaarBuoy_Wave_height',
       'mvbc_WandelaarBuoy_10%_highest_waves', 'mean_NW2_C02_winddirection',
       'mean_NW2_C02_yaw', 'mvbc_WandelaarMeasuringpile_Air_temperature',
       'mvbc_WandelaarBuoy_Sea_water_temperature',
       'mvbc_WandelaarMeasuringpile_Air_pressure',
       'mvbc_WandelaarMeasuringpile_Average_wind_direction',
       'mvbc_WandelaarBuoy_Average_wave_period',
       'mvbc_WandelaarMeasuringpile_Max_3-seconds_wind_gust_(at_10_m_height)',
       'mvbc_WandelaarBuoy_Height_waves_with_period_>_10_s',
       'mvbc_WandelaarMeasuringpile_Average_wind_speed_(at_10_m_height)'],
      dtype='object')


In [17]:
#order the features based on ranking
X_transformed = X[:,ranking.argsort()]

InvalidIndexError: (slice(None, None, None), array([ 8, 14, 13, 11, 15,  1,  0, 16, 12, 10,  4,  9,  6,  2,  5,  3,  7],
      dtype=int64))