In [6]:
from clusterer import Clusterer, ClusterType
from model import Model
import pandas as pd 
import numpy as np
import scipy.stats as stats
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier

from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, balanced_accuracy_score


# clusterer = Clusterer(n_components=0.8)
# model = Model(spot_dif = False)
model = Model(n_components=3,cluster_model=ClusterType.gaussian_mixture)
clusterer = Clusterer(n_components=0.8)

len(clusterer.X_train) #== len(clusterer.y_train) # True

10587

In [12]:
clusterer.y_train.head()

Unnamed: 0_level_0,spot_id_delta
DELIVERY_START,Unnamed: 1_level_1
2022-01-01 19:00:00+00:00,0
2022-01-01 20:00:00+00:00,0
2022-01-01 21:00:00+00:00,0
2022-01-01 22:00:00+00:00,0
2022-01-01 23:00:00+00:00,0


In [7]:
classifier_dict = {
    'LogisticRegression': LogisticRegression(),
    'DecisionTreeClassifier': DecisionTreeClassifier(),
    'RandomForestClassifier': RandomForestClassifier()    
}

for classifier_name, classifier in classifier_dict.items():
    y_pred_test_list = []
    for c in range(model.n_cluster):
        X_train = model.X_train_list[c]
        X_test = model.X_test_list[c]
        y_train = model.y_train[model.y_train.index.isin(X_train.index)]
        y_test = model.y_test[model.y_test.index.isin(X_test.index)]
        
        classifier.fit(X_train, y_train)
        
        y_pred = classifier.predict(X_train)
        y_pred_test = classifier.predict(X_test)
        y_pred_test_list.append(y_pred_test)

    y_pred_test_all = np.concatenate(y_pred_test_list)

    print(f'{classifier_name}, balanced accuracy : {balanced_accuracy_score(y_pred_test_all, model.y_test)}')

LogisticRegression, balanced accuracy : 0.5177135970305305
DecisionTreeClassifier, balanced accuracy : 0.5010750774724324
RandomForestClassifier, balanced accuracy : 0.5176908774131908


In [2]:
model.X_train

Unnamed: 0_level_0,load_forecast,coal_power_available,gas_power_available,nuclear_power_available,wind_power_forecasts_average,solar_power_forecasts_average,wind_power_forecasts_std,solar_power_forecasts_std,Jan,Feb,...,spot_id_delta_lag_10,spot_id_delta_lag_11,spot_id_delta_lag_12,spot_id_delta_lag_13,spot_id_delta_lag_14,spot_id_delta_lag_15,spot_id_delta_lag_16,spot_id_delta_lag_17,spot_id_delta_lag_18,Cluster
DELIVERY_START,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2022-01-01 19:00:00+00:00,0.420998,1.418625,0.287611,1.285499,0.146087,-0.740530,0.512179,-0.588654,1,0,...,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1
2022-01-01 20:00:00+00:00,0.249349,1.418625,0.287611,1.285499,0.188259,-0.740530,0.536946,-0.588654,1,0,...,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1
2022-01-01 21:00:00+00:00,0.321520,1.418625,0.287611,1.285499,0.243114,-0.740530,0.881371,-0.588654,1,0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,1
2022-01-01 22:00:00+00:00,0.383194,1.418625,0.287611,1.285499,0.316676,-0.740530,1.406182,-0.588654,1,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1
2022-01-01 23:00:00+00:00,0.298485,-0.911772,0.287611,1.146007,0.414654,-0.740530,-0.213277,-0.588654,1,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-03-29 17:00:00+00:00,0.123492,1.418625,1.235345,0.356159,1.030106,-0.531416,0.823400,-0.434474,0,0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2
2023-03-29 18:00:00+00:00,0.106216,1.418625,1.235345,0.356159,1.279331,-0.705517,0.172648,-0.538703,0,0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2
2023-03-29 19:00:00+00:00,-0.119213,1.418625,1.235345,0.356159,1.525702,-0.740530,-0.032709,-0.588654,0,0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2
2023-03-29 20:00:00+00:00,-0.140947,1.418625,1.235345,0.356159,1.690901,-0.740530,0.178857,-0.588654,0,0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2


In [3]:
model.y_train

Unnamed: 0_level_0,spot_id_delta
DELIVERY_START,Unnamed: 1_level_1
2022-01-01 19:00:00+00:00,0
2022-01-01 20:00:00+00:00,0
2022-01-01 21:00:00+00:00,0
2022-01-01 22:00:00+00:00,0
2022-01-01 23:00:00+00:00,0
...,...
2023-03-29 17:00:00+00:00,1
2023-03-29 18:00:00+00:00,1
2023-03-29 19:00:00+00:00,1
2023-03-29 20:00:00+00:00,1
