In [1]:
from sklearnex import patch_sklearn

patch_sklearn()

import sys
sys.path.insert(0,'..')
import module as mod

from sklearn.preprocessing import MinMaxScaler, StandardScaler, RobustScaler, QuantileTransformer, MaxAbsScaler, Normalizer, PowerTransformer
from sklearn.ensemble import RandomForestClassifier, VotingClassifier, HistGradientBoostingClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.semi_supervised import SelfTrainingClassifier
from sklearn.metrics import fbeta_score, accuracy_score

import warnings

# we remove warnings during imports
with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
    from lightgbm import LGBMClassifier
    from xgboost import XGBClassifier
    from catboost import CatBoostClassifier

Intel(R) Extension for Scikit-learn* enabled (https://github.com/intel/scikit-learn-intelex)


In [2]:
house1_power_blk2 = mod.load_dataset('house1_power_blk2_labels.zip')
house1_power_blk2.head()

Unnamed: 0_level_0,mains,activity,hour
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2016-03-06 00:00:00,473.0,0,0.0
2016-03-06 00:00:01,474.0,0,0.0
2016-03-06 00:00:02,476.0,0,0.0
2016-03-06 00:00:03,476.0,0,0.0
2016-03-06 00:00:04,475.0,0,0.0


In [3]:
house2_power_blk1 = mod.load_dataset('house2_power_blk1_labels.zip')
house2_power_blk1.head()

Unnamed: 0_level_0,mains,activity,hour
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2017-09-12 23:00:00,103.0,0,23.0
2017-09-12 23:00:01,101.0,0,23.0
2017-09-12 23:00:02,104.0,0,23.0
2017-09-12 23:00:03,104.0,0,23.0
2017-09-12 23:00:04,105.0,0,23.0


In [4]:
windows = ['1h', '10min']
features_col = ['mains', 'hour']

house1_power_blk2, _ = mod.generate_features(house1_power_blk2, window=windows)
house2_power_blk1, cols = mod.generate_features(house2_power_blk1, window=windows)
features_col += cols

In [5]:
def score_classif(X_train, X_test, y_train, y_test, para_classifier, para_scaler):

    scaler = para_scaler
    
    X_train_scaled = scaler.fit_transform(X_train)

    X_test_scaled = scaler.fit_transform(X_test)
    
    clf = para_classifier
    
    clf.fit(X_train_scaled, y_train.ravel())
    
    y_pred = clf.predict(X_test_scaled)

    acc = accuracy_score(y_test, y_pred)
    fscore = fbeta_score(y_test, y_pred, beta=0.5)
    
    return acc, fscore

# Train house 1 Test house 2

In [8]:
# taux = 1
# seed = 8

# house1_power_blk2 = house1_power_blk2.sample(n=int(round(house1_power_blk2.shape[0] * taux,0)), random_state=seed)
# house2_power_blk1 = house2_power_blk1.sample(n=int(round(house2_power_blk1.shape[0] * taux,0)), random_state=seed)

# y_train = house1_power_blk2.iloc[:,-1]
# X_train = house1_power_blk2.iloc[:,:-1]

# y_test = house2_power_blk1.iloc[:,-1]
# X_test = house2_power_blk1.iloc[:,:-1]

sample_idx = mod.pick_random_indexes(house2_power_blk1, 0.3)
X_train = house1_power_blk2[features_col].values
X_test = house2_power_blk1.loc[sample_idx, features_col].values

y_train = house1_power_blk2['activity'].values
y_test = house2_power_blk1.loc[sample_idx, 'activity'].values

In [14]:
liste_scaler = [MinMaxScaler(), StandardScaler(), QuantileTransformer(), RobustScaler(), Normalizer(), MaxAbsScaler(), PowerTransformer()]
liste_scaler_name = ["MinMaxScaler", "StandardScaler", "QuantileTransformer", "RobustScaler", "Normalizer", "MaxAbsScaler", "PowerTransformer"]

liste_classifier = [HistGradientBoostingClassifier(), LGBMClassifier(), RandomForestClassifier()]
liste_classifier_name = ["HistGradientBoostingClassifier", "LGBMClassifier", "RandomForestClassifier"]

liste_acc = dict()

liste_fscore = dict()

n=0
    
for i in range(len(liste_scaler_name)):
    for j in range(len(liste_classifier_name)):
        print(n)
        n+=1
        acc, fscore = score_classif(X_train, X_test, y_train, y_test, liste_classifier[j], liste_scaler[i])
        
        liste_acc[liste_scaler_name[i]+" et "+ liste_classifier_name[j]] = acc
        liste_fscore[liste_scaler_name[i]+" et "+ liste_classifier_name[j]] = fscore


0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20


In [15]:
liste_acc

{'MinMaxScaler et HistGradientBoostingClassifier': 0.8440951797385621,
 'MinMaxScaler et LGBMClassifier': 0.8514916938997822,
 'MinMaxScaler et RandomForestClassifier': 0.8547998366013072,
 'StandardScaler et HistGradientBoostingClassifier': 0.7050708061002179,
 'StandardScaler et LGBMClassifier': 0.6989501633986928,
 'StandardScaler et RandomForestClassifier': 0.6507339324618736,
 'QuantileTransformer et HistGradientBoostingClassifier': 0.7540482026143791,
 'QuantileTransformer et LGBMClassifier': 0.7553438180827887,
 'QuantileTransformer et RandomForestClassifier': 0.7367395152505447,
 'RobustScaler et HistGradientBoostingClassifier': 0.7648011982570806,
 'RobustScaler et LGBMClassifier': 0.763244825708061,
 'RobustScaler et RandomForestClassifier': 0.7703921568627451,
 'Normalizer et HistGradientBoostingClassifier': 0.6291040305010893,
 'Normalizer et LGBMClassifier': 0.7088289760348584,
 'Normalizer et RandomForestClassifier': 0.7087819989106754,
 'MaxAbsScaler et HistGradientBoost

In [16]:
liste_fscore

{'MinMaxScaler et HistGradientBoostingClassifier': 0.8121401801582453,
 'MinMaxScaler et LGBMClassifier': 0.8219016948447215,
 'MinMaxScaler et RandomForestClassifier': 0.8340407662895164,
 'StandardScaler et HistGradientBoostingClassifier': 0.6361273347925196,
 'StandardScaler et LGBMClassifier': 0.631026298429711,
 'StandardScaler et RandomForestClassifier': 0.5931941849733807,
 'QuantileTransformer et HistGradientBoostingClassifier': 0.6861654513049217,
 'QuantileTransformer et LGBMClassifier': 0.6865894154071295,
 'QuantileTransformer et RandomForestClassifier': 0.6673272963623043,
 'RobustScaler et HistGradientBoostingClassifier': 0.695508938774974,
 'RobustScaler et LGBMClassifier': 0.6934989154341106,
 'RobustScaler et RandomForestClassifier': 0.6991586656310622,
 'Normalizer et HistGradientBoostingClassifier': 0.524556858050394,
 'Normalizer et LGBMClassifier': 0.6696972670554823,
 'Normalizer et RandomForestClassifier': 0.6693587306799059,
 'MaxAbsScaler et HistGradientBoostin

In [10]:
liste_scaler = [MinMaxScaler(), StandardScaler(), QuantileTransformer(), RobustScaler(), Normalizer(), MaxAbsScaler(), PowerTransformer()]
liste_scaler_name = ["MinMaxScaler", "StandardScaler", "QuantileTransformer", "RobustScaler", "Normalizer", "MaxAbsScaler", "PowerTransformer"]

liste_classifier = [XGBClassifier(), CatBoostClassifier(silent=True, task_type="GPU")]
liste_classifier_name = ["XGBClassifier", "CatBoostClassifier"]

liste_acc = dict()

liste_fscore = dict()

n=0
    
for i in range(len(liste_scaler_name)):
    for j in range(len(liste_classifier_name)):
        print(n)
        n+=1
        acc, fscore = score_classif(X_train, X_test, y_train, y_test, liste_classifier[j], liste_scaler[i])
        
        liste_acc[liste_scaler_name[i]+" et "+ liste_classifier_name[j]] = acc
        liste_fscore[liste_scaler_name[i]+" et "+ liste_classifier_name[j]] = fscore


0
1
2
3
4
5
6
7
8
9
10
11
12
13


In [11]:
liste_acc

{'MinMaxScaler et XGBClassifier': 0.8134749455337691,
 'MinMaxScaler et CatBoostClassifier': 0.7811335784313725,
 'StandardScaler et XGBClassifier': 0.7144042755991286,
 'StandardScaler et CatBoostClassifier': 0.6760940904139433,
 'QuantileTransformer et XGBClassifier': 0.7820390795206972,
 'QuantileTransformer et CatBoostClassifier': 0.7715550108932462,
 'RobustScaler et XGBClassifier': 0.7912956154684095,
 'RobustScaler et CatBoostClassifier': 0.787420343137255,
 'Normalizer et XGBClassifier': 0.6113630174291939,
 'Normalizer et CatBoostClassifier': 0.6268668300653595,
 'MaxAbsScaler et XGBClassifier': 0.7335348583877995,
 'MaxAbsScaler et CatBoostClassifier': 0.7493266612200435,
 'PowerTransformer et XGBClassifier': 0.7894226579520697,
 'PowerTransformer et CatBoostClassifier': 0.7784477124183007}

In [12]:
liste_fscore

{'MinMaxScaler et XGBClassifier': 0.835233281090945,
 'MinMaxScaler et CatBoostClassifier': 0.8180089600549149,
 'StandardScaler et XGBClassifier': 0.6840008655138086,
 'StandardScaler et CatBoostClassifier': 0.653311708644315,
 'QuantileTransformer et XGBClassifier': 0.7622208583065048,
 'QuantileTransformer et CatBoostClassifier': 0.7545398658663537,
 'RobustScaler et XGBClassifier': 0.7708506231401319,
 'RobustScaler et CatBoostClassifier': 0.7713279761344636,
 'Normalizer et XGBClassifier': 0.5624340260634351,
 'Normalizer et CatBoostClassifier': 0.5922106496303932,
 'MaxAbsScaler et XGBClassifier': 0.7668953881922085,
 'MaxAbsScaler et CatBoostClassifier': 0.8021692136160818,
 'PowerTransformer et XGBClassifier': 0.7647236786722291,
 'PowerTransformer et CatBoostClassifier': 0.75110451088703}

# Train house 2 Test house 1

In [13]:
sample_idx = mod.pick_random_indexes(house1_power_blk2, 0.3)
X_train = house2_power_blk1[features_col].values
X_test = house1_power_blk2.loc[sample_idx, features_col].values

y_train = house2_power_blk1['activity'].values
y_test = house1_power_blk2.loc[sample_idx, 'activity'].values

In [19]:
liste_scaler = [MinMaxScaler(), StandardScaler(), QuantileTransformer(), RobustScaler(), Normalizer(), MaxAbsScaler(), PowerTransformer()]
liste_scaler_name = ["MinMaxScaler", "StandardScaler", "QuantileTransformer", "RobustScaler", "Normalizer", "MaxAbsScaler", "PowerTransformer"]

liste_classifier = [HistGradientBoostingClassifier(), LGBMClassifier(), RandomForestClassifier()]
liste_classifier_name = ["HistGradientBoostingClassifier", "LGBMClassifier", "RandomForestClassifier"]

liste_acc = dict()

liste_fscore = dict()

n=0
    
for i in range(len(liste_scaler_name)):
    for j in range(len(liste_classifier_name)):
        print(n)
        n+=1
        acc, fscore = score_classif(X_train, X_test, y_train, y_test, liste_classifier[j], liste_scaler[i])
        
        liste_acc[liste_scaler_name[i]+" et "+ liste_classifier_name[j]] = acc
        liste_fscore[liste_scaler_name[i]+" et "+ liste_classifier_name[j]] = fscore

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20


In [20]:
liste_acc

{'MinMaxScaler et HistGradientBoostingClassifier': 0.7063895318930041,
 'MinMaxScaler et LGBMClassifier': 0.7069290123456791,
 'MinMaxScaler et RandomForestClassifier': 0.7077289094650205,
 'StandardScaler et HistGradientBoostingClassifier': 0.719363425925926,
 'StandardScaler et LGBMClassifier': 0.7191261574074074,
 'StandardScaler et RandomForestClassifier': 0.720270061728395,
 'QuantileTransformer et HistGradientBoostingClassifier': 0.7312345679012345,
 'QuantileTransformer et LGBMClassifier': 0.7384284979423869,
 'QuantileTransformer et RandomForestClassifier': 0.7325675154320987,
 'RobustScaler et HistGradientBoostingClassifier': 0.7314859825102881,
 'RobustScaler et LGBMClassifier': 0.7311541923868313,
 'RobustScaler et RandomForestClassifier': 0.7317071759259259,
 'Normalizer et HistGradientBoostingClassifier': 0.5456661522633744,
 'Normalizer et LGBMClassifier': 0.5461889146090535,
 'Normalizer et RandomForestClassifier': 0.506289866255144,
 'MaxAbsScaler et HistGradientBoostin

In [21]:
liste_fscore

{'MinMaxScaler et HistGradientBoostingClassifier': 0.6809289293296069,
 'MinMaxScaler et LGBMClassifier': 0.6815086476842543,
 'MinMaxScaler et RandomForestClassifier': 0.6818349069578855,
 'StandardScaler et HistGradientBoostingClassifier': 0.7247316997671822,
 'StandardScaler et LGBMClassifier': 0.724339953347861,
 'StandardScaler et RandomForestClassifier': 0.7213179470649934,
 'QuantileTransformer et HistGradientBoostingClassifier': 0.7342395948574117,
 'QuantileTransformer et LGBMClassifier': 0.742693394790157,
 'QuantileTransformer et RandomForestClassifier': 0.7371689816317258,
 'RobustScaler et HistGradientBoostingClassifier': 0.7289455561856923,
 'RobustScaler et LGBMClassifier': 0.7302692497633648,
 'RobustScaler et RandomForestClassifier': 0.7337013958757831,
 'Normalizer et HistGradientBoostingClassifier': 0.5781651011363234,
 'Normalizer et LGBMClassifier': 0.5784150332299832,
 'Normalizer et RandomForestClassifier': 0.5577797547009105,
 'MaxAbsScaler et HistGradientBoosti

In [14]:
liste_scaler = [MinMaxScaler(), StandardScaler(), QuantileTransformer(), RobustScaler(), Normalizer(), MaxAbsScaler(), PowerTransformer()]
liste_scaler_name = ["MinMaxScaler", "StandardScaler", "QuantileTransformer", "RobustScaler", "Normalizer", "MaxAbsScaler", "PowerTransformer"]

liste_classifier = [XGBClassifier(), CatBoostClassifier(silent=True, task_type="GPU")]
liste_classifier_name = ["XGBClassifier", "CatBoostClassifier"]

liste_acc = dict()

liste_fscore = dict()

n=0
    
for i in range(len(liste_scaler_name)):
    for j in range(len(liste_classifier_name)):
        print(n)
        n+=1
        acc, fscore = score_classif(X_train, X_test, y_train, y_test, liste_classifier[j], liste_scaler[i])
        
        liste_acc[liste_scaler_name[i]+" et "+ liste_classifier_name[j]] = acc
        liste_fscore[liste_scaler_name[i]+" et "+ liste_classifier_name[j]] = fscore


0
1
2
3
4
5
6
7
8
9
10
11
12
13


In [15]:
liste_acc

{'MinMaxScaler et XGBClassifier': 0.7234709362139917,
 'MinMaxScaler et CatBoostClassifier': 0.7374871399176954,
 'StandardScaler et XGBClassifier': 0.720318287037037,
 'StandardScaler et CatBoostClassifier': 0.7281597222222222,
 'QuantileTransformer et XGBClassifier': 0.7246965020576132,
 'QuantileTransformer et CatBoostClassifier': 0.7290560699588478,
 'RobustScaler et XGBClassifier': 0.7061779835390947,
 'RobustScaler et CatBoostClassifier': 0.7424942129629629,
 'Normalizer et XGBClassifier': 0.5339705504115226,
 'Normalizer et CatBoostClassifier': 0.5233416923868313,
 'MaxAbsScaler et XGBClassifier': 0.5116583076131688,
 'MaxAbsScaler et CatBoostClassifier': 0.5316460905349795,
 'PowerTransformer et XGBClassifier': 0.734295267489712,
 'PowerTransformer et CatBoostClassifier': 0.7448984053497942}

In [16]:
liste_fscore

{'MinMaxScaler et XGBClassifier': 0.6997200278848313,
 'MinMaxScaler et CatBoostClassifier': 0.7108581589430142,
 'StandardScaler et XGBClassifier': 0.731282222435983,
 'StandardScaler et CatBoostClassifier': 0.7446749949539854,
 'QuantileTransformer et XGBClassifier': 0.7368801758771993,
 'QuantileTransformer et CatBoostClassifier': 0.744541295852371,
 'RobustScaler et XGBClassifier': 0.7064063472617004,
 'RobustScaler et CatBoostClassifier': 0.748751391655514,
 'Normalizer et XGBClassifier': 0.5774966679478688,
 'Normalizer et CatBoostClassifier': 0.5720275609646628,
 'MaxAbsScaler et XGBClassifier': 0.5662402656125217,
 'MaxAbsScaler et CatBoostClassifier': 0.5765105394791121,
 'PowerTransformer et XGBClassifier': 0.7405407233754469,
 'PowerTransformer et CatBoostClassifier': 0.7549722633863585}

# Train house 2 Test house 1 - la nuit

In [6]:
#sample_idx = mod.pick_random_indexes(house1_power_blk2, 0.3)
X_train = house2_power_blk1[features_col].between_time('22:00', '5:59:59').values
X_test = house1_power_blk2[features_col].between_time('22:00', '5:59:59').values

y_train = house2_power_blk1['activity'].between_time('22:00', '5:59:59').values
y_test = house1_power_blk2['activity'].between_time('22:00', '5:59:59').values

In [19]:
liste_scaler = [MinMaxScaler(), StandardScaler(), QuantileTransformer(), RobustScaler(), MaxAbsScaler(), PowerTransformer()]
liste_scaler_name = ["MinMaxScaler", "StandardScaler", "QuantileTransformer", "RobustScaler", "MaxAbsScaler", "PowerTransformer"]

liste_classifier = [HistGradientBoostingClassifier(), LGBMClassifier(), CatBoostClassifier(silent=True, task_type="GPU"), RandomForestClassifier()]
liste_classifier_name = ["HistGradientBoostingClassifier", "LGBMClassifier", "CatBoostClassifier", "RandomForestClassifier"]

liste_acc = dict()

liste_fscore = dict()

n=0
    
for i in range(len(liste_scaler_name)):
    for j in range(len(liste_classifier_name)):
        print(n)
        n+=1
        acc, fscore = score_classif(X_train, X_test, y_train, y_test, liste_classifier[j], liste_scaler[i])
        
        liste_acc[liste_scaler_name[i]+" et "+ liste_classifier_name[j]] = acc
        liste_fscore[liste_scaler_name[i]+" et "+ liste_classifier_name[j]] = fscore

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23


In [20]:
liste_acc

{'MinMaxScaler et HistGradientBoostingClassifier': 0.7755682571239231,
 'MinMaxScaler et LGBMClassifier': 0.7785431853324497,
 'MinMaxScaler et CatBoostClassifier': 0.7792732493925337,
 'MinMaxScaler et RandomForestClassifier': 0.7650276121051469,
 'StandardScaler et HistGradientBoostingClassifier': 0.8466263529931521,
 'StandardScaler et LGBMClassifier': 0.8508554230174509,
 'StandardScaler et CatBoostClassifier': 0.8513193063839187,
 'StandardScaler et RandomForestClassifier': 0.8416478904351667,
 'QuantileTransformer et HistGradientBoostingClassifier': 0.8374988955157942,
 'QuantileTransformer et LGBMClassifier': 0.8394908327810913,
 'QuantileTransformer et CatBoostClassifier': 0.8472542522641926,
 'QuantileTransformer et RandomForestClassifier': 0.845187210072896,
 'RobustScaler et HistGradientBoostingClassifier': 0.8371068036227082,
 'RobustScaler et LGBMClassifier': 0.838478573006406,
 'RobustScaler et CatBoostClassifier': 0.8419864148442677,
 'RobustScaler et RandomForestClassif

In [21]:
liste_fscore

{'MinMaxScaler et HistGradientBoostingClassifier': 0.46157576546113954,
 'MinMaxScaler et LGBMClassifier': 0.46413560695897566,
 'MinMaxScaler et CatBoostClassifier': 0.46467071736573895,
 'MinMaxScaler et RandomForestClassifier': 0.4562849415998258,
 'StandardScaler et HistGradientBoostingClassifier': 0.5408439879829525,
 'StandardScaler et LGBMClassifier': 0.5527071077277022,
 'StandardScaler et CatBoostClassifier': 0.5533851346864055,
 'StandardScaler et RandomForestClassifier': 0.5236542038057653,
 'QuantileTransformer et HistGradientBoostingClassifier': 0.4723677308082771,
 'QuantileTransformer et LGBMClassifier': 0.480555744674413,
 'QuantileTransformer et CatBoostClassifier': 0.5253379408993228,
 'QuantileTransformer et RandomForestClassifier': 0.5040513867878875,
 'RobustScaler et HistGradientBoostingClassifier': 0.5197237478017493,
 'RobustScaler et LGBMClassifier': 0.5247265283439255,
 'RobustScaler et CatBoostClassifier': 0.5358798892184559,
 'RobustScaler et RandomForestCla

In [7]:
liste_scaler = [MinMaxScaler(), StandardScaler(), QuantileTransformer(), RobustScaler(), MaxAbsScaler(), PowerTransformer()]
liste_scaler_name = ["MinMaxScaler", "StandardScaler", "QuantileTransformer", "RobustScaler", "MaxAbsScaler", "PowerTransformer"]

liste_classifier = [XGBClassifier()]
liste_classifier_name = ["XGBClassifier"]

liste_acc = dict()

liste_fscore = dict()

n=0
    
for i in range(len(liste_scaler_name)):
    for j in range(len(liste_classifier_name)):
        print(n)
        n+=1
        acc, fscore = score_classif(X_train, X_test, y_train, y_test, liste_classifier[j], liste_scaler[i])
        
        liste_acc[liste_scaler_name[i]+" et "+ liste_classifier_name[j]] = acc
        liste_fscore[liste_scaler_name[i]+" et "+ liste_classifier_name[j]] = fscore

0
1
2
3
4
5


In [8]:
liste_acc

{'MinMaxScaler et XGBClassifier': 0.7757052131654517,
 'StandardScaler et XGBClassifier': 0.8385448420587586,
 'QuantileTransformer et XGBClassifier': 0.8355914512922465,
 'RobustScaler et XGBClassifier': 0.8399972387894853,
 'MaxAbsScaler et XGBClassifier': 0.6235266180693616,
 'PowerTransformer et XGBClassifier': 0.8593698917605478}

In [9]:
liste_fscore

{'MinMaxScaler et XGBClassifier': 0.4604027020780053,
 'StandardScaler et XGBClassifier': 0.5163917759636159,
 'QuantileTransformer et XGBClassifier': 0.46674696602167154,
 'RobustScaler et XGBClassifier': 0.5172457009284219,
 'MaxAbsScaler et XGBClassifier': 0.3529603154281402,
 'PowerTransformer et XGBClassifier': 0.5720439089421016}

# Train house 2 Test house 1 - le jour

In [13]:
X_train = house2_power_blk1[features_col].between_time('6:00:00', '21:59:59').values
X_test = house1_power_blk2[features_col].between_time('6:00:00', '21:59:59').values

y_train = house2_power_blk1['activity'].between_time('6:00:00', '21:59:59').values
y_test = house1_power_blk2['activity'].between_time('6:00:00', '21:59:59').values

In [23]:
liste_scaler = [MinMaxScaler(), StandardScaler(), QuantileTransformer(), RobustScaler(), MaxAbsScaler(), PowerTransformer()]
liste_scaler_name = ["MinMaxScaler", "StandardScaler", "QuantileTransformer", "RobustScaler", "MaxAbsScaler", "PowerTransformer"]

liste_classifier = [HistGradientBoostingClassifier(), LGBMClassifier(), CatBoostClassifier(silent=True, task_type="GPU"), RandomForestClassifier()]
liste_classifier_name = ["HistGradientBoostingClassifier", "LGBMClassifier", "CatBoostClassifier", "RandomForestClassifier"]

liste_acc = dict()

liste_fscore = dict()

n=0
    
for i in range(len(liste_scaler_name)):
    for j in range(len(liste_classifier_name)):
        print(n)
        n+=1
        acc, fscore = score_classif(X_train, X_test, y_train, y_test, liste_classifier[j], liste_scaler[i])
        
        liste_acc[liste_scaler_name[i]+" et "+ liste_classifier_name[j]] = acc
        liste_fscore[liste_scaler_name[i]+" et "+ liste_classifier_name[j]] = fscore

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23


In [24]:
liste_acc

{'MinMaxScaler et HistGradientBoostingClassifier': 0.7429351300705468,
 'MinMaxScaler et LGBMClassifier': 0.7449178791887126,
 'MinMaxScaler et CatBoostClassifier': 0.7482123567019401,
 'MinMaxScaler et RandomForestClassifier': 0.7370141644620811,
 'StandardScaler et HistGradientBoostingClassifier': 0.6849366181657849,
 'StandardScaler et LGBMClassifier': 0.6883074294532627,
 'StandardScaler et CatBoostClassifier': 0.6947233245149912,
 'StandardScaler et RandomForestClassifier': 0.6935918209876544,
 'QuantileTransformer et HistGradientBoostingClassifier': 0.7195342813051147,
 'QuantileTransformer et LGBMClassifier': 0.7234884810405644,
 'QuantileTransformer et CatBoostClassifier': 0.7274765762786596,
 'QuantileTransformer et RandomForestClassifier': 0.7245623897707231,
 'RobustScaler et HistGradientBoostingClassifier': 0.7268802358906525,
 'RobustScaler et LGBMClassifier': 0.7225496031746032,
 'RobustScaler et CatBoostClassifier': 0.7174922839506173,
 'RobustScaler et RandomForestClass

In [25]:
liste_fscore

{'MinMaxScaler et HistGradientBoostingClassifier': 0.7625104978537917,
 'MinMaxScaler et LGBMClassifier': 0.7638646880132409,
 'MinMaxScaler et CatBoostClassifier': 0.7654954366372029,
 'MinMaxScaler et RandomForestClassifier': 0.756357956292054,
 'StandardScaler et HistGradientBoostingClassifier': 0.7666021272726102,
 'StandardScaler et LGBMClassifier': 0.7699521884323878,
 'StandardScaler et CatBoostClassifier': 0.7725092324993698,
 'StandardScaler et RandomForestClassifier': 0.7704929933067929,
 'QuantileTransformer et HistGradientBoostingClassifier': 0.7796074739737792,
 'QuantileTransformer et LGBMClassifier': 0.7813326718195938,
 'QuantileTransformer et CatBoostClassifier': 0.7828741454799342,
 'QuantileTransformer et RandomForestClassifier': 0.7820485470497428,
 'RobustScaler et HistGradientBoostingClassifier': 0.7808990046915345,
 'RobustScaler et LGBMClassifier': 0.7791779536500574,
 'RobustScaler et CatBoostClassifier': 0.77647965162433,
 'RobustScaler et RandomForestClassifi

In [14]:
liste_scaler = [MinMaxScaler(), StandardScaler(), QuantileTransformer(), RobustScaler(), MaxAbsScaler(), PowerTransformer()]
liste_scaler_name = ["MinMaxScaler", "StandardScaler", "QuantileTransformer", "RobustScaler", "MaxAbsScaler", "PowerTransformer"]

liste_classifier = [XGBClassifier()]
liste_classifier_name = ["XGBClassifier"]

liste_acc = dict()

liste_fscore = dict()

n=0
    
for i in range(len(liste_scaler_name)):
    for j in range(len(liste_classifier_name)):
        print(n)
        n+=1
        acc, fscore = score_classif(X_train, X_test, y_train, y_test, liste_classifier[j], liste_scaler[i])
        
        liste_acc[liste_scaler_name[i]+" et "+ liste_classifier_name[j]] = acc
        liste_fscore[liste_scaler_name[i]+" et "+ liste_classifier_name[j]] = fscore

0
1
2
3
4
5


In [15]:
liste_acc

{'MinMaxScaler et XGBClassifier': 0.7306024029982363,
 'StandardScaler et XGBClassifier': 0.6949112654320988,
 'QuantileTransformer et XGBClassifier': 0.7214169973544974,
 'RobustScaler et XGBClassifier': 0.7241785163139329,
 'MaxAbsScaler et XGBClassifier': 0.6421717923280423,
 'PowerTransformer et XGBClassifier': 0.7222996582892416}

In [16]:
liste_fscore

{'MinMaxScaler et XGBClassifier': 0.7534933851908397,
 'StandardScaler et XGBClassifier': 0.7713087231355154,
 'QuantileTransformer et XGBClassifier': 0.7780869147621464,
 'RobustScaler et XGBClassifier': 0.7689091889471087,
 'MaxAbsScaler et XGBClassifier': 0.6880087327440668,
 'PowerTransformer et XGBClassifier': 0.7783243224160586}

# Train house 1 Test house 2 - la nuit

In [17]:
X_train = house1_power_blk2[features_col].between_time('22:00', '5:59:59').values
X_test = house2_power_blk1[features_col].between_time('22:00', '5:59:59').values

y_train = house1_power_blk2['activity'].between_time('22:00', '5:59:59').values
y_test = house2_power_blk1['activity'].between_time('22:00', '5:59:59').values

In [27]:
liste_scaler = [MinMaxScaler(), StandardScaler(), QuantileTransformer(), RobustScaler(), MaxAbsScaler(), PowerTransformer()]
liste_scaler_name = ["MinMaxScaler", "StandardScaler", "QuantileTransformer", "RobustScaler", "MaxAbsScaler", "PowerTransformer"]

liste_classifier = [HistGradientBoostingClassifier(), LGBMClassifier(), CatBoostClassifier(silent=True, task_type="GPU"), RandomForestClassifier()]
liste_classifier_name = ["HistGradientBoostingClassifier", "LGBMClassifier", "CatBoostClassifier", "RandomForestClassifier"]

liste_acc = dict()

liste_fscore = dict()

n=0
    
for i in range(len(liste_scaler_name)):
    for j in range(len(liste_classifier_name)):
        print(n)
        n+=1
        acc, fscore = score_classif(X_train, X_test, y_train, y_test, liste_classifier[j], liste_scaler[i])
        
        liste_acc[liste_scaler_name[i]+" et "+ liste_classifier_name[j]] = acc
        liste_fscore[liste_scaler_name[i]+" et "+ liste_classifier_name[j]] = fscore

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23


In [28]:
liste_acc

{'MinMaxScaler et HistGradientBoostingClassifier': 0.9166061780596664,
 'MinMaxScaler et LGBMClassifier': 0.9252249236551562,
 'MinMaxScaler et CatBoostClassifier': 0.9053993422598073,
 'MinMaxScaler et RandomForestClassifier': 0.9196200375851539,
 'StandardScaler et HistGradientBoostingClassifier': 0.816869274136716,
 'StandardScaler et LGBMClassifier': 0.8411392999765093,
 'StandardScaler et CatBoostClassifier': 0.8365744655860935,
 'StandardScaler et RandomForestClassifier': 0.8671728917077755,
 'QuantileTransformer et HistGradientBoostingClassifier': 0.8683168898285177,
 'QuantileTransformer et LGBMClassifier': 0.8639323467230444,
 'QuantileTransformer et CatBoostClassifier': 0.8681424712238666,
 'QuantileTransformer et RandomForestClassifier': 0.878231148696265,
 'RobustScaler et HistGradientBoostingClassifier': 0.7583732675593141,
 'RobustScaler et LGBMClassifier': 0.7754833215879727,
 'RobustScaler et CatBoostClassifier': 0.8121535118628141,
 'RobustScaler et RandomForestClassif

In [29]:
liste_fscore

{'MinMaxScaler et HistGradientBoostingClassifier': 0.7708107112067547,
 'MinMaxScaler et LGBMClassifier': 0.8028095652058582,
 'MinMaxScaler et CatBoostClassifier': 0.7182742430404081,
 'MinMaxScaler et RandomForestClassifier': 0.7853189924737722,
 'StandardScaler et HistGradientBoostingClassifier': 0.4572987104880201,
 'StandardScaler et LGBMClassifier': 0.5019724217504724,
 'StandardScaler et CatBoostClassifier': 0.4940709436661346,
 'StandardScaler et RandomForestClassifier': 0.5593614749622756,
 'QuantileTransformer et HistGradientBoostingClassifier': 0.5461864854235248,
 'QuantileTransformer et LGBMClassifier': 0.5301416972059387,
 'QuantileTransformer et CatBoostClassifier': 0.5464745895069975,
 'QuantileTransformer et RandomForestClassifier': 0.5781594501779438,
 'RobustScaler et HistGradientBoostingClassifier': 0.17964744692989848,
 'RobustScaler et LGBMClassifier': 0.18668869652027906,
 'RobustScaler et CatBoostClassifier': 0.34434496364808626,
 'RobustScaler et RandomForestCl

In [18]:
liste_scaler = [MinMaxScaler(), StandardScaler(), QuantileTransformer(), RobustScaler(), MaxAbsScaler(), PowerTransformer()]
liste_scaler_name = ["MinMaxScaler", "StandardScaler", "QuantileTransformer", "RobustScaler", "MaxAbsScaler", "PowerTransformer"]

liste_classifier = [XGBClassifier()]
liste_classifier_name = ["XGBClassifier"]

liste_acc = dict()

liste_fscore = dict()

n=0
    
for i in range(len(liste_scaler_name)):
    for j in range(len(liste_classifier_name)):
        print(n)
        n+=1
        acc, fscore = score_classif(X_train, X_test, y_train, y_test, liste_classifier[j], liste_scaler[i])
        
        liste_acc[liste_scaler_name[i]+" et "+ liste_classifier_name[j]] = acc
        liste_fscore[liste_scaler_name[i]+" et "+ liste_classifier_name[j]] = fscore

0
1
2
3
4
5


In [19]:
liste_acc

{'MinMaxScaler et XGBClassifier': 0.9176755931407095,
 'StandardScaler et XGBClassifier': 0.8749113225276016,
 'QuantileTransformer et XGBClassifier': 0.8737726098191214,
 'RobustScaler et XGBClassifier': 0.7907634484378671,
 'MaxAbsScaler et XGBClassifier': 0.8894373972280949,
 'PowerTransformer et XGBClassifier': 0.8781688982851773}

In [20]:
liste_fscore

{'MinMaxScaler et XGBClassifier': 0.7717192328095166,
 'StandardScaler et XGBClassifier': 0.580775688971354,
 'QuantileTransformer et XGBClassifier': 0.5669723982072904,
 'RobustScaler et XGBClassifier': 0.24598513127204774,
 'MaxAbsScaler et XGBClassifier': 0.6155761168527685,
 'PowerTransformer et XGBClassifier': 0.5880605423874302}

# Train house 1 Test house 2 - le jour

In [21]:
X_train = house1_power_blk2[features_col].between_time('6:00:00', '21:59:59').values
X_test = house2_power_blk1[features_col].between_time('6:00:00', '21:59:59').values

y_train = house1_power_blk2['activity'].between_time('6:00:00', '21:59:59').values
y_test = house2_power_blk1['activity'].between_time('6:00:00', '21:59:59').values

In [31]:
liste_scaler = [MinMaxScaler(), StandardScaler(), QuantileTransformer(), RobustScaler(), MaxAbsScaler(), PowerTransformer()]
liste_scaler_name = ["MinMaxScaler", "StandardScaler", "QuantileTransformer", "RobustScaler", "MaxAbsScaler", "PowerTransformer"]

liste_classifier = [HistGradientBoostingClassifier(), LGBMClassifier(), CatBoostClassifier(silent=True, task_type="GPU"), RandomForestClassifier()]
liste_classifier_name = ["HistGradientBoostingClassifier", "LGBMClassifier", "CatBoostClassifier", "RandomForestClassifier"]

liste_acc = dict()

liste_fscore = dict()

n=0
    
for i in range(len(liste_scaler_name)):
    for j in range(len(liste_classifier_name)):
        print(n)
        n+=1
        acc, fscore = score_classif(X_train, X_test, y_train, y_test, liste_classifier[j], liste_scaler[i])
        
        liste_acc[liste_scaler_name[i]+" et "+ liste_classifier_name[j]] = acc
        liste_fscore[liste_scaler_name[i]+" et "+ liste_classifier_name[j]] = fscore

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23


In [32]:
liste_acc

{'MinMaxScaler et HistGradientBoostingClassifier': 0.764891713747646,
 'MinMaxScaler et LGBMClassifier': 0.7713503413370998,
 'MinMaxScaler et CatBoostClassifier': 0.7681126412429379,
 'MinMaxScaler et RandomForestClassifier': 0.749752824858757,
 'StandardScaler et HistGradientBoostingClassifier': 0.6346739642184558,
 'StandardScaler et LGBMClassifier': 0.6299585098870056,
 'StandardScaler et CatBoostClassifier': 0.6127954331450094,
 'StandardScaler et RandomForestClassifier': 0.6231323564030132,
 'QuantileTransformer et HistGradientBoostingClassifier': 0.7132950800376647,
 'QuantileTransformer et LGBMClassifier': 0.7170759769303201,
 'QuantileTransformer et CatBoostClassifier': 0.7104940560263654,
 'QuantileTransformer et RandomForestClassifier': 0.7038297434086629,
 'RobustScaler et HistGradientBoostingClassifier': 0.7207582980225988,
 'RobustScaler et LGBMClassifier': 0.7296304143126177,
 'RobustScaler et CatBoostClassifier': 0.716761711393597,
 'RobustScaler et RandomForestClassifi

In [33]:
liste_fscore

{'MinMaxScaler et HistGradientBoostingClassifier': 0.8085443186716468,
 'MinMaxScaler et LGBMClassifier': 0.8196958564335493,
 'MinMaxScaler et CatBoostClassifier': 0.8140438895915348,
 'MinMaxScaler et RandomForestClassifier': 0.7845511731902404,
 'StandardScaler et HistGradientBoostingClassifier': 0.6708001540999592,
 'StandardScaler et LGBMClassifier': 0.6677195595116115,
 'StandardScaler et CatBoostClassifier': 0.656883049091942,
 'StandardScaler et RandomForestClassifier': 0.6631337201258547,
 'QuantileTransformer et HistGradientBoostingClassifier': 0.7403134433463067,
 'QuantileTransformer et LGBMClassifier': 0.7434585143560952,
 'QuantileTransformer et CatBoostClassifier': 0.736172583773913,
 'QuantileTransformer et RandomForestClassifier': 0.7284937711863506,
 'RobustScaler et HistGradientBoostingClassifier': 0.7384730049169776,
 'RobustScaler et LGBMClassifier': 0.7477767725739103,
 'RobustScaler et CatBoostClassifier': 0.7359075882964827,
 'RobustScaler et RandomForestClassif

In [22]:
liste_scaler = [MinMaxScaler(), StandardScaler(), QuantileTransformer(), RobustScaler(), MaxAbsScaler(), PowerTransformer()]
liste_scaler_name = ["MinMaxScaler", "StandardScaler", "QuantileTransformer", "RobustScaler", "MaxAbsScaler", "PowerTransformer"]

liste_classifier = [XGBClassifier()]
liste_classifier_name = ["XGBClassifier"]

liste_acc = dict()

liste_fscore = dict()

n=0
    
for i in range(len(liste_scaler_name)):
    for j in range(len(liste_classifier_name)):
        print(n)
        n+=1
        acc, fscore = score_classif(X_train, X_test, y_train, y_test, liste_classifier[j], liste_scaler[i])
        
        liste_acc[liste_scaler_name[i]+" et "+ liste_classifier_name[j]] = acc
        liste_fscore[liste_scaler_name[i]+" et "+ liste_classifier_name[j]] = fscore

0
1
2
3
4
5


In [23]:
liste_acc

{'MinMaxScaler et XGBClassifier': 0.7513159133709981,
 'StandardScaler et XGBClassifier': 0.630129472693032,
 'QuantileTransformer et XGBClassifier': 0.7069859345574387,
 'RobustScaler et XGBClassifier': 0.7169367937853107,
 'MaxAbsScaler et XGBClassifier': 0.7165510240112994,
 'PowerTransformer et XGBClassifier': 0.7146074623352165}

In [24]:
liste_fscore

{'MinMaxScaler et XGBClassifier': 0.7892024133048149,
 'StandardScaler et XGBClassifier': 0.6684986785313761,
 'QuantileTransformer et XGBClassifier': 0.7348442063728245,
 'RobustScaler et XGBClassifier': 0.7365813937707809,
 'MaxAbsScaler et XGBClassifier': 0.7899286319463854,
 'PowerTransformer et XGBClassifier': 0.7378816727729717}

# voting Train house 1 Test house 2

In [34]:
sample_idx = mod.pick_random_indexes(house2_power_blk1, 0.5)
X_train = house1_power_blk2[features_col].values
X_test = house2_power_blk1.loc[sample_idx, features_col].values

y_train = house1_power_blk2['activity'].values
y_test = house2_power_blk1.loc[sample_idx, 'activity'].values

In [35]:
liste_scaler = [MinMaxScaler(), StandardScaler(), QuantileTransformer(), RobustScaler(), Normalizer(), MaxAbsScaler(), PowerTransformer()]
liste_scaler_name = ["MinMaxScaler", "StandardScaler", "QuantileTransformer", "RobustScaler", "Normalizer", "MaxAbsScaler", "PowerTransformer"]

clf1 = XGBClassifier()
clf2 = HistGradientBoostingClassifier()
clf3 = LGBMClassifier()
clf4 = CatBoostClassifier(silent=True, task_type="GPU")
clf5 = RandomForestClassifier()

liste_classifier = [VotingClassifier(estimators=[('XGB', clf1), ('HGB', clf2), ('LGBM', clf3), ('CBC', clf4), ('RFC', clf5)], voting='soft')] # weights=[0.1, 0.3, 0.3, 0.3]
liste_classifier_name = ["VotingClassifier"]

liste_acc = dict()

liste_fscore = dict()

n=0
    
for i in range(len(liste_scaler_name)):
    for j in range(len(liste_classifier_name)):
        print(n)
        n+=1
        acc, fscore = score_classif(X_train, X_test, y_train, y_test, liste_classifier[j], liste_scaler[i])
        
        liste_acc[liste_scaler_name[i]+" et "+ liste_classifier_name[j]] = acc
        liste_fscore[liste_scaler_name[i]+" et "+ liste_classifier_name[j]] = fscore


0
1
2
3
4
5
6


In [36]:
liste_acc

{'MinMaxScaler et VotingClassifier': 0.830294540229885,
 'StandardScaler et VotingClassifier': 0.6933030012771392,
 'QuantileTransformer et VotingClassifier': 0.7570518039591315,
 'RobustScaler et VotingClassifier': 0.7756042464878672,
 'Normalizer et VotingClassifier': 0.6652282886334611,
 'MaxAbsScaler et VotingClassifier': 0.7975566730523627,
 'PowerTransformer et VotingClassifier': 0.7631142241379311}

In [37]:
liste_fscore

{'MinMaxScaler et VotingClassifier': 0.8184569944074453,
 'StandardScaler et VotingClassifier': 0.6420492878912878,
 'QuantileTransformer et VotingClassifier': 0.7079036915230363,
 'RobustScaler et VotingClassifier': 0.7261750158620066,
 'Normalizer et VotingClassifier': 0.6056305390504423,
 'MaxAbsScaler et VotingClassifier': 0.8197215837147419,
 'PowerTransformer et VotingClassifier': 0.7104368654259119}

# voting Train house 2 Test house 1

In [38]:
sample_idx = mod.pick_random_indexes(house1_power_blk2, 0.5)
X_train = house2_power_blk1[features_col].values
X_test = house1_power_blk2.loc[sample_idx, features_col].values

y_train = house2_power_blk1['activity'].values
y_test = house1_power_blk2.loc[sample_idx, 'activity'].values

In [39]:
liste_scaler = [MinMaxScaler(), StandardScaler(), QuantileTransformer(), RobustScaler(), Normalizer(), MaxAbsScaler(), PowerTransformer()]
liste_scaler_name = ["MinMaxScaler", "StandardScaler", "QuantileTransformer", "RobustScaler", "Normalizer", "MaxAbsScaler", "PowerTransformer"]

clf1 = XGBClassifier()
clf2 = HistGradientBoostingClassifier()
clf3 = LGBMClassifier()
clf4 = CatBoostClassifier(silent=True, task_type="GPU")
clf5 = RandomForestClassifier()

liste_classifier = [VotingClassifier(estimators=[('XGB', clf1), ('HGB', clf2), ('LGBM', clf3), ('CBC', clf4), ('RFC', clf5)], voting='soft')]
liste_classifier_name = ["VotingClassifier"]

liste_acc = dict()

liste_fscore = dict()

n=0
    
for i in range(len(liste_scaler_name)):
    for j in range(len(liste_classifier_name)):
        print(n)
        n+=1
        acc, fscore = score_classif(X_train, X_test, y_train, y_test, liste_classifier[j], liste_scaler[i])
        
        liste_acc[liste_scaler_name[i]+" et "+ liste_classifier_name[j]] = acc
        liste_fscore[liste_scaler_name[i]+" et "+ liste_classifier_name[j]] = fscore

0
1
2
3
4
5
6


In [40]:
liste_acc

{'MinMaxScaler et VotingClassifier': 0.6889859617682198,
 'StandardScaler et VotingClassifier': 0.7258445340501792,
 'QuantileTransformer et VotingClassifier': 0.7386783154121864,
 'RobustScaler et VotingClassifier': 0.7412335722819594,
 'Normalizer et VotingClassifier': 0.4745852001194743,
 'MaxAbsScaler et VotingClassifier': 0.46290770609319,
 'PowerTransformer et VotingClassifier': 0.739195788530466}

In [41]:
liste_fscore

{'MinMaxScaler et VotingClassifier': 0.6511639717947486,
 'StandardScaler et VotingClassifier': 0.7059071646870434,
 'QuantileTransformer et VotingClassifier': 0.7169307223766564,
 'RobustScaler et VotingClassifier': 0.7133822388807313,
 'Normalizer et VotingClassifier': 0.5237477426055831,
 'MaxAbsScaler et VotingClassifier': 0.5186120829689651,
 'PowerTransformer et VotingClassifier': 0.7152614954050218}

# voting Train house 2 Test house 1 - la nuit

In [26]:
X_train = house2_power_blk1[features_col].between_time('22:00', '5:59:59').values
X_test = house1_power_blk2[features_col].between_time('22:00', '5:59:59').values

y_train = house2_power_blk1['activity'].between_time('22:00', '5:59:59').values
y_test = house1_power_blk2['activity'].between_time('22:00', '5:59:59').values

In [28]:
liste_scaler = [MinMaxScaler(), StandardScaler(), QuantileTransformer(), RobustScaler(), MaxAbsScaler(), PowerTransformer()]
liste_scaler_name = ["MinMaxScaler", "StandardScaler", "QuantileTransformer", "RobustScaler", "MaxAbsScaler", "PowerTransformer"]

clf1 = XGBClassifier()
clf2 = HistGradientBoostingClassifier()
clf3 = LGBMClassifier()
clf4 = CatBoostClassifier(silent=True, task_type="GPU")
clf5 = RandomForestClassifier()

liste_classifier = [VotingClassifier(estimators=[('XGB', clf1), ('HGB', clf2), ('LGBM', clf3), ('CBC', clf4), ('RFC', clf5)], voting='soft')]
liste_classifier_name = ["VotingClassifier"]

liste_acc = dict()
liste_fscore = dict()

n=0
    
for i in range(len(liste_scaler_name)):
    for j in range(len(liste_classifier_name)):
        print(n)
        n+=1
        acc, fscore = score_classif(X_train, X_test, y_train, y_test, liste_classifier[j], liste_scaler[i])
        
        liste_acc[liste_scaler_name[i]+" et "+ liste_classifier_name[j]] = acc
        liste_fscore[liste_scaler_name[i]+" et "+ liste_classifier_name[j]] = fscore

0
1
2
3
4
5


In [29]:
liste_acc

{'MinMaxScaler et VotingClassifier': 0.7763971725204329,
 'StandardScaler et VotingClassifier': 0.8479649878506738,
 'QuantileTransformer et VotingClassifier': 0.8454793461453501,
 'RobustScaler et VotingClassifier': 0.8487911420366688,
 'MaxAbsScaler et VotingClassifier': 0.5299447757897062,
 'PowerTransformer et VotingClassifier': 0.8595195493704441}

In [30]:
liste_fscore

{'MinMaxScaler et VotingClassifier': 0.4620765804738672,
 'StandardScaler et VotingClassifier': 0.5435950607303583,
 'QuantileTransformer et VotingClassifier': 0.5074754746065835,
 'RobustScaler et VotingClassifier': 0.5506125648396387,
 'MaxAbsScaler et VotingClassifier': 0.30865734264595485,
 'PowerTransformer et VotingClassifier': 0.5771114614701092}

# voting Train house 2 Test house 1 - le jour

In [31]:
X_train = house2_power_blk1[features_col].between_time('6:00:00', '21:59:59').values
X_test = house1_power_blk2[features_col].between_time('6:00:00', '21:59:59').values

y_train = house2_power_blk1['activity'].between_time('6:00:00', '21:59:59').values
y_test = house1_power_blk2['activity'].between_time('6:00:00', '21:59:59').values

In [32]:
liste_scaler = [MinMaxScaler(), StandardScaler(), QuantileTransformer(), RobustScaler(), MaxAbsScaler(), PowerTransformer()]
liste_scaler_name = ["MinMaxScaler", "StandardScaler", "QuantileTransformer", "RobustScaler", "MaxAbsScaler", "PowerTransformer"]

clf1 = XGBClassifier()
clf2 = HistGradientBoostingClassifier()
clf3 = LGBMClassifier()
clf4 = CatBoostClassifier(silent=True, task_type="GPU")
clf5 = RandomForestClassifier()

liste_classifier = [VotingClassifier(estimators=[('XGB', clf1), ('HGB', clf2), ('LGBM', clf3), ('CBC', clf4), ('RFC', clf5)], voting='soft')]
liste_classifier_name = ["VotingClassifier"]

liste_acc = dict()

liste_fscore = dict()

n=0
    
for i in range(len(liste_scaler_name)):
    for j in range(len(liste_classifier_name)):
        print(n)
        n+=1
        acc, fscore = score_classif(X_train, X_test, y_train, y_test, liste_classifier[j], liste_scaler[i])
        
        liste_acc[liste_scaler_name[i]+" et "+ liste_classifier_name[j]] = acc
        liste_fscore[liste_scaler_name[i]+" et "+ liste_classifier_name[j]] = fscore

0
1
2
3
4
5


In [35]:
liste_acc

{'MinMaxScaler et VotingClassifier': 0.7451496362433863,
 'StandardScaler et VotingClassifier': 0.6927515983245149,
 'QuantileTransformer et VotingClassifier': 0.7275256283068783,
 'RobustScaler et VotingClassifier': 0.727405753968254,
 'MaxAbsScaler et VotingClassifier': 0.6397395833333334,
 'PowerTransformer et VotingClassifier': 0.726189098324515}

In [36]:
liste_fscore

{'MinMaxScaler et VotingClassifier': 0.7636921243699937,
 'StandardScaler et VotingClassifier': 0.7714560151262629,
 'QuantileTransformer et VotingClassifier': 0.7834539249432791,
 'RobustScaler et VotingClassifier': 0.7802066647848728,
 'MaxAbsScaler et VotingClassifier': 0.6861547937766947,
 'PowerTransformer et VotingClassifier': 0.7821157143742284}

# voting Train house 1 Test house 2 - la nuit

In [37]:
X_train = house1_power_blk2[features_col].between_time('22:00', '5:59:59').values
X_test = house2_power_blk1[features_col].between_time('22:00', '5:59:59').values

y_train = house1_power_blk2['activity'].between_time('22:00', '5:59:59').values
y_test = house2_power_blk1['activity'].between_time('22:00', '5:59:59').values

In [38]:
liste_scaler = [MinMaxScaler(), StandardScaler(), QuantileTransformer(), RobustScaler(), MaxAbsScaler(), PowerTransformer()]
liste_scaler_name = ["MinMaxScaler", "StandardScaler", "QuantileTransformer", "RobustScaler", "MaxAbsScaler", "PowerTransformer"]

clf1 = XGBClassifier()
clf2 = HistGradientBoostingClassifier()
clf3 = LGBMClassifier()
clf4 = CatBoostClassifier(silent=True, task_type="GPU")
clf5 = RandomForestClassifier()

liste_classifier = [VotingClassifier(estimators=[('XGB', clf1), ('HGB', clf2), ('LGBM', clf3), ('CBC', clf4), ('RFC', clf5)], voting='soft')]
liste_classifier_name = ["VotingClassifier"]

liste_acc = dict()

liste_fscore = dict()

n=0
    
for i in range(len(liste_scaler_name)):
    for j in range(len(liste_classifier_name)):
        print(n)
        n+=1
        acc, fscore = score_classif(X_train, X_test, y_train, y_test, liste_classifier[j], liste_scaler[i])
        
        liste_acc[liste_scaler_name[i]+" et "+ liste_classifier_name[j]] = acc
        liste_fscore[liste_scaler_name[i]+" et "+ liste_classifier_name[j]] = fscore

0
1
2
3
4
5


In [41]:
liste_acc

{'MinMaxScaler et VotingClassifier': 0.9185682405449848,
 'StandardScaler et VotingClassifier': 0.8645865633074935,
 'QuantileTransformer et VotingClassifier': 0.8746570354709889,
 'RobustScaler et VotingClassifier': 0.7863195912614517,
 'MaxAbsScaler et VotingClassifier': 0.8797304439746301,
 'PowerTransformer et VotingClassifier': 0.8795730561428236}

In [42]:
liste_fscore

{'MinMaxScaler et VotingClassifier': 0.7815742540554963,
 'StandardScaler et VotingClassifier': 0.5546468184063456,
 'QuantileTransformer et VotingClassifier': 0.567841030479163,
 'RobustScaler et VotingClassifier': 0.21252512636365406,
 'MaxAbsScaler et VotingClassifier': 0.5217937175652159,
 'PowerTransformer et VotingClassifier': 0.5925568020954977}

# voting Train house 1 Test house 2 - le jour

In [43]:
X_train = house1_power_blk2[features_col].between_time('6:00:00', '21:59:59').values
X_test = house2_power_blk1[features_col].between_time('6:00:00', '21:59:59').values

y_train = house1_power_blk2['activity'].between_time('6:00:00', '21:59:59').values
y_test = house2_power_blk1['activity'].between_time('6:00:00', '21:59:59').values

In [44]:
liste_scaler = [MinMaxScaler(), StandardScaler(), QuantileTransformer(), RobustScaler(), MaxAbsScaler(), PowerTransformer()]
liste_scaler_name = ["MinMaxScaler", "StandardScaler", "QuantileTransformer", "RobustScaler", "MaxAbsScaler", "PowerTransformer"]

clf1 = XGBClassifier()
clf2 = HistGradientBoostingClassifier()
clf3 = LGBMClassifier()
clf4 = CatBoostClassifier(silent=True, task_type="GPU")
clf5 = RandomForestClassifier()

liste_classifier = [VotingClassifier(estimators=[('XGB', clf1), ('HGB', clf2), ('LGBM', clf3), ('CBC', clf4), ('RFC', clf5)], voting='soft')]
liste_classifier_name = ["VotingClassifier"]

liste_acc = dict()

liste_fscore = dict()

n=0
    
for i in range(len(liste_scaler_name)):
    for j in range(len(liste_classifier_name)):
        print(n)
        n+=1
        acc, fscore = score_classif(X_train, X_test, y_train, y_test, liste_classifier[j], liste_scaler[i])
        
        liste_acc[liste_scaler_name[i]+" et "+ liste_classifier_name[j]] = acc
        liste_fscore[liste_scaler_name[i]+" et "+ liste_classifier_name[j]] = fscore

0
1
2
3
4
5


In [45]:
liste_acc

{'MinMaxScaler et VotingClassifier': 0.7712611817325801,
 'StandardScaler et VotingClassifier': 0.6233998352165725,
 'QuantileTransformer et VotingClassifier': 0.7104625706214689,
 'RobustScaler et VotingClassifier': 0.723065266007533,
 'MaxAbsScaler et VotingClassifier': 0.7345789194915254,
 'PowerTransformer et VotingClassifier': 0.7093888300376647}

In [46]:
liste_fscore

{'MinMaxScaler et VotingClassifier': 0.8136049628603329,
 'StandardScaler et VotingClassifier': 0.6633497395650044,
 'QuantileTransformer et VotingClassifier': 0.7363358496993448,
 'RobustScaler et VotingClassifier': 0.7403103494269255,
 'MaxAbsScaler et VotingClassifier': 0.82162510054178,
 'PowerTransformer et VotingClassifier': 0.7310306479929558}