# Random Forests for waiting times prediction

In [1]:
import pandas as pd
import numpy as np
import matplotlib as plt
from collections import Counter
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import SMOTE
from sklearn import metrics
from sklearn.metrics import classification_report
from tqdm import tqdm
from statistics import mean
import math
from statistics import mean
import warnings
warnings.filterwarnings('ignore')
from sklearn.metrics import recall_score, accuracy_score

from sklearn.ensemble import RandomForestClassifier

In [2]:
hp_oHe = pd.read_csv('HP_OHE_3class.csv')
hp_oHe.drop(hp_oHe.tail(17).index,inplace=True) 
hp_oHe = hp_oHe.drop('Unnamed: 0',axis=1)

hp_ME = pd.read_csv("harryPotterClean.csv")
hp_ME.drop(hp_ME.tail(17).index,inplace=True) 
hp_ME = hp_ME.drop('Unnamed: 0',axis=1)

hp_OE = pd.read_csv("harryPotterCleanOE.csv")
hp_OE.drop(hp_OE.tail(17).index,inplace=True) 
hp_OE = hp_OE.drop('Unnamed: 0',axis=1)

In [3]:
def getXandY(df):
    df.drop(df.tail(20).index,inplace=True) 
    x = df.drop(['HP_Forbidden_clean'],axis=1)
    y = df.HP_Forbidden_clean
    return(x,y)

def trainTest(x,y):
    X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.30, shuffle=True)
    return(X_train, X_test, y_train, y_test)

#Function to perform oversampling
def overSampling(X_train, y_train, y_test, method):
    X_train_os, y_train_os= method.fit_resample(X_train, y_train)
    # Check the number of records after over sampling
    #print(sorted(Counter(y_train_os).items())) 
    return(X_train_os, y_train_os)

smote = SMOTE(random_state=42)


def testModel(df,var_order,n_vars,n_loops,method):
    highest = 0
    for j in tqdm(range(1,n_vars)):
        #split our dataframe into X and Y
        x,y=getXandY(df)
        #create the lists to store metrics
        acc = []
        rec = []
        preci = []
        f1 = []
        for i in range(n_loops):
            #split the dataFrame into test and train
            X_train, X_test, y_train, y_test = trainTest(x,y)
            #Oversample the train dataset with SMOTE
            X_train_os, y_train_os=overSampling(X_train, y_train, y_test, smote)
            #define the variables order 
            X_train_os_r = X_train_os[var_order]
            X_test_r = X_test[var_order]
            df1= X_train_os_r.iloc[:, 0:j] #use only part of the variables
            
            #create and train decision trees
            rnd_clf = RandomForestClassifier(n_jobs=-1)
            rnd_clf.fit(df1, y_train_os)
        
            y_pred=rnd_clf.predict(X_test_r.iloc[:, 0:j])
            ac=metrics.accuracy_score(y_test, y_pred)
            acc.append(ac)
            p=metrics.precision_score(y_test, y_pred,average='macro')
            preci.append(p)
            r=metrics.recall_score(y_test, y_pred,average='macro')
            rec.append(r)
            f=metrics.f1_score(y_test, y_pred, average='macro')
            f1.append(f)
        print(df1.columns)
        print("For {} features: \n Accuracy: {} \n Precision: {} \n Recall: {} \n F1 score: {}".format(
        j,mean(acc),mean(preci),mean(rec),mean(f1)))
        
        if mean(acc)>highest:
            highest = mean(acc)
            best = "best accuracy = {}, with {} features, with {}".format(mean(acc),j,method)
        print(best)
        #print(classification_report(y_test, y_pred))
    print(best)
        
def analizeDF(df,order,n_vars,n_loops):
    for i in range(len(order)):
        print('------------------------- Analyzing method {} -------------------------'.format(method[i]))
        print('The variable order is: \n {}'.format(order[i]))
        testModel(df,order[i],n_vars,n_loops,method[i])
        print('\n \n')

## One hot encoding

In [4]:
#Variable order in one hot encoding:
pear_corrO = ['temperature',	'holiday',	'day',	'month',	'pressure',	'dayOfTheWeek',	'shower rain',	'broken clouds',	'fog',	'overcast clouds',	'heavy intensity rain',	'minute',	'haze',	'thunderstorm with light rain',	'scattered clouds',	'clear sky',	'mist',	'light intensity drizzle',	'few clouds',	'thunderstorm',	'very heavy rain',	'moderate rain',	'thunderstorm with rain',	'year',	'light rain',	'humidity',	'hour',	'Pandemic']
kend_corrO = ['temperature',	'holiday',	'month',	'day',	'pressure',	'dayOfTheWeek',	'shower rain',	'broken clouds',	'heavy intensity rain',	'fog',	'year',	'overcast clouds',	'minute',	'haze',	'thunderstorm with light rain',	'mist',	'light intensity drizzle',	'scattered clouds',	'clear sky',	'few clouds',	'thunderstorm',	'very heavy rain',	'moderate rain',	'thunderstorm with rain',	'light rain',	'humidity',	'hour',	'Pandemic']
mutInf_classO = ['month',	'day',	'year',	'hour',	'minute',	'holiday',	'dayOfTheWeek',	'Pandemic',	'temperature',	'humidity',	'pressure',	'heavy intensity rain',	'light rain',	'broken clouds',	'moderate rain',	'mist',	'overcast clouds',	'clear sky',	'scattered clouds',	'thunderstorm with rain',	'few clouds',	'thunderstorm',	'shower rain',	'very heavy rain',	'fog',	'haze',	'thunderstorm with light rain',	'light intensity drizzle']
mutInf_regO = ['month',	'day',	'year',	'hour',	'minute',	'holiday',	'dayOfTheWeek',	'Pandemic',	'temperature',	'humidity',	'pressure',	'heavy intensity rain',	'light rain',	'broken clouds',	'moderate rain',	'mist',	'overcast clouds',	'clear sky',	'scattered clouds',	'thunderstorm with rain',	'few clouds',	'thunderstorm',	'shower rain',	'very heavy rain',	'fog',	'haze',	'thunderstorm with light rain',	'light intensity drizzle']
mutInf_class2O = ['day',	'temperature',	'month',	'humidity',	'hour',	'pressure','dayOfTheWeek',	'year',	'holiday',	'shower rain',	'light rain',	'thunderstorm',	'fog',	'broken clouds',	'Pandemic',	'thunderstorm with rain',	'light intensity drizzle',	'thunderstorm with light rain',	'heavy intensity rain',	'mist',	'scattered clouds',	'very heavy rain',	'overcast clouds',	'moderate rain',	'minute',	'haze',	'few clouds',	'clear sky']
varThreO = ['month',	'day',	'year',	'hour',	'minute',	'holiday',	'dayOfTheWeek',	'temperature',	'humidity',	'pressure',	'heavy intensity rain',	'light rain',	'broken clouds',	'scattered clouds',	'thunderstorm with rain',	'few clouds',	'thunderstorm',	'shower rain',]
mrmrO = ['month',	'day',	'year',	'hour',	'minute',	'holiday',	'dayOfTheWeek',	'temperature',	'humidity',	'pressure',	'heavy intensity rain',	'light rain',	'broken clouds',	'scattered clouds',	'thunderstorm with rain',	'few clouds',	'thunderstorm',	'shower rain',	'heavy intensity rain',	'mist',	'scattered clouds',	'very heavy rain',	'overcast clouds',	'moderate rain',	'minute',	'haze',	'few clouds']

orderOHE=[pear_corrO,kend_corrO,mutInf_classO,mutInf_regO,mutInf_class2O,varThreO,mrmrO]

method = ['Pearson_correlation','Kendalls_correlation','mutualInformation_classification',
          'mutualInformation_reggression','mutualInformation_classification2',
          'varianceThreshold','MRMR']

In [None]:
analizeDF(hp_oHe,orderOHE,28,n_loops=20)

  0%|          | 0/27 [00:00<?, ?it/s]

------------------------- Analyzing method Pearson_correlation -------------------------
The variable order is: 
 ['temperature', 'holiday', 'day', 'month', 'pressure', 'dayOfTheWeek', 'shower rain', 'broken clouds', 'fog', 'overcast clouds', 'heavy intensity rain', 'minute', 'haze', 'thunderstorm with light rain', 'scattered clouds', 'clear sky', 'mist', 'light intensity drizzle', 'few clouds', 'thunderstorm', 'very heavy rain', 'moderate rain', 'thunderstorm with rain', 'year', 'light rain', 'humidity', 'hour', 'Pandemic']


  4%|▎         | 1/27 [00:16<07:07, 16.43s/it]

Index(['temperature'], dtype='object')
For 1 features: 
 Accuracy: 0.4388303708580206 
 Precision: 0.3640619531022423 
 Recall: 0.3630652794614665 
 F1 score: 0.362626551943934
best accuracy = 0.4388303708580206, with 1 features, with Pearson_correlation


  7%|▋         | 2/27 [00:34<07:12, 17.29s/it]

Index(['temperature', 'holiday'], dtype='object')
For 2 features: 
 Accuracy: 0.4575368050977807 
 Precision: 0.39853452463197914 
 Recall: 0.3954045549765509 
 F1 score: 0.3964295897636806
best accuracy = 0.4575368050977807, with 2 features, with Pearson_correlation


 11%|█         | 3/27 [00:49<06:32, 16.34s/it]

Index(['temperature', 'holiday', 'day'], dtype='object')
For 3 features: 
 Accuracy: 0.5966776677667767 
 Precision: 0.5594042823734405 
 Recall: 0.576564687224732 
 F1 score: 0.5653319929374269
best accuracy = 0.5966776677667767, with 3 features, with Pearson_correlation


 15%|█▍        | 4/27 [01:04<06:07, 15.97s/it]

Index(['temperature', 'holiday', 'day', 'month'], dtype='object')
For 4 features: 
 Accuracy: 0.7641330689579202 
 Precision: 0.7309177155826768 
 Recall: 0.7416733905897509 
 F1 score: 0.7357340902321512
best accuracy = 0.7641330689579202, with 4 features, with Pearson_correlation


 19%|█▊        | 5/27 [01:20<05:44, 15.65s/it]

Index(['temperature', 'holiday', 'day', 'month', 'pressure'], dtype='object')
For 5 features: 
 Accuracy: 0.8512133245091551 
 Precision: 0.8296906073273915 
 Recall: 0.8340669557440286 
 F1 score: 0.8317640973292955
best accuracy = 0.8512133245091551, with 5 features, with Pearson_correlation


 22%|██▏       | 6/27 [01:34<05:19, 15.20s/it]

Index(['temperature', 'holiday', 'day', 'month', 'pressure', 'dayOfTheWeek'], dtype='object')
For 6 features: 
 Accuracy: 0.8539761431411531 
 Precision: 0.8314111216531482 
 Recall: 0.83624241608572 
 F1 score: 0.8336356497887573
best accuracy = 0.8539761431411531, with 6 features, with Pearson_correlation


 26%|██▌       | 7/27 [01:48<04:55, 14.76s/it]

Index(['temperature', 'holiday', 'day', 'month', 'pressure', 'dayOfTheWeek',
       'shower rain'],
      dtype='object')
For 7 features: 
 Accuracy: 0.8552643220526432 
 Precision: 0.8342444525782784 
 Recall: 0.8383590504095191 
 F1 score: 0.8361806673622841
best accuracy = 0.8552643220526432, with 7 features, with Pearson_correlation


 30%|██▉       | 8/27 [02:01<04:32, 14.34s/it]

Index(['temperature', 'holiday', 'day', 'month', 'pressure', 'dayOfTheWeek',
       'shower rain', 'broken clouds'],
      dtype='object')
For 8 features: 
 Accuracy: 0.8599557032115172 
 Precision: 0.8392611967142376 
 Recall: 0.8435270073621295 
 F1 score: 0.8412507145268631
best accuracy = 0.8599557032115172, with 8 features, with Pearson_correlation


## Manual Encoding

In [4]:
#Replace times ending in 5 and also compacting the rest to have only 6 classes
b=hp_ME.Harry_Potter_and_the_Forbidden.replace([
5, 10, 11, 15, 20, 25, 30, 35, 40, 45,50.0,55.0,60.0,65.0,70.0,75.0,80.0,85.0,90.0,95.0,100.0,105.0,110.0,115.0,120.0,125.0,130.0,135.0,145.0,150.0,180.0], 
[1,1,  1,  1,  1,  1,  1,  2,  2,  2, 2,   2,   2,   3,   3,   3,   3,   3,   3,   3,   3,    3,    3,    3,     3,    3,   3,    3,     3,    3,    3])

df3=pd.DataFrame(b)
df3.rename(columns = {'Harry_Potter_and_the_Forbidden':'HP_Forbidden_clean'}, inplace = True)
hp_bis3=pd.concat([hp_ME, df3], axis=1)
hp3 = hp_bis3.drop('Harry_Potter_and_the_Forbidden',axis=1)
hp3.rename(columns = {'day.1':'dayOfTheWeek'}, inplace = True)
hp3 = hp3[hp3.HP_Forbidden_clean != 0] #delete rows with 0 min
hp3.HP_Forbidden_clean.unique()

array([2., 1., 3.])

In [5]:
HP_ME = hp3

pear_corr = ['temperature','holiday','day','month','pressure','dayOfTheWeek',
            'report','minute','year','humidity','hour',	'Pandemic']
kend_corr = ['temperature','holiday','month','day','pressure','dayOfTheWeek','report','year',
             'minute','humidity','hour','Pandemic']
mutInf_class = ['month','day','year','hour','minute','holiday','dayOfTheWeek','Pandemic',
                'temperature','humidity','pressure','report']
mutInf_reg = ['month','day','year','hour','minute','holiday','dayOfTheWeek','Pandemic',
                'temperature','humidity','pressure','report']
mutInf_class2 = ['day','temperature','month','humidity','hour','dayOfTheWeek','pressure',
                 'holiday','year','report','minute','Pandemic']
varThre = ['month','day','year','hour','minute','holiday','dayOfTheWeek',
           'temperature','humidity','pressure','report']
mrmr = ['temperature','dayOfTheWeek','hour','holiday','humidity','day',
        'year','month','pressure','minute','report']
method = ['Pearson_correlation','Kendalls_correlation','mutualInformation_classification',
          'mutualInformation_reggression','mutualInformation_classification2',
          'variableThreshold','MRMR']

orderME=[pear_corr,kend_corr,mutInf_class,mutInf_reg,mutInf_class2,varThre,mrmr]

In [6]:
analizeDF(HP_ME,orderME,13,n_loops=20)

------------------------- Analyzing method Pearson_correlation -------------------------
The variable order is: 
 ['temperature', 'holiday', 'day', 'month', 'pressure', 'dayOfTheWeek', 'report', 'minute', 'year', 'humidity', 'hour', 'Pandemic']


  8%|▊         | 1/12 [00:23<04:15, 23.18s/it]

Index(['temperature'], dtype='object')
For 1 features: 
 Accuracy: 0.4362116402116402 
 Precision: 0.3647895344464025 
 Recall: 0.3635805862106963 
 F1 score: 0.3627222224029266
best accuracy = 0.4362116402116402, with 1 features, with Pearson_correlation


 17%|█▋        | 2/12 [00:34<02:44, 16.42s/it]

Index(['temperature', 'holiday'], dtype='object')
For 2 features: 
 Accuracy: 0.456198347107438 
 Precision: 0.4007016661013192 
 Recall: 0.39683896265442575 
 F1 score: 0.39810173228815715
best accuracy = 0.456198347107438, with 2 features, with Pearson_correlation


 25%|██▌       | 3/12 [00:45<02:02, 13.60s/it]

Index(['temperature', 'holiday', 'day'], dtype='object')
For 3 features: 
 Accuracy: 0.5905368130702313 
 Precision: 0.5560893631935223 
 Recall: 0.5746379593888786 
 F1 score: 0.5618372324149514
best accuracy = 0.5905368130702313, with 3 features, with Pearson_correlation


 33%|███▎      | 4/12 [00:55<01:37, 12.17s/it]

Index(['temperature', 'holiday', 'day', 'month'], dtype='object')
For 4 features: 
 Accuracy: 0.7640747822392182 
 Precision: 0.7319146211285656 
 Recall: 0.7449747027093084 
 F1 score: 0.7376534559505511
best accuracy = 0.7640747822392182, with 4 features, with Pearson_correlation


 42%|████▏     | 5/12 [01:04<01:19, 11.35s/it]

Index(['temperature', 'holiday', 'day', 'month', 'pressure'], dtype='object')
For 5 features: 
 Accuracy: 0.8502659008721548 
 Precision: 0.8295901712801702 
 Recall: 0.8341566618643858 
 F1 score: 0.8317420317883103
best accuracy = 0.8502659008721548, with 5 features, with Pearson_correlation


 50%|█████     | 6/12 [01:14<01:04, 10.76s/it]

Index(['temperature', 'holiday', 'day', 'month', 'pressure', 'dayOfTheWeek'], dtype='object')
For 6 features: 
 Accuracy: 0.8512566560170394 
 Precision: 0.8301710234593029 
 Recall: 0.8346838136695317 
 F1 score: 0.8323048987745234
best accuracy = 0.8512566560170394, with 6 features, with Pearson_correlation


 58%|█████▊    | 7/12 [01:24<00:52, 10.45s/it]

Index(['temperature', 'holiday', 'day', 'month', 'pressure', 'dayOfTheWeek',
       'report'],
      dtype='object')
For 7 features: 
 Accuracy: 0.860172744721689 
 Precision: 0.8422108479593629 
 Recall: 0.8449604162126378 
 F1 score: 0.8434909505100145
best accuracy = 0.860172744721689, with 7 features, with Pearson_correlation


 67%|██████▋   | 8/12 [01:34<00:41, 10.38s/it]

Index(['temperature', 'holiday', 'day', 'month', 'pressure', 'dayOfTheWeek',
       'report', 'minute'],
      dtype='object')
For 8 features: 
 Accuracy: 0.8350736707238949 
 Precision: 0.8134664785588152 
 Recall: 0.8158723757109266 
 F1 score: 0.8145879805853375
best accuracy = 0.860172744721689, with 7 features, with Pearson_correlation


 75%|███████▌  | 9/12 [01:45<00:31, 10.59s/it]

Index(['temperature', 'holiday', 'day', 'month', 'pressure', 'dayOfTheWeek',
       'report', 'minute', 'year'],
      dtype='object')
For 9 features: 
 Accuracy: 0.8416185589052811 
 Precision: 0.8203645167299995 
 Recall: 0.822744583960983 
 F1 score: 0.8214524235368983
best accuracy = 0.860172744721689, with 7 features, with Pearson_correlation


 83%|████████▎ | 10/12 [01:56<00:21, 10.56s/it]

Index(['temperature', 'holiday', 'day', 'month', 'pressure', 'dayOfTheWeek',
       'report', 'minute', 'year', 'humidity'],
      dtype='object')
For 10 features: 
 Accuracy: 0.8529008777563691 
 Precision: 0.8338851434114747 
 Recall: 0.8362243541230666 
 F1 score: 0.8349565872720718
best accuracy = 0.860172744721689, with 7 features, with Pearson_correlation


 92%|█████████▏| 11/12 [02:06<00:10, 10.54s/it]

Index(['temperature', 'holiday', 'day', 'month', 'pressure', 'dayOfTheWeek',
       'report', 'minute', 'year', 'humidity', 'hour'],
      dtype='object')
For 11 features: 
 Accuracy: 0.8770953912111469 
 Precision: 0.8606775587912583 
 Recall: 0.8623085594331228 
 F1 score: 0.8614435699735153
best accuracy = 0.8770953912111469, with 11 features, with Pearson_correlation


100%|██████████| 12/12 [02:17<00:00, 11.45s/it]


Index(['temperature', 'holiday', 'day', 'month', 'pressure', 'dayOfTheWeek',
       'report', 'minute', 'year', 'humidity', 'hour', 'Pandemic'],
      dtype='object')
For 12 features: 
 Accuracy: 0.874179008370895 
 Precision: 0.8584591127590333 
 Recall: 0.8583827991794623 
 F1 score: 0.8583448017122329
best accuracy = 0.8770953912111469, with 11 features, with Pearson_correlation
best accuracy = 0.8770953912111469, with 11 features, with Pearson_correlation

 

------------------------- Analyzing method Kendalls_correlation -------------------------
The variable order is: 
 ['temperature', 'holiday', 'month', 'day', 'pressure', 'dayOfTheWeek', 'report', 'year', 'minute', 'humidity', 'hour', 'Pandemic']


  8%|▊         | 1/12 [00:16<03:01, 16.52s/it]

Index(['temperature'], dtype='object')
For 1 features: 
 Accuracy: 0.44017837954008165 
 Precision: 0.36616788185362464 
 Recall: 0.36472193939525216 
 F1 score: 0.3639228153674561
best accuracy = 0.44017837954008165, with 1 features, with Kendalls_correlation


 17%|█▋        | 2/12 [00:29<02:26, 14.69s/it]

Index(['temperature', 'holiday'], dtype='object')
For 2 features: 
 Accuracy: 0.4576393372068001 
 Precision: 0.4010283102201637 
 Recall: 0.39765492080644466 
 F1 score: 0.3988360836529458
best accuracy = 0.4576393372068001, with 2 features, with Kendalls_correlation


 25%|██▌       | 3/12 [00:41<02:00, 13.40s/it]

Index(['temperature', 'holiday', 'month'], dtype='object')
For 3 features: 
 Accuracy: 0.5226567550096962 
 Precision: 0.48490304038858484 
 Recall: 0.4954328809717427 
 F1 score: 0.48535915248897593
best accuracy = 0.5226567550096962, with 3 features, with Kendalls_correlation


 33%|███▎      | 4/12 [00:52<01:38, 12.34s/it]

Index(['temperature', 'holiday', 'month', 'day'], dtype='object')
For 4 features: 
 Accuracy: 0.7657605177993527 
 Precision: 0.7320279224257934 
 Recall: 0.7449638778613592 
 F1 score: 0.7377263629365236
best accuracy = 0.7657605177993527, with 4 features, with Kendalls_correlation


 42%|████▏     | 5/12 [01:03<01:23, 11.95s/it]

Index(['temperature', 'holiday', 'month', 'day', 'pressure'], dtype='object')
For 5 features: 
 Accuracy: 0.8478613091380428 
 Precision: 0.8256645524367078 
 Recall: 0.8307648554117419 
 F1 score: 0.8280604153580549
best accuracy = 0.8478613091380428, with 5 features, with Kendalls_correlation


 50%|█████     | 6/12 [01:14<01:09, 11.60s/it]

Index(['temperature', 'holiday', 'month', 'day', 'pressure', 'dayOfTheWeek'], dtype='object')
For 6 features: 
 Accuracy: 0.8508976854856154 
 Precision: 0.8294501928913852 
 Recall: 0.8358544210710892 
 F1 score: 0.8324351212964711
best accuracy = 0.8508976854856154, with 6 features, with Kendalls_correlation


 58%|█████▊    | 7/12 [01:23<00:53, 10.75s/it]

Index(['temperature', 'holiday', 'month', 'day', 'pressure', 'dayOfTheWeek',
       'report'],
      dtype='object')
For 7 features: 
 Accuracy: 0.860006497725796 
 Precision: 0.8403916210766053 
 Recall: 0.8443407062503939 
 F1 score: 0.8422491724059435
best accuracy = 0.860006497725796, with 7 features, with Kendalls_correlation


 67%|██████▋   | 8/12 [01:32<00:39,  9.99s/it]

Index(['temperature', 'holiday', 'month', 'day', 'pressure', 'dayOfTheWeek',
       'report', 'year'],
      dtype='object')
For 8 features: 
 Accuracy: 0.8584471914985903 
 Precision: 0.8392736356468735 
 Recall: 0.842881512484535 
 F1 score: 0.8409426395247344
best accuracy = 0.860006497725796, with 7 features, with Kendalls_correlation


 75%|███████▌  | 9/12 [01:41<00:29,  9.75s/it]

Index(['temperature', 'holiday', 'month', 'day', 'pressure', 'dayOfTheWeek',
       'report', 'year', 'minute'],
      dtype='object')
For 9 features: 
 Accuracy: 0.8428881650380022 
 Precision: 0.8210044354506048 
 Recall: 0.823520153722156 
 F1 score: 0.8221715730110396
best accuracy = 0.860006497725796, with 7 features, with Kendalls_correlation


 83%|████████▎ | 10/12 [01:50<00:19,  9.69s/it]

Index(['temperature', 'holiday', 'month', 'day', 'pressure', 'dayOfTheWeek',
       'report', 'year', 'minute', 'humidity'],
      dtype='object')
For 10 features: 
 Accuracy: 0.8556968906283975 
 Precision: 0.8369179050641924 
 Recall: 0.8394807702050143 
 F1 score: 0.8380919600346198
best accuracy = 0.860006497725796, with 7 features, with Kendalls_correlation


 92%|█████████▏| 11/12 [02:00<00:09,  9.64s/it]

Index(['temperature', 'holiday', 'month', 'day', 'pressure', 'dayOfTheWeek',
       'report', 'year', 'minute', 'humidity', 'hour'],
      dtype='object')
For 11 features: 
 Accuracy: 0.8770955802307859 
 Precision: 0.8608190559010273 
 Recall: 0.8624398957917254 
 F1 score: 0.86157967365172
best accuracy = 0.8770955802307859, with 11 features, with Kendalls_correlation


100%|██████████| 12/12 [02:09<00:00, 10.81s/it]


Index(['temperature', 'holiday', 'month', 'day', 'pressure', 'dayOfTheWeek',
       'report', 'year', 'minute', 'humidity', 'hour', 'Pandemic'],
      dtype='object')
For 12 features: 
 Accuracy: 0.8748419446261173 
 Precision: 0.8567696224351091 
 Recall: 0.858524523131193 
 F1 score: 0.8575452038456086
best accuracy = 0.8770955802307859, with 11 features, with Kendalls_correlation
best accuracy = 0.8770955802307859, with 11 features, with Kendalls_correlation

 

------------------------- Analyzing method mutualInformation_classification -------------------------
The variable order is: 
 ['month', 'day', 'year', 'hour', 'minute', 'holiday', 'dayOfTheWeek', 'Pandemic', 'temperature', 'humidity', 'pressure', 'report']


  8%|▊         | 1/12 [00:06<01:09,  6.28s/it]

Index(['month'], dtype='object')
For 1 features: 
 Accuracy: 0.4722549661645929 
 Precision: 0.4741068588390163 
 Recall: 0.4640265448430852 
 F1 score: 0.44164863843451985
best accuracy = 0.4722549661645929, with 1 features, with mutualInformation_classification


 17%|█▋        | 2/12 [00:13<01:07,  6.78s/it]

Index(['month', 'day'], dtype='object')
For 2 features: 
 Accuracy: 0.668327868852459 
 Precision: 0.6352224510549804 
 Recall: 0.6712545450909863 
 F1 score: 0.6421345789358508
best accuracy = 0.668327868852459, with 2 features, with mutualInformation_classification


 25%|██▌       | 3/12 [00:20<01:02,  6.91s/it]

Index(['month', 'day', 'year'], dtype='object')
For 3 features: 
 Accuracy: 0.671514554607135 
 Precision: 0.6376740989572507 
 Recall: 0.6735260560033544 
 F1 score: 0.6453308841183524
best accuracy = 0.671514554607135, with 3 features, with mutualInformation_classification


 33%|███▎      | 4/12 [00:28<00:59,  7.38s/it]

Index(['month', 'day', 'year', 'hour'], dtype='object')
For 4 features: 
 Accuracy: 0.842329607714223 
 Precision: 0.817860663182973 
 Recall: 0.8215015992681135 
 F1 score: 0.8195799263470479
best accuracy = 0.842329607714223, with 4 features, with mutualInformation_classification


 42%|████▏     | 5/12 [00:36<00:54,  7.73s/it]

Index(['month', 'day', 'year', 'hour', 'minute'], dtype='object')
For 5 features: 
 Accuracy: 0.8124204520517885 
 Precision: 0.7850282861331601 
 Recall: 0.7816415225727406 
 F1 score: 0.7831445015788694
best accuracy = 0.842329607714223, with 4 features, with mutualInformation_classification


 50%|█████     | 6/12 [00:45<00:47,  7.90s/it]

Index(['month', 'day', 'year', 'hour', 'minute', 'holiday'], dtype='object')
For 6 features: 
 Accuracy: 0.8018347615908592 
 Precision: 0.7724124060833366 
 Recall: 0.7702638178039914 
 F1 score: 0.7712217058963082
best accuracy = 0.842329607714223, with 4 features, with mutualInformation_classification


 58%|█████▊    | 7/12 [00:53<00:39,  8.00s/it]

Index(['month', 'day', 'year', 'hour', 'minute', 'holiday', 'dayOfTheWeek'], dtype='object')
For 7 features: 
 Accuracy: 0.8383168316831683 
 Precision: 0.8131983178677716 
 Recall: 0.8118250878155274 
 F1 score: 0.8124249311482282
best accuracy = 0.842329607714223, with 4 features, with mutualInformation_classification


 67%|██████▋   | 8/12 [01:01<00:32,  8.00s/it]

Index(['month', 'day', 'year', 'hour', 'minute', 'holiday', 'dayOfTheWeek',
       'Pandemic'],
      dtype='object')
For 8 features: 
 Accuracy: 0.8315598149372109 
 Precision: 0.8041587224658853 
 Recall: 0.8040083635276509 
 F1 score: 0.8040237549312272
best accuracy = 0.842329607714223, with 4 features, with mutualInformation_classification


 75%|███████▌  | 9/12 [01:10<00:24,  8.30s/it]

Index(['month', 'day', 'year', 'hour', 'minute', 'holiday', 'dayOfTheWeek',
       'Pandemic', 'temperature'],
      dtype='object')
For 9 features: 
 Accuracy: 0.8671299360247077 
 Precision: 0.8452693139137192 
 Recall: 0.8482166655228028 
 F1 score: 0.8466440964894597
best accuracy = 0.8671299360247077, with 9 features, with mutualInformation_classification


 83%|████████▎ | 10/12 [01:19<00:17,  8.63s/it]

Index(['month', 'day', 'year', 'hour', 'minute', 'holiday', 'dayOfTheWeek',
       'Pandemic', 'temperature', 'humidity'],
      dtype='object')
For 10 features: 
 Accuracy: 0.8718356527501657 
 Precision: 0.8526155664152666 
 Recall: 0.8537669023390875 
 F1 score: 0.8531054459679914
best accuracy = 0.8718356527501657, with 10 features, with mutualInformation_classification


 92%|█████████▏| 11/12 [01:29<00:08,  8.94s/it]

Index(['month', 'day', 'year', 'hour', 'minute', 'holiday', 'dayOfTheWeek',
       'Pandemic', 'temperature', 'humidity', 'pressure'],
      dtype='object')
For 11 features: 
 Accuracy: 0.8773501437735014 
 Precision: 0.8591808247766077 
 Recall: 0.8594966126521033 
 F1 score: 0.8592693313795661
best accuracy = 0.8773501437735014, with 11 features, with mutualInformation_classification


100%|██████████| 12/12 [01:38<00:00,  8.21s/it]


Index(['month', 'day', 'year', 'hour', 'minute', 'holiday', 'dayOfTheWeek',
       'Pandemic', 'temperature', 'humidity', 'pressure', 'report'],
      dtype='object')
For 12 features: 
 Accuracy: 0.8754152823920266 
 Precision: 0.8587975335481222 
 Recall: 0.8588996344227863 
 F1 score: 0.85876617497936
best accuracy = 0.8773501437735014, with 11 features, with mutualInformation_classification
best accuracy = 0.8773501437735014, with 11 features, with mutualInformation_classification

 

------------------------- Analyzing method mutualInformation_reggression -------------------------
The variable order is: 
 ['month', 'day', 'year', 'hour', 'minute', 'holiday', 'dayOfTheWeek', 'Pandemic', 'temperature', 'humidity', 'pressure', 'report']


  8%|▊         | 1/12 [00:06<01:09,  6.28s/it]

Index(['month'], dtype='object')
For 1 features: 
 Accuracy: 0.4691173209137281 
 Precision: 0.47801941173086293 
 Recall: 0.4683858378553644 
 F1 score: 0.4408259558414792
best accuracy = 0.4691173209137281, with 1 features, with mutualInformation_reggression


 17%|█▋        | 2/12 [00:13<01:07,  6.73s/it]

Index(['month', 'day'], dtype='object')
For 2 features: 
 Accuracy: 0.6727626027093049 
 Precision: 0.6383680002343189 
 Recall: 0.6757917430671854 
 F1 score: 0.6460678197985429
best accuracy = 0.6727626027093049, with 2 features, with mutualInformation_reggression


 25%|██▌       | 3/12 [00:20<01:01,  6.83s/it]

Index(['month', 'day', 'year'], dtype='object')
For 3 features: 
 Accuracy: 0.6746164109406271 
 Precision: 0.6392606959784364 
 Recall: 0.674994779085178 
 F1 score: 0.6474180594930781
best accuracy = 0.6746164109406271, with 3 features, with mutualInformation_reggression


 33%|███▎      | 4/12 [00:28<00:58,  7.28s/it]

Index(['month', 'day', 'year', 'hour'], dtype='object')
For 4 features: 
 Accuracy: 0.8432197728790916 
 Precision: 0.8178750166035947 
 Recall: 0.8211012894688976 
 F1 score: 0.8193570038868551
best accuracy = 0.8432197728790916, with 4 features, with mutualInformation_reggression


 42%|████▏     | 5/12 [00:36<00:53,  7.63s/it]

Index(['month', 'day', 'year', 'hour', 'minute'], dtype='object')
For 5 features: 
 Accuracy: 0.811438127090301 
 Precision: 0.7832243101804546 
 Recall: 0.7812933488732068 
 F1 score: 0.7821112072533746
best accuracy = 0.8432197728790916, with 4 features, with mutualInformation_reggression


 50%|█████     | 6/12 [00:44<00:46,  7.77s/it]

Index(['month', 'day', 'year', 'hour', 'minute', 'holiday'], dtype='object')
For 6 features: 
 Accuracy: 0.803203840142889 
 Precision: 0.7735065261113394 
 Recall: 0.7701921945235435 
 F1 score: 0.7717297247951995
best accuracy = 0.8432197728790916, with 4 features, with mutualInformation_reggression


 58%|█████▊    | 7/12 [00:52<00:39,  7.81s/it]

Index(['month', 'day', 'year', 'hour', 'minute', 'holiday', 'dayOfTheWeek'], dtype='object')
For 7 features: 
 Accuracy: 0.8386765034652359 
 Precision: 0.8128563696054384 
 Recall: 0.8118390443194762 
 F1 score: 0.8122455017799062
best accuracy = 0.8432197728790916, with 4 features, with mutualInformation_reggression


 67%|██████▋   | 8/12 [01:00<00:31,  7.81s/it]

Index(['month', 'day', 'year', 'hour', 'minute', 'holiday', 'dayOfTheWeek',
       'Pandemic'],
      dtype='object')
For 8 features: 
 Accuracy: 0.8349675397358406 
 Precision: 0.8081632671950894 
 Recall: 0.8073884823280691 
 F1 score: 0.8077074861414062
best accuracy = 0.8432197728790916, with 4 features, with mutualInformation_reggression


 75%|███████▌  | 9/12 [01:09<00:24,  8.12s/it]

Index(['month', 'day', 'year', 'hour', 'minute', 'holiday', 'dayOfTheWeek',
       'Pandemic', 'temperature'],
      dtype='object')
For 9 features: 
 Accuracy: 0.8664649181797803 
 Precision: 0.8443252272367844 
 Recall: 0.845840902644793 
 F1 score: 0.8449941208913878
best accuracy = 0.8664649181797803, with 9 features, with mutualInformation_reggression


 83%|████████▎ | 10/12 [01:17<00:16,  8.38s/it]

Index(['month', 'day', 'year', 'hour', 'minute', 'holiday', 'dayOfTheWeek',
       'Pandemic', 'temperature', 'humidity'],
      dtype='object')
For 10 features: 
 Accuracy: 0.8723905723905724 
 Precision: 0.8534490866045129 
 Recall: 0.8539566435220691 
 F1 score: 0.853649194177533
best accuracy = 0.8723905723905724, with 10 features, with mutualInformation_reggression


 92%|█████████▏| 11/12 [01:27<00:08,  8.60s/it]

Index(['month', 'day', 'year', 'hour', 'minute', 'holiday', 'dayOfTheWeek',
       'Pandemic', 'temperature', 'humidity', 'pressure'],
      dtype='object')
For 11 features: 
 Accuracy: 0.8786356484603282 
 Precision: 0.8608931143264064 
 Recall: 0.8624554327194899 
 F1 score: 0.861565070324639
best accuracy = 0.8786356484603282, with 11 features, with mutualInformation_reggression


100%|██████████| 12/12 [01:36<00:00,  8.00s/it]


Index(['month', 'day', 'year', 'hour', 'minute', 'holiday', 'dayOfTheWeek',
       'Pandemic', 'temperature', 'humidity', 'pressure', 'report'],
      dtype='object')
For 12 features: 
 Accuracy: 0.8793157776277291 
 Precision: 0.8608009485587178 
 Recall: 0.8629370213723562 
 F1 score: 0.8618082037589517
best accuracy = 0.8793157776277291, with 12 features, with mutualInformation_reggression
best accuracy = 0.8793157776277291, with 12 features, with mutualInformation_reggression

 

------------------------- Analyzing method mutualInformation_classification2 -------------------------
The variable order is: 
 ['day', 'temperature', 'month', 'humidity', 'hour', 'dayOfTheWeek', 'pressure', 'holiday', 'year', 'report', 'minute', 'Pandemic']


  8%|▊         | 1/12 [00:06<01:13,  6.71s/it]

Index(['day'], dtype='object')
For 1 features: 
 Accuracy: 0.45265945458643225 
 Precision: 0.42661359569724905 
 Recall: 0.43858374671188405 
 F1 score: 0.4178991797038426
best accuracy = 0.45265945458643225, with 1 features, with mutualInformation_classification2


 17%|█▋        | 2/12 [00:15<01:19,  7.93s/it]

Index(['day', 'temperature'], dtype='object')
For 2 features: 
 Accuracy: 0.5653012863913338 
 Precision: 0.5254351612749998 
 Recall: 0.5422471720070094 
 F1 score: 0.5295540470258239
best accuracy = 0.5653012863913338, with 2 features, with mutualInformation_classification2


 25%|██▌       | 3/12 [00:23<01:12,  8.03s/it]

Index(['day', 'temperature', 'month'], dtype='object')
For 3 features: 
 Accuracy: 0.7660677966101695 
 Precision: 0.7302202275911792 
 Recall: 0.7418215712806102 
 F1 score: 0.7353674120694214
best accuracy = 0.7660677966101695, with 3 features, with mutualInformation_classification2


 33%|███▎      | 4/12 [00:32<01:07,  8.43s/it]

Index(['day', 'temperature', 'month', 'humidity'], dtype='object')
For 4 features: 
 Accuracy: 0.8386625933469111 
 Precision: 0.814766369651991 
 Recall: 0.8199974130427292 
 F1 score: 0.8172186552870413
best accuracy = 0.8386625933469111, with 4 features, with mutualInformation_classification2


 42%|████▏     | 5/12 [00:41<01:00,  8.71s/it]

Index(['day', 'temperature', 'month', 'humidity', 'hour'], dtype='object')
For 5 features: 
 Accuracy: 0.871017448447768 
 Precision: 0.8521032657676102 
 Recall: 0.8525946166742542 
 F1 score: 0.8522515441904003
best accuracy = 0.871017448447768, with 5 features, with mutualInformation_classification2


 50%|█████     | 6/12 [00:50<00:52,  8.68s/it]

Index(['day', 'temperature', 'month', 'humidity', 'hour', 'dayOfTheWeek'], dtype='object')
For 6 features: 
 Accuracy: 0.8758679373723621 
 Precision: 0.8569842812760938 
 Recall: 0.858381856820559 
 F1 score: 0.8576136203856102
best accuracy = 0.8758679373723621, with 6 features, with mutualInformation_classification2


 58%|█████▊    | 7/12 [00:59<00:43,  8.69s/it]

Index(['day', 'temperature', 'month', 'humidity', 'hour', 'dayOfTheWeek',
       'pressure'],
      dtype='object')
For 7 features: 
 Accuracy: 0.8780390820268121 
 Precision: 0.8599687367658605 
 Recall: 0.8611834052365147 
 F1 score: 0.8605345029564359
best accuracy = 0.8780390820268121, with 7 features, with mutualInformation_classification2


 67%|██████▋   | 8/12 [01:07<00:34,  8.60s/it]

Index(['day', 'temperature', 'month', 'humidity', 'hour', 'dayOfTheWeek',
       'pressure', 'holiday'],
      dtype='object')
For 8 features: 
 Accuracy: 0.8800341296928328 
 Precision: 0.8626841058743256 
 Recall: 0.8631607076106174 
 F1 score: 0.8628541985294568
best accuracy = 0.8800341296928328, with 8 features, with mutualInformation_classification2


 75%|███████▌  | 9/12 [01:16<00:26,  8.76s/it]

Index(['day', 'temperature', 'month', 'humidity', 'hour', 'dayOfTheWeek',
       'pressure', 'holiday', 'year'],
      dtype='object')
For 9 features: 
 Accuracy: 0.8791410344041923 
 Precision: 0.8611250643177925 
 Recall: 0.8627079654531595 
 F1 score: 0.8618491253433602
best accuracy = 0.8800341296928328, with 8 features, with mutualInformation_classification2


 83%|████████▎ | 10/12 [01:25<00:17,  8.86s/it]

Index(['day', 'temperature', 'month', 'humidity', 'hour', 'dayOfTheWeek',
       'pressure', 'holiday', 'year', 'report'],
      dtype='object')
For 10 features: 
 Accuracy: 0.8765343372119553 
 Precision: 0.8583573647650766 
 Recall: 0.8590399570770996 
 F1 score: 0.8586300432618675
best accuracy = 0.8800341296928328, with 8 features, with mutualInformation_classification2


 92%|█████████▏| 11/12 [01:35<00:08,  8.99s/it]

Index(['day', 'temperature', 'month', 'humidity', 'hour', 'dayOfTheWeek',
       'pressure', 'holiday', 'year', 'report', 'minute'],
      dtype='object')
For 11 features: 
 Accuracy: 0.8767077907242403 
 Precision: 0.8577645770858086 
 Recall: 0.8584445052907145 
 F1 score: 0.8580618721931572
best accuracy = 0.8800341296928328, with 8 features, with mutualInformation_classification2


100%|██████████| 12/12 [01:44<00:00,  8.68s/it]


Index(['day', 'temperature', 'month', 'humidity', 'hour', 'dayOfTheWeek',
       'pressure', 'holiday', 'year', 'report', 'minute', 'Pandemic'],
      dtype='object')
For 12 features: 
 Accuracy: 0.8773049645390071 
 Precision: 0.8581861118867491 
 Recall: 0.8595070633812316 
 F1 score: 0.8587582381584113
best accuracy = 0.8800341296928328, with 8 features, with mutualInformation_classification2
best accuracy = 0.8800341296928328, with 8 features, with mutualInformation_classification2

 

------------------------- Analyzing method variableThreshold -------------------------
The variable order is: 
 ['month', 'day', 'year', 'hour', 'minute', 'holiday', 'dayOfTheWeek', 'temperature', 'humidity', 'pressure', 'report']


  8%|▊         | 1/12 [00:05<01:05,  6.00s/it]

Index(['month'], dtype='object')
For 1 features: 
 Accuracy: 0.4655441008018327 
 Precision: 0.45749590035991716 
 Recall: 0.4495307835353533 
 F1 score: 0.42726788422363204
best accuracy = 0.4655441008018327, with 1 features, with variableThreshold


 17%|█▋        | 2/12 [00:12<01:05,  6.52s/it]

Index(['month', 'day'], dtype='object')
For 2 features: 
 Accuracy: 0.6737095664143152 
 Precision: 0.6365617619363053 
 Recall: 0.6712263330436564 
 F1 score: 0.6442443149751899
best accuracy = 0.6737095664143152, with 2 features, with variableThreshold


 25%|██▌       | 3/12 [00:19<00:59,  6.61s/it]

Index(['month', 'day', 'year'], dtype='object')
For 3 features: 
 Accuracy: 0.6720652423615897 
 Precision: 0.6361483905835766 
 Recall: 0.6712585566670268 
 F1 score: 0.6435346868737124
best accuracy = 0.6737095664143152, with 2 features, with variableThreshold


 33%|███▎      | 4/12 [00:27<00:56,  7.07s/it]

Index(['month', 'day', 'year', 'hour'], dtype='object')
For 4 features: 
 Accuracy: 0.841407867494824 
 Precision: 0.8157557352228005 
 Recall: 0.8177715698850149 
 F1 score: 0.8166917567762011
best accuracy = 0.841407867494824, with 4 features, with variableThreshold


 42%|████▏     | 5/12 [00:35<00:52,  7.47s/it]

Index(['month', 'day', 'year', 'hour', 'minute'], dtype='object')
For 5 features: 
 Accuracy: 0.8115526376410965 
 Precision: 0.779814142165891 
 Recall: 0.7782960314509224 
 F1 score: 0.7789624470941722
best accuracy = 0.841407867494824, with 4 features, with variableThreshold


 50%|█████     | 6/12 [00:43<00:45,  7.63s/it]

Index(['month', 'day', 'year', 'hour', 'minute', 'holiday'], dtype='object')
For 6 features: 
 Accuracy: 0.8022145328719723 
 Precision: 0.7704986043009482 
 Recall: 0.767472442943236 
 F1 score: 0.7688683165196979
best accuracy = 0.841407867494824, with 4 features, with variableThreshold


 58%|█████▊    | 7/12 [00:51<00:38,  7.65s/it]

Index(['month', 'day', 'year', 'hour', 'minute', 'holiday', 'dayOfTheWeek'], dtype='object')
For 7 features: 
 Accuracy: 0.8391545391545392 
 Precision: 0.811980754933275 
 Recall: 0.8108978320013945 
 F1 score: 0.8113454573539911
best accuracy = 0.841407867494824, with 4 features, with variableThreshold


 67%|██████▋   | 8/12 [00:59<00:31,  7.82s/it]

Index(['month', 'day', 'year', 'hour', 'minute', 'holiday', 'dayOfTheWeek',
       'temperature'],
      dtype='object')
For 8 features: 
 Accuracy: 0.8599930603747398 
 Precision: 0.8354975208415475 
 Recall: 0.8373000000124748 
 F1 score: 0.8362887375863874
best accuracy = 0.8599930603747398, with 8 features, with variableThreshold


 75%|███████▌  | 9/12 [01:08<00:24,  8.19s/it]

Index(['month', 'day', 'year', 'hour', 'minute', 'holiday', 'dayOfTheWeek',
       'temperature', 'humidity'],
      dtype='object')
For 9 features: 
 Accuracy: 0.8749594625897614 
 Precision: 0.8529141327246793 
 Recall: 0.8558606144054574 
 F1 score: 0.8543055873993382
best accuracy = 0.8749594625897614, with 9 features, with variableThreshold


 83%|████████▎ | 10/12 [01:17<00:16,  8.48s/it]

Index(['month', 'day', 'year', 'hour', 'minute', 'holiday', 'dayOfTheWeek',
       'temperature', 'humidity', 'pressure'],
      dtype='object')
For 10 features: 
 Accuracy: 0.878369287868244 
 Precision: 0.8587419840492558 
 Recall: 0.8596165213654128 
 F1 score: 0.8590833623869352
best accuracy = 0.878369287868244, with 10 features, with variableThreshold


 92%|█████████▏| 11/12 [01:26<00:08,  8.66s/it]

Index(['month', 'day', 'year', 'hour', 'minute', 'holiday', 'dayOfTheWeek',
       'temperature', 'humidity', 'pressure', 'report'],
      dtype='object')
For 11 features: 
 Accuracy: 0.8780255516840882 
 Precision: 0.8590397171085119 
 Recall: 0.8593660746153663 
 F1 score: 0.8591040856696648
best accuracy = 0.878369287868244, with 10 features, with variableThreshold


100%|██████████| 12/12 [01:35<00:00,  7.97s/it]


Index(['month', 'day', 'year', 'hour', 'minute', 'holiday', 'dayOfTheWeek',
       'temperature', 'humidity', 'pressure', 'report'],
      dtype='object')
For 12 features: 
 Accuracy: 0.8793905559432427 
 Precision: 0.861257398485809 
 Recall: 0.8603972096717732 
 F1 score: 0.8607058943303031
best accuracy = 0.8793905559432427, with 12 features, with variableThreshold
best accuracy = 0.8793905559432427, with 12 features, with variableThreshold

 

------------------------- Analyzing method MRMR -------------------------
The variable order is: 
 ['temperature', 'dayOfTheWeek', 'hour', 'holiday', 'humidity', 'day', 'year', 'month', 'pressure', 'minute', 'report']


  8%|▊         | 1/12 [00:10<01:51, 10.16s/it]

Index(['temperature'], dtype='object')
For 1 features: 
 Accuracy: 0.4460750058234335 
 Precision: 0.3669483767461173 
 Recall: 0.36494869294095234 
 F1 score: 0.3645418720278105
best accuracy = 0.4460750058234335, with 1 features, with MRMR


 17%|█▋        | 2/12 [00:19<01:36,  9.65s/it]

Index(['temperature', 'dayOfTheWeek'], dtype='object')
For 2 features: 
 Accuracy: 0.49539304875204104 
 Precision: 0.45784266504743626 
 Recall: 0.4695178372329754 
 F1 score: 0.45847613608565396
best accuracy = 0.49539304875204104, with 2 features, with MRMR


 25%|██▌       | 3/12 [00:27<01:22,  9.14s/it]

Index(['temperature', 'dayOfTheWeek', 'hour'], dtype='object')
For 3 features: 
 Accuracy: 0.6776337304368139 
 Precision: 0.6334758018505445 
 Recall: 0.6388146280456918 
 F1 score: 0.6358584627415965
best accuracy = 0.6776337304368139, with 3 features, with MRMR


 33%|███▎      | 4/12 [00:36<01:12,  9.05s/it]

Index(['temperature', 'dayOfTheWeek', 'hour', 'holiday'], dtype='object')
For 4 features: 
 Accuracy: 0.7088538011695906 
 Precision: 0.6696176341875797 
 Recall: 0.6761438597980082 
 F1 score: 0.672595232276545
best accuracy = 0.7088538011695906, with 4 features, with MRMR


 42%|████▏     | 5/12 [00:45<01:03,  9.05s/it]

Index(['temperature', 'dayOfTheWeek', 'hour', 'holiday', 'humidity'], dtype='object')
For 5 features: 
 Accuracy: 0.8356640899508081 
 Precision: 0.8121522003187149 
 Recall: 0.8126868230830048 
 F1 score: 0.8122805261058658
best accuracy = 0.8356640899508081, with 5 features, with MRMR


 50%|█████     | 6/12 [00:54<00:53,  8.97s/it]

Index(['temperature', 'dayOfTheWeek', 'hour', 'holiday', 'humidity', 'day'], dtype='object')
For 6 features: 
 Accuracy: 0.8763664086324184 
 Precision: 0.857413122948885 
 Recall: 0.8575888645299251 
 F1 score: 0.8573843629643415
best accuracy = 0.8763664086324184, with 6 features, with MRMR


 58%|█████▊    | 7/12 [01:03<00:43,  8.76s/it]

Index(['temperature', 'dayOfTheWeek', 'hour', 'holiday', 'humidity', 'day',
       'year'],
      dtype='object')
For 7 features: 
 Accuracy: 0.8769673478975805 
 Precision: 0.8575649944239152 
 Recall: 0.8573554236580623 
 F1 score: 0.8573684425931224
best accuracy = 0.8769673478975805, with 7 features, with MRMR


 67%|██████▋   | 8/12 [01:11<00:34,  8.54s/it]

Index(['temperature', 'dayOfTheWeek', 'hour', 'holiday', 'humidity', 'day',
       'year', 'month'],
      dtype='object')
For 8 features: 
 Accuracy: 0.8765114090802164 
 Precision: 0.8566861998357675 
 Recall: 0.8579983282160644 
 F1 score: 0.8571985487518197
best accuracy = 0.8769673478975805, with 7 features, with MRMR


 75%|███████▌  | 9/12 [01:20<00:26,  8.75s/it]

Index(['temperature', 'dayOfTheWeek', 'hour', 'holiday', 'humidity', 'day',
       'year', 'month', 'pressure'],
      dtype='object')
For 9 features: 
 Accuracy: 0.8802355712603063 
 Precision: 0.8610510792975012 
 Recall: 0.8627048864334202 
 F1 score: 0.8617655465457952
best accuracy = 0.8802355712603063, with 9 features, with MRMR


 83%|████████▎ | 10/12 [01:29<00:17,  8.84s/it]

Index(['temperature', 'dayOfTheWeek', 'hour', 'holiday', 'humidity', 'day',
       'year', 'month', 'pressure', 'minute'],
      dtype='object')
For 10 features: 
 Accuracy: 0.8783085633404105 
 Precision: 0.8578847254133398 
 Recall: 0.8590234332455967 
 F1 score: 0.8583651599261591
best accuracy = 0.8802355712603063, with 9 features, with MRMR


 92%|█████████▏| 11/12 [01:38<00:08,  8.95s/it]

Index(['temperature', 'dayOfTheWeek', 'hour', 'holiday', 'humidity', 'day',
       'year', 'month', 'pressure', 'minute', 'report'],
      dtype='object')
For 11 features: 
 Accuracy: 0.8788684148358138 
 Precision: 0.8600673581578482 
 Recall: 0.8596033611382311 
 F1 score: 0.8597286655505955
best accuracy = 0.8802355712603063, with 9 features, with MRMR


100%|██████████| 12/12 [01:47<00:00,  8.97s/it]

Index(['temperature', 'dayOfTheWeek', 'hour', 'holiday', 'humidity', 'day',
       'year', 'month', 'pressure', 'minute', 'report'],
      dtype='object')
For 12 features: 
 Accuracy: 0.880281523539153 
 Precision: 0.8618164221937163 
 Recall: 0.862273152163532 
 F1 score: 0.861990059918926
best accuracy = 0.880281523539153, with 12 features, with MRMR
best accuracy = 0.880281523539153, with 12 features, with MRMR

 






## Ordinal Encoding

In [7]:
hp_OE.rename(columns = {'day.1':'dayOfTheWeek'}, inplace = True)

#Replace times ending in 5 and also compacting the rest to have only 6 classes
a=hp_OE.Harry_Potter_and_the_Forbidden.replace([
5, 10, 11, 15, 20, 25, 30, 35, 40, 45,50.0,55.0,60.0,65.0,70.0,75.0,80.0,85.0,90.0,95.0,100.0,105.0,110.0,115.0,120.0,125.0,130.0,135.0,145.0,150.0,180.0], 
[1,1,  1,  1,  1,  1,  1,  2,  2,  2, 2,   2,   2,   3,   3,   3,   3,   3,   3,   3,   3,    3,    3,    3,     3,    3,   3,    3,     3,    3,    3])

df1=pd.DataFrame(a)
df1.rename(columns = {'Harry_Potter_and_the_Forbidden':'HP_Forbidden_clean'}, inplace = True)
hp_bis=pd.concat([hp_OE, df1], axis=1)
hp2 = hp_bis.drop('Harry_Potter_and_the_Forbidden',axis=1)
hp2 = hp2[hp2.HP_Forbidden_clean != 0] #delete rows with 0 min
hp2.HP_Forbidden_clean.unique()

#Variable order in ordinal encoding:
pear_corrOE = ['temperature',	'holiday',	'day',	'month',	'pressure',	'dayOfTheWeek',	'report',	'minute',	'year',	'humidity',	'hour',	'Pandemic']
kend_corrOE = ['temperature',	'holiday',	'month',	'day',	'pressure',	'dayOfTheWeek',	'report',	'year',	'minute',	'humidity',	'hour',	'Pandemic',]
mutInf_classOE = ['month',	'day',	'year',	'hour',	'minute',	'holiday',	'dayOfTheWeek',	'Pandemic',	'temperature',	'humidity',	'pressure',	'report',]
mutInf_regOE = ['month',	'day',	'year',	'hour',	'minute',	'holiday',	'dayOfTheWeek',	'Pandemic',	'temperature',	'humidity',	'pressure',	'report',]
mutInf_class2OE = ['day',	'temperature',	'month',	'humidity',	'hour',	'dayOfTheWeek',	'pressure',	'holiday',	'year',	'report',	'minute',	'Pandemic',]
varThreOE = ['month',	'day',	'year',	'hour',	'minute',	'holiday',	'dayOfTheWeek',	'temperature',	'humidity',	'pressure',	'report']
mrmrOE = ['temperature',	'dayOfTheWeek',	'hour',	'holiday',	'humidity',	'day',	'year',	'month',	'pressure',	'minute',	'report']

orderOE=[pear_corrOE,kend_corrOE,mutInf_classOE,mutInf_regOE,mutInf_class2OE,varThreOE,mrmrOE]

In [8]:
analizeDF(hp2,orderOE,13,n_loops=20)

------------------------- Analyzing method Pearson_correlation -------------------------
The variable order is: 
 ['temperature', 'holiday', 'day', 'month', 'pressure', 'dayOfTheWeek', 'report', 'minute', 'year', 'humidity', 'hour', 'Pandemic']


  8%|▊         | 1/12 [00:11<02:02, 11.10s/it]

Index(['temperature'], dtype='object')
For 1 features: 
 Accuracy: 0.4384021164021164 
 Precision: 0.366707419951909 
 Recall: 0.36534282248213307 
 F1 score: 0.3644074969353454
best accuracy = 0.4384021164021164, with 1 features, with Pearson_correlation


 17%|█▋        | 2/12 [00:21<01:49, 10.96s/it]

Index(['temperature', 'holiday'], dtype='object')
For 2 features: 
 Accuracy: 0.4570989616444162 
 Precision: 0.40138446872133343 
 Recall: 0.3972324314597244 
 F1 score: 0.3985130593488181
best accuracy = 0.4570989616444162, with 2 features, with Pearson_correlation


 25%|██▌       | 3/12 [00:31<01:34, 10.47s/it]

Index(['temperature', 'holiday', 'day'], dtype='object')
For 3 features: 
 Accuracy: 0.5896138340759601 
 Precision: 0.5545737253067448 
 Recall: 0.5723618697284514 
 F1 score: 0.5601872499408876
best accuracy = 0.5896138340759601, with 3 features, with Pearson_correlation


 33%|███▎      | 4/12 [00:41<01:20, 10.02s/it]

Index(['temperature', 'holiday', 'day', 'month'], dtype='object')
For 4 features: 
 Accuracy: 0.7646271510516253 
 Precision: 0.7327869951265089 
 Recall: 0.7452012163429425 
 F1 score: 0.7382473434668206
best accuracy = 0.7646271510516253, with 4 features, with Pearson_correlation


 42%|████▏     | 5/12 [00:50<01:07,  9.69s/it]

Index(['temperature', 'holiday', 'day', 'month', 'pressure'], dtype='object')
For 5 features: 
 Accuracy: 0.847755796639013 
 Precision: 0.8249123433115422 
 Recall: 0.8317164614987282 
 F1 score: 0.8280848873149497
best accuracy = 0.847755796639013, with 5 features, with Pearson_correlation


 50%|█████     | 6/12 [00:59<00:56,  9.42s/it]

Index(['temperature', 'holiday', 'day', 'month', 'pressure', 'dayOfTheWeek'], dtype='object')
For 6 features: 
 Accuracy: 0.850468583599574 
 Precision: 0.8300160753179292 
 Recall: 0.8344402333263197 
 F1 score: 0.8320888027615002
best accuracy = 0.850468583599574, with 6 features, with Pearson_correlation


 58%|█████▊    | 7/12 [01:08<00:46,  9.35s/it]

Index(['temperature', 'holiday', 'day', 'month', 'pressure', 'dayOfTheWeek',
       'report'],
      dtype='object')
For 7 features: 
 Accuracy: 0.8585092770313499 
 Precision: 0.8398294450965746 
 Recall: 0.8421685040280226 
 F1 score: 0.8408995662973192
best accuracy = 0.8585092770313499, with 7 features, with Pearson_correlation


 67%|██████▋   | 8/12 [01:17<00:37,  9.43s/it]

Index(['temperature', 'holiday', 'day', 'month', 'pressure', 'dayOfTheWeek',
       'report', 'minute'],
      dtype='object')
For 8 features: 
 Accuracy: 0.8358424087123638 
 Precision: 0.8139176241488488 
 Recall: 0.8153734874118368 
 F1 score: 0.8145657745089667
best accuracy = 0.8585092770313499, with 7 features, with Pearson_correlation


 75%|███████▌  | 9/12 [01:27<00:28,  9.57s/it]

Index(['temperature', 'holiday', 'day', 'month', 'pressure', 'dayOfTheWeek',
       'report', 'minute', 'year'],
      dtype='object')
For 9 features: 
 Accuracy: 0.8430831729741287 
 Precision: 0.8218101756422107 
 Recall: 0.8234732262128145 
 F1 score: 0.8225544169045665
best accuracy = 0.8585092770313499, with 7 features, with Pearson_correlation


 83%|████████▎ | 10/12 [01:37<00:19,  9.71s/it]

Index(['temperature', 'holiday', 'day', 'month', 'pressure', 'dayOfTheWeek',
       'report', 'minute', 'year', 'humidity'],
      dtype='object')
For 10 features: 
 Accuracy: 0.85417469492614 
 Precision: 0.8348922223795022 
 Recall: 0.8381610829319326 
 F1 score: 0.8364360799234026
best accuracy = 0.8585092770313499, with 7 features, with Pearson_correlation


 92%|█████████▏| 11/12 [01:48<00:09,  9.88s/it]

Index(['temperature', 'holiday', 'day', 'month', 'pressure', 'dayOfTheWeek',
       'report', 'minute', 'year', 'humidity', 'hour'],
      dtype='object')
For 11 features: 
 Accuracy: 0.877502679528403 
 Precision: 0.8614157972769926 
 Recall: 0.8620526179059408 
 F1 score: 0.861670492691601
best accuracy = 0.877502679528403, with 11 features, with Pearson_correlation


100%|██████████| 12/12 [01:58<00:00,  9.85s/it]


Index(['temperature', 'holiday', 'day', 'month', 'pressure', 'dayOfTheWeek',
       'report', 'minute', 'year', 'humidity', 'hour', 'Pandemic'],
      dtype='object')
For 12 features: 
 Accuracy: 0.8764756385490449 
 Precision: 0.860088989286244 
 Recall: 0.8618263851384946 
 F1 score: 0.8608789463058522
best accuracy = 0.877502679528403, with 11 features, with Pearson_correlation
best accuracy = 0.877502679528403, with 11 features, with Pearson_correlation

 

------------------------- Analyzing method Kendalls_correlation -------------------------
The variable order is: 
 ['temperature', 'holiday', 'month', 'day', 'pressure', 'dayOfTheWeek', 'report', 'year', 'minute', 'humidity', 'hour', 'Pandemic']


  8%|▊         | 1/12 [00:11<02:04, 11.33s/it]

Index(['temperature'], dtype='object')
For 1 features: 
 Accuracy: 0.4438964109176875 
 Precision: 0.3694891395624837 
 Recall: 0.3673929158293332 
 F1 score: 0.3665365451483783
best accuracy = 0.4438964109176875, with 1 features, with Kendalls_correlation


 17%|█▋        | 2/12 [00:21<01:49, 10.92s/it]

Index(['temperature', 'holiday'], dtype='object')
For 2 features: 
 Accuracy: 0.455229180116204 
 Precision: 0.4005279155225943 
 Recall: 0.39658993573036994 
 F1 score: 0.39788838463974036
best accuracy = 0.455229180116204, with 2 features, with Kendalls_correlation


 25%|██▌       | 3/12 [00:31<01:32, 10.30s/it]

Index(['temperature', 'holiday', 'month'], dtype='object')
For 3 features: 
 Accuracy: 0.519823313940961 
 Precision: 0.4809484693206845 
 Recall: 0.4914188150069513 
 F1 score: 0.4813961479013373
best accuracy = 0.519823313940961, with 3 features, with Kendalls_correlation


 33%|███▎      | 4/12 [00:41<01:20, 10.10s/it]

Index(['temperature', 'holiday', 'month', 'day'], dtype='object')
For 4 features: 
 Accuracy: 0.7651024811218986 
 Precision: 0.7321831463876082 
 Recall: 0.7449504559317865 
 F1 score: 0.737827213980178
best accuracy = 0.7651024811218986, with 4 features, with Kendalls_correlation


 42%|████▏     | 5/12 [00:50<01:07,  9.68s/it]

Index(['temperature', 'holiday', 'month', 'day', 'pressure'], dtype='object')
For 5 features: 
 Accuracy: 0.8481421473320372 
 Precision: 0.8270135638860997 
 Recall: 0.8309362411576297 
 F1 score: 0.8288746052827033
best accuracy = 0.8481421473320372, with 5 features, with Kendalls_correlation


 50%|█████     | 6/12 [01:00<00:58,  9.75s/it]

Index(['temperature', 'holiday', 'month', 'day', 'pressure', 'dayOfTheWeek'], dtype='object')
For 6 features: 
 Accuracy: 0.850302833657798 
 Precision: 0.8296043154869857 
 Recall: 0.8324460025127537 
 F1 score: 0.8309232299675084
best accuracy = 0.850302833657798, with 6 features, with Kendalls_correlation


 58%|█████▊    | 7/12 [01:09<00:47,  9.57s/it]

Index(['temperature', 'holiday', 'month', 'day', 'pressure', 'dayOfTheWeek',
       'report'],
      dtype='object')
For 7 features: 
 Accuracy: 0.8598440545808967 
 Precision: 0.8411614030926269 
 Recall: 0.8438740447855609 
 F1 score: 0.8424246059969529
best accuracy = 0.8598440545808967, with 7 features, with Kendalls_correlation


 67%|██████▋   | 8/12 [01:18<00:38,  9.53s/it]

Index(['temperature', 'holiday', 'month', 'day', 'pressure', 'dayOfTheWeek',
       'report', 'year'],
      dtype='object')
For 8 features: 
 Accuracy: 0.8578182606809803 
 Precision: 0.8384471450309362 
 Recall: 0.8415104038626667 
 F1 score: 0.8398941593948968
best accuracy = 0.8598440545808967, with 7 features, with Kendalls_correlation


 75%|███████▌  | 9/12 [01:29<00:29,  9.84s/it]

Index(['temperature', 'holiday', 'month', 'day', 'pressure', 'dayOfTheWeek',
       'report', 'year', 'minute'],
      dtype='object')
For 9 features: 
 Accuracy: 0.8436916395222585 
 Precision: 0.8219687841389185 
 Recall: 0.8245749185161946 
 F1 score: 0.823169832544065
best accuracy = 0.8598440545808967, with 7 features, with Kendalls_correlation


 83%|████████▎ | 10/12 [01:39<00:20, 10.06s/it]

Index(['temperature', 'holiday', 'month', 'day', 'pressure', 'dayOfTheWeek',
       'report', 'year', 'minute', 'humidity'],
      dtype='object')
For 10 features: 
 Accuracy: 0.8550228310502282 
 Precision: 0.8350783010614103 
 Recall: 0.8367150626548167 
 F1 score: 0.8358290063107062
best accuracy = 0.8598440545808967, with 7 features, with Kendalls_correlation


 92%|█████████▏| 11/12 [01:50<00:10, 10.22s/it]

Index(['temperature', 'holiday', 'month', 'day', 'pressure', 'dayOfTheWeek',
       'report', 'year', 'minute', 'humidity', 'hour'],
      dtype='object')
For 11 features: 
 Accuracy: 0.8791639451338994 
 Precision: 0.8622249062829617 
 Recall: 0.8626708197609667 
 F1 score: 0.8623923207512894
best accuracy = 0.8791639451338994, with 11 features, with Kendalls_correlation


100%|██████████| 12/12 [02:01<00:00, 10.13s/it]


Index(['temperature', 'holiday', 'month', 'day', 'pressure', 'dayOfTheWeek',
       'report', 'year', 'minute', 'humidity', 'hour', 'Pandemic'],
      dtype='object')
For 12 features: 
 Accuracy: 0.876106387617179 
 Precision: 0.8596654918185919 
 Recall: 0.860136313056493 
 F1 score: 0.8597780406232429
best accuracy = 0.8791639451338994, with 11 features, with Kendalls_correlation
best accuracy = 0.8791639451338994, with 11 features, with Kendalls_correlation

 

------------------------- Analyzing method mutualInformation_classification -------------------------
The variable order is: 
 ['month', 'day', 'year', 'hour', 'minute', 'holiday', 'dayOfTheWeek', 'Pandemic', 'temperature', 'humidity', 'pressure', 'report']


  8%|▊         | 1/12 [00:07<01:21,  7.41s/it]

Index(['month'], dtype='object')
For 1 features: 
 Accuracy: 0.4722222222222222 
 Precision: 0.4754214154640678 
 Recall: 0.46433478853559934 
 F1 score: 0.44143808872085755
best accuracy = 0.4722222222222222, with 1 features, with mutualInformation_classification


 17%|█▋        | 2/12 [00:15<01:18,  7.84s/it]

Index(['month', 'day'], dtype='object')
For 2 features: 
 Accuracy: 0.6700546448087431 
 Precision: 0.6360880441322261 
 Recall: 0.6704112211429648 
 F1 score: 0.6436673954126382
best accuracy = 0.6700546448087431, with 2 features, with mutualInformation_classification


 25%|██▌       | 3/12 [00:23<01:09,  7.73s/it]

Index(['month', 'day', 'year'], dtype='object')
For 3 features: 
 Accuracy: 0.6705187130663165 
 Precision: 0.6377989437724835 
 Recall: 0.6718118005777505 
 F1 score: 0.6454796799525373
best accuracy = 0.6705187130663165, with 3 features, with mutualInformation_classification


 33%|███▎      | 4/12 [00:32<01:05,  8.19s/it]

Index(['month', 'day', 'year', 'hour'], dtype='object')
For 4 features: 
 Accuracy: 0.8418803418803419 
 Precision: 0.8178316150933499 
 Recall: 0.8213867758868205 
 F1 score: 0.8195138356222011
best accuracy = 0.8418803418803419, with 4 features, with mutualInformation_classification


 42%|████▏     | 5/12 [00:40<00:58,  8.40s/it]

Index(['month', 'day', 'year', 'hour', 'minute'], dtype='object')
For 5 features: 
 Accuracy: 0.8113232389730085 
 Precision: 0.7831956699712225 
 Recall: 0.7814440601429011 
 F1 score: 0.7821851553203926
best accuracy = 0.8418803418803419, with 4 features, with mutualInformation_classification


 50%|█████     | 6/12 [00:49<00:50,  8.41s/it]

Index(['month', 'day', 'year', 'hour', 'minute', 'holiday'], dtype='object')
For 6 features: 
 Accuracy: 0.8005273566249176 
 Precision: 0.7715499841157778 
 Recall: 0.7682821870320471 
 F1 score: 0.7698039250652392
best accuracy = 0.8418803418803419, with 4 features, with mutualInformation_classification


 58%|█████▊    | 7/12 [00:57<00:41,  8.34s/it]

Index(['month', 'day', 'year', 'hour', 'minute', 'holiday', 'dayOfTheWeek'], dtype='object')
For 7 features: 
 Accuracy: 0.8370957095709571 
 Precision: 0.8114725757259035 
 Recall: 0.8107343306377515 
 F1 score: 0.8110353795291717
best accuracy = 0.8418803418803419, with 4 features, with mutualInformation_classification


 67%|██████▋   | 8/12 [01:05<00:33,  8.28s/it]

Index(['month', 'day', 'year', 'hour', 'minute', 'holiday', 'dayOfTheWeek',
       'Pandemic'],
      dtype='object')
For 8 features: 
 Accuracy: 0.8312623925974885 
 Precision: 0.8050789717197263 
 Recall: 0.8026829251889214 
 F1 score: 0.8037528925530317
best accuracy = 0.8418803418803419, with 4 features, with mutualInformation_classification


 75%|███████▌  | 9/12 [01:14<00:25,  8.42s/it]

Index(['month', 'day', 'year', 'hour', 'minute', 'holiday', 'dayOfTheWeek',
       'Pandemic', 'temperature'],
      dtype='object')
For 9 features: 
 Accuracy: 0.8650121332450915 
 Precision: 0.8439512744591073 
 Recall: 0.8449276406950634 
 F1 score: 0.8443570275354031
best accuracy = 0.8650121332450915, with 9 features, with mutualInformation_classification


 83%|████████▎ | 10/12 [01:23<00:17,  8.61s/it]

Index(['month', 'day', 'year', 'hour', 'minute', 'holiday', 'dayOfTheWeek',
       'Pandemic', 'temperature', 'humidity'],
      dtype='object')
For 10 features: 
 Accuracy: 0.8726087916942787 
 Precision: 0.8543485708059037 
 Recall: 0.8543742923430824 
 F1 score: 0.8542848703022837
best accuracy = 0.8726087916942787, with 10 features, with mutualInformation_classification


 92%|█████████▏| 11/12 [01:32<00:08,  8.75s/it]

Index(['month', 'day', 'year', 'hour', 'minute', 'holiday', 'dayOfTheWeek',
       'Pandemic', 'temperature', 'humidity', 'pressure'],
      dtype='object')
For 11 features: 
 Accuracy: 0.8769298827692988 
 Precision: 0.8591263445755859 
 Recall: 0.8596723175863182 
 F1 score: 0.859293075027844
best accuracy = 0.8769298827692988, with 11 features, with mutualInformation_classification


100%|██████████| 12/12 [01:41<00:00,  8.47s/it]


Index(['month', 'day', 'year', 'hour', 'minute', 'holiday', 'dayOfTheWeek',
       'Pandemic', 'temperature', 'humidity', 'pressure', 'report'],
      dtype='object')
For 12 features: 
 Accuracy: 0.8768770764119601 
 Precision: 0.8596365916584602 
 Recall: 0.861064473931903 
 F1 score: 0.8602341087320182
best accuracy = 0.8769298827692988, with 11 features, with mutualInformation_classification
best accuracy = 0.8769298827692988, with 11 features, with mutualInformation_classification

 

------------------------- Analyzing method mutualInformation_reggression -------------------------
The variable order is: 
 ['month', 'day', 'year', 'hour', 'minute', 'holiday', 'dayOfTheWeek', 'Pandemic', 'temperature', 'humidity', 'pressure', 'report']


  8%|▊         | 1/12 [00:06<01:07,  6.10s/it]

Index(['month'], dtype='object')
For 1 features: 
 Accuracy: 0.4674872477267687 
 Precision: 0.47705782524848334 
 Recall: 0.4647541319355252 
 F1 score: 0.43714360628289295
best accuracy = 0.4674872477267687, with 1 features, with mutualInformation_reggression


 17%|█▋        | 2/12 [00:13<01:06,  6.63s/it]

Index(['month', 'day'], dtype='object')
For 2 features: 
 Accuracy: 0.6714856762158561 
 Precision: 0.6370064264543489 
 Recall: 0.6719192795690699 
 F1 score: 0.6449133135053393
best accuracy = 0.6714856762158561, with 2 features, with mutualInformation_reggression


 25%|██▌       | 3/12 [00:19<01:00,  6.73s/it]

Index(['month', 'day', 'year'], dtype='object')
For 3 features: 
 Accuracy: 0.6707582832999778 
 Precision: 0.6360171223752915 
 Recall: 0.6714384206248697 
 F1 score: 0.6436628139213773
best accuracy = 0.6714856762158561, with 2 features, with mutualInformation_reggression


 33%|███▎      | 4/12 [00:27<00:57,  7.21s/it]

Index(['month', 'day', 'year', 'hour'], dtype='object')
For 4 features: 
 Accuracy: 0.841104431084391 
 Precision: 0.815910176645334 
 Recall: 0.8192294422878018 
 F1 score: 0.817407811144942
best accuracy = 0.841104431084391, with 4 features, with mutualInformation_reggression


 42%|████▏     | 5/12 [00:36<00:52,  7.56s/it]

Index(['month', 'day', 'year', 'hour', 'minute'], dtype='object')
For 5 features: 
 Accuracy: 0.8129542920847269 
 Precision: 0.7854856696125326 
 Recall: 0.7821385186806363 
 F1 score: 0.7836559617341813
best accuracy = 0.841104431084391, with 4 features, with mutualInformation_reggression


 50%|█████     | 6/12 [00:44<00:46,  7.76s/it]

Index(['month', 'day', 'year', 'hour', 'minute', 'holiday'], dtype='object')
For 6 features: 
 Accuracy: 0.8043759767805314 
 Precision: 0.775787512290914 
 Recall: 0.7733545110585416 
 F1 score: 0.7744348468554965
best accuracy = 0.841104431084391, with 4 features, with mutualInformation_reggression


 58%|█████▊    | 7/12 [00:52<00:38,  7.78s/it]

Index(['month', 'day', 'year', 'hour', 'minute', 'holiday', 'dayOfTheWeek'], dtype='object')
For 7 features: 
 Accuracy: 0.8402749832327298 
 Precision: 0.8136485183783475 
 Recall: 0.8139548185649543 
 F1 score: 0.8137491312900093
best accuracy = 0.841104431084391, with 4 features, with mutualInformation_reggression


 67%|██████▋   | 8/12 [00:59<00:31,  7.80s/it]

Index(['month', 'day', 'year', 'hour', 'minute', 'holiday', 'dayOfTheWeek',
       'Pandemic'],
      dtype='object')
For 8 features: 
 Accuracy: 0.8317215133199015 
 Precision: 0.8040973228585631 
 Recall: 0.8029023847235586 
 F1 score: 0.8034379969038864
best accuracy = 0.841104431084391, with 4 features, with mutualInformation_reggression


 75%|███████▌  | 9/12 [01:08<00:24,  8.06s/it]

Index(['month', 'day', 'year', 'hour', 'minute', 'holiday', 'dayOfTheWeek',
       'Pandemic', 'temperature'],
      dtype='object')
For 9 features: 
 Accuracy: 0.8676642008518269 
 Precision: 0.8462662001640898 
 Recall: 0.8474962910150948 
 F1 score: 0.846818016279479
best accuracy = 0.8676642008518269, with 9 features, with mutualInformation_reggression


 83%|████████▎ | 10/12 [01:17<00:16,  8.36s/it]

Index(['month', 'day', 'year', 'hour', 'minute', 'holiday', 'dayOfTheWeek',
       'Pandemic', 'temperature', 'humidity'],
      dtype='object')
For 10 features: 
 Accuracy: 0.8736700336700337 
 Precision: 0.8539853983760459 
 Recall: 0.8540002719083309 
 F1 score: 0.8539159031045309
best accuracy = 0.8736700336700337, with 10 features, with mutualInformation_reggression


 92%|█████████▏| 11/12 [01:26<00:08,  8.60s/it]

Index(['month', 'day', 'year', 'hour', 'minute', 'holiday', 'dayOfTheWeek',
       'Pandemic', 'temperature', 'humidity', 'pressure'],
      dtype='object')
For 11 features: 
 Accuracy: 0.8764890986738593 
 Precision: 0.8579925265941941 
 Recall: 0.8597878449893829 
 F1 score: 0.8588090748129494
best accuracy = 0.8764890986738593, with 11 features, with mutualInformation_reggression


100%|██████████| 12/12 [01:35<00:00,  7.96s/it]


Index(['month', 'day', 'year', 'hour', 'minute', 'holiday', 'dayOfTheWeek',
       'Pandemic', 'temperature', 'humidity', 'pressure', 'report'],
      dtype='object')
For 12 features: 
 Accuracy: 0.8785505289218997 
 Precision: 0.8621446669999261 
 Recall: 0.8611842744688352 
 F1 score: 0.8615816388652704
best accuracy = 0.8785505289218997, with 12 features, with mutualInformation_reggression
best accuracy = 0.8785505289218997, with 12 features, with mutualInformation_reggression

 

------------------------- Analyzing method mutualInformation_classification2 -------------------------
The variable order is: 
 ['day', 'temperature', 'month', 'humidity', 'hour', 'dayOfTheWeek', 'pressure', 'holiday', 'year', 'report', 'minute', 'Pandemic']


  8%|▊         | 1/12 [00:06<01:14,  6.80s/it]

Index(['day'], dtype='object')
For 1 features: 
 Accuracy: 0.46399594320486814 
 Precision: 0.4267601819654769 
 Recall: 0.4357202990853155 
 F1 score: 0.4208432893983276
best accuracy = 0.46399594320486814, with 1 features, with mutualInformation_classification2


 17%|█▋        | 2/12 [00:15<01:20,  8.04s/it]

Index(['day', 'temperature'], dtype='object')
For 2 features: 
 Accuracy: 0.5667907921462424 
 Precision: 0.5268054377460563 
 Recall: 0.5424963517291218 
 F1 score: 0.5309205496092934
best accuracy = 0.5667907921462424, with 2 features, with mutualInformation_classification2


 25%|██▌       | 3/12 [00:23<01:12,  8.09s/it]

Index(['day', 'temperature', 'month'], dtype='object')
For 3 features: 
 Accuracy: 0.7661694915254237 
 Precision: 0.7315866596127594 
 Recall: 0.7435499200075989 
 F1 score: 0.736905211404191
best accuracy = 0.7661694915254237, with 3 features, with mutualInformation_classification2


 33%|███▎      | 4/12 [00:32<01:07,  8.45s/it]

Index(['day', 'temperature', 'month', 'humidity'], dtype='object')
For 4 features: 
 Accuracy: 0.8377574111789998 
 Precision: 0.815932796001574 
 Recall: 0.8188256434237123 
 F1 score: 0.8172733446341748
best accuracy = 0.8377574111789998, with 4 features, with mutualInformation_classification2


 42%|████▏     | 5/12 [00:42<01:01,  8.73s/it]

Index(['day', 'temperature', 'month', 'humidity', 'hour'], dtype='object')
For 5 features: 
 Accuracy: 0.8717878993881714 
 Precision: 0.853134058426347 
 Recall: 0.8528391088308223 
 F1 score: 0.8529392238216279
best accuracy = 0.8717878993881714, with 5 features, with mutualInformation_classification2


 50%|█████     | 6/12 [00:50<00:52,  8.68s/it]

Index(['day', 'temperature', 'month', 'humidity', 'hour', 'dayOfTheWeek'], dtype='object')
For 6 features: 
 Accuracy: 0.8755048786022237 
 Precision: 0.8562588240462725 
 Recall: 0.8578373737476713 
 F1 score: 0.8569891178448316
best accuracy = 0.8755048786022237, with 6 features, with mutualInformation_classification2


 58%|█████▊    | 7/12 [00:59<00:43,  8.70s/it]

Index(['day', 'temperature', 'month', 'humidity', 'hour', 'dayOfTheWeek',
       'pressure'],
      dtype='object')
For 7 features: 
 Accuracy: 0.8762440354464894 
 Precision: 0.8578430359869635 
 Recall: 0.858929513861596 
 F1 score: 0.8583038749738604
best accuracy = 0.8762440354464894, with 7 features, with mutualInformation_classification2


 67%|██████▋   | 8/12 [01:07<00:34,  8.58s/it]

Index(['day', 'temperature', 'month', 'humidity', 'hour', 'dayOfTheWeek',
       'pressure', 'holiday'],
      dtype='object')
For 8 features: 
 Accuracy: 0.8783276450511945 
 Precision: 0.8601593965285803 
 Recall: 0.8601219695599712 
 F1 score: 0.8600764248373505
best accuracy = 0.8783276450511945, with 8 features, with mutualInformation_classification2


 75%|███████▌  | 9/12 [01:16<00:26,  8.74s/it]

Index(['day', 'temperature', 'month', 'humidity', 'hour', 'dayOfTheWeek',
       'pressure', 'holiday', 'year'],
      dtype='object')
For 9 features: 
 Accuracy: 0.8782524493050808 
 Precision: 0.8598459141608166 
 Recall: 0.8603179248910526 
 F1 score: 0.8599601592804894
best accuracy = 0.8783276450511945, with 8 features, with mutualInformation_classification2


 83%|████████▎ | 10/12 [01:25<00:17,  8.82s/it]

Index(['day', 'temperature', 'month', 'humidity', 'hour', 'dayOfTheWeek',
       'pressure', 'holiday', 'year', 'report'],
      dtype='object')
For 10 features: 
 Accuracy: 0.876682637462925 
 Precision: 0.8590616244558914 
 Recall: 0.8580546092387723 
 F1 score: 0.8584837694308406
best accuracy = 0.8783276450511945, with 8 features, with mutualInformation_classification2


 92%|█████████▏| 11/12 [01:35<00:08,  8.94s/it]

Index(['day', 'temperature', 'month', 'humidity', 'hour', 'dayOfTheWeek',
       'pressure', 'holiday', 'year', 'report', 'minute'],
      dtype='object')
For 11 features: 
 Accuracy: 0.8799977153301348 
 Precision: 0.861915490755273 
 Recall: 0.8610121812464578 
 F1 score: 0.8613993843494805
best accuracy = 0.8799977153301348, with 11 features, with mutualInformation_classification2


100%|██████████| 12/12 [01:44<00:00,  8.68s/it]


Index(['day', 'temperature', 'month', 'humidity', 'hour', 'dayOfTheWeek',
       'pressure', 'holiday', 'year', 'report', 'minute', 'Pandemic'],
      dtype='object')
For 12 features: 
 Accuracy: 0.8767215740105239 
 Precision: 0.8581561656607846 
 Recall: 0.8582748801888848 
 F1 score: 0.8581313870228231
best accuracy = 0.8799977153301348, with 11 features, with mutualInformation_classification2
best accuracy = 0.8799977153301348, with 11 features, with mutualInformation_classification2

 

------------------------- Analyzing method variableThreshold -------------------------
The variable order is: 
 ['month', 'day', 'year', 'hour', 'minute', 'holiday', 'dayOfTheWeek', 'temperature', 'humidity', 'pressure', 'report']


  8%|▊         | 1/12 [00:06<01:06,  6.02s/it]

Index(['month'], dtype='object')
For 1 features: 
 Accuracy: 0.47697594501718216 
 Precision: 0.46123283499545636 
 Recall: 0.4535698998057736 
 F1 score: 0.43373677168342284
best accuracy = 0.47697594501718216, with 1 features, with variableThreshold


 17%|█▋        | 2/12 [00:12<01:04,  6.49s/it]

Index(['month', 'day'], dtype='object')
For 2 features: 
 Accuracy: 0.6759463179628356 
 Precision: 0.6376840536516099 
 Recall: 0.6708673234284543 
 F1 score: 0.6456442603483594
best accuracy = 0.6759463179628356, with 2 features, with variableThreshold


 25%|██▌       | 3/12 [00:19<00:59,  6.64s/it]

Index(['month', 'day', 'year'], dtype='object')
For 3 features: 
 Accuracy: 0.6728233402251321 
 Precision: 0.6338496410229307 
 Recall: 0.6688201623371147 
 F1 score: 0.6418609329759061
best accuracy = 0.6759463179628356, with 2 features, with variableThreshold


 33%|███▎      | 4/12 [00:27<00:56,  7.05s/it]

Index(['month', 'day', 'year', 'hour'], dtype='object')
For 4 features: 
 Accuracy: 0.8394064872325742 
 Precision: 0.8131125912762343 
 Recall: 0.816360487036261 
 F1 score: 0.8146002846418565
best accuracy = 0.8394064872325742, with 4 features, with variableThreshold


 42%|████▏     | 5/12 [00:35<00:52,  7.49s/it]

Index(['month', 'day', 'year', 'hour', 'minute'], dtype='object')
For 5 features: 
 Accuracy: 0.8112186132227598 
 Precision: 0.7800053990480994 
 Recall: 0.7774483287139795 
 F1 score: 0.7786013855786804
best accuracy = 0.8394064872325742, with 4 features, with variableThreshold


 50%|█████     | 6/12 [00:43<00:46,  7.71s/it]

Index(['month', 'day', 'year', 'hour', 'minute', 'holiday'], dtype='object')
For 6 features: 
 Accuracy: 0.8032179930795847 
 Precision: 0.7719162723441313 
 Recall: 0.7687979738340595 
 F1 score: 0.7701985047898838
best accuracy = 0.8394064872325742, with 4 features, with variableThreshold


 58%|█████▊    | 7/12 [00:51<00:38,  7.72s/it]

Index(['month', 'day', 'year', 'hour', 'minute', 'holiday', 'dayOfTheWeek'], dtype='object')
For 7 features: 
 Accuracy: 0.8381265881265881 
 Precision: 0.811001061255663 
 Recall: 0.8088895798507066 
 F1 score: 0.8098267906436262
best accuracy = 0.8394064872325742, with 4 features, with variableThreshold


 67%|██████▋   | 8/12 [00:59<00:31,  7.82s/it]

Index(['month', 'day', 'year', 'hour', 'minute', 'holiday', 'dayOfTheWeek',
       'temperature'],
      dtype='object')
For 8 features: 
 Accuracy: 0.8597386074485311 
 Precision: 0.8354999088815067 
 Recall: 0.8372103407388061 
 F1 score: 0.8362576913353932
best accuracy = 0.8597386074485311, with 8 features, with variableThreshold


 75%|███████▌  | 9/12 [01:08<00:24,  8.19s/it]

Index(['month', 'day', 'year', 'hour', 'minute', 'holiday', 'dayOfTheWeek',
       'temperature', 'humidity'],
      dtype='object')
For 9 features: 
 Accuracy: 0.8743919388464211 
 Precision: 0.8529135063068856 
 Recall: 0.8542036210000751 
 F1 score: 0.8534643776112176
best accuracy = 0.8743919388464211, with 9 features, with variableThreshold


 83%|████████▎ | 10/12 [01:17<00:16,  8.42s/it]

Index(['month', 'day', 'year', 'hour', 'minute', 'holiday', 'dayOfTheWeek',
       'temperature', 'humidity', 'pressure'],
      dtype='object')
For 10 features: 
 Accuracy: 0.8797030851310601 
 Precision: 0.8601538311461707 
 Recall: 0.8618122020881667 
 F1 score: 0.8609011546834163
best accuracy = 0.8797030851310601, with 10 features, with variableThreshold


 92%|█████████▏| 11/12 [01:26<00:08,  8.62s/it]

Index(['month', 'day', 'year', 'hour', 'minute', 'holiday', 'dayOfTheWeek',
       'temperature', 'humidity', 'pressure', 'report'],
      dtype='object')
For 11 features: 
 Accuracy: 0.8788966318234611 
 Precision: 0.8603338803545204 
 Recall: 0.8604610922523658 
 F1 score: 0.8603264512871974
best accuracy = 0.8797030851310601, with 10 features, with variableThreshold


100%|██████████| 12/12 [01:35<00:00,  7.96s/it]


Index(['month', 'day', 'year', 'hour', 'minute', 'holiday', 'dayOfTheWeek',
       'temperature', 'humidity', 'pressure', 'report'],
      dtype='object')
For 12 features: 
 Accuracy: 0.8798557804140498 
 Precision: 0.8612092525385109 
 Recall: 0.8617760273100844 
 F1 score: 0.8614133767357203
best accuracy = 0.8798557804140498, with 12 features, with variableThreshold
best accuracy = 0.8798557804140498, with 12 features, with variableThreshold

 

------------------------- Analyzing method MRMR -------------------------
The variable order is: 
 ['temperature', 'dayOfTheWeek', 'hour', 'holiday', 'humidity', 'day', 'year', 'month', 'pressure', 'minute', 'report']


  8%|▊         | 1/12 [00:10<01:52, 10.25s/it]

Index(['temperature'], dtype='object')
For 1 features: 
 Accuracy: 0.445364546936874 
 Precision: 0.36711652121386185 
 Recall: 0.36552219065728836 
 F1 score: 0.36510817162376774
best accuracy = 0.445364546936874, with 1 features, with MRMR


 17%|█▋        | 2/12 [00:19<01:35,  9.56s/it]

Index(['temperature', 'dayOfTheWeek'], dtype='object')
For 2 features: 
 Accuracy: 0.49534639608117564 
 Precision: 0.45649845503846126 
 Recall: 0.4681133402163512 
 F1 score: 0.457113996802008
best accuracy = 0.49534639608117564, with 2 features, with MRMR


 25%|██▌       | 3/12 [00:27<01:21,  9.01s/it]

Index(['temperature', 'dayOfTheWeek', 'hour'], dtype='object')
For 3 features: 
 Accuracy: 0.6812894183601962 
 Precision: 0.6364657410350126 
 Recall: 0.6419321069098298 
 F1 score: 0.6388879421884763
best accuracy = 0.6812894183601962, with 3 features, with MRMR


 33%|███▎      | 4/12 [00:36<01:11,  8.98s/it]

Index(['temperature', 'dayOfTheWeek', 'hour', 'holiday'], dtype='object')
For 4 features: 
 Accuracy: 0.7054736842105264 
 Precision: 0.6650403463843507 
 Recall: 0.6715553584046106 
 F1 score: 0.6679209315335166
best accuracy = 0.7054736842105264, with 4 features, with MRMR


 42%|████▏     | 5/12 [00:45<01:01,  8.85s/it]

Index(['temperature', 'dayOfTheWeek', 'hour', 'holiday', 'humidity'], dtype='object')
For 5 features: 
 Accuracy: 0.8337549777465448 
 Precision: 0.8097664741705943 
 Recall: 0.8090106858527595 
 F1 score: 0.8092990654985255
best accuracy = 0.8337549777465448, with 5 features, with MRMR


 50%|█████     | 6/12 [00:53<00:52,  8.71s/it]

Index(['temperature', 'dayOfTheWeek', 'hour', 'holiday', 'humidity', 'day'], dtype='object')
For 6 features: 
 Accuracy: 0.8774806474313863 
 Precision: 0.8573665682527829 
 Recall: 0.8579653687569234 
 F1 score: 0.8576039943409796
best accuracy = 0.8774806474313863, with 6 features, with MRMR


 58%|█████▊    | 7/12 [01:01<00:42,  8.54s/it]

Index(['temperature', 'dayOfTheWeek', 'hour', 'holiday', 'humidity', 'day',
       'year'],
      dtype='object')
For 7 features: 
 Accuracy: 0.8776368334507869 
 Precision: 0.8577469530477729 
 Recall: 0.8586185650832814 
 F1 score: 0.8581156325396513
best accuracy = 0.8776368334507869, with 7 features, with MRMR


 67%|██████▋   | 8/12 [01:09<00:33,  8.39s/it]

Index(['temperature', 'dayOfTheWeek', 'hour', 'holiday', 'humidity', 'day',
       'year', 'month'],
      dtype='object')
For 8 features: 
 Accuracy: 0.8783109856504352 
 Precision: 0.859032094037999 
 Recall: 0.8599949664420954 
 F1 score: 0.8594486382782273
best accuracy = 0.8783109856504352, with 8 features, with MRMR


 75%|███████▌  | 9/12 [01:18<00:25,  8.52s/it]

Index(['temperature', 'dayOfTheWeek', 'hour', 'holiday', 'humidity', 'day',
       'year', 'month', 'pressure'],
      dtype='object')
For 9 features: 
 Accuracy: 0.8812014134275619 
 Precision: 0.862765116133125 
 Recall: 0.8633766100791621 
 F1 score: 0.8629970031988867
best accuracy = 0.8812014134275619, with 9 features, with MRMR


 83%|████████▎ | 10/12 [01:27<00:17,  8.63s/it]

Index(['temperature', 'dayOfTheWeek', 'hour', 'holiday', 'humidity', 'day',
       'year', 'month', 'pressure', 'minute'],
      dtype='object')
For 10 features: 
 Accuracy: 0.8796296296296297 
 Precision: 0.860147647323601 
 Recall: 0.8612152685646522 
 F1 score: 0.8606419096240109
best accuracy = 0.8812014134275619, with 9 features, with MRMR


 92%|█████████▏| 11/12 [01:37<00:09,  9.12s/it]

Index(['temperature', 'dayOfTheWeek', 'hour', 'holiday', 'humidity', 'day',
       'year', 'month', 'pressure', 'minute', 'report'],
      dtype='object')
For 11 features: 
 Accuracy: 0.8809118828254193 
 Precision: 0.8626148548618935 
 Recall: 0.8618620810710773 
 F1 score: 0.8621503710615337
best accuracy = 0.8812014134275619, with 9 features, with MRMR


100%|██████████| 12/12 [01:47<00:00,  8.94s/it]

Index(['temperature', 'dayOfTheWeek', 'hour', 'holiday', 'humidity', 'day',
       'year', 'month', 'pressure', 'minute', 'report'],
      dtype='object')
For 12 features: 
 Accuracy: 0.8807546723444524 
 Precision: 0.8626961521262452 
 Recall: 0.861842549531403 
 F1 score: 0.8621867216836552
best accuracy = 0.8812014134275619, with 9 features, with MRMR
best accuracy = 0.8812014134275619, with 9 features, with MRMR

 




