# Boosting Classifiers

In [1]:
import pandas as pd
import numpy as np
import matplotlib as plt
from collections import Counter
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import SMOTE
from sklearn import metrics
from sklearn.metrics import classification_report
from statistics import mean
import math
import warnings
warnings.filterwarnings('ignore')
from sklearn.metrics import recall_score, accuracy_score

from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier 
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import AdaBoostClassifier

In [2]:
hp_oHe = pd.read_csv('HP_OHE_3class.csv')
hp_oHe.drop(hp_oHe.tail(17).index,inplace=True) 
hp_oHe = hp_oHe.drop('Unnamed: 0',axis=1)

hp_ME = pd.read_csv("harryPotterClean.csv")
hp_ME.drop(hp_ME.tail(17).index,inplace=True) 
hp_ME = hp_ME.drop('Unnamed: 0',axis=1)

hp_OE = pd.read_csv("harryPotterCleanOE.csv")
hp_OE.drop(hp_OE.tail(17).index,inplace=True) 
hp_OE = hp_OE.drop('Unnamed: 0',axis=1)

## Creating Ada Boosting classifier

### Ada Boosting on Decision Trees

In [58]:
mu = ['day',	'temperature',	'month',	'humidity',	'hour',	'pressure','dayOfTheWeek']

x,y=getXandY(hp_oHe)
#split the dataFrame into test and train
X_train, X_test, y_train, y_test = trainTest(x,y)
#Oversample the train dataset with SMOTE
X_train_os, y_train_os=overSampling(X_train, y_train, y_test, smote)
#define the variables order 
X_train_os_r = X_train_os[mu]
X_test_r = X_test[mu]

dt = DecisionTreeClassifier(max_depth=600)
dt.fit(X_train_os_r,y_train_os)
y_pred=dt.predict(X_test_r)
print("accuracy for base model decision tree is: {}".format(metrics.accuracy_score(y_test, y_pred)))
       
adaB_class = AdaBoostClassifier(DecisionTreeClassifier(max_depth=600), n_estimators=700, learning_rate=0.9)
adaB_class.fit(X_train_os_r,y_train_os)
y_pred=adaB_class.predict(X_test_r)
ac=metrics.accuracy_score(y_test, y_pred)
print("accuracy for adaBoost with base as decision tree is: {}".format(metrics.accuracy_score(y_test, y_pred)))

accuracy for base model decision tree is: 0.8693118134947321
accuracy for adaBoost with base as decision tree is: 0.887020847343645


In [59]:
mu = ['day',	'temperature',	'month',	'humidity',	'hour',	'pressure','dayOfTheWeek']

x,y=getXandY(hp_oHe)
#split the dataFrame into test and train
X_train, X_test, y_train, y_test = trainTest(x,y)
#Oversample the train dataset with SMOTE
X_train_os, y_train_os=overSampling(X_train, y_train, y_test, smote)
#define the variables order 
X_train_os_r = X_train_os[mu]
X_test_r = X_test[mu]

dt = DecisionTreeClassifier(criterion='entropy', splitter='random',  max_depth=500, min_samples_split=2)
dt.fit(X_train_os_r,y_train_os)
y_pred=dt.predict(X_test_r)
print("accuracy for base model decision tree is: {}".format(metrics.accuracy_score(y_test, y_pred)))
       
adaB_class = AdaBoostClassifier(dt, n_estimators=700, learning_rate=0.9)
adaB_class.fit(X_train_os_r,y_train_os)
y_pred=adaB_class.predict(X_test_r)
ac=metrics.accuracy_score(y_test, y_pred)
print("accuracy for adaBoost with base as decision tree is: {}".format(metrics.accuracy_score(y_test, y_pred)))

accuracy for base model decision tree is: 0.868013468013468
accuracy for adaBoost with base as decision tree is: 0.8774410774410775


### Random Forest

In [61]:
mrmr = ['month',	'day',	'year',	'hour',	'minute',	'holiday',	'dayOfTheWeek',	'temperature',	'humidity',	
        'pressure',	'heavy intensity rain',	'light rain',	'broken clouds',	'scattered clouds',
        'thunderstorm with rain',
        'few clouds',	'thunderstorm',	'shower rain',	'heavy intensity rain',	'mist',	'scattered clouds']

#define the variables order 
X_train_os_r = X_train_os[mrmr]
X_test_r = X_test[mrmr]

rf = RandomForestClassifier()
rf.fit(X_train_os_r,y_train_os)
y_pred=rf.predict(X_test_r)
print("accuracy for base model random forest is: {}".format(metrics.accuracy_score(y_test, y_pred)))
       
adaB_class = AdaBoostClassifier(RandomForestClassifier(), n_estimators=100, learning_rate=0.9)
adaB_class.fit(X_train_os,y_train_os)
y_pred=adaB_class.predict(X_test)
ac=metrics.accuracy_score(y_test, y_pred)
print("accuracy for adaBoost with base as random fores is: {}".format(metrics.accuracy_score(y_test, y_pred)))

accuracy for base model random forest is: 0.8725028058361392
accuracy for adaBoost with base as random fores is: 0.8749719416386083


## Gradient Boosting Classifiers

In [81]:
n=[100,250,500,700,1000]
for i in range(len(n)):
    gbc=GradientBoostingClassifier(n_estimators=n[i], learning_rate=1.0, max_depth=None, random_state=0)
    gbc.fit(X_train_os,y_train_os)
    y_pred=gbc.predict(X_test)
    print("The accuracy with {} estimators is {}".format(n[i],gbc.score(X_test,y_test)))

The accuracy with 100 estimators is 0.8489337822671156
The accuracy with 250 estimators is 0.8491582491582491
The accuracy with 500 estimators is 0.8624017957351291
The accuracy with 700 estimators is 0.8619528619528619
The accuracy with 1000 estimators is 0.8619528619528619


In [80]:
lr = [0.001, 0.01, 0.1, 1, 1.5]

for i in range(len(lr)):
    gbc=GradientBoostingClassifier(n_estimators=500, learning_rate=lr[i], max_depth=None)
    gbc.fit(X_train_os,y_train_os)
    print("The accuracy with {} learning rate is {}".format(lr[i],gbc.score(X_test,y_test)))

The accuracy with 0.001 learning rate is 0.8545454545454545
The accuracy with 0.01 learning rate is 0.8500561167227834
The accuracy with 0.1 learning rate is 0.8457912457912458
The accuracy with 1 learning rate is 0.865993265993266
The accuracy with 1.5 learning rate is 0.8599326599326599


### Encoding and feature selection

In [3]:
def getXandY(df):
    df.drop(df.tail(20).index,inplace=True) 
    x = df.drop(['HP_Forbidden_clean'],axis=1)
    y = df.HP_Forbidden_clean
    return(x,y)

def trainTest(x,y):
    X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.30, shuffle=True)
    return(X_train, X_test, y_train, y_test)

#Function to perform oversampling
def overSampling(X_train, y_train, y_test, method):
    X_train_os, y_train_os= method.fit_resample(X_train, y_train)
    # Check the number of records after over sampling
    #print(sorted(Counter(y_train_os).items())) 
    return(X_train_os, y_train_os)

smote = SMOTE(random_state=42)


def testModel(df,var_order,n_vars,n_loops,method):
    highest = 0
    for j in range(1,n_vars):
        #split our dataframe into X and Y
        x,y=getXandY(df)
        #create the lists to store metrics
        acc = []
        rec = []
        preci = []
        f1 = []
        for i in range(n_loops):
            #split the dataFrame into test and train
            X_train, X_test, y_train, y_test = trainTest(x,y)
            #Oversample the train dataset with SMOTE
            X_train_os, y_train_os=overSampling(X_train, y_train, y_test, smote)
            #define the variables order 
            X_train_os_r = X_train_os[var_order]
            X_test_r = X_test[var_order]
            df1= X_train_os_r.iloc[:, 0:j] #use only part of the variables
            
            #create and train decision trees
            gbc=GradientBoostingClassifier(n_estimators=500, learning_rate=1, max_depth=None)
            gbc.fit(df1, y_train_os)
        
            y_pred=gbc.predict(X_test_r.iloc[:, 0:j])
            ac=metrics.accuracy_score(y_test, y_pred)
            acc.append(ac)
            p=metrics.precision_score(y_test, y_pred,average='macro')
            preci.append(p)
            r=metrics.recall_score(y_test, y_pred,average='macro')
            rec.append(r)
            f=metrics.f1_score(y_test, y_pred, average='macro')
            f1.append(f)
        print(df1.columns)
        print("For {} features: \n Accuracy: {} \n Precision: {} \n Recall: {} \n F1 score: {}".format(
        j,mean(acc),mean(preci),mean(rec),mean(f1)))
        
        if mean(acc)>highest:
            highest = mean(acc)
            best = "best accuracy = {}, with {} features, with {}".format(mean(acc),j,method)
        print(best)
        #print(classification_report(y_test, y_pred))
    print(best)
        
def analizeDF(df,order,n_vars,n_loops):
    for i in range(len(order)):
        print('------------------------- Analyzing method {} -------------------------'.format(method[i]))
        print('The variable order is: \n {}'.format(order[i]))
        testModel(df,order[i],n_vars,n_loops,method[i])
        print('\n \n')

### One hot encoding

In [6]:
#Variable order in one hot encoding:
pear_corrO = ['temperature',	'holiday',	'day',	'month',	'pressure',	'dayOfTheWeek',	'shower rain',	'broken clouds',	'fog',	'overcast clouds',	'heavy intensity rain',	'minute',	'haze',	'thunderstorm with light rain',	'scattered clouds',	'clear sky',	'mist',	'light intensity drizzle',	'few clouds',	'thunderstorm',	'very heavy rain',	'moderate rain',	'thunderstorm with rain',	'year',	'light rain',	'humidity',	'hour',	'Pandemic']
kend_corrO = ['temperature',	'holiday',	'month',	'day',	'pressure',	'dayOfTheWeek',	'shower rain',	'broken clouds',	'heavy intensity rain',	'fog',	'year',	'overcast clouds',	'minute',	'haze',	'thunderstorm with light rain',	'mist',	'light intensity drizzle',	'scattered clouds',	'clear sky',	'few clouds',	'thunderstorm',	'very heavy rain',	'moderate rain',	'thunderstorm with rain',	'light rain',	'humidity',	'hour',	'Pandemic']
mutInf_classO = ['month',	'day',	'year',	'hour',	'minute',	'holiday',	'dayOfTheWeek',	'Pandemic',	'temperature',	'humidity',	'pressure',	'heavy intensity rain',	'light rain',	'broken clouds',	'moderate rain',	'mist',	'overcast clouds',	'clear sky',	'scattered clouds',	'thunderstorm with rain',	'few clouds',	'thunderstorm',	'shower rain',	'very heavy rain',	'fog',	'haze',	'thunderstorm with light rain',	'light intensity drizzle']
mutInf_regO = ['month',	'day',	'year',	'hour',	'minute',	'holiday',	'dayOfTheWeek',	'Pandemic',	'temperature',	'humidity',	'pressure',	'heavy intensity rain',	'light rain',	'broken clouds',	'moderate rain',	'mist',	'overcast clouds',	'clear sky',	'scattered clouds',	'thunderstorm with rain',	'few clouds',	'thunderstorm',	'shower rain',	'very heavy rain',	'fog',	'haze',	'thunderstorm with light rain',	'light intensity drizzle']
mutInf_class2O = ['day',	'temperature',	'month',	'humidity',	'hour',	'pressure','dayOfTheWeek',	'year',	'holiday',	'shower rain',	'light rain',	'thunderstorm',	'fog',	'broken clouds',	'Pandemic',	'thunderstorm with rain',	'light intensity drizzle',	'thunderstorm with light rain',	'heavy intensity rain',	'mist',	'scattered clouds',	'very heavy rain',	'overcast clouds',	'moderate rain',	'minute',	'haze',	'few clouds',	'clear sky']
varThreO = ['month',	'day',	'year',	'hour',	'minute',	'holiday',	'dayOfTheWeek',	'temperature',	'humidity',	'pressure',	'heavy intensity rain',	'light rain',	'broken clouds',	'scattered clouds',	'thunderstorm with rain',	'few clouds',	'thunderstorm',	'shower rain',]
mrmrO = ['month',	'day',	'year',	'hour',	'minute',	'holiday',	'dayOfTheWeek',	'temperature',	'humidity',	'pressure',	'heavy intensity rain',	'light rain',	'broken clouds',	'scattered clouds',	'thunderstorm with rain',	'few clouds',	'thunderstorm',	'shower rain',	'heavy intensity rain',	'mist',	'scattered clouds',	'very heavy rain',	'overcast clouds',	'moderate rain',	'minute',	'haze',	'few clouds']

orderOHE=[pear_corrO,kend_corrO,mutInf_classO,mutInf_regO,mutInf_class2O,varThreO,mrmrO]

method = ['Pearson_correlation','Kendalls_correlation','mutualInformation_classification',
          'mutualInformation_reggression','mutualInformation_classification2',
          'varianceThreshold','MRMR']

In [None]:
analizeDF(hp_oHe,orderOHE,28,n_loops=3)

  0%|          | 0/27 [00:00<?, ?it/s]

------------------------- Analyzing method Pearson_correlation -------------------------
The variable order is: 
 ['temperature', 'holiday', 'day', 'month', 'pressure', 'dayOfTheWeek', 'shower rain', 'broken clouds', 'fog', 'overcast clouds', 'heavy intensity rain', 'minute', 'haze', 'thunderstorm with light rain', 'scattered clouds', 'clear sky', 'mist', 'light intensity drizzle', 'few clouds', 'thunderstorm', 'very heavy rain', 'moderate rain', 'thunderstorm with rain', 'year', 'light rain', 'humidity', 'hour', 'Pandemic']


### Manual Encoding

In [4]:
#Replace times ending in 5 and also compacting the rest to have only 6 classes
b=hp_ME.Harry_Potter_and_the_Forbidden.replace([
5, 10, 11, 15, 20, 25, 30, 35, 40, 45,50.0,55.0,60.0,65.0,70.0,75.0,80.0,85.0,90.0,95.0,100.0,105.0,110.0,115.0,120.0,125.0,130.0,135.0,145.0,150.0,180.0], 
[1,1,  1,  1,  1,  1,  1,  2,  2,  2, 2,   2,   2,   3,   3,   3,   3,   3,   3,   3,   3,    3,    3,    3,     3,    3,   3,    3,     3,    3,    3])

df3=pd.DataFrame(b)
df3.rename(columns = {'Harry_Potter_and_the_Forbidden':'HP_Forbidden_clean'}, inplace = True)
hp_bis3=pd.concat([hp_ME, df3], axis=1)
hp3 = hp_bis3.drop('Harry_Potter_and_the_Forbidden',axis=1)
hp3.rename(columns = {'day.1':'dayOfTheWeek'}, inplace = True)
hp3 = hp3[hp3.HP_Forbidden_clean != 0] #delete rows with 0 min
hp3.HP_Forbidden_clean.unique()

array([2., 1., 3.])

In [5]:
HP_ME = hp3

pear_corr = ['temperature','holiday','day','month','pressure','dayOfTheWeek',
            'report','minute','year','humidity','hour',	'Pandemic']
kend_corr = ['temperature','holiday','month','day','pressure','dayOfTheWeek','report','year',
             'minute','humidity','hour','Pandemic']
mutInf_class = ['month','day','year','hour','minute','holiday','dayOfTheWeek','Pandemic',
                'temperature','humidity','pressure','report']
mutInf_reg = ['month','day','year','hour','minute','holiday','dayOfTheWeek','Pandemic',
                'temperature','humidity','pressure','report']
mutInf_class2 = ['day','temperature','month','humidity','hour','dayOfTheWeek','pressure',
                 'holiday','year','report','minute','Pandemic']
varThre = ['month','day','year','hour','minute','holiday','dayOfTheWeek',
           'temperature','humidity','pressure','report']
mrmr = ['temperature','dayOfTheWeek','hour','holiday','humidity','day',
        'year','month','pressure','minute','report']
method = ['Pearson_correlation','Kendalls_correlation','mutualInformation_classification',
          'mutualInformation_reggression','mutualInformation_classification2',
          'variableThreshold','MRMR']

orderME=[pear_corr,kend_corr,mutInf_class,mutInf_reg,mutInf_class2,varThre,mrmr]

In [8]:
analizeDF(HP_ME,orderME,13,n_loops=5)

------------------------- Analyzing method Pearson_correlation -------------------------
The variable order is: 
 ['temperature', 'holiday', 'day', 'month', 'pressure', 'dayOfTheWeek', 'report', 'minute', 'year', 'humidity', 'hour', 'Pandemic']
Index(['temperature'], dtype='object')
For 1 features: 
 Accuracy: 0.4558306878306878 
 Precision: 0.36696140267047866 
 Recall: 0.3624067684867311 
 F1 score: 0.35780355925416063
best accuracy = 0.4558306878306878, with 1 features, with Pearson_correlation
Index(['temperature', 'holiday'], dtype='object')
For 2 features: 
 Accuracy: 0.46865861411315957 
 Precision: 0.39890508094918165 
 Recall: 0.387015733840894 
 F1 score: 0.38716541229071005
best accuracy = 0.46865861411315957, with 2 features, with Pearson_correlation
Index(['temperature', 'holiday', 'day'], dtype='object')
For 3 features: 
 Accuracy: 0.5805219605346913 
 Precision: 0.5451978892272851 
 Recall: 0.5632085884253596 
 F1 score: 0.550556037988275
best accuracy = 0.58052196053469

Index(['month'], dtype='object')
For 1 features: 
 Accuracy: 0.46487666448373716 
 Precision: 0.4743574134315322 
 Recall: 0.4660861715903708 
 F1 score: 0.440091894105251
best accuracy = 0.46487666448373716, with 1 features, with mutualInformation_classification
Index(['month', 'day'], dtype='object')
For 2 features: 
 Accuracy: 0.6723497267759563 
 Precision: 0.6380363775784684 
 Recall: 0.673038894482023 
 F1 score: 0.645771872374717
best accuracy = 0.6723497267759563, with 2 features, with mutualInformation_classification
Index(['month', 'day', 'year'], dtype='object')
For 3 features: 
 Accuracy: 0.6706938060844824 
 Precision: 0.6352858215357736 
 Recall: 0.6736634925737557 
 F1 score: 0.6428666070417609
best accuracy = 0.6723497267759563, with 2 features, with mutualInformation_classification
Index(['month', 'day', 'year', 'hour'], dtype='object')
For 4 features: 
 Accuracy: 0.8413762875301337 
 Precision: 0.8199663363667714 
 Recall: 0.8126111460728117 
 F1 score: 0.816108441770

Index(['day'], dtype='object')
For 1 features: 
 Accuracy: 0.4530538652242506 
 Precision: 0.4293346129625549 
 Recall: 0.4425891728838678 
 F1 score: 0.42067299761468807
best accuracy = 0.4530538652242506, with 1 features, with mutualInformation_classification2
Index(['day', 'temperature'], dtype='object')
For 2 features: 
 Accuracy: 0.5597382080794403 
 Precision: 0.5215590782345374 
 Recall: 0.5365571456253926 
 F1 score: 0.5252249496771324
best accuracy = 0.5597382080794403, with 2 features, with mutualInformation_classification2
Index(['day', 'temperature', 'month'], dtype='object')
For 3 features: 
 Accuracy: 0.7552994350282486 
 Precision: 0.718698852306614 
 Recall: 0.7316810914025248 
 F1 score: 0.7243697799314215
best accuracy = 0.7552994350282486, with 3 features, with mutualInformation_classification2
Index(['day', 'temperature', 'month', 'humidity'], dtype='object')
For 4 features: 
 Accuracy: 0.8238968092328581 
 Precision: 0.7982025309584097 
 Recall: 0.8043281241509824 

Index(['temperature'], dtype='object')
For 1 features: 
 Accuracy: 0.4631260191008619 
 Precision: 0.36675728583869144 
 Recall: 0.36039239138120477 
 F1 score: 0.35590092449211824
best accuracy = 0.4631260191008619, with 1 features, with MRMR
Index(['temperature', 'dayOfTheWeek'], dtype='object')
For 2 features: 
 Accuracy: 0.4994168416141824 
 Precision: 0.4530423243122243 
 Recall: 0.46087302908773137 
 F1 score: 0.4550518219302518
best accuracy = 0.4994168416141824, with 2 features, with MRMR
Index(['temperature', 'dayOfTheWeek', 'hour'], dtype='object')
For 3 features: 
 Accuracy: 0.6720392431674842 
 Precision: 0.6248354924846848 
 Recall: 0.6325800503665903 
 F1 score: 0.6282562007076329
best accuracy = 0.6720392431674842, with 3 features, with MRMR
Index(['temperature', 'dayOfTheWeek', 'hour', 'holiday'], dtype='object')
For 4 features: 
 Accuracy: 0.6882807017543859 
 Precision: 0.6450491392728093 
 Recall: 0.6540435452900539 
 F1 score: 0.6490520380018026
best accuracy = 0.68

### Ordinal Encoding

In [4]:
hp_OE.rename(columns = {'day.1':'dayOfTheWeek'}, inplace = True)

#Replace times ending in 5 and also compacting the rest to have only 6 classes
a=hp_OE.Harry_Potter_and_the_Forbidden.replace([
5, 10, 11, 15, 20, 25, 30, 35, 40, 45,50.0,55.0,60.0,65.0,70.0,75.0,80.0,85.0,90.0,95.0,100.0,105.0,110.0,115.0,120.0,125.0,130.0,135.0,145.0,150.0,180.0], 
[1,1,  1,  1,  1,  1,  1,  2,  2,  2, 2,   2,   2,   3,   3,   3,   3,   3,   3,   3,   3,    3,    3,    3,     3,    3,   3,    3,     3,    3,    3])

df1=pd.DataFrame(a)
df1.rename(columns = {'Harry_Potter_and_the_Forbidden':'HP_Forbidden_clean'}, inplace = True)
hp_bis=pd.concat([hp_OE, df1], axis=1)
hp2 = hp_bis.drop('Harry_Potter_and_the_Forbidden',axis=1)
hp2 = hp2[hp2.HP_Forbidden_clean != 0] #delete rows with 0 min
hp2.HP_Forbidden_clean.unique()

#Variable order in ordinal encoding:
pear_corrOE = ['temperature',	'holiday',	'day',	'month',	'pressure',	'dayOfTheWeek',	'report',	'minute',	'year',	'humidity',	'hour',	'Pandemic']
kend_corrOE = ['temperature',	'holiday',	'month',	'day',	'pressure',	'dayOfTheWeek',	'report',	'year',	'minute',	'humidity',	'hour',	'Pandemic',]
mutInf_classOE = ['month',	'day',	'year',	'hour',	'minute',	'holiday',	'dayOfTheWeek',	'Pandemic',	'temperature',	'humidity',	'pressure',	'report',]
mutInf_regOE = ['month',	'day',	'year',	'hour',	'minute',	'holiday',	'dayOfTheWeek',	'Pandemic',	'temperature',	'humidity',	'pressure',	'report',]
mutInf_class2OE = ['day',	'temperature',	'month',	'humidity',	'hour',	'dayOfTheWeek',	'pressure',	'holiday',	'year',	'report',	'minute',	'Pandemic',]
varThreOE = ['month',	'day',	'year',	'hour',	'minute',	'holiday',	'dayOfTheWeek',	'temperature',	'humidity',	'pressure',	'report']
mrmrOE = ['temperature',	'dayOfTheWeek',	'hour',	'holiday',	'humidity',	'day',	'year',	'month',	'pressure',	'minute',	'report']

orderOE=[pear_corrOE,kend_corrOE,mutInf_classOE,mutInf_regOE,mutInf_class2OE,varThreOE,mrmrOE]

In [8]:
analizeDF(hp2,orderOE,13,n_loops=1)

------------------------- Analyzing method Pearson_correlation -------------------------
The variable order is: 
 ['temperature', 'holiday', 'day', 'month', 'pressure', 'dayOfTheWeek', 'report', 'minute', 'year', 'humidity', 'hour', 'Pandemic']
Index(['temperature'], dtype='object')
For 1 features: 
 Accuracy: 0.45136681500317866 
 Precision: 0.35568898714249664 
 Recall: 0.35436889424179996 
 F1 score: 0.3484741078470363
best accuracy = 0.45136681500317866, with 1 features, with Pearson_correlation
Index(['temperature', 'holiday'], dtype='object')
For 2 features: 
 Accuracy: 0.46552089963929555 
 Precision: 0.39330928033002 
 Recall: 0.3837242760377559 
 F1 score: 0.38463614944906804
best accuracy = 0.46552089963929555, with 2 features, with Pearson_correlation
Index(['temperature', 'holiday', 'day'], dtype='object')
For 3 features: 
 Accuracy: 0.573401317187168 
 Precision: 0.5399155744576815 
 Recall: 0.5525917412345013 
 F1 score: 0.5438780226326908
best accuracy = 0.57340131718716

Index(['month'], dtype='object')
For 1 features: 
 Accuracy: 0.4944262295081967 
 Precision: 0.47339753032251214 
 Recall: 0.46242721865554937 
 F1 score: 0.447568517490008
best accuracy = 0.4944262295081967, with 1 features, with mutualInformation_classification
Index(['month', 'day'], dtype='object')
For 2 features: 
 Accuracy: 0.6725760560297658 
 Precision: 0.635130279372454 
 Recall: 0.6628919480395693 
 F1 score: 0.6422233980138229
best accuracy = 0.6725760560297658, with 2 features, with mutualInformation_classification
Index(['month', 'day', 'year'], dtype='object')
For 3 features: 
 Accuracy: 0.6736795967565198 
 Precision: 0.6407138936685287 
 Recall: 0.6715550176442119 
 F1 score: 0.6451102792410146
best accuracy = 0.6736795967565198, with 3 features, with mutualInformation_classification
Index(['month', 'day', 'year', 'hour'], dtype='object')
For 4 features: 
 Accuracy: 0.8426596445029625 
 Precision: 0.8269029909799638 
 Recall: 0.8152650638144786 
 F1 score: 0.82066531980

Index(['day'], dtype='object')
For 1 features: 
 Accuracy: 0.46445497630331756 
 Precision: 0.4291115712081308 
 Recall: 0.43971151426822175 
 F1 score: 0.4273540437995054
best accuracy = 0.46445497630331756, with 1 features, with mutualInformation_classification2
Index(['day', 'temperature'], dtype='object')
For 2 features: 
 Accuracy: 0.5557062146892655 
 Precision: 0.5150678209251849 
 Recall: 0.5309062249771933 
 F1 score: 0.5184312140987093
best accuracy = 0.5557062146892655, with 2 features, with mutualInformation_classification2
Index(['day', 'temperature', 'month'], dtype='object')
For 3 features: 
 Accuracy: 0.7513011993663725 
 Precision: 0.7123972588590388 
 Recall: 0.727438210227814 
 F1 score: 0.7187609348716534
best accuracy = 0.7513011993663725, with 3 features, with mutualInformation_classification2
Index(['day', 'temperature', 'month', 'humidity'], dtype='object')
For 4 features: 
 Accuracy: 0.8166779968275549 
 Precision: 0.7896555440676484 
 Recall: 0.791854314955195

Index(['temperature'], dtype='object')
For 1 features: 
 Accuracy: 0.4595288080242594 
 Precision: 0.3571092446070929 
 Recall: 0.3547745733506619 
 F1 score: 0.34818416274022884
best accuracy = 0.4595288080242594, with 1 features, with MRMR
Index(['temperature', 'dayOfTheWeek'], dtype='object')
For 2 features: 
 Accuracy: 0.49380985750992756 
 Precision: 0.4440164027655731 
 Recall: 0.4520554820640286 
 F1 score: 0.44619235617810366
best accuracy = 0.49380985750992756, with 2 features, with MRMR
Index(['temperature', 'dayOfTheWeek', 'hour'], dtype='object')
For 3 features: 
 Accuracy: 0.6652631578947369 
 Precision: 0.6197477922688237 
 Recall: 0.6247694515725812 
 F1 score: 0.6220912379384546
best accuracy = 0.6652631578947369, with 3 features, with MRMR
Index(['temperature', 'dayOfTheWeek', 'hour', 'holiday'], dtype='object')
For 4 features: 
 Accuracy: 0.6910283438744437 
 Precision: 0.655261906700825 
 Recall: 0.6599951545902605 
 F1 score: 0.6574977983508444
best accuracy = 0.691