# TODO
- try combinations
- test already written functions
- try other methods
- remove unused functions, options
- knn imputation seems too slow, unusable
- split (train, test) before everything (imputation, sampling, etc.)

In [61]:
from preprocessing import *
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score, accuracy_score, precision_score, recall_score
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
import itertools
import time

In [62]:
def train_test_model(model, X_train, X_test, y_train, y_test, average='weighted', verbose=True):
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    prec = precision_score(y_test, y_pred, average=average)
    rec = recall_score(y_test, y_pred, average=average)
    f1 = f1_score(y_test, y_pred, average=average)

    if verbose:
        print(f"Accuracy: {acc}")
        print(f"Precision: {prec}")
        print(f"Recall: {rec}")
        print(f"F1: {f1}")
        print()
    return acc, prec, rec, f1


In [63]:
P = Preprocessor()
df = P.load_data('weatherAUS.csv')

In [64]:
print(P.imputation_types())
print(P.outlier_types())
print(P.encoding_types())
print(P.scaling_types())
print(P.discretize_types())
print(P.sampling_types())


['mice', 'mean', 'median', 'mode', 'mode_', 'drop', 'knn', 'interpolate', 'mode_mean', 'mode_median', 'mode_knn', 'mode_interpolate', 'mode_mode', 'None']
['mean', 'median', 'mode', 'drop', 'None']
['onehot', 'ordinal', 'label', 'None']
['standard', 'minmax', 'robust', 'None']
['equal_width', 'equal_freq', 'None']
['smote', 'random', 'smotenc', 'smoten', 'under', 'sklearn', 'None']


# Steps
- Encode RainToday, RainTomorrow (0, 1)
- Resample (sklearn)
- Imputation (mode_) categorical
- Encoding (label)
- Imputation (mice) numerical
- Outliers (drop)
- Scale (standard) for X_train

In [65]:
combination = {
    'imputation': 'mode_', # categorical: mode
    'imputation2': 'mice', # numeric: mice
    'outlier': 'drop',
    'encoding': 'label',
    'scaling': 'standard',
    'discritization': 'equal_width',
    'sampling': 'sklearn',
}

In [66]:
df_p = df.copy()

In [67]:
# convert df['RainToday'], df['RainTomorrow'] from 'Yes'/'No' to 1/0
df_p['RainToday'] = df_p['RainToday'].map({'Yes': 1, 'No': 0})
df_p['RainTomorrow'] = df_p['RainTomorrow'].map({'Yes': 1, 'No': 0})
df_p

Unnamed: 0,Date,Location,MinTemp,MaxTemp,Rainfall,Evaporation,Sunshine,WindGustDir,WindGustSpeed,WindDir9am,...,Humidity9am,Humidity3pm,Pressure9am,Pressure3pm,Cloud9am,Cloud3pm,Temp9am,Temp3pm,RainToday,RainTomorrow
0,2008-12-01,Albury,13.4,22.9,0.6,,,W,44.0,W,...,71.0,22.0,1007.7,1007.1,8.0,,16.9,21.8,0.0,0.0
1,2008-12-02,Albury,7.4,25.1,0.0,,,WNW,44.0,NNW,...,44.0,25.0,1010.6,1007.8,,,17.2,24.3,0.0,0.0
2,2008-12-03,Albury,12.9,25.7,0.0,,,WSW,46.0,W,...,38.0,30.0,1007.6,1008.7,,2.0,21.0,23.2,0.0,0.0
3,2008-12-04,Albury,9.2,28.0,0.0,,,NE,24.0,SE,...,45.0,16.0,1017.6,1012.8,,,18.1,26.5,0.0,0.0
4,2008-12-05,Albury,17.5,32.3,1.0,,,W,41.0,ENE,...,82.0,33.0,1010.8,1006.0,7.0,8.0,17.8,29.7,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
145455,2017-06-21,Uluru,2.8,23.4,0.0,,,E,31.0,SE,...,51.0,24.0,1024.6,1020.3,,,10.1,22.4,0.0,0.0
145456,2017-06-22,Uluru,3.6,25.3,0.0,,,NNW,22.0,SE,...,56.0,21.0,1023.5,1019.1,,,10.9,24.5,0.0,0.0
145457,2017-06-23,Uluru,5.4,26.9,0.0,,,N,37.0,SE,...,53.0,24.0,1021.0,1016.8,,,12.5,26.1,0.0,0.0
145458,2017-06-24,Uluru,7.8,27.0,0.0,,,SE,28.0,SSE,...,51.0,24.0,1019.4,1016.5,3.0,2.0,15.1,26.0,0.0,0.0


In [68]:
df_p = P.sample(df_p, combination['sampling'])
df_p

Unnamed: 0,Date,Location,MinTemp,MaxTemp,Rainfall,Evaporation,Sunshine,WindGustDir,WindGustSpeed,WindDir9am,...,Humidity9am,Humidity3pm,Pressure9am,Pressure3pm,Cloud9am,Cloud3pm,Temp9am,Temp3pm,RainToday,RainTomorrow
0,2008-12-01,Albury,13.4,22.9,0.6,,,W,44.0,W,...,71.0,22.0,1007.7,1007.1,8.0,,16.9,21.8,0.0,0.0
1,2008-12-02,Albury,7.4,25.1,0.0,,,WNW,44.0,NNW,...,44.0,25.0,1010.6,1007.8,,,17.2,24.3,0.0,0.0
2,2008-12-03,Albury,12.9,25.7,0.0,,,WSW,46.0,W,...,38.0,30.0,1007.6,1008.7,,2.0,21.0,23.2,0.0,0.0
3,2008-12-04,Albury,9.2,28.0,0.0,,,NE,24.0,SE,...,45.0,16.0,1017.6,1012.8,,,18.1,26.5,0.0,0.0
4,2008-12-05,Albury,17.5,32.3,1.0,,,W,41.0,ENE,...,82.0,33.0,1010.8,1006.0,7.0,8.0,17.8,29.7,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1482,2013-03-21,Albury,14.9,20.2,2.6,,,N,67.0,NNE,...,81.0,82.0,1015.7,1009.7,8.0,8.0,17.8,18.6,1.0,1.0
77215,2015-05-14,Portland,8.4,14.1,2.8,0.6,2.9,WSW,41.0,SSW,...,82.0,83.0,1033.9,1034.4,8.0,8.0,12.2,13.1,1.0,1.0
57755,2016-06-04,Ballarat,7.5,13.1,5.2,,,SE,35.0,SE,...,100.0,95.0,1016.8,1012.3,8.0,8.0,10.0,12.4,1.0,1.0
65977,2014-02-19,MelbourneAirport,15.5,25.7,0.0,7.2,9.3,W,67.0,NW,...,71.0,32.0,999.2,996.9,4.0,6.0,17.9,23.8,0.0,1.0


In [69]:
df_p = P.impute(df_p, combination['imputation'])
df_p

Unnamed: 0,Date,Location,MinTemp,MaxTemp,Rainfall,Evaporation,Sunshine,WindGustDir,WindGustSpeed,WindDir9am,...,Humidity9am,Humidity3pm,Pressure9am,Pressure3pm,Cloud9am,Cloud3pm,Temp9am,Temp3pm,RainToday,RainTomorrow
0,2008-12-01,Albury,13.4,22.9,0.6,,,W,44.0,W,...,71.0,22.0,1007.7,1007.1,8.0,,16.9,21.8,0.0,0.0
1,2008-12-02,Albury,7.4,25.1,0.0,,,WNW,44.0,NNW,...,44.0,25.0,1010.6,1007.8,,,17.2,24.3,0.0,0.0
2,2008-12-03,Albury,12.9,25.7,0.0,,,WSW,46.0,W,...,38.0,30.0,1007.6,1008.7,,2.0,21.0,23.2,0.0,0.0
3,2008-12-04,Albury,9.2,28.0,0.0,,,NE,24.0,SE,...,45.0,16.0,1017.6,1012.8,,,18.1,26.5,0.0,0.0
4,2008-12-05,Albury,17.5,32.3,1.0,,,W,41.0,ENE,...,82.0,33.0,1010.8,1006.0,7.0,8.0,17.8,29.7,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1482,2013-03-21,Albury,14.9,20.2,2.6,,,N,67.0,NNE,...,81.0,82.0,1015.7,1009.7,8.0,8.0,17.8,18.6,1.0,1.0
77215,2015-05-14,Portland,8.4,14.1,2.8,0.6,2.9,WSW,41.0,SSW,...,82.0,83.0,1033.9,1034.4,8.0,8.0,12.2,13.1,1.0,1.0
57755,2016-06-04,Ballarat,7.5,13.1,5.2,,,SE,35.0,SE,...,100.0,95.0,1016.8,1012.3,8.0,8.0,10.0,12.4,1.0,1.0
65977,2014-02-19,MelbourneAirport,15.5,25.7,0.0,7.2,9.3,W,67.0,NW,...,71.0,32.0,999.2,996.9,4.0,6.0,17.9,23.8,0.0,1.0


In [70]:
df_p = P.encode(df_p, combination['encoding'])
df_p

Unnamed: 0,Date,Location,MinTemp,MaxTemp,Rainfall,Evaporation,Sunshine,WindGustDir,WindGustSpeed,WindDir9am,...,Humidity9am,Humidity3pm,Pressure9am,Pressure3pm,Cloud9am,Cloud3pm,Temp9am,Temp3pm,RainToday,RainTomorrow
0,396,2,13.4,22.9,0.6,,,13,44.0,13,...,71.0,22.0,1007.7,1007.1,8.0,,16.9,21.8,0.0,0.0
1,397,2,7.4,25.1,0.0,,,14,44.0,6,...,44.0,25.0,1010.6,1007.8,,,17.2,24.3,0.0,0.0
2,398,2,12.9,25.7,0.0,,,15,46.0,13,...,38.0,30.0,1007.6,1008.7,,2.0,21.0,23.2,0.0,0.0
3,399,2,9.2,28.0,0.0,,,4,24.0,9,...,45.0,16.0,1017.6,1012.8,,,18.1,26.5,0.0,0.0
4,400,2,17.5,32.3,1.0,,,13,41.0,1,...,82.0,33.0,1010.8,1006.0,7.0,8.0,17.8,29.7,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1482,1878,2,14.9,20.2,2.6,,,3,67.0,5,...,81.0,82.0,1015.7,1009.7,8.0,8.0,17.8,18.6,1.0,1.0
77215,2662,33,8.4,14.1,2.8,0.6,2.9,15,41.0,11,...,82.0,83.0,1033.9,1034.4,8.0,8.0,12.2,13.1,1.0,1.0
57755,3049,5,7.5,13.1,5.2,,,9,35.0,9,...,100.0,95.0,1016.8,1012.3,8.0,8.0,10.0,12.4,1.0,1.0
65977,2213,19,15.5,25.7,0.0,7.2,9.3,13,67.0,7,...,71.0,32.0,999.2,996.9,4.0,6.0,17.9,23.8,0.0,1.0


In [71]:
df_p = P.impute(df_p, combination['imputation2'])
df_p



Unnamed: 0,Date,Location,MinTemp,MaxTemp,Rainfall,Evaporation,Sunshine,WindGustDir,WindGustSpeed,WindDir9am,...,Humidity9am,Humidity3pm,Pressure9am,Pressure3pm,Cloud9am,Cloud3pm,Temp9am,Temp3pm,RainToday,RainTomorrow
0,396.0,2.0,13.4,22.9,0.6,5.758410,7.407814,13.0,44.0,13.0,...,71.0,22.0,1007.7,1007.1,8.000000,4.847870,16.9,21.8,0.0,0.0
1,397.0,2.0,7.4,25.1,0.0,5.639122,11.496005,14.0,44.0,6.0,...,44.0,25.0,1010.6,1007.8,1.734548,2.618758,17.2,24.3,0.0,0.0
2,398.0,2.0,12.9,25.7,0.0,7.770166,12.339338,15.0,46.0,13.0,...,38.0,30.0,1007.6,1008.7,1.843146,2.000000,21.0,23.2,0.0,0.0
3,399.0,2.0,9.2,28.0,0.0,5.992142,11.766955,4.0,24.0,9.0,...,45.0,16.0,1017.6,1012.8,1.257526,2.118895,18.1,26.5,0.0,0.0
4,400.0,2.0,17.5,32.3,1.0,6.847603,5.683229,13.0,41.0,1.0,...,82.0,33.0,1010.8,1006.0,7.000000,8.000000,17.8,29.7,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
220627,1878.0,2.0,14.9,20.2,2.6,2.399494,1.503404,3.0,67.0,5.0,...,81.0,82.0,1015.7,1009.7,8.000000,8.000000,17.8,18.6,1.0,1.0
220628,2662.0,33.0,8.4,14.1,2.8,0.600000,2.900000,15.0,41.0,11.0,...,82.0,83.0,1033.9,1034.4,8.000000,8.000000,12.2,13.1,1.0,1.0
220629,3049.0,5.0,7.5,13.1,5.2,-0.809179,0.869035,9.0,35.0,9.0,...,100.0,95.0,1016.8,1012.3,8.000000,8.000000,10.0,12.4,1.0,1.0
220630,2213.0,19.0,15.5,25.7,0.0,7.200000,9.300000,13.0,67.0,7.0,...,71.0,32.0,999.2,996.9,4.000000,6.000000,17.9,23.8,0.0,1.0


In [72]:
df_p = P.outlier(df_p, combination['outlier'])
df_p

After dropping outliers:  (170731, 23)


Unnamed: 0,Date,Location,MinTemp,MaxTemp,Rainfall,Evaporation,Sunshine,WindGustDir,WindGustSpeed,WindDir9am,...,Humidity9am,Humidity3pm,Pressure9am,Pressure3pm,Cloud9am,Cloud3pm,Temp9am,Temp3pm,RainToday,RainTomorrow
0,396.0,2.0,13.4,22.9,0.6,5.758410,7.407814,13.0,44.0,13.0,...,71.0,22.0,1007.700000,1007.100000,8.000000,4.847870,16.9,21.8,0.0,0.0
1,397.0,2.0,7.4,25.1,0.0,5.639122,11.496005,14.0,44.0,6.0,...,44.0,25.0,1010.600000,1007.800000,1.734548,2.618758,17.2,24.3,0.0,0.0
2,398.0,2.0,12.9,25.7,0.0,7.770166,12.339338,15.0,46.0,13.0,...,38.0,30.0,1007.600000,1008.700000,1.843146,2.000000,21.0,23.2,0.0,0.0
3,399.0,2.0,9.2,28.0,0.0,5.992142,11.766955,4.0,24.0,9.0,...,45.0,16.0,1017.600000,1012.800000,1.257526,2.118895,18.1,26.5,0.0,0.0
4,400.0,2.0,17.5,32.3,1.0,6.847603,5.683229,13.0,41.0,1.0,...,82.0,33.0,1010.800000,1006.000000,7.000000,8.000000,17.8,29.7,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
220626,2237.0,23.0,8.8,15.7,0.0,1.792403,1.807538,7.0,56.0,7.0,...,99.0,87.0,1016.637213,1013.852659,7.341562,7.432745,9.9,14.4,0.0,1.0
220627,1878.0,2.0,14.9,20.2,2.6,2.399494,1.503404,3.0,67.0,5.0,...,81.0,82.0,1015.700000,1009.700000,8.000000,8.000000,17.8,18.6,1.0,1.0
220629,3049.0,5.0,7.5,13.1,5.2,-0.809179,0.869035,9.0,35.0,9.0,...,100.0,95.0,1016.800000,1012.300000,8.000000,8.000000,10.0,12.4,1.0,1.0
220630,2213.0,19.0,15.5,25.7,0.0,7.200000,9.300000,13.0,67.0,7.0,...,71.0,32.0,999.200000,996.900000,4.000000,6.000000,17.9,23.8,0.0,1.0


In [73]:
X_train, X_test, y_train, y_test = P.split(df_p)

In [81]:
print(len(X_train))
print(len(X_test))
print(len(y_train))
print(len(y_test))

136584
34147
136584
34147


In [74]:
print(type(X_train))
print(type(y_train))
print(type(X_test))
print(type(y_test))

<class 'pandas.core.frame.DataFrame'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.frame.DataFrame'>
<class 'pandas.core.series.Series'>


In [75]:
X_train = P.scale(X_train, combination['scaling'])
X_test = P.scale(X_test, combination['scaling'])

In [76]:
print(type(X_train))
print(type(y_train))
print(type(X_test))
print(type(y_test))


<class 'pandas.core.frame.DataFrame'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.frame.DataFrame'>
<class 'pandas.core.series.Series'>


In [77]:
X_train.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,12,13,14,15,16,17,18,19,20,21
0,-0.927162,1.087925,1.627074,0.913995,2.798731,1.001441,-1.678797,-1.294296,-0.020479,0.469543,...,0.734009,-0.060146,0.799394,-0.179996,-0.355822,0.934941,0.900072,1.528585,0.919505,2.11207
1,-0.792711,-1.25101,0.420976,-0.399549,0.256309,-0.990904,-1.4343,-0.866204,-0.92581,-0.87193,...,-0.635037,1.578757,1.873681,-0.710004,-0.499693,1.340364,1.330897,0.217506,-0.2072,-0.479827
2,1.144952,0.379156,0.897067,1.794667,-0.491462,2.084378,1.510693,-1.722389,2.119395,-1.095509,...,1.605221,-1.46492,-1.642168,0.414256,0.123747,-1.021924,-1.148613,1.000956,1.878726,-0.479827
3,-0.740739,-0.187858,-0.451858,-1.071248,-0.341908,-1.02636,-0.465097,1.488305,0.884853,1.811015,...,1.480762,1.051967,0.213419,-1.304256,-1.219046,0.529519,0.469246,-0.821763,-0.968487,-0.479827
4,-0.427775,-0.258735,1.579464,2.332026,-0.491462,1.653235,1.482468,-1.08025,0.391036,-0.424772,...,0.982927,-1.99171,-2.032818,-0.147874,-0.307865,-1.903017,-1.684881,1.848361,2.47253,-0.479827


In [80]:
X_test.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,12,13,14,15,16,17,18,19,20,21
34142,-0.393676,0.880612,0.262107,0.631146,-0.496371,0.481983,-0.068868,-0.864286,-0.930819,-0.656337,...,-1.392925,0.116037,-0.905339,0.35194,-0.002908,-0.016489,-0.194028,-0.066136,0.843279,-0.489772
34143,1.26556,0.596758,-0.356939,-0.558022,1.129095,-0.659325,0.215908,1.067866,-0.359272,-0.433346,...,0.218275,1.051517,-0.17757,-0.817213,-0.544371,0.527846,0.035063,-0.577463,-0.385696,2.068604
34144,1.157817,-1.035406,0.293853,1.240595,-0.496371,0.872538,-0.034579,-1.72302,1.028769,-1.548301,...,-0.401417,0.232972,-1.002375,0.128342,0.268985,0.121627,1.327213,0.429212,1.101212,-0.489772
34145,0.594153,1.590249,0.087505,-0.676939,-0.496371,-0.19815,-0.75223,0.853183,-0.032675,1.796564,...,0.094336,0.817647,0.550198,0.400791,0.540104,0.668822,0.549967,-0.385715,-0.719492,-0.489772
34146,-0.322225,1.02254,-0.277574,-0.825585,-0.496371,-0.441226,0.69435,-0.005551,0.86547,1.796564,...,0.961906,-0.936378,0.259091,0.44887,0.731482,-1.503251,-1.687803,-0.417673,-0.70432,-0.489772


In [94]:
# train = pd.concat([pd.DataFrame(X_train, columns=df_p.columns.drop('RainTomorrow')), pd.DataFrame(y_train, columns=['RainTomorrow'])], axis=1)
# test = pd.concat([pd.DataFrame(X_test, columns=df_p.columns.drop('RainTomorrow')), pd.DataFrame(y_test, columns=['RainTomorrow'])], axis=1)
X_train2 = X_train.rename(columns={x : df_p.columns.drop('RainTomorrow')[x] for x in range(len(df_p.columns.drop('RainTomorrow')))})
X_test2 = X_test.rename(columns={x : df_p.columns.drop('RainTomorrow')[x] for x in range(len(df_p.columns.drop('RainTomorrow')))})

y_train2 = y_train.reset_index(drop=True)
y_test2 = y_test.reset_index(drop=True)

train = pd.concat([X_train2, y_train2], axis=1)
test = pd.concat([X_test2, y_test2], axis=1)

train.to_csv('RF_datasets/train.csv', index=False)
test.to_csv('RF_datasets/test.csv', index=False)


In [95]:
X_train, X_test, y_train, y_test = X_train2, X_test2, y_train2, y_test2

In [96]:
models = [
    LogisticRegression(),
    DecisionTreeClassifier(),
    RandomForestClassifier(),
    KNeighborsClassifier(),
    GaussianNB()
]

In [97]:
s_t = time.time()
print('Combination:', combination)
for model in models:
    print(model)
    acc, prec, rec, f1 = train_test_model(model, X_train, X_test, y_train, y_test, verbose=True)
    with open('result.log', 'a') as f:
        f.write(f"{model.__class__.__name__}:\n")
        f.write(f"Combination: {combination}\n")
        f.write(f"Accuracy: {acc}\n")
        f.write(f"Precision: {prec}\n")
        f.write(f"Recall: {rec}\n")
        f.write(f"F1: {f1}\n")
        f.write('\n')
    print(f"Time taken: {time.time() - s_t}")
    s_t = time.time()

    print()
with open('result.log', 'a') as f:
    f.write('-'*50 + '\n')

Combination: {'imputation': 'mode_', 'imputation2': 'mice', 'outlier': 'drop', 'encoding': 'label', 'scaling': 'standard', 'discritization': 'equal_width', 'sampling': 'sklearn'}
LogisticRegression()
Accuracy: 0.795531086186195
Precision: 0.7951306248814657
Recall: 0.795531086186195
F1: 0.7946634533779091

Time taken: 0.976529598236084

DecisionTreeClassifier()
Accuracy: 0.902890444255718
Precision: 0.9054301524697594
Recall: 0.902890444255718
F1: 0.9031597514261818

Time taken: 2.5047035217285156

RandomForestClassifier()
Accuracy: 0.9625150086391191
Precision: 0.9632096038081553
Recall: 0.9625150086391191
F1: 0.9625790745363668

Time taken: 43.260782957077026

KNeighborsClassifier()
Accuracy: 0.846340820569889
Precision: 0.8523235260076757
Recall: 0.846340820569889
F1: 0.8468578399719089

Time taken: 10.56873631477356

GaussianNB()
Accuracy: 0.7564061264532755
Precision: 0.7568714983094121
Recall: 0.7564061264532755
F1: 0.7565994414675461

Time taken: 0.14090704917907715



# Again but with P.preprocess(), you can just use it.

In [17]:
df2 = P.load_data('weatherAUS.csv')

In [18]:
X_train2, X_test2, y_train2, y_test2 = P.preprocess(df2)



After dropping outliers:  (170731, 23)


In [19]:
models2 = [
    LogisticRegression(),
    DecisionTreeClassifier(),
    RandomForestClassifier(),
    KNeighborsClassifier(),
    GaussianNB()
]

In [20]:
s_t = time.time()
print('Combination:', combination)
for model in models2:
    print(model)
    acc, prec, rec, f1 = train_test_model(model, X_train2, X_test2, y_train2, y_test2, verbose=True)
    print(f"Time taken: {time.time() - s_t}")
    s_t = time.time()

    print()

Combination: {'imputation': 'mode_', 'imputation2': 'mice', 'outlier': 'drop', 'encoding': 'label', 'scaling': 'standard', 'discritization': 'equal_width', 'sampling': 'sklearn'}
LogisticRegression()
Accuracy: 0.795531086186195
Precision: 0.7951306248814657
Recall: 0.795531086186195
F1: 0.7946634533779091

Time taken: 0.6542108058929443

DecisionTreeClassifier()
Accuracy: 0.9009283392391718
Precision: 0.9033805610397984
Recall: 0.9009283392391718
F1: 0.9011994508274296

Time taken: 1.9681663513183594

RandomForestClassifier()
Accuracy: 0.9620757313966087
Precision: 0.9628927573053667
Recall: 0.9620757313966087
F1: 0.9621454757251434

Time taken: 25.569432973861694

KNeighborsClassifier()
Accuracy: 0.846340820569889
Precision: 0.8523235260076757
Recall: 0.846340820569889
F1: 0.8468578399719089

Time taken: 4.633833646774292

GaussianNB()
Accuracy: 0.7564061264532755
Precision: 0.7568714983094121
Recall: 0.7564061264532755
F1: 0.7565994414675461

Time taken: 0.10720229148864746

