In [31]:
import pandas as pd 
import numpy as np
%matplotlib inline 
import matplotlib.pyplot as plt
from tqdm import tqdm
import datetime as datetime
import seaborn as sns

from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import VotingRegressor
from sklearn.ensemble import GradientBoostingRegressor
import sklearn.metrics

import random

import warnings
warnings.filterwarnings(action='ignore')

In [2]:
df_train = pd.read_csv('bpi2017_train.csv', parse_dates = ['time:timestamp'])
df_test = pd.read_csv('bpi2017_test.csv', parse_dates = ['time:timestamp'])
df_val = pd.read_csv('bpi2017_val.csv', parse_dates = ['time:timestamp'])

In [3]:
df_train = df_train.sort_values(by = ['case:concept:name', 'time:timestamp']).reset_index()
df_val = df_val.sort_values(by = ['case:concept:name', 'time:timestamp']).reset_index()
df_test = df_test.sort_values(by = ['case:concept:name', 'time:timestamp']).reset_index()

# Remove obsolete columns
df_train = df_train.drop(['index', 'Unnamed: 0'], axis = 1)
df_val = df_val.drop(['index', 'Unnamed: 0'], axis = 1)
df_test = df_test.drop(['index', 'Unnamed: 0'], axis = 1)

In [4]:
# Cumulative sum function to be used later
def CumSum(lists):
    # Returns the cumulative sum of a list
    length = len(lists)
    cu_list = [sum(lists[0: x: 1]) for x in range(0, length + 1)]
    return cu_list[1: ]   
def time_difference(df):
    # Calculate time difference between each row
    df['time_diff'] = df['time:timestamp'].diff().dt.total_seconds()
    # Set the time difference of the 1st row to 0 as it's currently NaN
    df.at[0, 'time_diff'] = 0
    # Count number of steps per process
    length_per_case_List = df.groupby(['case:concept:name'])['time_diff'].count().tolist()

    # Using the cumulative sum we get all the positions that are a first step in a process
    # And then the time difference can be set to 0
    position_lst = CumSum(length_per_case_List)
    for i in tqdm(position_lst):
        df.at[i, 'time_diff'] = 0
    # For Loop mysteriously creates an empty row at the end of the df, gotta delete it
    df = df.iloc[: -1]

    # Unzip the position list to get the number of each steps of each process, make that into a list
    step_in_process = []
    for x in tqdm(length_per_case_List):
        for y in range(x):
            step_in_process.append(y + 1)
    # Assign position number to each row/process
    df['position'] = step_in_process
    return df

In [5]:
df_train = time_difference(df_train)
df_val = time_difference(df_val)
df_test = time_difference(df_test)

100%|██████████████████████████████████| 16308/16308 [00:00<00:00, 66233.16it/s]
100%|█████████████████████████████████| 16308/16308 [00:00<00:00, 341477.67it/s]
100%|████████████████████████████████████| 4078/4078 [00:00<00:00, 76432.85it/s]
100%|███████████████████████████████████| 4078/4078 [00:00<00:00, 371567.61it/s]
100%|██████████████████████████████████████| 751/751 [00:00<00:00, 68316.18it/s]
100%|█████████████████████████████████████| 751/751 [00:00<00:00, 356608.43it/s]


In [6]:
# factorization of categorical atrtibutes (of interest) of the training data
df_train_f = df_train.copy()
vals, code_Action = pd.factorize(df_train_f['Action'])
df_train_f['Action'] = vals

vals, code_Origin = pd.factorize(df_train_f['EventOrigin'])
df_train_f['EventOrigin'] = vals

vals, code_lifecycle_transition = pd.factorize(df_train_f['lifecycle:transition'])
df_train_f['lifecycle:transition'] = vals

vals, code_loan_goal = pd.factorize(df_train_f['case:LoanGoal'])
df_train_f['case:LoanGoal'] = vals

vals, code_appl_type = pd.factorize(df_train_f['case:ApplicationType'])
df_train_f['case:ApplicationType'] = vals

# vals, code_concept_name = pd.factorize(df_train_f['concept:name'])
# df_train_f['concept:name'] = vals

# and for test data
df_test_f = df_test.copy()
vals, code_Action = pd.factorize(df_test_f['Action'])
df_test_f['Action'] = vals

vals, code_Origin = pd.factorize(df_test_f['EventOrigin'])
df_test_f['EventOrigin'] = vals

vals, code_lifecycle_transition = pd.factorize(df_test_f['lifecycle:transition'])
df_test_f['lifecycle:transition'] = vals

vals, code_loan_goal = pd.factorize(df_test_f['case:LoanGoal'])
df_test_f['case:LoanGoal'] = vals

vals, code_appl_type = pd.factorize(df_test_f['case:ApplicationType'])
df_test_f['case:ApplicationType'] = vals

vals, code_concept_name = pd.factorize(df_test_f['concept:name'])
# df_test_f['concept:name'] = vals

# to see the "code" - so which number corresponds to which class - print the code_... for the attribute of interest

In [47]:
df_train_10 = df_train_f[df_train_f['position']<= 10][:2000]
df_test_10 = df_test_f[df_test_f['position']<= 10][:2000]

df_train_5 = df_train_f[df_train_f['position']<= 5][:2000]
df_test_5 = df_test_f[df_test_f['position']<= 5][:2000]

# in general, the results are much better for the "early positions"

In [8]:
features = ['lifecycle:transition', 'case:LoanGoal', 'case:ApplicationType', 'case:RequestedAmount', 'position']

In [51]:
# X_train = df_train_f[features]
# y_train = df_train_f['time_diff']
# X_test = df_test_f[features]
# y_test = df_test_f['time_diff']

# X_train = df_train_10[features]
# y_train = df_train_10['time_diff']
# X_test = df_test_10[features]
# y_test = df_test_10['time_diff']

X_train = df_train_f[features][:2000]
y_train = df_train_f['time_diff'][:2000]
X_test = df_test_f[features][:2000]
y_test = df_test_f['time_diff'][:2000]

# X_train = df_train_5[features]
# y_train = df_train_5['time_diff']
# X_test = df_test_5[features]
# y_test = df_test_5['time_diff']

In [10]:
r1 = LinearRegression()
r2 = RandomForestRegressor(n_estimators=100)

er = VotingRegressor([('lr', r1), ('rf', r2)])

er.fit(X_train, y_train)

VotingRegressor(estimators=[('lr', LinearRegression()),
                            ('rf', RandomForestRegressor())])

In [None]:
er.score(X_train, y_train)

In [36]:
reg1 = GradientBoostingRegressor(n_estimators=1000) # default loss has best score
reg2 = RandomForestRegressor(n_estimators=200)
reg3 = LinearRegression()
reg4 = RandomForestRegressor(max_depth=5)
reg5 = RandomForestRegressor(max_depth=10)
# reg6 = linear_model.Lasso(alpha=0.1)

reg1.fit(X_train, y_train)
reg2.fit(X_train, y_train)
reg3.fit(X_train, y_train)
reg4.fit(X_train, y_train)
reg5.fit(X_train, y_train)

ereg = VotingRegressor([("gb", reg1), ("rf", reg2), ("lr", reg3), ("rf2", reg4), ("rf3", reg5)], verbose=True)
ereg.fit(X_train, y_train)

[Voting] ....................... (1 of 5) Processing gb, total=   0.6s
[Voting] ....................... (2 of 5) Processing rf, total=   1.4s
[Voting] ....................... (3 of 5) Processing lr, total=   0.0s
[Voting] ...................... (4 of 5) Processing rf2, total=   0.1s
[Voting] ...................... (5 of 5) Processing rf3, total=   0.4s


VotingRegressor(estimators=[('gb',
                             GradientBoostingRegressor(n_estimators=1000)),
                            ('rf', RandomForestRegressor(n_estimators=200)),
                            ('lr', LinearRegression()),
                            ('rf2', RandomForestRegressor(max_depth=5)),
                            ('rf3', RandomForestRegressor(max_depth=10))],
                verbose=True)

In [37]:
ereg.score(X_train, y_train)

0.6901792698822978

In [10]:
# parameters
n_estimators = [5, 10, 20, 50, 100, 200]
max_depth = [3, 5, 10, 25, 50]
criterion = ['squared_error', 'absolute_error', 'poisson']
normalize = [True, False]
fit_intercept = [True, False]
bootstrap = [True, False]
max_samples = [5, 10, 20, 50, 100, 150] # only if bootstrap=True

parameters = [n_estimators, n_estimators, n_estimators, n_estimators,
              max_depth, max_depth, criterion, criterion, criterion,
              normalize, fit_intercept, bootstrap, bootstrap, bootstrap]

In [52]:
def model_testing(n1, n2, n4, n5, m4, m5, c2, c4, c5, norm, fi, b2, b4, b5, ms1, ms2):
    reg1 = GradientBoostingRegressor(n_estimators=n1) # default loss has best score
    reg2 = RandomForestRegressor(n_estimators=n2, criterion=c2, bootstrap=b2)
    reg3 = LinearRegression(normalize=norm, fit_intercept=fi)
    if b4 == True:
        reg4 = RandomForestRegressor(n_estimators=n4, max_depth=m4, criterion=c4, bootstrap=b4, max_samples = ms1)
    else:
        reg4 = RandomForestRegressor(n_estimators=n4, max_depth=m4, criterion=c4, bootstrap=b4)
    if b5 == True:
        reg5 = RandomForestRegressor(n_estimators=n5, max_depth=m5, criterion=c5, bootstrap=b5, max_samples = ms2)
    else:
        reg5 = RandomForestRegressor(n_estimators=n5, max_depth=m5, criterion=c5, bootstrap=b5)

    reg1.fit(X_train, y_train)
    reg2.fit(X_train, y_train)
    reg3.fit(X_train, y_train)
    reg4.fit(X_train, y_train)
    reg5.fit(X_train, y_train)

    ereg = VotingRegressor([("gb", reg1), ("rf", reg2), ("lr", reg3), ("rf2", reg4), ("rf3", reg5)], verbose=True)
    ereg.fit(X_train, y_train)
#     score = ereg.score(X_train, y_train)
    score_train, score_test = ereg.score(X_train, y_train), ereg.score(X_test, y_test)
    mse_train = sklearn.metrics.mean_squared_error(y_true = y_train, y_pred = ereg.predict(X_train))
    mse_test = sklearn.metrics.mean_squared_error(y_true = y_test, y_pred = ereg.predict(X_test))
    return score_train, score_test, mse_train, mse_test

In [45]:
columns = ['n1', 'n2', 'n4', 'n5', 'm4', 'm5', 'c2', 'c4', 'c5',
           'norm', 'fi', 'b2', 'b4', 'b5', 'ms1', 'ms2', 'score_train', 'score_test', 'mse_train', 'mse_test']
df_scores = pd.DataFrame(columns = columns)

In [26]:
for i in tqdm(range(0,3)):
    chosen_params = []
    for p in parameters:
        ind1 = random.randint(0, len(p)-1)
        chosen_params.append(p[ind1])                    
    print(chosen_params)
    
    n1 = chosen_params[0]
    n2 = chosen_params[1] 
    n4 = chosen_params[2]
    n5 = chosen_params[3]
    m4 = chosen_params[4]
    m5 = chosen_params[5] 
    c2 = chosen_params[6] 
    c4 = chosen_params[7] 
    c5 = chosen_params[8] 
    norm = chosen_params[9] 
    fi = chosen_params[10] 
    b2 = chosen_params[11] 
    b4 = chosen_params[12]
    b5 = chosen_params[13] 
    ms1 = None
    ms2 = None
    
    if b4 == True:
        ind = random.randint(0, len(max_samples)-1)
        ms1 = max_samples[ind]
    if b5 == True:
        ind = random.randint(0, len(max_samples)-1)
        ms2 = max_samples[ind]
        
    score = model_testing(n1, n2, n4, n5, m4, m5, c2, c4, c5, norm, fi, b2, b4, b5, ms1, ms2)
    values = [n1, n2, n4, n5, m4, m5, c2, c4, c5, norm, fi, b2, b4, b5, ms1, ms2, score_train, score_test, mse_train, mse_test]
    df_scores = df_scores.append(dict(zip(columns, values)), ignore_index=True)
df_scores

  0%|                                                     | 0/3 [00:00<?, ?it/s]

[50, 200, 5, 200, 3, 10, 'squared_error', 'absolute_error', 'squared_error', True, True, False, True, False]
[Voting] ....................... (1 of 5) Processing gb, total=   0.0s
[Voting] ....................... (2 of 5) Processing rf, total=   0.5s
[Voting] ....................... (3 of 5) Processing lr, total=   0.0s
[Voting] ...................... (4 of 5) Processing rf2, total=   0.0s


  0%|                                                     | 0/3 [00:01<?, ?it/s]

[Voting] ...................... (5 of 5) Processing rf3, total=   0.4s





NameError: name 'score_train' is not defined

In [53]:
# bootstrap == False is better in general. Next tuning try:
df_scores_2 = pd.DataFrame(columns = columns)
for i in tqdm(range(0,5)):
    chosen_params = []
    for p in parameters:
        ind1 = random.randint(0, len(p)-1)
        chosen_params.append(p[ind1])                    
    print(chosen_params)
    
    n1 = chosen_params[0]
    n2 = chosen_params[1] 
    n4 = chosen_params[2]
    n5 = chosen_params[3]
    m4 = chosen_params[4]
    m5 = chosen_params[5] 
    c2 = chosen_params[6] 
    c4 = chosen_params[7] 
    c5 = chosen_params[8] 
    norm = chosen_params[9] 
    fi = chosen_params[10] 
    b2 = False 
    b4 = False
    b5 = False 
    ms1 = None
    ms2 = None
        
    score_train, score_test, mse_train, mse_test = model_testing(n1, n2, n4, n5, m4, m5, c2, c4, c5, norm, fi, b2, b4, b5, ms1, ms2)
    values = [n1, n2, n4, n5, m4, m5, c2, c4, c5, norm, fi, b2, b4, b5, ms1, ms2, score_train, score_test, mse_train, mse_test]
    df_scores_2 = df_scores_2.append(dict(zip(columns, values)), ignore_index=True)
df_scores_2

  0%|                                                     | 0/5 [00:00<?, ?it/s]

[200, 50, 5, 200, 50, 5, 'poisson', 'poisson', 'absolute_error', True, False, True, True, False]
[Voting] ....................... (1 of 5) Processing gb, total=   0.1s
[Voting] ....................... (2 of 5) Processing rf, total=   0.4s
[Voting] ....................... (3 of 5) Processing lr, total=   0.0s
[Voting] ...................... (4 of 5) Processing rf2, total=   0.0s


 20%|█████████                                    | 1/5 [00:29<01:59, 29.95s/it]

[Voting] ...................... (5 of 5) Processing rf3, total=  14.3s
[100, 50, 10, 200, 3, 50, 'absolute_error', 'absolute_error', 'poisson', True, False, False, True, False]
[Voting] ....................... (1 of 5) Processing gb, total=   0.1s
[Voting] ....................... (2 of 5) Processing rf, total=   4.7s
[Voting] ....................... (3 of 5) Processing lr, total=   0.0s
[Voting] ...................... (4 of 5) Processing rf2, total=   0.6s
[Voting] ...................... (5 of 5) Processing rf3, total=   1.1s


 40%|██████████████████                           | 2/5 [00:43<01:00, 20.10s/it]

[5, 100, 10, 200, 25, 10, 'absolute_error', 'squared_error', 'squared_error', True, True, False, True, True]
[Voting] ....................... (1 of 5) Processing gb, total=   0.0s
[Voting] ....................... (2 of 5) Processing rf, total=   9.5s
[Voting] ....................... (3 of 5) Processing lr, total=   0.0s
[Voting] ...................... (4 of 5) Processing rf2, total=   0.0s


 60%|███████████████████████████                  | 3/5 [01:03<00:40, 20.35s/it]

[Voting] ...................... (5 of 5) Processing rf3, total=   0.4s
[5, 100, 20, 200, 5, 10, 'absolute_error', 'squared_error', 'squared_error', False, False, False, True, True]
[Voting] ....................... (1 of 5) Processing gb, total=   0.0s
[Voting] ....................... (2 of 5) Processing rf, total=   9.7s
[Voting] ....................... (3 of 5) Processing lr, total=   0.0s
[Voting] ...................... (4 of 5) Processing rf2, total=   0.0s


 80%|████████████████████████████████████         | 4/5 [01:24<00:20, 20.39s/it]

[Voting] ...................... (5 of 5) Processing rf3, total=   0.5s
[20, 20, 100, 200, 50, 25, 'poisson', 'squared_error', 'poisson', True, False, True, False, False]
[Voting] ....................... (1 of 5) Processing gb, total=   0.0s
[Voting] ....................... (2 of 5) Processing rf, total=   0.1s
[Voting] ....................... (3 of 5) Processing lr, total=   0.0s
[Voting] ...................... (4 of 5) Processing rf2, total=   0.4s
[Voting] ...................... (5 of 5) Processing rf3, total=   1.0s


100%|█████████████████████████████████████████████| 5/5 [01:27<00:00, 17.49s/it]


Unnamed: 0,n1,n2,n4,n5,m4,m5,c2,c4,c5,norm,fi,b2,b4,b5,ms1,ms2,score_train,score_test,mse_train,mse_test
0,200,50,5,200,50,5,poisson,poisson,absolute_error,True,False,False,False,False,,,0.692882,-0.204519,16736050000.0,68370500000.0
1,100,50,10,200,3,50,absolute_error,absolute_error,poisson,True,False,False,False,False,,,0.677085,-0.082471,17596850000.0,61442830000.0
2,5,100,10,200,25,10,absolute_error,squared_error,squared_error,True,True,False,False,False,,,0.781853,-0.285786,11887660000.0,72983350000.0
3,5,100,20,200,5,10,absolute_error,squared_error,squared_error,False,False,False,False,False,,,0.610497,-0.096571,21225500000.0,62243190000.0
4,20,20,100,200,50,25,poisson,squared_error,poisson,True,False,False,False,False,,,0.834314,-0.303284,9028889000.0,73976580000.0


In [18]:
# bootstrap == False is better in general. Next tuning try:
df_scores_2 = pd.DataFrame(columns = columns)
for i in tqdm(range(0,50)):
    chosen_params = []
    for p in parameters:
        ind1 = random.randint(0, len(p)-1)
        chosen_params.append(p[ind1])                    
    print(chosen_params)
    
    n1 = chosen_params[0]
    n2 = chosen_params[1] 
    n4 = chosen_params[2]
    n5 = chosen_params[3]
    m4 = chosen_params[4]
    m5 = chosen_params[5] 
    c2 = chosen_params[6] 
    c4 = chosen_params[7] 
    c5 = chosen_params[8] 
    norm = chosen_params[9] 
    fi = chosen_params[10] 
    b2 = False
    b4 = False
    b5 = False 
    ms1 = None
    ms2 = None
        
    score = model_testing(n1, n2, n4, n5, m4, m5, c2, c4, c5, norm, fi, b2, b4, b5, ms1, ms2)
    values = [n1, n2, n4, n5, m4, m5, c2, c4, c5, norm, fi, b2, b4, b5, ms1, ms2, score]
    df_scores_2 = df_scores_2.append(dict(zip(columns, values)), ignore_index=True)
df_scores_2

  0%|                                                    | 0/50 [00:00<?, ?it/s]

[500, 5, 50, 200, 50, 3, 'poisson', 'squared_error', 'absolute_error', True, True, True, False, True]
[Voting] ....................... (1 of 5) Processing gb, total=   0.2s
[Voting] ....................... (2 of 5) Processing rf, total=   0.0s
[Voting] ....................... (3 of 5) Processing lr, total=   0.0s
[Voting] ...................... (4 of 5) Processing rf2, total=   0.1s


  2%|▉                                           | 1/50 [00:23<18:55, 23.17s/it]

[Voting] ...................... (5 of 5) Processing rf3, total=  11.3s
[20, 500, 200, 50, 3, 50, 'squared_error', 'absolute_error', 'absolute_error', False, False, True, True, True]
[Voting] ....................... (1 of 5) Processing gb, total=   0.0s
[Voting] ....................... (2 of 5) Processing rf, total=   0.8s
[Voting] ....................... (3 of 5) Processing lr, total=   0.0s
[Voting] ...................... (4 of 5) Processing rf2, total=  11.3s


  4%|█▊                                          | 2/50 [00:55<23:04, 28.85s/it]

[Voting] ...................... (5 of 5) Processing rf3, total=   4.3s
[50, 10, 100, 200, 3, 50, 'poisson', 'absolute_error', 'absolute_error', True, True, True, True, False]
[Voting] ....................... (1 of 5) Processing gb, total=   0.0s
[Voting] ....................... (2 of 5) Processing rf, total=   0.0s
[Voting] ....................... (3 of 5) Processing lr, total=   0.0s
[Voting] ...................... (4 of 5) Processing rf2, total=   5.7s


  6%|██▋                                         | 3/50 [01:41<28:28, 36.36s/it]

[Voting] ...................... (5 of 5) Processing rf3, total=  16.9s
[10, 50, 100, 50, 5, 10, 'poisson', 'absolute_error', 'poisson', False, True, True, False, True]
[Voting] ....................... (1 of 5) Processing gb, total=   0.0s
[Voting] ....................... (2 of 5) Processing rf, total=   0.1s
[Voting] ....................... (3 of 5) Processing lr, total=   0.0s


  8%|███▌                                        | 4/50 [01:55<21:08, 27.59s/it]

[Voting] ...................... (4 of 5) Processing rf2, total=   6.9s
[Voting] ...................... (5 of 5) Processing rf3, total=   0.1s
[20, 200, 10, 100, 10, 3, 'absolute_error', 'absolute_error', 'absolute_error', False, False, False, False, False]
[Voting] ....................... (1 of 5) Processing gb, total=   0.0s
[Voting] ....................... (2 of 5) Processing rf, total=  16.9s
[Voting] ....................... (3 of 5) Processing lr, total=   0.0s
[Voting] ...................... (4 of 5) Processing rf2, total=   0.8s


 10%|████▍                                       | 5/50 [02:42<25:54, 34.54s/it]

[Voting] ...................... (5 of 5) Processing rf3, total=   5.7s
[200, 100, 5, 50, 25, 3, 'poisson', 'squared_error', 'absolute_error', True, True, False, True, False]
[Voting] ....................... (1 of 5) Processing gb, total=   0.1s
[Voting] ....................... (2 of 5) Processing rf, total=   0.2s
[Voting] ....................... (3 of 5) Processing lr, total=   0.0s
[Voting] ...................... (4 of 5) Processing rf2, total=   0.0s


 12%|█████▎                                      | 6/50 [02:48<18:17, 24.95s/it]

[Voting] ...................... (5 of 5) Processing rf3, total=   2.9s
[500, 500, 50, 50, 5, 5, 'absolute_error', 'poisson', 'squared_error', False, False, False, True, True]
[Voting] ....................... (1 of 5) Processing gb, total=   0.2s


 14%|██████▏                                     | 7/50 [04:13<32:00, 44.66s/it]

[Voting] ....................... (2 of 5) Processing rf, total=  42.3s
[Voting] ....................... (3 of 5) Processing lr, total=   0.0s
[Voting] ...................... (4 of 5) Processing rf2, total=   0.1s
[Voting] ...................... (5 of 5) Processing rf3, total=   0.0s
[10, 5, 20, 200, 3, 50, 'poisson', 'absolute_error', 'squared_error', False, False, False, False, False]
[Voting] ....................... (1 of 5) Processing gb, total=   0.0s
[Voting] ....................... (2 of 5) Processing rf, total=   0.0s
[Voting] ....................... (3 of 5) Processing lr, total=   0.0s
[Voting] ...................... (4 of 5) Processing rf2, total=   1.2s


 16%|███████                                     | 8/50 [04:16<21:58, 31.40s/it]

[Voting] ...................... (5 of 5) Processing rf3, total=   0.3s
[5, 20, 10, 20, 5, 50, 'poisson', 'poisson', 'squared_error', False, False, False, False, False]
[Voting] ....................... (1 of 5) Processing gb, total=   0.0s
[Voting] ....................... (2 of 5) Processing rf, total=   0.0s
[Voting] ....................... (3 of 5) Processing lr, total=   0.0s
[Voting] ...................... (4 of 5) Processing rf2, total=   0.0s


 18%|███████▉                                    | 9/50 [04:17<14:47, 21.64s/it]

[Voting] ...................... (5 of 5) Processing rf3, total=   0.0s
[100, 10, 10, 20, 3, 10, 'squared_error', 'poisson', 'squared_error', True, False, True, False, True]
[Voting] ....................... (1 of 5) Processing gb, total=   0.0s
[Voting] ....................... (2 of 5) Processing rf, total=   0.0s
[Voting] ....................... (3 of 5) Processing lr, total=   0.0s
[Voting] ...................... (4 of 5) Processing rf2, total=   0.0s


 20%|████████▌                                  | 10/50 [04:17<10:01, 15.03s/it]

[Voting] ...................... (5 of 5) Processing rf3, total=   0.0s
[100, 10, 50, 200, 5, 3, 'squared_error', 'absolute_error', 'absolute_error', False, True, True, False, False]
[Voting] ....................... (1 of 5) Processing gb, total=   0.0s
[Voting] ....................... (2 of 5) Processing rf, total=   0.0s
[Voting] ....................... (3 of 5) Processing lr, total=   0.0s
[Voting] ...................... (4 of 5) Processing rf2, total=   3.4s


 22%|█████████▍                                 | 11/50 [04:47<12:42, 19.55s/it]

[Voting] ...................... (5 of 5) Processing rf3, total=  11.4s
[100, 100, 200, 10, 25, 5, 'absolute_error', 'absolute_error', 'squared_error', True, False, True, False, False]
[Voting] ....................... (1 of 5) Processing gb, total=   0.0s
[Voting] ....................... (2 of 5) Processing rf, total=   8.4s
[Voting] ....................... (3 of 5) Processing lr, total=   0.0s


 24%|██████████▎                                | 12/50 [05:37<18:24, 29.07s/it]

[Voting] ...................... (4 of 5) Processing rf2, total=  16.9s
[Voting] ...................... (5 of 5) Processing rf3, total=   0.0s
[500, 500, 200, 100, 50, 10, 'squared_error', 'poisson', 'absolute_error', False, False, True, False, False]
[Voting] ....................... (1 of 5) Processing gb, total=   0.2s
[Voting] ....................... (2 of 5) Processing rf, total=   0.8s
[Voting] ....................... (3 of 5) Processing lr, total=   0.0s
[Voting] ...................... (4 of 5) Processing rf2, total=   0.4s


 26%|███████████▏                               | 13/50 [05:56<16:03, 26.03s/it]

[Voting] ...................... (5 of 5) Processing rf3, total=   8.1s
[500, 50, 20, 5, 10, 25, 'absolute_error', 'squared_error', 'squared_error', True, False, True, True, True]
[Voting] ....................... (1 of 5) Processing gb, total=   0.2s


 28%|████████████                               | 14/50 [06:05<12:31, 20.88s/it]

[Voting] ....................... (2 of 5) Processing rf, total=   4.2s
[Voting] ....................... (3 of 5) Processing lr, total=   0.0s
[Voting] ...................... (4 of 5) Processing rf2, total=   0.0s
[Voting] ...................... (5 of 5) Processing rf3, total=   0.0s
[20, 20, 5, 5, 25, 50, 'absolute_error', 'absolute_error', 'absolute_error', True, False, True, False, True]
[Voting] ....................... (1 of 5) Processing gb, total=   0.0s
[Voting] ....................... (2 of 5) Processing rf, total=   1.7s
[Voting] ....................... (3 of 5) Processing lr, total=   0.0s
[Voting] ...................... (4 of 5) Processing rf2, total=   0.4s


 30%|████████████▉                              | 15/50 [08:28<33:36, 57.61s/it]

[Voting] ...................... (5 of 5) Processing rf3, total=   0.6s
[500, 20, 10, 20, 50, 5, 'poisson', 'poisson', 'poisson', False, True, True, False, False]


 32%|█████████████▊                             | 16/50 [08:29<22:55, 40.46s/it]

[Voting] ....................... (1 of 5) Processing gb, total=   0.2s
[Voting] ....................... (2 of 5) Processing rf, total=   0.0s
[Voting] ....................... (3 of 5) Processing lr, total=   0.0s
[Voting] ...................... (4 of 5) Processing rf2, total=   0.0s
[Voting] ...................... (5 of 5) Processing rf3, total=   0.0s
[20, 200, 200, 500, 5, 10, 'poisson', 'poisson', 'poisson', False, True, True, False, False]
[Voting] ....................... (1 of 5) Processing gb, total=   0.0s
[Voting] ....................... (2 of 5) Processing rf, total=   0.4s
[Voting] ....................... (3 of 5) Processing lr, total=   0.0s
[Voting] ...................... (4 of 5) Processing rf2, total=   0.2s


 34%|██████████████▌                            | 17/50 [08:32<16:03, 29.20s/it]

[Voting] ...................... (5 of 5) Processing rf3, total=   0.8s
[5, 500, 500, 20, 25, 5, 'poisson', 'squared_error', 'squared_error', True, True, True, True, False]
[Voting] ....................... (1 of 5) Processing gb, total=   0.0s
[Voting] ....................... (2 of 5) Processing rf, total=   1.0s
[Voting] ....................... (3 of 5) Processing lr, total=   0.0s


 36%|███████████████▍                           | 18/50 [08:36<11:30, 21.57s/it]

[Voting] ...................... (4 of 5) Processing rf2, total=   0.8s
[Voting] ...................... (5 of 5) Processing rf3, total=   0.0s
[50, 500, 200, 100, 3, 10, 'squared_error', 'absolute_error', 'absolute_error', False, False, True, True, False]
[Voting] ....................... (1 of 5) Processing gb, total=   0.0s
[Voting] ....................... (2 of 5) Processing rf, total=   0.8s
[Voting] ....................... (3 of 5) Processing lr, total=   0.0s
[Voting] ...................... (4 of 5) Processing rf2, total=  11.2s


 38%|████████████████▎                          | 19/50 [09:16<14:00, 27.12s/it]

[Voting] ...................... (5 of 5) Processing rf3, total=   8.0s
[20, 200, 200, 200, 50, 3, 'absolute_error', 'absolute_error', 'squared_error', True, True, True, True, False]
[Voting] ....................... (1 of 5) Processing gb, total=   0.0s
[Voting] ....................... (2 of 5) Processing rf, total=  16.8s
[Voting] ....................... (3 of 5) Processing lr, total=   0.0s


 40%|█████████████████▏                         | 20/50 [10:23<19:37, 39.23s/it]

[Voting] ...................... (4 of 5) Processing rf2, total=  16.8s
[Voting] ...................... (5 of 5) Processing rf3, total=   0.1s
[200, 10, 10, 10, 3, 5, 'poisson', 'poisson', 'squared_error', True, False, True, True, False]


 42%|██████████████████                         | 21/50 [10:23<13:18, 27.54s/it]

[Voting] ....................... (1 of 5) Processing gb, total=   0.1s
[Voting] ....................... (2 of 5) Processing rf, total=   0.0s
[Voting] ....................... (3 of 5) Processing lr, total=   0.0s
[Voting] ...................... (4 of 5) Processing rf2, total=   0.0s
[Voting] ...................... (5 of 5) Processing rf3, total=   0.0s
[20, 200, 10, 10, 50, 5, 'squared_error', 'squared_error', 'squared_error', False, False, True, True, True]
[Voting] ....................... (1 of 5) Processing gb, total=   0.0s


 44%|██████████████████▉                        | 22/50 [10:24<09:05, 19.49s/it]

[Voting] ....................... (2 of 5) Processing rf, total=   0.3s
[Voting] ....................... (3 of 5) Processing lr, total=   0.0s
[Voting] ...................... (4 of 5) Processing rf2, total=   0.0s
[Voting] ...................... (5 of 5) Processing rf3, total=   0.0s
[500, 100, 5, 50, 3, 10, 'poisson', 'absolute_error', 'absolute_error', True, True, True, True, False]
[Voting] ....................... (1 of 5) Processing gb, total=   0.2s
[Voting] ....................... (2 of 5) Processing rf, total=   0.2s
[Voting] ....................... (3 of 5) Processing lr, total=   0.0s
[Voting] ...................... (4 of 5) Processing rf2, total=   0.3s


 46%|███████████████████▊                       | 23/50 [10:34<07:25, 16.51s/it]

[Voting] ...................... (5 of 5) Processing rf3, total=   4.0s
[10, 200, 50, 200, 50, 10, 'squared_error', 'absolute_error', 'squared_error', False, True, False, False, False]
[Voting] ....................... (1 of 5) Processing gb, total=   0.0s
[Voting] ....................... (2 of 5) Processing rf, total=   0.3s
[Voting] ....................... (3 of 5) Processing lr, total=   0.0s
[Voting] ...................... (4 of 5) Processing rf2, total=   4.2s


 48%|████████████████████▋                      | 24/50 [10:43<06:14, 14.42s/it]

[Voting] ...................... (5 of 5) Processing rf3, total=   0.3s
[10, 100, 50, 5, 50, 50, 'poisson', 'absolute_error', 'absolute_error', False, True, True, False, True]
[Voting] ....................... (1 of 5) Processing gb, total=   0.0s
[Voting] ....................... (2 of 5) Processing rf, total=   0.2s
[Voting] ....................... (3 of 5) Processing lr, total=   0.0s
[Voting] ...................... (4 of 5) Processing rf2, total=   4.2s


 50%|█████████████████████▌                     | 25/50 [10:53<05:24, 12.97s/it]

[Voting] ...................... (5 of 5) Processing rf3, total=   0.4s
[5, 200, 50, 50, 10, 10, 'squared_error', 'squared_error', 'absolute_error', False, False, False, True, False]
[Voting] ....................... (1 of 5) Processing gb, total=   0.0s
[Voting] ....................... (2 of 5) Processing rf, total=   0.3s
[Voting] ....................... (3 of 5) Processing lr, total=   0.0s
[Voting] ...................... (4 of 5) Processing rf2, total=   0.1s


 52%|██████████████████████▎                    | 26/50 [11:02<04:40, 11.70s/it]

[Voting] ...................... (5 of 5) Processing rf3, total=   4.0s
[100, 20, 500, 100, 50, 10, 'absolute_error', 'absolute_error', 'squared_error', True, True, True, False, True]
[Voting] ....................... (1 of 5) Processing gb, total=   0.0s
[Voting] ....................... (2 of 5) Processing rf, total=   1.7s
[Voting] ....................... (3 of 5) Processing lr, total=   0.0s
[Voting] ...................... (4 of 5) Processing rf2, total=  42.2s
[Voting] ...................... (5 of 5) Processing rf3, total=   0.1s


 54%|███████████████████████▏                   | 27/50 [12:30<13:16, 34.63s/it]

[10, 5, 200, 200, 25, 50, 'poisson', 'squared_error', 'absolute_error', True, False, False, False, True]
[Voting] ....................... (1 of 5) Processing gb, total=   0.0s
[Voting] ....................... (2 of 5) Processing rf, total=   0.0s
[Voting] ....................... (3 of 5) Processing lr, total=   0.0s
[Voting] ...................... (4 of 5) Processing rf2, total=   0.3s


 56%|████████████████████████                   | 28/50 [13:04<12:40, 34.56s/it]

[Voting] ...................... (5 of 5) Processing rf3, total=  16.8s
[200, 100, 500, 100, 5, 25, 'poisson', 'poisson', 'absolute_error', True, False, False, False, False]
[Voting] ....................... (1 of 5) Processing gb, total=   0.1s
[Voting] ....................... (2 of 5) Processing rf, total=   0.2s
[Voting] ....................... (3 of 5) Processing lr, total=   0.0s
[Voting] ...................... (4 of 5) Processing rf2, total=   0.5s


 58%|████████████████████████▉                  | 29/50 [13:23<10:24, 29.73s/it]

[Voting] ...................... (5 of 5) Processing rf3, total=   8.4s
[10, 200, 50, 20, 10, 10, 'absolute_error', 'absolute_error', 'squared_error', False, False, False, True, False]
[Voting] ....................... (1 of 5) Processing gb, total=   0.0s
[Voting] ....................... (2 of 5) Processing rf, total=  16.8s
[Voting] ....................... (3 of 5) Processing lr, total=   0.0s


 60%|█████████████████████████▊                 | 30/50 [14:04<11:07, 33.39s/it]

[Voting] ...................... (4 of 5) Processing rf2, total=   4.0s
[Voting] ...................... (5 of 5) Processing rf3, total=   0.0s
[5, 50, 200, 10, 25, 50, 'absolute_error', 'poisson', 'poisson', True, True, False, True, True]
[Voting] ....................... (1 of 5) Processing gb, total=   0.0s
[Voting] ....................... (2 of 5) Processing rf, total=   4.2s
[Voting] ....................... (3 of 5) Processing lr, total=   0.0s


 62%|██████████████████████████▋                | 31/50 [14:14<08:17, 26.18s/it]

[Voting] ...................... (4 of 5) Processing rf2, total=   0.4s
[Voting] ...................... (5 of 5) Processing rf3, total=   0.0s
[50, 100, 500, 20, 3, 10, 'squared_error', 'poisson', 'absolute_error', False, False, False, True, True]
[Voting] ....................... (1 of 5) Processing gb, total=   0.0s
[Voting] ....................... (2 of 5) Processing rf, total=   0.2s
[Voting] ....................... (3 of 5) Processing lr, total=   0.0s
[Voting] ...................... (4 of 5) Processing rf2, total=   0.4s


 64%|███████████████████████████▌               | 32/50 [14:18<05:53, 19.65s/it]

[Voting] ...................... (5 of 5) Processing rf3, total=   1.6s
[100, 50, 500, 5, 5, 10, 'absolute_error', 'absolute_error', 'poisson', False, True, True, True, False]
[Voting] ....................... (1 of 5) Processing gb, total=   0.0s
[Voting] ....................... (2 of 5) Processing rf, total=   4.2s
[Voting] ....................... (3 of 5) Processing lr, total=   0.0s


 66%|████████████████████████████▍              | 33/50 [15:35<10:27, 36.92s/it]

[Voting] ...................... (4 of 5) Processing rf2, total=  34.4s
[Voting] ...................... (5 of 5) Processing rf3, total=   0.0s
[20, 5, 20, 200, 10, 25, 'squared_error', 'absolute_error', 'squared_error', True, True, False, True, True]
[Voting] ....................... (1 of 5) Processing gb, total=   0.0s
[Voting] ....................... (2 of 5) Processing rf, total=   0.0s
[Voting] ....................... (3 of 5) Processing lr, total=   0.0s
[Voting] ...................... (4 of 5) Processing rf2, total=   1.6s


 68%|█████████████████████████████▏             | 34/50 [15:39<07:12, 27.02s/it]

[Voting] ...................... (5 of 5) Processing rf3, total=   0.3s
[100, 5, 500, 10, 3, 10, 'squared_error', 'squared_error', 'absolute_error', False, True, False, False, False]
[Voting] ....................... (1 of 5) Processing gb, total=   0.0s
[Voting] ....................... (2 of 5) Processing rf, total=   0.0s
[Voting] ....................... (3 of 5) Processing lr, total=   0.0s
[Voting] ...................... (4 of 5) Processing rf2, total=   0.3s


 70%|██████████████████████████████             | 35/50 [15:42<04:54, 19.65s/it]

[Voting] ...................... (5 of 5) Processing rf3, total=   0.8s
[5, 500, 20, 200, 50, 25, 'poisson', 'squared_error', 'poisson', True, True, True, True, True]
[Voting] ....................... (1 of 5) Processing gb, total=   0.0s
[Voting] ....................... (2 of 5) Processing rf, total=   1.0s
[Voting] ....................... (3 of 5) Processing lr, total=   0.0s
[Voting] ...................... (4 of 5) Processing rf2, total=   0.0s


 72%|██████████████████████████████▉            | 36/50 [15:45<03:25, 14.68s/it]

[Voting] ...................... (5 of 5) Processing rf3, total=   0.4s
[100, 100, 50, 5, 5, 10, 'absolute_error', 'absolute_error', 'poisson', False, True, False, False, True]
[Voting] ....................... (1 of 5) Processing gb, total=   0.0s
[Voting] ....................... (2 of 5) Processing rf, total=   8.5s
[Voting] ....................... (3 of 5) Processing lr, total=   0.0s


 74%|███████████████████████████████▊           | 37/50 [16:09<03:47, 17.46s/it]

[Voting] ...................... (4 of 5) Processing rf2, total=   3.4s
[Voting] ...................... (5 of 5) Processing rf3, total=   0.0s
[5, 50, 20, 200, 5, 25, 'poisson', 'squared_error', 'absolute_error', False, False, True, True, False]
[Voting] ....................... (1 of 5) Processing gb, total=   0.0s
[Voting] ....................... (2 of 5) Processing rf, total=   0.1s
[Voting] ....................... (3 of 5) Processing lr, total=   0.0s
[Voting] ...................... (4 of 5) Processing rf2, total=   0.0s


 76%|████████████████████████████████▋          | 38/50 [16:43<04:29, 22.46s/it]

[Voting] ...................... (5 of 5) Processing rf3, total=  16.9s
[100, 100, 50, 500, 25, 50, 'absolute_error', 'absolute_error', 'squared_error', False, True, False, False, False]
[Voting] ....................... (1 of 5) Processing gb, total=   0.0s
[Voting] ....................... (2 of 5) Processing rf, total=   8.4s
[Voting] ....................... (3 of 5) Processing lr, total=   0.0s
[Voting] ...................... (4 of 5) Processing rf2, total=   4.2s


 78%|█████████████████████████████████▌         | 39/50 [17:10<04:21, 23.81s/it]

[Voting] ...................... (5 of 5) Processing rf3, total=   0.8s
[5, 20, 10, 100, 10, 25, 'squared_error', 'poisson', 'poisson', True, True, False, True, False]
[Voting] ....................... (1 of 5) Processing gb, total=   0.0s
[Voting] ....................... (2 of 5) Processing rf, total=   0.0s
[Voting] ....................... (3 of 5) Processing lr, total=   0.0s
[Voting] ...................... (4 of 5) Processing rf2, total=   0.0s


 80%|██████████████████████████████████▍        | 40/50 [17:11<02:48, 16.83s/it]

[Voting] ...................... (5 of 5) Processing rf3, total=   0.2s
[20, 100, 50, 50, 3, 5, 'squared_error', 'squared_error', 'poisson', True, True, True, True, False]
[Voting] ....................... (1 of 5) Processing gb, total=   0.0s
[Voting] ....................... (2 of 5) Processing rf, total=   0.2s
[Voting] ....................... (3 of 5) Processing lr, total=   0.0s
[Voting] ...................... (4 of 5) Processing rf2, total=   0.0s


 82%|███████████████████████████████████▎       | 41/50 [17:11<01:47, 11.95s/it]

[Voting] ...................... (5 of 5) Processing rf3, total=   0.1s
[10, 50, 50, 20, 3, 50, 'poisson', 'absolute_error', 'poisson', True, False, True, False, False]
[Voting] ....................... (1 of 5) Processing gb, total=   0.0s
[Voting] ....................... (2 of 5) Processing rf, total=   0.1s
[Voting] ....................... (3 of 5) Processing lr, total=   0.0s


 84%|████████████████████████████████████       | 42/50 [17:17<01:21, 10.17s/it]

[Voting] ...................... (4 of 5) Processing rf2, total=   2.8s
[Voting] ...................... (5 of 5) Processing rf3, total=   0.0s
[50, 50, 10, 100, 10, 5, 'poisson', 'squared_error', 'squared_error', False, True, False, False, False]
[Voting] ....................... (1 of 5) Processing gb, total=   0.0s
[Voting] ....................... (2 of 5) Processing rf, total=   0.1s
[Voting] ....................... (3 of 5) Processing lr, total=   0.0s
[Voting] ...................... (4 of 5) Processing rf2, total=   0.0s


 86%|████████████████████████████████████▉      | 43/50 [17:18<00:50,  7.27s/it]

[Voting] ...................... (5 of 5) Processing rf3, total=   0.1s
[100, 50, 20, 5, 50, 3, 'absolute_error', 'poisson', 'squared_error', False, False, True, False, True]
[Voting] ....................... (1 of 5) Processing gb, total=   0.0s


 88%|█████████████████████████████████████▊     | 44/50 [17:26<00:46,  7.68s/it]

[Voting] ....................... (2 of 5) Processing rf, total=   4.2s
[Voting] ....................... (3 of 5) Processing lr, total=   0.0s
[Voting] ...................... (4 of 5) Processing rf2, total=   0.0s
[Voting] ...................... (5 of 5) Processing rf3, total=   0.0s
[20, 20, 100, 20, 3, 25, 'poisson', 'absolute_error', 'squared_error', True, True, False, False, False]
[Voting] ....................... (1 of 5) Processing gb, total=   0.0s
[Voting] ....................... (2 of 5) Processing rf, total=   0.0s
[Voting] ....................... (3 of 5) Processing lr, total=   0.0s


 90%|██████████████████████████████████████▋    | 45/50 [17:38<00:44,  8.86s/it]

[Voting] ...................... (4 of 5) Processing rf2, total=   5.7s
[Voting] ...................... (5 of 5) Processing rf3, total=   0.0s
[20, 50, 50, 500, 3, 10, 'squared_error', 'poisson', 'poisson', True, False, True, False, True]
[Voting] ....................... (1 of 5) Processing gb, total=   0.0s
[Voting] ....................... (2 of 5) Processing rf, total=   0.1s
[Voting] ....................... (3 of 5) Processing lr, total=   0.0s
[Voting] ...................... (4 of 5) Processing rf2, total=   0.0s


 92%|███████████████████████████████████████▌   | 46/50 [17:40<00:27,  6.78s/it]

[Voting] ...................... (5 of 5) Processing rf3, total=   0.8s
[200, 5, 20, 100, 50, 50, 'squared_error', 'squared_error', 'poisson', False, False, True, False, False]
[Voting] ....................... (1 of 5) Processing gb, total=   0.1s
[Voting] ....................... (2 of 5) Processing rf, total=   0.0s
[Voting] ....................... (3 of 5) Processing lr, total=   0.0s
[Voting] ...................... (4 of 5) Processing rf2, total=   0.0s


 94%|████████████████████████████████████████▍  | 47/50 [17:40<00:14,  4.96s/it]

[Voting] ...................... (5 of 5) Processing rf3, total=   0.2s
[20, 10, 5, 100, 3, 5, 'squared_error', 'poisson', 'squared_error', False, True, False, True, True]
[Voting] ....................... (1 of 5) Processing gb, total=   0.0s
[Voting] ....................... (2 of 5) Processing rf, total=   0.0s
[Voting] ....................... (3 of 5) Processing lr, total=   0.0s
[Voting] ...................... (4 of 5) Processing rf2, total=   0.0s


 96%|█████████████████████████████████████████▎ | 48/50 [17:41<00:07,  3.56s/it]

[Voting] ...................... (5 of 5) Processing rf3, total=   0.1s
[10, 50, 500, 50, 5, 25, 'poisson', 'absolute_error', 'absolute_error', False, False, True, True, False]
[Voting] ....................... (1 of 5) Processing gb, total=   0.0s
[Voting] ....................... (2 of 5) Processing rf, total=   0.1s
[Voting] ....................... (3 of 5) Processing lr, total=   0.0s
[Voting] ...................... (4 of 5) Processing rf2, total=  34.4s


 98%|██████████████████████████████████████████▏| 49/50 [18:58<00:25, 25.79s/it]

[Voting] ...................... (5 of 5) Processing rf3, total=   4.2s
[20, 100, 200, 500, 10, 25, 'squared_error', 'squared_error', 'poisson', True, True, True, True, False]
[Voting] ....................... (1 of 5) Processing gb, total=   0.0s
[Voting] ....................... (2 of 5) Processing rf, total=   0.2s
[Voting] ....................... (3 of 5) Processing lr, total=   0.0s
[Voting] ...................... (4 of 5) Processing rf2, total=   0.3s


100%|███████████████████████████████████████████| 50/50 [19:01<00:00, 22.84s/it]

[Voting] ...................... (5 of 5) Processing rf3, total=   1.0s





Unnamed: 0,n1,n2,n4,n5,m4,m5,c2,c4,c5,norm,fi,b2,b4,b5,ms1,ms2,score
0,500,5,50,200,50,3,poisson,squared_error,absolute_error,True,True,False,False,False,,,0.516609
1,20,500,200,50,3,50,squared_error,absolute_error,absolute_error,False,False,False,False,False,,,0.471101
2,50,10,100,200,3,50,poisson,absolute_error,absolute_error,True,True,False,False,False,,,0.480987
3,10,50,100,50,5,10,poisson,absolute_error,poisson,False,True,False,False,False,,,0.442287
4,20,200,10,100,10,3,absolute_error,absolute_error,absolute_error,False,False,False,False,False,,,0.40337
5,200,100,5,50,25,3,poisson,squared_error,absolute_error,True,True,False,False,False,,,0.507523
6,500,500,50,50,5,5,absolute_error,poisson,squared_error,False,False,False,False,False,,,0.449195
7,10,5,20,200,3,50,poisson,absolute_error,squared_error,False,False,False,False,False,,,0.475312
8,5,20,10,20,5,50,poisson,poisson,squared_error,False,False,False,False,False,,,0.487851
9,100,10,10,20,3,10,squared_error,poisson,squared_error,True,False,False,False,False,,,0.465605


In [21]:
df_scores_2[df_scores_2['score']>0.55].sort_values('score')
# higher m4, m5 seem to be better

Unnamed: 0,n1,n2,n4,n5,m4,m5,c2,c4,c5,norm,fi,b2,b4,b5,ms1,ms2,score
30,5,50,200,10,25,50,absolute_error,poisson,poisson,True,True,False,False,False,,,0.550556
12,500,500,200,100,50,10,squared_error,poisson,absolute_error,False,False,False,False,False,,,0.554192
13,500,50,20,5,10,25,absolute_error,squared_error,squared_error,True,False,False,False,False,,,0.556606
27,10,5,200,200,25,50,poisson,squared_error,absolute_error,True,False,False,False,False,,,0.557477
35,5,500,20,200,50,25,poisson,squared_error,poisson,True,True,False,False,False,,,0.561148
38,100,100,50,500,25,50,absolute_error,absolute_error,squared_error,False,True,False,False,False,,,0.563166
46,200,5,20,100,50,50,squared_error,squared_error,poisson,False,False,False,False,False,,,0.586206


In [None]:
n1, n2, n4, n5, m4, m5, c2, c4, c5, norm, fi = 1000, 1000, 1000, 1000, 50, 100, 'squared_error', 'squared_error', 'poisson', False, False
reg1 = GradientBoostingRegressor(n_estimators=n1)
reg2 = RandomForestRegressor(n_estimators=n2, criterion=c2)
reg3 = LinearRegression(normalize=norm, fit_intercept=fi)
reg4 = RandomForestRegressor(n_estimators=n4, max_depth=m4, criterion=c4)
reg5 = RandomForestRegressor(n_estimators=n5, max_depth=m5, criterion=c5)


reg1.fit(X_train, y_train)
reg2.fit(X_train, y_train)
reg3.fit(X_train, y_train)
reg4.fit(X_train, y_train)
reg5.fit(X_train, y_train)

ereg = VotingRegressor([("gb", reg1), ("rf", reg2), ("lr", reg3), ("rf2", reg4), ("rf3", reg5)], verbose=True)
ereg.fit(X_train, y_train)
ereg.score(X_train, y_train)

# the most important parameter is the number of estimators - it seems that the higher, the better
# in the next cell, we check if there is a "boundary"

[Voting] ....................... (1 of 5) Processing gb, total= 3.1min


In [52]:
df_estimators = pd.DataFrame()
i = 1
while i < 100:
    n1, n2, n4, n5, m4, m5, c2, c4, c5, norm, fi = 100*i, 100*i, 100*i, 100*i, 50, 100, 'squared_error', 'squared_error', 'poisson', False, False
    reg1 = GradientBoostingRegressor(n_estimators=n1)
    reg2 = RandomForestRegressor(n_estimators=n2, criterion=c2, bootstrap=False)
    reg3 = LinearRegression(normalize=norm, fit_intercept=fi)
    reg4 = RandomForestRegressor(n_estimators=n4, max_depth=m4, criterion=c4, bootstrap=False)
    reg5 = RandomForestRegressor(n_estimators=n5, max_depth=m5, criterion=c5, bootstrap=False)


    reg1.fit(X_train, y_train)
    reg2.fit(X_train, y_train)
    reg3.fit(X_train, y_train)
    reg4.fit(X_train, y_train)
    reg5.fit(X_train, y_train)

    ereg = VotingRegressor([("gb", reg1), ("rf", reg2), ("lr", reg3), ("rf2", reg4), ("rf3", reg5)], verbose=True)
    ereg.fit(X_train, y_train)
    score = ereg.score(X_train, y_train)
    values = [n1, n2, n4, n5, m4, m5, c2, c4, c5, norm, fi, False, False, False, ms1, ms2, score]
    df_estimators = df_estimators.append(dict(zip(columns, values)), ignore_index=True)
    i = i+10
    
df_estimators
# no significant improvement for more than 1000 estimators

[Voting] ....................... (1 of 5) Processing gb, total=   0.1s
[Voting] ....................... (2 of 5) Processing rf, total=   0.2s
[Voting] ....................... (3 of 5) Processing lr, total=   0.0s
[Voting] ...................... (4 of 5) Processing rf2, total=   0.3s
[Voting] ...................... (5 of 5) Processing rf3, total=   0.4s
[Voting] ....................... (1 of 5) Processing gb, total=   0.6s
[Voting] ....................... (2 of 5) Processing rf, total=   2.6s
[Voting] ....................... (3 of 5) Processing lr, total=   0.0s
[Voting] ...................... (4 of 5) Processing rf2, total=   2.6s
[Voting] ...................... (5 of 5) Processing rf3, total=   3.9s
[Voting] ....................... (1 of 5) Processing gb, total=   1.2s
[Voting] ....................... (2 of 5) Processing rf, total=   4.9s
[Voting] ....................... (3 of 5) Processing lr, total=   0.0s
[Voting] ...................... (4 of 5) Processing rf2, total=   5.0s
[Votin

Unnamed: 0,n1,n2,n4,n5,m4,m5,c2,c4,c5,norm,fi,b2,b4,b5,ms1,ms2,score
0,100.0,100.0,100.0,100.0,50.0,100.0,squared_error,squared_error,poisson,0.0,0.0,0.0,0.0,0.0,,,0.828542
1,1100.0,1100.0,1100.0,1100.0,50.0,100.0,squared_error,squared_error,poisson,0.0,0.0,0.0,0.0,0.0,,,0.844845
2,2100.0,2100.0,2100.0,2100.0,50.0,100.0,squared_error,squared_error,poisson,0.0,0.0,0.0,0.0,0.0,,,0.849093
3,3100.0,3100.0,3100.0,3100.0,50.0,100.0,squared_error,squared_error,poisson,0.0,0.0,0.0,0.0,0.0,,,0.851708
4,4100.0,4100.0,4100.0,4100.0,50.0,100.0,squared_error,squared_error,poisson,0.0,0.0,0.0,0.0,0.0,,,0.853583
5,5100.0,5100.0,5100.0,5100.0,50.0,100.0,squared_error,squared_error,poisson,0.0,0.0,0.0,0.0,0.0,,,0.854944
6,6100.0,6100.0,6100.0,6100.0,50.0,100.0,squared_error,squared_error,poisson,0.0,0.0,0.0,0.0,0.0,,,0.856091
7,7100.0,7100.0,7100.0,7100.0,50.0,100.0,squared_error,squared_error,poisson,0.0,0.0,0.0,0.0,0.0,,,0.856994
8,8100.0,8100.0,8100.0,8100.0,50.0,100.0,squared_error,squared_error,poisson,0.0,0.0,0.0,0.0,0.0,,,0.857848
9,9100.0,9100.0,9100.0,9100.0,50.0,100.0,squared_error,squared_error,poisson,0.0,0.0,0.0,0.0,0.0,,,0.858427


In [53]:
predictions = ereg.predict(X_train[:200])

In [54]:
# compare the predictions with actual data and check accuracy (proportion of correct predictions)
compare_predictions = pd.DataFrame()
compare_predictions['true'] = y_train[:200]
compare_predictions['predicted'] = predictions
# compare_predictions['position'] = X_test['position'][:200]
compare_predictions['diff'] = compare_predictions['true'] - compare_predictions['predicted']
mse = sum((compare_predictions['true']-compare_predictions['predicted'])**2)/200
mse/1000000000
# compare_predictions[:60]

Unnamed: 0,true,predicted,diff
0,0.0,229.767658,-229.767658
1,0.061,543.030808,-542.969808
2,0.229,2237.595918,-2237.366918
3,66.323,2180.827025,-2114.504025
4,0.007,1778.048526,-1778.041526
5,0.006,1308.276458,-1308.270458
6,78061.258,54799.902828,23261.355172
7,642.724,5745.965547,-5103.241547
8,86815.138,23727.244525,63087.893475
9,169.901,1902.684177,-1732.783177
