# Library

In [1]:
import platform

import numpy as np
import pandas as pd


In [2]:
print('Python version:', platform.python_version())
print('Numpy version:', np.__version__)
print('Pandas version:', pd.__version__)


Python version: 3.8.5
Numpy version: 1.19.0
Pandas version: 1.0.5


In [3]:
TOTAL_TEST = 55970


# Load submission

In [4]:
df_mljar = pd.read_csv('./data/submission/submission_2020-08-06 22:24:43.301671.csv') # 0.53712
df_lgbm_v20 = pd.read_csv('./data/submission/lightgbm_v20/submission_ensemble_mode.csv') # 0.52608
df_lgbm_v21 = pd.read_csv('./data/submission/lightgbm_v21/submission_best_mean.csv') # 0.52854
df_lgbm_v23_1 = pd.read_csv('./data/submission/lightgbm_v23/submission_weighted_ensemble_mean.csv') # 0.53007
df_lgbm_v23_2 = pd.read_csv('./data/submission/lightgbm_v23/submission_best_mode.csv') # 0.52511


# Majority vote

In [5]:
majority_vote = []
for i in range(TOTAL_TEST):
    vote_0, vote_1 = 0, 0

    for df in [df_mljar, df_lgbm_v20, df_lgbm_v21, df_lgbm_v23_1, df_lgbm_v23_2]:
        if df.loc[i, 'open_flag'] == 0:
            vote_0 += 1
        else:
            vote_1 += 1

    if vote_0 > vote_1:
        majority_vote.append(0)
    else:
        majority_vote.append(1)



In [6]:
df_majority_vote = pd.concat([pd.Series(list(range(0, TOTAL_TEST)), name='row_id', dtype=np.int32), pd.Series(majority_vote, name='open_flag')], axis=1)
df_majority_vote.to_csv('./data/submission/ensemble/submission_majority_vote.csv', index=False)

df_majority_vote


Unnamed: 0,row_id,open_flag
0,0,0
1,1,0
2,2,0
3,3,0
4,4,0
...,...,...
55965,55965,0
55966,55966,0
55967,55967,0
55968,55968,0


# Weighted vote 1

In [7]:
weighted_vote_1 = []
for i in range(TOTAL_TEST):
    vote_0, vote_1 = 0, 0

    if df_mljar.loc[i, 'open_flag'] == 0:
        vote_0 += 3.5
    else:
        vote_1 += 3.5

    if df_lgbm_v23_2.loc[i, 'open_flag'] == 0:
        vote_0 += 2
    else:
        vote_1 += 2

    if df_lgbm_v21.loc[i, 'open_flag'] == 0:
        vote_0 += 1.5
    else:
        vote_1 += 1.5

    if df_lgbm_v20.loc[i, 'open_flag'] == 0:
        vote_0 += 1.5
    else:
        vote_1 += 1.5

    if df_lgbm_v23_1.loc[i, 'open_flag'] == 0:
        vote_0 += 1
    else:
        vote_1 += 1

    if vote_0 == vote_1:
        print('same weight')
        weighted_vote_1.append(0)
    elif vote_0 > vote_1:
        weighted_vote_1.append(0)
    else:
        weighted_vote_1.append(1)


In [8]:
df_weighted_vote_1 = pd.concat([pd.Series(list(range(0, TOTAL_TEST)), name='row_id', dtype=np.int32), pd.Series(weighted_vote_1, name='open_flag')], axis=1)
df_weighted_vote_1.to_csv('./data/submission/ensemble/submission_weighted_vote_1.csv', index=False)

df_weighted_vote_1


Unnamed: 0,row_id,open_flag
0,0,0
1,1,0
2,2,0
3,3,0
4,4,0
...,...,...
55965,55965,0
55966,55966,0
55967,55967,0
55968,55968,0


# Weighted vote 2

In [9]:
weighted_vote_2 = []
for i in range(TOTAL_TEST):
    vote_0, vote_1 = 0, 0

    if df_mljar.loc[i, 'open_flag'] == 0:
        vote_0 += 1.5
    else:
        vote_1 += 1.5

    if df_lgbm_v23_2.loc[i, 'open_flag'] == 0:
        vote_0 += 1
    else:
        vote_1 += 1

    if df_lgbm_v21.loc[i, 'open_flag'] == 0:
        vote_0 += 1
    else:
        vote_1 += 1

    if vote_0 == vote_1:
        print('same weight')
        weighted_vote_2.append(0)
    elif vote_0 > vote_1:
        weighted_vote_2.append(0)
    else:
        weighted_vote_2.append(1)


In [10]:
df_weighted_vote_2 = pd.concat([pd.Series(list(range(0, TOTAL_TEST)), name='row_id', dtype=np.int32), pd.Series(weighted_vote_2, name='open_flag')], axis=1)
df_weighted_vote_2.to_csv('./data/submission/ensemble/submission_weighted_vote_2.csv', index=False)

df_weighted_vote_2


Unnamed: 0,row_id,open_flag
0,0,0
1,1,0
2,2,0
3,3,0
4,4,0
...,...,...
55965,55965,0
55966,55966,0
55967,55967,0
55968,55968,0


# Weighted vote 3

In [11]:
weighted_vote_3 = []
for i in range(TOTAL_TEST):
    vote_0, vote_1 = 0, 0

    if df_mljar.loc[i, 'open_flag'] == 0:
        vote_0 += 3.5
    else:
        vote_1 += 3.5

    if df_lgbm_v23_2.loc[i, 'open_flag'] == 0:
        vote_0 += 1
    else:
        vote_1 += 1

    if df_lgbm_v21.loc[i, 'open_flag'] == 0:
        vote_0 += 1
    else:
        vote_1 += 1

    if df_lgbm_v20.loc[i, 'open_flag'] == 0:
        vote_0 += 1
    else:
        vote_1 += 1

    if df_lgbm_v23_1.loc[i, 'open_flag'] == 0:
        vote_0 += 1
    else:
        vote_1 += 1

    if vote_0 == vote_1:
        print('same weight')
        weighted_vote_3.append(0)
    elif vote_0 > vote_1:
        weighted_vote_3.append(0)
    else:
        weighted_vote_3.append(1)


In [12]:
df_weighted_vote_3 = pd.concat([pd.Series(list(range(0, TOTAL_TEST)), name='row_id', dtype=np.int32), pd.Series(weighted_vote_3, name='open_flag')], axis=1)
df_weighted_vote_3.to_csv('./data/submission/ensemble/submission_weighted_vote_3.csv', index=False)

df_weighted_vote_3


Unnamed: 0,row_id,open_flag
0,0,0
1,1,0
2,2,0
3,3,0
4,4,0
...,...,...
55965,55965,0
55966,55966,0
55967,55967,0
55968,55968,1


# Check difference

In [13]:
pd.concat([df_majority_vote, df_weighted_vote_1]).drop_duplicates(keep=False)


Unnamed: 0,row_id,open_flag
793,793,1
1128,1128,1
1360,1360,1
1485,1485,1
2472,2472,1
...,...,...
55010,55010,0
55119,55119,0
55349,55349,0
55495,55495,0


In [14]:
pd.concat([df_majority_vote, df_weighted_vote_2]).drop_duplicates(keep=False)


Unnamed: 0,row_id,open_flag
278,278,1
478,478,1
743,743,1
793,793,1
1128,1128,1
...,...,...
55295,55295,0
55349,55349,0
55495,55495,0
55729,55729,0


In [15]:
pd.concat([df_majority_vote, df_weighted_vote_3]).drop_duplicates(keep=False)


Unnamed: 0,row_id,open_flag
18,18,0
79,79,0
153,153,0
174,174,0
207,207,0
...,...,...
55916,55916,0
55924,55924,1
55930,55930,1
55955,55955,1


In [16]:
pd.concat([df_weighted_vote_1, df_weighted_vote_2]).drop_duplicates(keep=False)


Unnamed: 0,row_id,open_flag
278,278,1
478,478,1
743,743,1
1751,1751,1
1829,1829,1
...,...,...
48816,48816,0
49890,49890,0
52260,52260,0
55295,55295,0


In [17]:
pd.concat([df_weighted_vote_1, df_weighted_vote_3]).drop_duplicates(keep=False)


Unnamed: 0,row_id,open_flag
18,18,0
79,79,0
153,153,0
174,174,0
207,207,0
...,...,...
55663,55663,1
55924,55924,1
55930,55930,1
55955,55955,1


In [18]:
pd.concat([df_weighted_vote_2, df_weighted_vote_3]).drop_duplicates(keep=False)


Unnamed: 0,row_id,open_flag
18,18,0
79,79,0
153,153,0
174,174,0
207,207,0
...,...,...
55729,55729,1
55924,55924,1
55930,55930,1
55955,55955,1
