# Blending the results of the simple models

In [77]:
import pandas as pd

In [78]:
def read_result(filenames):
    result = pd.DataFrame(columns=['id'])
    for i, f in enumerate(result_files):
        res = pd.read_csv(f)
        result[f"target{i}"] = res['target']
        result['id'] = res['id']
    return result

In [79]:
result_files = ['submission_lightgbm_default.csv', 
                'submission_catboost_default.csv',
                'submission_xgboost_default.csv']

In [80]:
results = read_result(result_files)
results.head()

Unnamed: 0,id,target0,target1,target2
0,5,0.140532,0.140137,0.128846
1,6,0.3683,0.395102,0.601259
2,8,0.030049,0.015622,0.02414
3,9,0.177666,0.2176,0.205932
4,11,0.112767,0.140418,0.230093


In [81]:
targets = [t for t in results.columns if t.startswith('target')]
targets

['target0', 'target1', 'target2']

# Mean of targets

In [82]:
avgs = results[targets].mean(axis='columns')

In [83]:
result = pd.DataFrame({'id':results['id'], 'target':avgs})
result

Unnamed: 0,id,target
0,5,0.136505
1,6,0.454887
2,8,0.023270
3,9,0.200399
4,11,0.161093
...,...,...
199995,499983,0.904262
199996,499984,0.035248
199997,499987,0.656495
199998,499994,0.072873


In [84]:
result.to_csv('submission_blendedmean_default.csv', index=False)

# Disagreements

In [86]:
decisions = results[targets]>0.5

In [87]:
disagreement_idx = (decisions.sum(axis='columns')>0) & (decisions.sum(axis='columns')<3)

In [88]:
sum(disagreement_idx)/len(disagreement_idx)

0.05443

In [89]:
results[disagreement_idx]

Unnamed: 0,id,target0,target1,target2
1,6,0.368300,0.395102,0.601259
14,30,0.233673,0.453246,0.584520
103,226,0.514812,0.409622,0.709497
116,259,0.268834,0.277914,0.534294
139,309,0.338097,0.509823,0.415357
...,...,...,...,...
199952,499870,0.518363,0.491683,0.456939
199954,499874,0.446214,0.537936,0.393259
199956,499879,0.529697,0.294653,0.535625
199959,499887,0.396982,0.529461,0.579282


# Median

In [90]:
results

Unnamed: 0,id,target0,target1,target2
0,5,0.140532,0.140137,0.128846
1,6,0.368300,0.395102,0.601259
2,8,0.030049,0.015622,0.024140
3,9,0.177666,0.217600,0.205932
4,11,0.112767,0.140418,0.230093
...,...,...,...,...
199995,499983,0.891375,0.915538,0.905873
199996,499984,0.036511,0.043148,0.026085
199997,499987,0.668240,0.669159,0.632085
199998,499994,0.063410,0.074866,0.080343


In [91]:
medians = results[targets].median(axis='columns')

In [92]:
result = pd.DataFrame({'id':results['id'], 'target':medians})
result

Unnamed: 0,id,target
0,5,0.140137
1,6,0.395102
2,8,0.024140
3,9,0.205932
4,11,0.140418
...,...,...
199995,499983,0.905873
199996,499984,0.036511
199997,499987,0.668240
199998,499994,0.074866


In [93]:
result.to_csv('submission_blendedmedian_default.csv', index=False)

# Power average

In [94]:
power = 8

In [96]:
avgs = results[targets]**8
result = avgs.mean(axis='columns')
result

0         1.256081e-07
1         6.004186e-03
2         2.612067e-13
3         3.084572e-06
4         2.677930e-06
              ...     
199995    4.485509e-01
199996    5.128702e-12
199997    3.514754e-02
199998    9.947864e-10
199999    1.119384e-02
Length: 200000, dtype: float64

In [97]:
result.to_csv('submission_blended_power8avg_default.csv', index=False)