# Ensembling

Models are ensembled using weight blending. This involves 3 Field-Aware Factorization Machine models and 3 LightGBM models. Weights are optimized using the <a href='https://github.com/pklauke/Ensemble'>Ensemble</a> package.

## Load validation and test predictions

In [6]:
files = ['instance-1/home/klauke_peter/predictions_test_new.csv', 'predictions_test_2.csv', 'predictions_test_3.csv']

preds_test_lgb = np.concatenate([pd.read_csv(file).values[:, 1:] for file in files], axis=1)
print(preds_test_lgb.shape)
print('Mean: ', np.mean(preds_test_lgb, axis=0))

(18790469, 3)
Mean:  [0.07576617 0.08030955 0.0792169 ]


In [7]:
files = ['instance-1/home/klauke_peter/predictions_valid_new.csv', 'predictions_valid_2.csv', 'predictions_valid_3.csv']

preds_valid_lgb = np.concatenate([pd.read_csv(file).values[:, 1:] for file in files], axis=1)
print(preds_valid_lgb.shape)
print('Mean: ', np.mean(preds_valid_lgb, axis=0))

(2327952, 3)
Mean:  [0.06339939 0.0675721  0.06784469]


In [8]:
files = ['LibFFM/preds_test_ffm.txt', 'LibFFM/preds_test_ffm_2.txt', 'LibFFM/preds_test_ffm_3.txt']

preds_test_ffm = np.concatenate([pd.read_csv(file, header=None).values for file in files], axis=1)
print(preds_test_ffm.shape)
print('Mean: ', np.mean(preds_test_ffm, axis=0))

(18790469, 3)
Mean:  [       nan 0.00337221        nan]


In [9]:
files = ['LibFFM/preds_valid_ffm.txt', 'LibFFM/preds_valid_ffm_2.txt', 'LibFFM/preds_valid_ffm_3.txt']

preds_valid_ffm = np.concatenate([pd.read_csv(file, header=None).values for file in files], axis=1)
print(preds_valid_ffm.shape)
print('Mean: ', np.mean(preds_valid_ffm, axis=0))

(2327952, 3)
Mean:  [0.00267422 0.00279109 0.00279529]


In [None]:
df_submission = pd.read_csv('instance-1/home/klauke_peter/.kaggle/competitions/talkingdata-adtracking-fraud-detection/sample_submission.csv.zip')
df_submission.head(n=2)

## Prepare data

In [11]:
for i in range(preds_valid_ffm.shape[1]):
    srs_mask_valid = np.isnan(preds_valid_ffm[:, i])
    print(np.sum(srs_mask_valid), ' rows are NaN in valid file')
    preds_valid_ffm[srs_mask_valid, i] = np.mean(preds_valid_lgb[srs_mask_valid], axis=1)

    srs_mask_test = np.isnan(preds_test_ffm[:, i])
    print(np.sum(srs_mask_test), ' rows are NaN in test file')
    preds_test_ffm[srs_mask_test, i] = np.mean(preds_test_lgb[srs_mask_test], axis=1)

0  rows are NaN in valid file
4748  rows are NaN in test file
0  rows are NaN in valid file
0  rows are NaN in test file
0  rows are NaN in valid file
4748  rows are NaN in test file


In [12]:
with pd.HDFStore('instance-1/home/klauke_peter/store_enc_chunks.h5',  mode='r') as store:
    
    df_valid =  downcast_dtypes(store.select('df_valid'))
    df_valid = to_drop(df_valid)
    print('Loaded df_valid')
    df_valid = downcast_dtypes(df_valid)
    print('Finished df_valid, shape:', df_valid.shape)


Loaded df_valid
Finished df_valid, shape: (5000000, 43)


In [13]:
df_valid = df_valid.loc[ ((df_valid.minute_of_day > 295) & (df_valid.minute_of_day < 365))
                       | ((df_valid.minute_of_day > 535) & (df_valid.minute_of_day < 665))
                       | ((df_valid.minute_of_day > 770) & (df_valid.minute_of_day < 905))]

gc.collect()

print('valid shape: ', df_valid.shape)

valid shape:  (2327952, 43)


## Check model score

Score for the Field-Aware Factorization machine is lower than the score for the LightGBM model. This is probably caused by non-optimal hyperparameters. The Field-Aware Factorization Machine turned out to be quite unhandy for this amount of data. Nonetheless it contributes to the blend by some nice amount.

In [14]:
from sklearn.metrics import roc_auc_score

for i in range(preds_valid_ffm.shape[1]):
    print('FFM{} AUC (valid): {:0.5f}'.format(i, roc_auc_score(df_valid.is_attributed, preds_valid_ffm[:, i])))
    
preds_valid_ffm_mean = np.mean(preds_valid_ffm, axis=1)
print('FFM AUC (valid): {:0.5f}'.format(roc_auc_score(df_valid.is_attributed, preds_valid_ffm_mean)))  
print('')
    
for i in range(preds_valid_lgb.shape[1]):
    print('LGB{} AUC (valid): {:0.5f}'.format(i, roc_auc_score(df_valid.is_attributed, preds_valid_lgb[:, i])))
    
preds_valid_lgb_mean = np.mean(preds_valid_lgb, axis=1)
print('LGB AUC (valid): {:0.5f}'.format(roc_auc_score(df_valid.is_attributed, preds_valid_lgb_mean)))

FFM0 AUC (valid): 0.97656
FFM1 AUC (valid): 0.97575
FFM2 AUC (valid): 0.97575
FFM AUC (valid): 0.97610

LGB0 AUC (valid): 0.98352
LGB1 AUC (valid): 0.98242
LGB2 AUC (valid): 0.98259
LGB AUC (valid): 0.98320


## Blending

In [3]:
from ensemble.blending_optimizer import BlendingOptimizer

bo = BlendingOptimizer(roc_auc_score)

weights = bo.fit(np.concatenate([preds_valid_ffm.T, preds_valid_lgb.T], axis=0),
                 df_valid.loc[:, 'is_attributed'],
                 step=0.01,
                 init_weights=[0.7, 1, 1.3, 0.5, 0, 0])

for model, weight in zip(['ffm 1', 'ffm 2', 'ffm 3', 'lgb 1', 'lgb 2', 'lgb 3'], weights):
    print(model, weight)
    
print('Optimized blending weights')
for w in  bo.weights:
    print('{:0.2f}'.format(w))

Optimized blending weights
ffm 1: 0.71
ffm 2: 1.00
ffm 3: 1.30
lgb 1: 0.50
lgb 2: 0.00
lgb 3: 0.01


In [4]:
print('Blended score: ', bo.score)

Blended score:  0.9837291790368998


In [18]:
blend_test = bo.predict(np.concatenate([preds_test_ffm.T, preds_test_lgb.T], axis=0))

df_submission.loc[:, 'is_attributed'] = blend_test

df_submission.head()

Unnamed: 0,click_id,is_attributed
0,0,0.017205
1,1,0.002623
2,2,0.000304
3,3,0.003242
4,4,0.003437


## Save and submit

In [40]:
df_submission.to_csv('submission.csv.gz', index = False, compression = 'gzip', float_format='%.8f')

df_submission.head()

Unnamed: 0,click_id,is_attributed
0,0,0.016878
1,1,0.0026
2,2,0.000302
3,3,0.003211
4,4,0.003404


In [41]:
import os

subname = '3xffmada0.2 3xlgb, w=(0.71, 1.00, 1.30, 0.50, 0.00, 0.01)'
os.system('kaggle competitions submit -c talkingdata-adtracking-fraud-detection -f submission.csv.gz -m "{}"'.format(subname))

0