## Summary

To apply the technique of gradient boosting from lightgbm package to the problem of predicting customer value.

In [2]:
import time

import sys
sys.path.append('../../common_routines/')

from relevant_functions import (get_train_data,
                                get_test_data,
                                get_rel_cols,
                                get_all_predictor_cols)

import numpy as np
import lightgbm as lgb
import ast
import operator

In [3]:
INPUT_DIR = '../../input/'

In [4]:
ts = time.time()
train = get_train_data(INPUT_DIR)
time.time() - ts

5.190080642700195

### Build a basic model with all predictors

Let us a build a simple lightgbm model using all predictors.

In [5]:
all_predictor_cols = get_all_predictor_cols(train)

In [6]:
X = train[all_predictor_cols]
Y = train[['log_target']]

In [7]:
lgb_complete_data = lgb.Dataset(X, Y)

In [8]:
lgb_params = {'learning_rate' : 0.01, 
              'objective' : 'regression', 
              'metric' : 'mean_squared_error'}

In [38]:
ts = time.time()
model_lgb = lgb.cv(params=lgb_params,
                   train_set=lgb_complete_data,
                   num_boost_round=3000,
                   nfold=5,
                   early_stopping_rounds=5,
              
                  # Below parameter is set as true by default. Set it to false to make it work for regression.
                  stratified = False                  
                  )
time.time() - ts

13.012925148010254

In [63]:
print([np.sqrt(x) for x in list(model_lgb.values())[0]][-1])
print(len([np.sqrt(x) for x in reversed(list(model_lgb.values())[0])]))

1.4505464604674227
377


### Let us enhance the model by using only dense predictors

Since the results look encouraging, let us try including only dense predictors.

In [9]:
def get_prediction_score(columns, data=train, params=lgb_params):
    X = train[columns]
    Y = train[['log_target']]
    
    lgb_complete_data = lgb.Dataset(X, Y)
    
    model_lgb = lgb.cv(params=params,
                       train_set=lgb_complete_data,
                       num_boost_round=1000,
                       nfold=5,
                       early_stopping_rounds=5,
                       stratified=False)
    
    scores = [x for x in list(model_lgb.values())[0]]
    best_score = np.sqrt(scores[-1])
    num_iterations = len(scores)
    
    return (best_score, num_iterations)

In [10]:
def get_model_with_predictors_beyond_given_threshold(percent_threshold=10, data=train, params=lgb_params):
    rel_cols = get_rel_cols(percent_threshold, data)
    
    (best_score, num_iterations) = get_prediction_score(rel_cols, data, params)    
    return (best_score, num_iterations, len(rel_cols))
    

In [67]:
PERCENTAGES_TO_CHECK = np.arange(5, 40, 5)
percentage_to_prediction_score = dict()
percentage_to_num_predictors = dict()
percentage_to_num_model_iterations = dict()

In [71]:
ts = time.time()
for percent in PERCENTAGES_TO_CHECK:
    (best_score, num_iterations, num_predictors) = \
        get_model_with_predictors_beyond_given_threshold(percent_threshold=percent)
    percentage_to_prediction_score[percent] = best_score
    percentage_to_num_model_iterations[percent] = num_iterations
    percentage_to_num_predictors[percent] = num_predictors
time.time() - ts

46.17691111564636

In [72]:
percentage_to_prediction_score

{5: 1.4505508265683893,
 10: 1.4505508265683893,
 15: 1.4505508265683893,
 20: 1.4505508265683893,
 25: 1.4482976907500078,
 30: 1.4482976907500078,
 35: 1.6380069492289402}

In [75]:
PERCENTAGES_TO_CHECK = np.arange(21, 35, 1)

In [76]:
ts = time.time()
for percent in PERCENTAGES_TO_CHECK:
    (best_score, num_iterations, num_predictors) = \
        get_model_with_predictors_beyond_given_threshold(percent_threshold=percent)
    percentage_to_prediction_score[percent] = best_score
    percentage_to_num_model_iterations[percent] = num_iterations
    percentage_to_num_predictors[percent] = num_predictors
time.time() - ts

77.59558486938477

In [77]:
percentage_to_prediction_score

{5: 1.4505508265683893,
 10: 1.4505508265683893,
 15: 1.4505508265683893,
 20: 1.4505508265683893,
 25: 1.4482976907500078,
 30: 1.4482976907500078,
 35: 1.6380069492289402,
 21: 1.4482976907500078,
 22: 1.4482976907500078,
 23: 1.4482976907500078,
 24: 1.4482976907500078,
 26: 1.4482976907500078,
 27: 1.4482976907500078,
 28: 1.4482976907500078,
 29: 1.4482976907500078,
 31: 1.4482976907500078,
 32: 1.4522997102569455,
 33: 1.457118415265635,
 34: 1.508591899656438}

In [78]:
percentage_to_num_predictors

{5: 997,
 10: 376,
 15: 191,
 20: 50,
 25: 40,
 30: 40,
 35: 2,
 21: 40,
 22: 40,
 23: 40,
 24: 40,
 26: 40,
 27: 40,
 28: 40,
 29: 40,
 31: 40,
 32: 39,
 33: 33,
 34: 18}

In [79]:
percentage_to_num_model_iterations

{5: 377,
 10: 377,
 15: 377,
 20: 377,
 25: 424,
 30: 424,
 35: 248,
 21: 424,
 22: 424,
 23: 424,
 24: 424,
 26: 424,
 27: 424,
 28: 424,
 29: 424,
 31: 424,
 32: 388,
 33: 367,
 34: 302}

### Finding the most optimal subset of columns

Now, let us find the most optimal subset of columns which maximises the cross validation score amongst this superset of 40 columns. Let us do this using a greedy method.

In [107]:
FULL_REL_COLS_LIST = get_rel_cols(21, train)

In [109]:
def get_prediction_scores_after_column_addition(given_list, data):
    col_list_to_score = dict()
    for col in FULL_REL_COLS_LIST:
        if col not in given_list:
            cols_list = given_list + [col]
            score = get_prediction_score(cols_list, data)[0]
            col_list_to_score[str(cols_list)] = score
            
    return sorted(col_list_to_score.items(), key=operator.itemgetter(1))
    

In [110]:
ts = time.time()

columns_list = []
MAX_NUM_PREDICTORS  = len(columns_list) + 1
max_columns_list_to_score = dict()
while len(columns_list) < MAX_NUM_PREDICTORS:
    col_list_to_score = get_prediction_scores_after_column_addition(columns_list, train)
    columns_list = ast.literal_eval(col_list_to_score[0][0])
    max_columns_list_to_score[col_list_to_score[0][0]] = col_list_to_score[0][1]
time.time() - ts

37.065032958984375

In [111]:
col_list_to_score

[("['f190486d6']", 1.6449866458904614),
 ("['eeb9cd3aa']", 1.6549665060753382),
 ("['c47340d97']", 1.6564248164011501),
 ("['c5a231d81']", 1.6597331712390317),
 ("['66ace2992']", 1.6597427000135743),
 ("['58e2e02e6']", 1.662740227415354),
 ("['adb64ff71']", 1.6637054978615513),
 ("['491b9ee45']", 1.6641022211337322),
 ("['e176a204a']", 1.6641756120490236),
 ("['0572565c2']", 1.6648001845404812),
 ("['1db387535']", 1.6660756055977954),
 ("['1931ccfdd']", 1.667217694693769),
 ("['15ace8c9f']", 1.667416285644309),
 ("['024c577b9']", 1.6674215815208133),
 ("['91f701ba2']", 1.669414765433305),
 ("['0ff32eb98']", 1.6698816518903408),
 ("['26fc93eb7']", 1.6702095430990647),
 ("['963a49cdc']", 1.6710982011036655),
 ("['70feb1494']", 1.671840619636297),
 ("['6619d81fc']", 1.6726918675576397),
 ("['9fd594eec']", 1.6730804789897662),
 ("['703885424']", 1.6736472114556256),
 ("['23310aa6f']", 1.6741381954941077),
 ("['f74e8f13d']", 1.67481747766058),
 ("['58232a6fb']", 1.6761855988254997),
 ("['5c

In [112]:
max_columns_list_to_score

{"['f190486d6']": 1.6449866458904614}

### Full algorithm was run as a kaggle script(https://www.kaggle.com/babinu/gradient-boosting-lightgbm)

The complete analysis has been done as a kaggle script and the results are pasted below.

In [11]:
max_columns_list_to_score = {"['f190486d6']": 1.6449866458904616,
 "['f190486d6', 'eeb9cd3aa']": 1.5836977665245267,
 "['f190486d6', 'eeb9cd3aa', 'c47340d97']": 1.5504512705557958,
 "['f190486d6', 'eeb9cd3aa', 'c47340d97', '58e2e02e6']": 1.528980861141374,
 "['f190486d6', 'eeb9cd3aa', 'c47340d97', '58e2e02e6', '491b9ee45']": 1.5118721914054778,
 "['f190486d6', 'eeb9cd3aa', 'c47340d97', '58e2e02e6', '491b9ee45', 'd6bb78916']": 1.4989457081703554,
 "['f190486d6', 'eeb9cd3aa', 'c47340d97', '58e2e02e6', '491b9ee45', 'd6bb78916', 'adb64ff71']": 1.4901748694923371,
 "['f190486d6', 'eeb9cd3aa', 'c47340d97', '58e2e02e6', '491b9ee45', 'd6bb78916', 'adb64ff71', 'b43a7cfd5']": 1.485975491761902,
 "['f190486d6', 'eeb9cd3aa', 'c47340d97', '58e2e02e6', '491b9ee45', 'd6bb78916', 'adb64ff71', 'b43a7cfd5', '58232a6fb']": 1.4795685931740603,
 "['f190486d6', 'eeb9cd3aa', 'c47340d97', '58e2e02e6', '491b9ee45', 'd6bb78916', 'adb64ff71', 'b43a7cfd5', '58232a6fb', 'fc99f9426']": 1.476432356250629,
 "['f190486d6', 'eeb9cd3aa', 'c47340d97', '58e2e02e6', '491b9ee45', 'd6bb78916', 'adb64ff71', 'b43a7cfd5', '58232a6fb', 'fc99f9426', '9fd594eec']": 1.4725496110335539,
 "['f190486d6', 'eeb9cd3aa', 'c47340d97', '58e2e02e6', '491b9ee45', 'd6bb78916', 'adb64ff71', 'b43a7cfd5', '58232a6fb', 'fc99f9426', '9fd594eec', 'fb0f5dbfe']": 1.4682779655310683,
 "['f190486d6', 'eeb9cd3aa', 'c47340d97', '58e2e02e6', '491b9ee45', 'd6bb78916', 'adb64ff71', 'b43a7cfd5', '58232a6fb', 'fc99f9426', '9fd594eec', 'fb0f5dbfe', '26fc93eb7']": 1.4644155188818466,
 "['f190486d6', 'eeb9cd3aa', 'c47340d97', '58e2e02e6', '491b9ee45', 'd6bb78916', 'adb64ff71', 'b43a7cfd5', '58232a6fb', 'fc99f9426', '9fd594eec', 'fb0f5dbfe', '26fc93eb7', '241f0f867']": 1.4622498914394688,
 "['f190486d6', 'eeb9cd3aa', 'c47340d97', '58e2e02e6', '491b9ee45', 'd6bb78916', 'adb64ff71', 'b43a7cfd5', '58232a6fb', 'fc99f9426', '9fd594eec', 'fb0f5dbfe', '26fc93eb7', '241f0f867', '15ace8c9f']": 1.4588467764291295,
 "['f190486d6', 'eeb9cd3aa', 'c47340d97', '58e2e02e6', '491b9ee45', 'd6bb78916', 'adb64ff71', 'b43a7cfd5', '58232a6fb', 'fc99f9426', '9fd594eec', 'fb0f5dbfe', '26fc93eb7', '241f0f867', '15ace8c9f', '024c577b9']": 1.4548849542228728,
 "['f190486d6', 'eeb9cd3aa', 'c47340d97', '58e2e02e6', '491b9ee45', 'd6bb78916', 'adb64ff71', 'b43a7cfd5', '58232a6fb', 'fc99f9426', '9fd594eec', 'fb0f5dbfe', '26fc93eb7', '241f0f867', '15ace8c9f', '024c577b9', '66ace2992']": 1.4538387654504028,
 "['f190486d6', 'eeb9cd3aa', 'c47340d97', '58e2e02e6', '491b9ee45', 'd6bb78916', 'adb64ff71', 'b43a7cfd5', '58232a6fb', 'fc99f9426', '9fd594eec', 'fb0f5dbfe', '26fc93eb7', '241f0f867', '15ace8c9f', '024c577b9', '66ace2992', '0572565c2']": 1.4525021909388476,
 "['f190486d6', 'eeb9cd3aa', 'c47340d97', '58e2e02e6', '491b9ee45', 'd6bb78916', 'adb64ff71', 'b43a7cfd5', '58232a6fb', 'fc99f9426', '9fd594eec', 'fb0f5dbfe', '26fc93eb7', '241f0f867', '15ace8c9f', '024c577b9', '66ace2992', '0572565c2', 'e176a204a']": 1.4529145004718886,
 "['f190486d6', 'eeb9cd3aa', 'c47340d97', '58e2e02e6', '491b9ee45', 'd6bb78916', 'adb64ff71', 'b43a7cfd5', '58232a6fb', 'fc99f9426', '9fd594eec', 'fb0f5dbfe', '26fc93eb7', '241f0f867', '15ace8c9f', '024c577b9', '66ace2992', '0572565c2', 'e176a204a', '70feb1494']": 1.451687367202684,
 "['f190486d6', 'eeb9cd3aa', 'c47340d97', '58e2e02e6', '491b9ee45', 'd6bb78916', 'adb64ff71', 'b43a7cfd5', '58232a6fb', 'fc99f9426', '9fd594eec', 'fb0f5dbfe', '26fc93eb7', '241f0f867', '15ace8c9f', '024c577b9', '66ace2992', '0572565c2', 'e176a204a', '70feb1494', '703885424']": 1.4520826351864002,
 "['f190486d6', 'eeb9cd3aa', 'c47340d97', '58e2e02e6', '491b9ee45', 'd6bb78916', 'adb64ff71', 'b43a7cfd5', '58232a6fb', 'fc99f9426', '9fd594eec', 'fb0f5dbfe', '26fc93eb7', '241f0f867', '15ace8c9f', '024c577b9', '66ace2992', '0572565c2', 'e176a204a', '70feb1494', '703885424', 'f74e8f13d']": 1.4526760589766303,
 "['f190486d6', 'eeb9cd3aa', 'c47340d97', '58e2e02e6', '491b9ee45', 'd6bb78916', 'adb64ff71', 'b43a7cfd5', '58232a6fb', 'fc99f9426', '9fd594eec', 'fb0f5dbfe', '26fc93eb7', '241f0f867', '15ace8c9f', '024c577b9', '66ace2992', '0572565c2', 'e176a204a', '70feb1494', '703885424', 'f74e8f13d', '58e056e12']": 1.4512259380695665,
 "['f190486d6', 'eeb9cd3aa', 'c47340d97', '58e2e02e6', '491b9ee45', 'd6bb78916', 'adb64ff71', 'b43a7cfd5', '58232a6fb', 'fc99f9426', '9fd594eec', 'fb0f5dbfe', '26fc93eb7', '241f0f867', '15ace8c9f', '024c577b9', '66ace2992', '0572565c2', 'e176a204a', '70feb1494', '703885424', 'f74e8f13d', '58e056e12', '20aa07010']": 1.4506379592292695,
 "['f190486d6', 'eeb9cd3aa', 'c47340d97', '58e2e02e6', '491b9ee45', 'd6bb78916', 'adb64ff71', 'b43a7cfd5', '58232a6fb', 'fc99f9426', '9fd594eec', 'fb0f5dbfe', '26fc93eb7', '241f0f867', '15ace8c9f', '024c577b9', '66ace2992', '0572565c2', 'e176a204a', '70feb1494', '703885424', 'f74e8f13d', '58e056e12', '20aa07010', '1931ccfdd']": 1.4492935807502703,
 "['f190486d6', 'eeb9cd3aa', 'c47340d97', '58e2e02e6', '491b9ee45', 'd6bb78916', 'adb64ff71', 'b43a7cfd5', '58232a6fb', 'fc99f9426', '9fd594eec', 'fb0f5dbfe', '26fc93eb7', '241f0f867', '15ace8c9f', '024c577b9', '66ace2992', '0572565c2', 'e176a204a', '70feb1494', '703885424', 'f74e8f13d', '58e056e12', '20aa07010', '1931ccfdd', '2ec5b290f']": 1.4484631284738125,
 "['f190486d6', 'eeb9cd3aa', 'c47340d97', '58e2e02e6', '491b9ee45', 'd6bb78916', 'adb64ff71', 'b43a7cfd5', '58232a6fb', 'fc99f9426', '9fd594eec', 'fb0f5dbfe', '26fc93eb7', '241f0f867', '15ace8c9f', '024c577b9', '66ace2992', '0572565c2', 'e176a204a', '70feb1494', '703885424', 'f74e8f13d', '58e056e12', '20aa07010', '1931ccfdd', '2ec5b290f', '91f701ba2']": 1.4477678946116617,
 "['f190486d6', 'eeb9cd3aa', 'c47340d97', '58e2e02e6', '491b9ee45', 'd6bb78916', 'adb64ff71', 'b43a7cfd5', '58232a6fb', 'fc99f9426', '9fd594eec', 'fb0f5dbfe', '26fc93eb7', '241f0f867', '15ace8c9f', '024c577b9', '66ace2992', '0572565c2', 'e176a204a', '70feb1494', '703885424', 'f74e8f13d', '58e056e12', '20aa07010', '1931ccfdd', '2ec5b290f', '91f701ba2', '190db8488']": 1.4484659492184804,
 "['f190486d6', 'eeb9cd3aa', 'c47340d97', '58e2e02e6', '491b9ee45', 'd6bb78916', 'adb64ff71', 'b43a7cfd5', '58232a6fb', 'fc99f9426', '9fd594eec', 'fb0f5dbfe', '26fc93eb7', '241f0f867', '15ace8c9f', '024c577b9', '66ace2992', '0572565c2', 'e176a204a', '70feb1494', '703885424', 'f74e8f13d', '58e056e12', '20aa07010', '1931ccfdd', '2ec5b290f', '91f701ba2', '190db8488', '23310aa6f']": 1.4487101535982179,
 "['f190486d6', 'eeb9cd3aa', 'c47340d97', '58e2e02e6', '491b9ee45', 'd6bb78916', 'adb64ff71', 'b43a7cfd5', '58232a6fb', 'fc99f9426', '9fd594eec', 'fb0f5dbfe', '26fc93eb7', '241f0f867', '15ace8c9f', '024c577b9', '66ace2992', '0572565c2', 'e176a204a', '70feb1494', '703885424', 'f74e8f13d', '58e056e12', '20aa07010', '1931ccfdd', '2ec5b290f', '91f701ba2', '190db8488', '23310aa6f', '1702b5bf0']": 1.4494657512432392,
 "['f190486d6', 'eeb9cd3aa', 'c47340d97', '58e2e02e6', '491b9ee45', 'd6bb78916', 'adb64ff71', 'b43a7cfd5', '58232a6fb', 'fc99f9426', '9fd594eec', 'fb0f5dbfe', '26fc93eb7', '241f0f867', '15ace8c9f', '024c577b9', '66ace2992', '0572565c2', 'e176a204a', '70feb1494', '703885424', 'f74e8f13d', '58e056e12', '20aa07010', '1931ccfdd', '2ec5b290f', '91f701ba2', '190db8488', '23310aa6f', '1702b5bf0', '6eef030c1']": 1.4478878780362392,
 "['f190486d6', 'eeb9cd3aa', 'c47340d97', '58e2e02e6', '491b9ee45', 'd6bb78916', 'adb64ff71', 'b43a7cfd5', '58232a6fb', 'fc99f9426', '9fd594eec', 'fb0f5dbfe', '26fc93eb7', '241f0f867', '15ace8c9f', '024c577b9', '66ace2992', '0572565c2', 'e176a204a', '70feb1494', '703885424', 'f74e8f13d', '58e056e12', '20aa07010', '1931ccfdd', '2ec5b290f', '91f701ba2', '190db8488', '23310aa6f', '1702b5bf0', '6eef030c1', '1db387535']": 1.447354083135638,
 "['f190486d6', 'eeb9cd3aa', 'c47340d97', '58e2e02e6', '491b9ee45', 'd6bb78916', 'adb64ff71', 'b43a7cfd5', '58232a6fb', 'fc99f9426', '9fd594eec', 'fb0f5dbfe', '26fc93eb7', '241f0f867', '15ace8c9f', '024c577b9', '66ace2992', '0572565c2', 'e176a204a', '70feb1494', '703885424', 'f74e8f13d', '58e056e12', '20aa07010', '1931ccfdd', '2ec5b290f', '91f701ba2', '190db8488', '23310aa6f', '1702b5bf0', '6eef030c1', '1db387535', 'fb49e4212']": 1.447149096266835,
 "['f190486d6', 'eeb9cd3aa', 'c47340d97', '58e2e02e6', '491b9ee45', 'd6bb78916', 'adb64ff71', 'b43a7cfd5', '58232a6fb', 'fc99f9426', '9fd594eec', 'fb0f5dbfe', '26fc93eb7', '241f0f867', '15ace8c9f', '024c577b9', '66ace2992', '0572565c2', 'e176a204a', '70feb1494', '703885424', 'f74e8f13d', '58e056e12', '20aa07010', '1931ccfdd', '2ec5b290f', '91f701ba2', '190db8488', '23310aa6f', '1702b5bf0', '6eef030c1', '1db387535', 'fb49e4212', '6619d81fc']": 1.4469956244156605,
 "['f190486d6', 'eeb9cd3aa', 'c47340d97', '58e2e02e6', '491b9ee45', 'd6bb78916', 'adb64ff71', 'b43a7cfd5', '58232a6fb', 'fc99f9426', '9fd594eec', 'fb0f5dbfe', '26fc93eb7', '241f0f867', '15ace8c9f', '024c577b9', '66ace2992', '0572565c2', 'e176a204a', '70feb1494', '703885424', 'f74e8f13d', '58e056e12', '20aa07010', '1931ccfdd', '2ec5b290f', '91f701ba2', '190db8488', '23310aa6f', '1702b5bf0', '6eef030c1', '1db387535', 'fb49e4212', '6619d81fc', '324921c7b']": 1.4462793045269746,
 "['f190486d6', 'eeb9cd3aa', 'c47340d97', '58e2e02e6', '491b9ee45', 'd6bb78916', 'adb64ff71', 'b43a7cfd5', '58232a6fb', 'fc99f9426', '9fd594eec', 'fb0f5dbfe', '26fc93eb7', '241f0f867', '15ace8c9f', '024c577b9', '66ace2992', '0572565c2', 'e176a204a', '70feb1494', '703885424', 'f74e8f13d', '58e056e12', '20aa07010', '1931ccfdd', '2ec5b290f', '91f701ba2', '190db8488', '23310aa6f', '1702b5bf0', '6eef030c1', '1db387535', 'fb49e4212', '6619d81fc', '324921c7b', '963a49cdc']": 1.4482643361838679,
 "['f190486d6', 'eeb9cd3aa', 'c47340d97', '58e2e02e6', '491b9ee45', 'd6bb78916', 'adb64ff71', 'b43a7cfd5', '58232a6fb', 'fc99f9426', '9fd594eec', 'fb0f5dbfe', '26fc93eb7', '241f0f867', '15ace8c9f', '024c577b9', '66ace2992', '0572565c2', 'e176a204a', '70feb1494', '703885424', 'f74e8f13d', '58e056e12', '20aa07010', '1931ccfdd', '2ec5b290f', '91f701ba2', '190db8488', '23310aa6f', '1702b5bf0', '6eef030c1', '1db387535', 'fb49e4212', '6619d81fc', '324921c7b', '963a49cdc', '5c6487af1']": 1.4478135484080445,
 "['f190486d6', 'eeb9cd3aa', 'c47340d97', '58e2e02e6', '491b9ee45', 'd6bb78916', 'adb64ff71', 'b43a7cfd5', '58232a6fb', 'fc99f9426', '9fd594eec', 'fb0f5dbfe', '26fc93eb7', '241f0f867', '15ace8c9f', '024c577b9', '66ace2992', '0572565c2', 'e176a204a', '70feb1494', '703885424', 'f74e8f13d', '58e056e12', '20aa07010', '1931ccfdd', '2ec5b290f', '91f701ba2', '190db8488', '23310aa6f', '1702b5bf0', '6eef030c1', '1db387535', 'fb49e4212', '6619d81fc', '324921c7b', '963a49cdc', '5c6487af1', '62e59a501']": 1.4464322861195018,
 "['f190486d6', 'eeb9cd3aa', 'c47340d97', '58e2e02e6', '491b9ee45', 'd6bb78916', 'adb64ff71', 'b43a7cfd5', '58232a6fb', 'fc99f9426', '9fd594eec', 'fb0f5dbfe', '26fc93eb7', '241f0f867', '15ace8c9f', '024c577b9', '66ace2992', '0572565c2', 'e176a204a', '70feb1494', '703885424', 'f74e8f13d', '58e056e12', '20aa07010', '1931ccfdd', '2ec5b290f', '91f701ba2', '190db8488', '23310aa6f', '1702b5bf0', '6eef030c1', '1db387535', 'fb49e4212', '6619d81fc', '324921c7b', '963a49cdc', '5c6487af1', '62e59a501', '0ff32eb98']": 1.448641565870212,
 "['f190486d6', 'eeb9cd3aa', 'c47340d97', '58e2e02e6', '491b9ee45', 'd6bb78916', 'adb64ff71', 'b43a7cfd5', '58232a6fb', 'fc99f9426', '9fd594eec', 'fb0f5dbfe', '26fc93eb7', '241f0f867', '15ace8c9f', '024c577b9', '66ace2992', '0572565c2', 'e176a204a', '70feb1494', '703885424', 'f74e8f13d', '58e056e12', '20aa07010', '1931ccfdd', '2ec5b290f', '91f701ba2', '190db8488', '23310aa6f', '1702b5bf0', '6eef030c1', '1db387535', 'fb49e4212', '6619d81fc', '324921c7b', '963a49cdc', '5c6487af1', '62e59a501', '0ff32eb98', 'c5a231d81']": 1.4482964955565283}

In [20]:
best_pred_cols = ast.literal_eval(sorted(max_columns_list_to_score.items(), key=operator.itemgetter(1))[0][0])

In [21]:
len(best_pred_cols)

35

### Make predictions on the test set.

Let us make predictions over the test set with our best model.

In [23]:
X = train[best_pred_cols]
Y = train[['log_target']]

lgb_full_data = lgb.Dataset(X, Y)

In [36]:
# We want to know the number of iterations this model took  as well, so that we can train the model over the complete
# data for same number of iterations and make predictions over the test set as well.
ts = time.time()
(score, num_iterations) = get_prediction_score(best_pred_cols, train, lgb_params)
print(score, num_iterations)
time.time() - ts

1.4462793045269748 406


4.756251335144043

In [31]:
ts = time.time()
model_lgb = lgb.train(params=lgb_params, train_set=lgb_full_data, num_boost_round=num_iterations)
time.time() - ts

0.9945838451385498

In [32]:
ts = time.time()
test = get_test_data(INPUT_DIR)
time.time() - ts

70.67730712890625

In [33]:
ts = time.time()
new_X = test[best_pred_cols]
test_log_predictions = model_lgb.predict(new_X)
test_log_predictions[test_log_predictions <0] = 0
test['target'] = np.exp(test_log_predictions) - 1.0
time.time() - ts

0.5638971328735352

In [35]:
test[['ID', 'target']].to_csv('submission_gradient_boosting_lightgbm.csv', index=False)