In [1]:
import numpy as np
import pandas as pd

from sklearn.impute import (SimpleImputer,KNNImputer)
from sklearn.ensemble import (RandomForestRegressor, IsolationForest)

from sklearn.model_selection import KFold
from sklearn.model_selection import train_test_split

import lightgbm as lgb

from sklearn.metrics import r2_score

import matplotlib.pyplot as plt

%matplotlib inline

In [2]:
def fill_missing_values(X, n_neighbors = 75, method="KNN"): 
    
    # normalization
    X_std = np.nanstd(X,axis=0,keepdims=True)
    X_ave = np.nanmean(X,axis=0,keepdims=True)
    X_norma = (X-X_ave)/X_std
    
    # use KNNImputer
    imputer = KNNImputer(missing_values=np.nan, n_neighbors=n_neighbors, weights = 'distance') if method=="KNN"\
        else SimpleImputer(missing_values=np.nan, strategy='median')
    
    X_norma_fixed = imputer.fit_transform(X_norma)
    
    return X_norma_fixed

def remove_outliers(X, y):
    print("IsolationForest-Traing data shape before removed: {}".format(X.shape))
    iforest = IsolationForest(max_samples=200, random_state=1, contamination='auto')
    iforest.fit(X)
    iforest_outlier_pred = iforest.predict(X)

    mask = (iforest_outlier_pred!=-1)
    X , y = X[mask, :], y[mask]
    print("IsolationForest-Traing data shape after removed: {}".format(X.shape))
    return X, y

def select_features(X, y, X_test, feature_num=50):
    rf = RandomForestRegressor(n_jobs=-1, n_estimators=80, random_state=1)
    rf.fit(X, y)
    indices = np.asarray(list(rf.feature_importances_)).argsort()[-feature_num:][::-1]
    
    X = np.take(X, indices, axis = 1)
    X_test = np.take(X_test, indices, axis = 1)
    return X, X_test

In [3]:
X_train_data = pd.read_csv('X_train.csv')
y_train_data = pd.read_csv('y_train.csv')
X_test_data = pd.read_csv('X_test.csv')

indices_test = np.array(X_test_data)[:,0]
X_test = np.array(X_test_data)[:,1:]
y_train = np.array(y_train_data)[:,1]
X_train = np.array(X_train_data)[:,1:]

## 1. Imputation of Missing Values
* [Reference](https://scikit-learn.org/stable/modules/impute.html)
* We use median of column instead of mean

In [4]:
X_train_missing_indices = X_train[X_train==np]
X_train = fill_missing_values(X_train, n_neighbors=75)
X_test = fill_missing_values(X_test)
print(X_train.shape)
print(X_test.shape)

  X_norma = (X-X_ave)/X_std
  X_norma = (X-X_ave)/X_std


(1212, 828)
(776, 828)


## 2. Outlier Detection
* [reference_sklearn](https://scikit-learn.org/stable/modules/outlier_detection.html)
* [reference_in_detail](https://practicaldatascience.co.uk/machine-learning/how-to-use-the-isolation-forest-model-for-outlier-detection)

In [5]:
X_train,y_train = remove_outliers(X_train,y_train)

IsolationForest-Traing data shape before removed: (1212, 828)
IsolationForest-Traing data shape after removed: (1206, 828)


## 3. Feature Selection

In [6]:
# X, X_test = feature_reduction(X, X_test,750)
X_train, X_test = select_features(X_train, y_train, X_test,feature_num = 50)
print("Traing data shape after selection: {}".format(X_train.shape))
print("Testing data shape after selection: {}".format(X_test.shape))

Traing data shape after selection: (1206, 50)
Testing data shape after selection: (776, 50)


## 4. Extra Tree

In [7]:
def custom_r2(prediction, train_data):
    """Regular r2 cost function returned as a tuple to be used with lgb"""
    labels = train_data.get_label()
    return 'r2', r2_score(labels, prediction), True

def fit_model_and_pred(degree, X_train, y_train, X_val, y_val, X_test):
   
    params = {
                'task': 'train',
                'boosting_type': 'gbdt',
                'objective': 'regression',
                'num_leaves': 31,
                'learning_rate': 0.05,
                'feature_fraction': 0.5,
                'bagging_fraction': 0.8,
                'bagging_freq': 5,
                'verbose': 0
            }
    
    lgb_train = lgb.Dataset(X_train, y_train)
    lgb_eval = lgb.Dataset(X_val, y_val, reference=lgb_train)

    gbm = lgb.train(params,
                    lgb_train,
                    num_boost_round=500,
                    feval=custom_r2,
                    valid_sets={lgb_train, lgb_eval},
                    early_stopping_rounds=20)

    y_pred = gbm.predict(X_test) 
    
    return y_pred

def train_k_fold(X, y, fold_num=10):
    kf = KFold(n_splits=fold_num, random_state=None, shuffle=False)
    kf.get_n_splits(X)
    test_score = 0.0
    
    for train_index, test_index in kf.split(X):
        X_train, X_val = X[train_index], X[test_index]
        y_train, y_val = y[train_index], y[test_index]

        y_pred = fit_model_and_pred(1, X_train, y_train, X_val, y_val, X_val)
        score = r2_score(y_val, y_pred)

        print('The obtained validation r2 score is : ',score)
        test_score += score
    print("Validation score: %f"%(test_score/fold_num))
    
def train_k_fold_predict(X, y,X_test, fold_num=10):
    kf = KFold(n_splits=fold_num, random_state=None, shuffle=False)
    kf.get_n_splits(X)
    y_test_predict = np.zeros(X_test.shape[0])
    for train_index, test_index in kf.split(X):
        X_train, X_val = X[train_index], X[test_index]
        y_train, y_val = y[train_index], y[test_index]

        y_pred = fit_model_and_pred(1, X_train, y_train, X_val, y_val, X_test)
        y_test_predict += y_pred

    return y_test_predict/fold_num

In [8]:
train_k_fold(X_train,y_train, fold_num=10) 

You can set `force_col_wise=true` to remove the overhead.
[1]	training's l2: 91.0603	training's r2: 0.0537746	valid_1's l2: 79.2292	valid_1's r2: 0.0106823
Training until validation scores don't improve for 20 rounds
[2]	training's l2: 86.3589	training's r2: 0.102627	valid_1's l2: 74.9689	valid_1's r2: 0.0638794
[3]	training's l2: 81.813	training's r2: 0.149865	valid_1's l2: 70.9189	valid_1's r2: 0.114451
[4]	training's l2: 77.9254	training's r2: 0.190262	valid_1's l2: 67.4326	valid_1's r2: 0.157984
[5]	training's l2: 74.1013	training's r2: 0.229999	valid_1's l2: 64.4841	valid_1's r2: 0.194801
[6]	training's l2: 70.4187	training's r2: 0.268266	valid_1's l2: 61.084	valid_1's r2: 0.237257
[7]	training's l2: 67.1375	training's r2: 0.302361	valid_1's l2: 58.4419	valid_1's r2: 0.270249
[8]	training's l2: 64.2043	training's r2: 0.332841	valid_1's l2: 56.0377	valid_1's r2: 0.30027
[9]	training's l2: 61.2082	training's r2: 0.363974	valid_1's l2: 53.758	valid_1's r2: 0.328735
[10]	training's l2

[87]	training's l2: 11.7375	training's r2: 0.878033	valid_1's l2: 25.3502	valid_1's r2: 0.683457
[88]	training's l2: 11.5965	training's r2: 0.879499	valid_1's l2: 25.3565	valid_1's r2: 0.683379
[89]	training's l2: 11.456	training's r2: 0.880958	valid_1's l2: 25.3718	valid_1's r2: 0.683188
[90]	training's l2: 11.3195	training's r2: 0.882377	valid_1's l2: 25.4067	valid_1's r2: 0.682752
[91]	training's l2: 11.1697	training's r2: 0.883934	valid_1's l2: 25.4765	valid_1's r2: 0.68188
[92]	training's l2: 11.0206	training's r2: 0.885482	valid_1's l2: 25.4575	valid_1's r2: 0.682118
[93]	training's l2: 10.8924	training's r2: 0.886815	valid_1's l2: 25.435	valid_1's r2: 0.682399
[94]	training's l2: 10.7524	training's r2: 0.88827	valid_1's l2: 25.2626	valid_1's r2: 0.684552
[95]	training's l2: 10.6305	training's r2: 0.889536	valid_1's l2: 25.1551	valid_1's r2: 0.685894
[96]	training's l2: 10.5036	training's r2: 0.890855	valid_1's l2: 24.9526	valid_1's r2: 0.688422
[97]	training's l2: 10.3767	traini

[92]	training's l2: 10.7054	training's r2: 0.887621	valid_1's l2: 31.3029	valid_1's r2: 0.659219
[93]	training's l2: 10.5871	training's r2: 0.888863	valid_1's l2: 31.244	valid_1's r2: 0.65986
[94]	training's l2: 10.4574	training's r2: 0.890224	valid_1's l2: 31.173	valid_1's r2: 0.660633
[95]	training's l2: 10.3409	training's r2: 0.891447	valid_1's l2: 31.1814	valid_1's r2: 0.660542
[96]	training's l2: 10.2175	training's r2: 0.892742	valid_1's l2: 31.2114	valid_1's r2: 0.660215
[97]	training's l2: 10.0979	training's r2: 0.893998	valid_1's l2: 31.123	valid_1's r2: 0.661177
[98]	training's l2: 9.95788	training's r2: 0.895468	valid_1's l2: 31.1831	valid_1's r2: 0.660523
[99]	training's l2: 9.81621	training's r2: 0.896955	valid_1's l2: 31.1336	valid_1's r2: 0.661062
[100]	training's l2: 9.69341	training's r2: 0.898244	valid_1's l2: 31.0951	valid_1's r2: 0.661481
[101]	training's l2: 9.57197	training's r2: 0.899519	valid_1's l2: 30.979	valid_1's r2: 0.662745
[102]	training's l2: 9.43586	trai

[182]	training's l2: 4.4593	training's r2: 0.953189	valid_1's l2: 29.0447	valid_1's r2: 0.683803
[183]	training's l2: 4.41467	training's r2: 0.953657	valid_1's l2: 29.0082	valid_1's r2: 0.6842
[184]	training's l2: 4.37984	training's r2: 0.954023	valid_1's l2: 28.9784	valid_1's r2: 0.684525
[185]	training's l2: 4.33588	training's r2: 0.954484	valid_1's l2: 28.9667	valid_1's r2: 0.684652
[186]	training's l2: 4.29581	training's r2: 0.954905	valid_1's l2: 28.9327	valid_1's r2: 0.685023
[187]	training's l2: 4.26654	training's r2: 0.955212	valid_1's l2: 28.9487	valid_1's r2: 0.684848
[188]	training's l2: 4.23394	training's r2: 0.955554	valid_1's l2: 28.9754	valid_1's r2: 0.684557
[189]	training's l2: 4.20248	training's r2: 0.955885	valid_1's l2: 28.9631	valid_1's r2: 0.684692
[190]	training's l2: 4.17918	training's r2: 0.956129	valid_1's l2: 28.999	valid_1's r2: 0.684301
[191]	training's l2: 4.14564	training's r2: 0.956481	valid_1's l2: 29.0324	valid_1's r2: 0.683936
[192]	training's l2: 4.1

[59]	training's l2: 16.254	training's r2: 0.827669	valid_0's l2: 50.8848	valid_0's r2: 0.48262
[60]	training's l2: 16.0089	training's r2: 0.830267	valid_0's l2: 50.6977	valid_0's r2: 0.484522
[61]	training's l2: 15.7325	training's r2: 0.833198	valid_0's l2: 50.5508	valid_0's r2: 0.486015
[62]	training's l2: 15.4869	training's r2: 0.835802	valid_0's l2: 50.4626	valid_0's r2: 0.486913
[63]	training's l2: 15.2354	training's r2: 0.838468	valid_0's l2: 50.0421	valid_0's r2: 0.491188
[64]	training's l2: 14.9977	training's r2: 0.840989	valid_0's l2: 49.8033	valid_0's r2: 0.493615
[65]	training's l2: 14.7852	training's r2: 0.843241	valid_0's l2: 49.6776	valid_0's r2: 0.494894
[66]	training's l2: 14.5763	training's r2: 0.845456	valid_0's l2: 49.6669	valid_0's r2: 0.495002
[67]	training's l2: 14.3866	training's r2: 0.847468	valid_0's l2: 49.696	valid_0's r2: 0.494707
[68]	training's l2: 14.2117	training's r2: 0.849322	valid_0's l2: 49.8309	valid_0's r2: 0.493335
[69]	training's l2: 14.0074	train

[4]	training's l2: 78.2907	training's r2: 0.191468	valid_1's l2: 64.7256	valid_1's r2: 0.167961
[5]	training's l2: 74.3709	training's r2: 0.231948	valid_1's l2: 62.4441	valid_1's r2: 0.19729
[6]	training's l2: 70.552	training's r2: 0.271387	valid_1's l2: 60.1386	valid_1's r2: 0.226926
[7]	training's l2: 67.3318	training's r2: 0.304643	valid_1's l2: 57.7256	valid_1's r2: 0.257946
[8]	training's l2: 64.4565	training's r2: 0.334337	valid_1's l2: 55.7406	valid_1's r2: 0.283462
[9]	training's l2: 61.4005	training's r2: 0.365898	valid_1's l2: 53.5085	valid_1's r2: 0.312155
[10]	training's l2: 58.5208	training's r2: 0.395638	valid_1's l2: 51.7266	valid_1's r2: 0.335061
[11]	training's l2: 55.94	training's r2: 0.42229	valid_1's l2: 50.0649	valid_1's r2: 0.356423
[12]	training's l2: 53.5453	training's r2: 0.447021	valid_1's l2: 48.9061	valid_1's r2: 0.371319
[13]	training's l2: 51.4535	training's r2: 0.468624	valid_1's l2: 47.8473	valid_1's r2: 0.38493
[14]	training's l2: 49.5352	training's r2:

[127]	training's l2: 7.339	training's r2: 0.924208	valid_1's l2: 25.9347	valid_1's r2: 0.666613
[128]	training's l2: 7.28266	training's r2: 0.92479	valid_1's l2: 25.9165	valid_1's r2: 0.666847
[129]	training's l2: 7.21952	training's r2: 0.925442	valid_1's l2: 25.9009	valid_1's r2: 0.667047
[130]	training's l2: 7.15547	training's r2: 0.926103	valid_1's l2: 25.8395	valid_1's r2: 0.667837
[131]	training's l2: 7.09404	training's r2: 0.926738	valid_1's l2: 25.799	valid_1's r2: 0.668357
[132]	training's l2: 7.02551	training's r2: 0.927445	valid_1's l2: 25.7748	valid_1's r2: 0.668668
[133]	training's l2: 6.96347	training's r2: 0.928086	valid_1's l2: 25.809	valid_1's r2: 0.668229
[134]	training's l2: 6.90732	training's r2: 0.928666	valid_1's l2: 25.8313	valid_1's r2: 0.667942
[135]	training's l2: 6.84727	training's r2: 0.929286	valid_1's l2: 25.8673	valid_1's r2: 0.667479
[136]	training's l2: 6.78414	training's r2: 0.929938	valid_1's l2: 25.8113	valid_1's r2: 0.668199
[137]	training's l2: 6.72

[8]	training's l2: 62.8581	training's r2: 0.337966	valid_0's l2: 73.0784	valid_0's r2: 0.229315
[9]	training's l2: 59.9	training's r2: 0.369121	valid_0's l2: 70.8363	valid_0's r2: 0.25296
[10]	training's l2: 57.2342	training's r2: 0.397199	valid_0's l2: 68.8923	valid_0's r2: 0.273461
[11]	training's l2: 54.7594	training's r2: 0.423264	valid_0's l2: 67.8072	valid_0's r2: 0.284904
[12]	training's l2: 52.4723	training's r2: 0.447351	valid_0's l2: 66.2205	valid_0's r2: 0.301639
[13]	training's l2: 50.3482	training's r2: 0.469723	valid_0's l2: 64.8433	valid_0's r2: 0.316162
[14]	training's l2: 48.2989	training's r2: 0.491307	valid_0's l2: 64.0475	valid_0's r2: 0.324555
[15]	training's l2: 46.5192	training's r2: 0.510051	valid_0's l2: 63.2185	valid_0's r2: 0.333298
[16]	training's l2: 44.7987	training's r2: 0.528171	valid_0's l2: 62.3746	valid_0's r2: 0.342197
[17]	training's l2: 43.2542	training's r2: 0.544439	valid_0's l2: 61.2693	valid_0's r2: 0.353853
[18]	training's l2: 41.716	training'

The obtained validation r2 score is :  0.49548933691152863
You can set `force_col_wise=true` to remove the overhead.
[1]	training's l2: 88.2331	training's r2: 0.0540176	valid_0's l2: 106.021	valid_0's r2: 0.0246361
Training until validation scores don't improve for 20 rounds
[2]	training's l2: 83.7998	training's r2: 0.101549	valid_0's l2: 102.248	valid_0's r2: 0.0593459
[3]	training's l2: 79.3691	training's r2: 0.149052	valid_0's l2: 98.8955	valid_0's r2: 0.0901913
[4]	training's l2: 75.4913	training's r2: 0.190627	valid_0's l2: 95.1762	valid_0's r2: 0.124408
[5]	training's l2: 71.7393	training's r2: 0.230854	valid_0's l2: 91.5202	valid_0's r2: 0.158042
[6]	training's l2: 68.135	training's r2: 0.269497	valid_0's l2: 88.0664	valid_0's r2: 0.189816
[7]	training's l2: 64.9842	training's r2: 0.303278	valid_0's l2: 85.1575	valid_0's r2: 0.216577
[8]	training's l2: 62.0527	training's r2: 0.334708	valid_0's l2: 82.4191	valid_0's r2: 0.241769
[9]	training's l2: 59.1716	training's r2: 0.365597	

[96]	training's l2: 9.3761	training's r2: 0.899475	valid_0's l2: 43.7562	valid_0's r2: 0.597456
[97]	training's l2: 9.26272	training's r2: 0.900691	valid_0's l2: 43.8139	valid_0's r2: 0.596925
[98]	training's l2: 9.15464	training's r2: 0.901849	valid_0's l2: 43.8697	valid_0's r2: 0.596412
[99]	training's l2: 9.04911	training's r2: 0.902981	valid_0's l2: 43.8265	valid_0's r2: 0.596809
[100]	training's l2: 8.9401	training's r2: 0.90415	valid_0's l2: 43.8129	valid_0's r2: 0.596934
[101]	training's l2: 8.83727	training's r2: 0.905252	valid_0's l2: 43.8341	valid_0's r2: 0.596739
[102]	training's l2: 8.74729	training's r2: 0.906217	valid_0's l2: 43.9004	valid_0's r2: 0.59613
[103]	training's l2: 8.64398	training's r2: 0.907324	valid_0's l2: 43.9834	valid_0's r2: 0.595366
[104]	training's l2: 8.53503	training's r2: 0.908492	valid_0's l2: 43.8983	valid_0's r2: 0.596149
[105]	training's l2: 8.41908	training's r2: 0.909736	valid_0's l2: 43.8514	valid_0's r2: 0.596581
[106]	training's l2: 8.3302	

[70]	training's l2: 13.7879	training's r2: 0.854862	valid_1's l2: 48.7251	valid_1's r2: 0.477727
[71]	training's l2: 13.576	training's r2: 0.857093	valid_1's l2: 48.7139	valid_1's r2: 0.477847
[72]	training's l2: 13.3778	training's r2: 0.859179	valid_1's l2: 48.7522	valid_1's r2: 0.477437
[73]	training's l2: 13.1931	training's r2: 0.861124	valid_1's l2: 48.7555	valid_1's r2: 0.477402
[74]	training's l2: 12.9998	training's r2: 0.863158	valid_1's l2: 48.8653	valid_1's r2: 0.476225
[75]	training's l2: 12.7956	training's r2: 0.865307	valid_1's l2: 48.8086	valid_1's r2: 0.476832
[76]	training's l2: 12.5941	training's r2: 0.867428	valid_1's l2: 48.7393	valid_1's r2: 0.477575
[77]	training's l2: 12.3958	training's r2: 0.869516	valid_1's l2: 48.6758	valid_1's r2: 0.478256
[78]	training's l2: 12.2272	training's r2: 0.871291	valid_1's l2: 48.57	valid_1's r2: 0.479389
[79]	training's l2: 12.0454	training's r2: 0.873204	valid_1's l2: 48.5191	valid_1's r2: 0.479935
[80]	training's l2: 11.8814	train

[79]	training's l2: 12.062	training's r2: 0.873851	valid_0's l2: 40.6232	valid_0's r2: 0.536762
[80]	training's l2: 11.9212	training's r2: 0.875323	valid_0's l2: 40.7265	valid_0's r2: 0.535585
[81]	training's l2: 11.7449	training's r2: 0.877168	valid_0's l2: 40.8118	valid_0's r2: 0.534613
[82]	training's l2: 11.5916	training's r2: 0.878771	valid_0's l2: 40.9191	valid_0's r2: 0.533389
[83]	training's l2: 11.4496	training's r2: 0.880256	valid_0's l2: 41.0318	valid_0's r2: 0.532104
[84]	training's l2: 11.3078	training's r2: 0.881739	valid_0's l2: 40.9477	valid_0's r2: 0.533063
[85]	training's l2: 11.1606	training's r2: 0.883278	valid_0's l2: 40.9355	valid_0's r2: 0.533201
[86]	training's l2: 11.0236	training's r2: 0.884711	valid_0's l2: 40.8362	valid_0's r2: 0.534334
[87]	training's l2: 10.9089	training's r2: 0.88591	valid_0's l2: 40.8976	valid_0's r2: 0.533633
[88]	training's l2: 10.7745	training's r2: 0.887316	valid_0's l2: 40.7113	valid_0's r2: 0.535758
[89]	training's l2: 10.6586	trai

[3]	training's l2: 79.7445	training's r2: 0.144955	valid_1's l2: 96.0738	valid_1's r2: 0.128735
[4]	training's l2: 76.2439	training's r2: 0.18249	valid_1's l2: 91.917	valid_1's r2: 0.166432
[5]	training's l2: 72.4995	training's r2: 0.222638	valid_1's l2: 87.6762	valid_1's r2: 0.204891
[6]	training's l2: 68.9222	training's r2: 0.260995	valid_1's l2: 84.0488	valid_1's r2: 0.237787
[7]	training's l2: 65.7917	training's r2: 0.294561	valid_1's l2: 80.8881	valid_1's r2: 0.26645
[8]	training's l2: 63.0528	training's r2: 0.323929	valid_1's l2: 77.9848	valid_1's r2: 0.292779
[9]	training's l2: 60.2224	training's r2: 0.354277	valid_1's l2: 74.7028	valid_1's r2: 0.322543
[10]	training's l2: 57.5402	training's r2: 0.383036	valid_1's l2: 71.8448	valid_1's r2: 0.348461
[11]	training's l2: 55.1579	training's r2: 0.408581	valid_1's l2: 69.0907	valid_1's r2: 0.373437
[12]	training's l2: 52.8616	training's r2: 0.433202	valid_1's l2: 66.4485	valid_1's r2: 0.397399
[13]	training's l2: 50.7432	training's r

You can set `force_col_wise=true` to remove the overhead.
[1]	training's l2: 89.4688	training's r2: 0.0554017	valid_1's l2: 93.1071	valid_1's r2: 0.0273311
Training until validation scores don't improve for 20 rounds
[2]	training's l2: 84.7837	training's r2: 0.104866	valid_1's l2: 90.1075	valid_1's r2: 0.0586676
[3]	training's l2: 80.4232	training's r2: 0.150903	valid_1's l2: 86.7507	valid_1's r2: 0.093735
[4]	training's l2: 76.5937	training's r2: 0.191335	valid_1's l2: 84.1656	valid_1's r2: 0.120741
[5]	training's l2: 72.7777	training's r2: 0.231624	valid_1's l2: 81.0054	valid_1's r2: 0.153755
[6]	training's l2: 69.1622	training's r2: 0.269796	valid_1's l2: 78.5606	valid_1's r2: 0.179295
[7]	training's l2: 65.7938	training's r2: 0.305358	valid_1's l2: 76.8357	valid_1's r2: 0.197314
[8]	training's l2: 62.9213	training's r2: 0.335686	valid_1's l2: 74.7175	valid_1's r2: 0.219443
[9]	training's l2: 59.9001	training's r2: 0.367584	valid_1's l2: 72.877	valid_1's r2: 0.23867
[10]	training's 

[116]	training's l2: 8.10066	training's r2: 0.914474	valid_1's l2: 39.7091	valid_1's r2: 0.585168
[117]	training's l2: 8.03175	training's r2: 0.915202	valid_1's l2: 39.6614	valid_1's r2: 0.585666
[118]	training's l2: 7.95083	training's r2: 0.916056	valid_1's l2: 39.7961	valid_1's r2: 0.584259
[119]	training's l2: 7.86988	training's r2: 0.916911	valid_1's l2: 39.7507	valid_1's r2: 0.584734
[120]	training's l2: 7.79853	training's r2: 0.917664	valid_1's l2: 39.7689	valid_1's r2: 0.584543
[121]	training's l2: 7.72889	training's r2: 0.918399	valid_1's l2: 39.6736	valid_1's r2: 0.585539
[122]	training's l2: 7.65504	training's r2: 0.919179	valid_1's l2: 39.5735	valid_1's r2: 0.586584
[123]	training's l2: 7.57802	training's r2: 0.919992	valid_1's l2: 39.5454	valid_1's r2: 0.586878
[124]	training's l2: 7.50241	training's r2: 0.920791	valid_1's l2: 39.5665	valid_1's r2: 0.586658
[125]	training's l2: 7.42289	training's r2: 0.92163	valid_1's l2: 39.5473	valid_1's r2: 0.586858
[126]	training's l2: 

[232]	training's l2: 3.36318	training's r2: 0.964492	valid_1's l2: 37.4136	valid_1's r2: 0.609149
[233]	training's l2: 3.33786	training's r2: 0.964759	valid_1's l2: 37.3741	valid_1's r2: 0.609561
[234]	training's l2: 3.31805	training's r2: 0.964969	valid_1's l2: 37.4367	valid_1's r2: 0.608907
[235]	training's l2: 3.29515	training's r2: 0.96521	valid_1's l2: 37.3798	valid_1's r2: 0.609502
[236]	training's l2: 3.276	training's r2: 0.965412	valid_1's l2: 37.3187	valid_1's r2: 0.61014
[237]	training's l2: 3.25908	training's r2: 0.965591	valid_1's l2: 37.2591	valid_1's r2: 0.610763
[238]	training's l2: 3.24	training's r2: 0.965793	valid_1's l2: 37.2487	valid_1's r2: 0.610871
[239]	training's l2: 3.22178	training's r2: 0.965985	valid_1's l2: 37.1804	valid_1's r2: 0.611585
[240]	training's l2: 3.2016	training's r2: 0.966198	valid_1's l2: 37.1971	valid_1's r2: 0.61141
[241]	training's l2: 3.18134	training's r2: 0.966412	valid_1's l2: 37.1594	valid_1's r2: 0.611804
[242]	training's l2: 3.16554	

In [9]:
# Y_test_pred = train_k_fold_predict(X_train, y_train,X_test)
# final_res = np.vstack((indices_test, Y_test_pred)).T

In [10]:
# df_res = pd.DataFrame(final_res)
# df_res.to_csv("our_result.csv", header = ["id", "y"], index=False)