In [27]:
!pip install bayesian-optimization

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [28]:
import pandas as pd
import numpy as np
from sklearn.model_selection import StratifiedKFold
from scipy.stats import rankdata
import lightgbm as lgb
from sklearn import metrics
import gc
import warnings
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder

In [29]:
df = pd.read_csv("/content/drive/MyDrive/weatherAUS.csv")
df.dropna(subset=["RainTomorrow"], inplace=True)

In [30]:
df.head()

Unnamed: 0,Date,Location,MinTemp,MaxTemp,Rainfall,Evaporation,Sunshine,WindGustDir,WindGustSpeed,WindDir9am,...,Humidity9am,Humidity3pm,Pressure9am,Pressure3pm,Cloud9am,Cloud3pm,Temp9am,Temp3pm,RainToday,RainTomorrow
0,2008-12-01,Albury,13.4,22.9,0.6,,,W,44.0,W,...,71.0,22.0,1007.7,1007.1,8.0,,16.9,21.8,No,No
1,2008-12-02,Albury,7.4,25.1,0.0,,,WNW,44.0,NNW,...,44.0,25.0,1010.6,1007.8,,,17.2,24.3,No,No
2,2008-12-03,Albury,12.9,25.7,0.0,,,WSW,46.0,W,...,38.0,30.0,1007.6,1008.7,,2.0,21.0,23.2,No,No
3,2008-12-04,Albury,9.2,28.0,0.0,,,NE,24.0,SE,...,45.0,16.0,1017.6,1012.8,,,18.1,26.5,No,No
4,2008-12-05,Albury,17.5,32.3,1.0,,,W,41.0,ENE,...,82.0,33.0,1010.8,1006.0,7.0,8.0,17.8,29.7,No,No


In [31]:
df.shape

(142193, 23)

In [32]:
train_df = df.copy()
train_df.head()

Unnamed: 0,Date,Location,MinTemp,MaxTemp,Rainfall,Evaporation,Sunshine,WindGustDir,WindGustSpeed,WindDir9am,...,Humidity9am,Humidity3pm,Pressure9am,Pressure3pm,Cloud9am,Cloud3pm,Temp9am,Temp3pm,RainToday,RainTomorrow
0,2008-12-01,Albury,13.4,22.9,0.6,,,W,44.0,W,...,71.0,22.0,1007.7,1007.1,8.0,,16.9,21.8,No,No
1,2008-12-02,Albury,7.4,25.1,0.0,,,WNW,44.0,NNW,...,44.0,25.0,1010.6,1007.8,,,17.2,24.3,No,No
2,2008-12-03,Albury,12.9,25.7,0.0,,,WSW,46.0,W,...,38.0,30.0,1007.6,1008.7,,2.0,21.0,23.2,No,No
3,2008-12-04,Albury,9.2,28.0,0.0,,,NE,24.0,SE,...,45.0,16.0,1017.6,1012.8,,,18.1,26.5,No,No
4,2008-12-05,Albury,17.5,32.3,1.0,,,W,41.0,ENE,...,82.0,33.0,1010.8,1006.0,7.0,8.0,17.8,29.7,No,No


In [33]:
df.RainTomorrow.value_counts()


No     110316
Yes     31877
Name: RainTomorrow, dtype: int64

The data set is unbalanced

Handling Null Values

In [None]:
num = train_df._get_numeric_data().columns
cat = set(train_df.columns) - set(num)
loc_for_miss = train_df["Location"].unique().tolist()
ls = []
def removeNull(columns):
  j=0
  while j<=len(columns)-1:
      for i in range(len(loc_for_miss)):    
          ls.append(str(train_df.loc[df["Location"] == loc_for_miss[i], columns[j]].median())) #great outliers so used median

      for i in range(len(loc_for_miss)):
          train_df.loc[train_df["Location"] == loc_for_miss[i], columns[j]] = train_df.loc[df["Location"] == loc_for_miss[i], columns[j]].fillna(ls[i])
      j+=1
      train_df[columns] = train_df[columns].astype(float)

removeNull(num)

In [35]:
train_df.isna().sum()


Date                 0
Location             0
MinTemp              0
MaxTemp              0
Rainfall             0
Evaporation          0
Sunshine             0
WindGustDir       9330
WindGustSpeed        0
WindDir9am       10013
WindDir3pm        3778
WindSpeed9am         0
WindSpeed3pm         0
Humidity9am          0
Humidity3pm          0
Pressure9am          0
Pressure3pm          0
Cloud9am             0
Cloud3pm             0
Temp9am              0
Temp3pm              0
RainToday         1406
RainTomorrow         0
dtype: int64

In [36]:
# df.loc[df["Location"] == 'Albury', "MinTemp"].isna().value_counts()

In [37]:
ls_WGD = []
loc_for_miss = train_df["Location"].unique()
for j in range(len(loc_for_miss)):
  df_allNanWGD = train_df.loc[train_df["Location"]== loc_for_miss[j]]
  if (df_allNanWGD["WindGustDir"].isnull().all()):
    ls_WGD.append(loc_for_miss[j])

In [38]:
ls_WGD

['Newcastle', 'Albany']

In [39]:
train_df.loc[train_df["Location"] == "Newcastle", "WindGustDir"] = "W"
train_df.loc[train_df["Location"] == "Albany", "WindGustDir"] = "SW"

In [40]:
# plt.scatter(train_df[(train_df["Location"] == "Woomera")]["WindGustDir"], train_df["RainTomorrow"])

In [41]:
categorical = ['RainToday', 'WindDir9am', 'WindDir3pm', 'WindGustDir']
loc_for_miss = ['Albury','BadgerysCreek', 'Cobar', 'CoffsHarbour', 'Moree','Newcastle','NorahHead','NorfolkIsland',
                'Penrith','Richmond','Sydney','SydneyAirport','WaggaWagga','Williamtown','Wollongong','Canberra',
                'Tuggeranong','MountGinini','Ballarat','Bendigo','Sale','MelbourneAirport','Melbourne','Mildura',
                'Nhil','Portland','Watsonia','Dartmoor','Brisbane','Cairns','GoldCoast','Townsville','Adelaide',
                'MountGambier','Nuriootpa','Woomera','Albany','Witchcliffe','PearceRAAF','PerthAirport','Perth',
                'SalmonGums','Walpole','Hobart','Launceston','AliceSprings','Darwin','Katherine','Uluru']
ls = []
ls_allNAN = []
def removeNull(columns):
    for j in range(len(columns)):
        ls = []
        for i in range(len(loc_for_miss)): 
            ls.append(train_df.loc[train_df["Location"] == loc_for_miss[i], columns[j]].mode()[0])
        for i in range(len(loc_for_miss)):
            train_df.loc[train_df["Location"] == loc_for_miss[i], columns[j]] = train_df.loc[train_df["Location"] == loc_for_miss[i], columns[j]].fillna(ls[i])
        train_df[columns] = train_df[columns].astype(object)
    
removeNull(categorical)

In [42]:
train_df.isnull().sum()

Date             0
Location         0
MinTemp          0
MaxTemp          0
Rainfall         0
Evaporation      0
Sunshine         0
WindGustDir      0
WindGustSpeed    0
WindDir9am       0
WindDir3pm       0
WindSpeed9am     0
WindSpeed3pm     0
Humidity9am      0
Humidity3pm      0
Pressure9am      0
Pressure3pm      0
Cloud9am         0
Cloud3pm         0
Temp9am          0
Temp3pm          0
RainToday        0
RainTomorrow     0
dtype: int64

###Feature Engineering

In [43]:
# df.isnull().sum()

In [44]:
# df.shape

In [45]:
#Adding extra columns by splitting date column
train_df['Year']=[d.split('-')[0] for d in train_df.Date]
train_df['Year'].astype(float)
train_df['Month']=[d.split('-')[1] for d in train_df.Date]
train_df['Day']=[d.split('-')[2] for d in train_df.Date]
train_df.drop(columns=["Date"], axis=1, inplace=True)

In [46]:
#performing labelEncoding on y i.e RainTommorrow
le = LabelEncoder()
train_df['RainTomorrow']= le.fit_transform(train_df['RainTomorrow'])
train_df.RainTomorrow.value_counts()

0    110316
1     31877
Name: RainTomorrow, dtype: int64

In [47]:
train_df_WindGustDir = pd.get_dummies(train_df["WindGustDir"], prefix="1")
train_df_WindDir9am = pd.get_dummies(train_df["WindDir9am"], prefix="2")
train_df_WindDir3pm = pd.get_dummies(train_df["WindDir3pm"], prefix="3")
train_df_RainToday = pd.get_dummies(train_df["RainToday"])
train_location_ohe = pd.get_dummies(train_df["Location"])
train_df_new = pd.concat([train_df, train_df_WindGustDir, train_df_WindDir9am, train_df_WindDir3pm, train_df_RainToday, train_location_ohe], axis=1)
train_df_new.drop(columns=["WindGustDir", "WindDir9am", "WindDir3pm", "RainToday", "Location"], axis=1, inplace=True)

In [48]:
target = 'RainTomorrow'
x_train_df = train_df_new.drop("RainTomorrow", axis=1)
predictors = x_train_df.columns.values.tolist()[:]

In [49]:
train_df_new.head()

Unnamed: 0,MinTemp,MaxTemp,Rainfall,Evaporation,Sunshine,WindGustSpeed,WindSpeed9am,WindSpeed3pm,Humidity9am,Humidity3pm,...,Townsville,Tuggeranong,Uluru,WaggaWagga,Walpole,Watsonia,Williamtown,Witchcliffe,Wollongong,Woomera
0,13.4,22.9,0.6,9.1,9.1,44.0,20.0,24.0,71.0,22.0,...,0,0,0,0,0,0,0,0,0,0
1,7.4,25.1,0.0,9.1,9.1,44.0,4.0,22.0,44.0,25.0,...,0,0,0,0,0,0,0,0,0,0
2,12.9,25.7,0.0,9.1,9.1,46.0,19.0,26.0,38.0,30.0,...,0,0,0,0,0,0,0,0,0,0
3,9.2,28.0,0.0,9.1,9.1,24.0,11.0,9.0,45.0,16.0,...,0,0,0,0,0,0,0,0,0,0
4,17.5,32.3,1.0,9.1,9.1,41.0,7.0,20.0,82.0,33.0,...,0,0,0,0,0,0,0,0,0,0


In [50]:
# predictors
train_df_new.columns.values

array(['MinTemp', 'MaxTemp', 'Rainfall', 'Evaporation', 'Sunshine',
       'WindGustSpeed', 'WindSpeed9am', 'WindSpeed3pm', 'Humidity9am',
       'Humidity3pm', 'Pressure9am', 'Pressure3pm', 'Cloud9am',
       'Cloud3pm', 'Temp9am', 'Temp3pm', 'RainTomorrow', 'Year', 'Month',
       'Day', '1_E', '1_ENE', '1_ESE', '1_N', '1_NE', '1_NNE', '1_NNW',
       '1_NW', '1_S', '1_SE', '1_SSE', '1_SSW', '1_SW', '1_W', '1_WNW',
       '1_WSW', '2_E', '2_ENE', '2_ESE', '2_N', '2_NE', '2_NNE', '2_NNW',
       '2_NW', '2_S', '2_SE', '2_SSE', '2_SSW', '2_SW', '2_W', '2_WNW',
       '2_WSW', '3_E', '3_ENE', '3_ESE', '3_N', '3_NE', '3_NNE', '3_NNW',
       '3_NW', '3_S', '3_SE', '3_SSE', '3_SSW', '3_SW', '3_W', '3_WNW',
       '3_WSW', 'No', 'Yes', 'Adelaide', 'Albany', 'Albury',
       'AliceSprings', 'BadgerysCreek', 'Ballarat', 'Bendigo', 'Brisbane',
       'Cairns', 'Canberra', 'Cobar', 'CoffsHarbour', 'Dartmoor',
       'Darwin', 'GoldCoast', 'Hobart', 'Katherine', 'Launceston',
       'Melbourne'

In [51]:
bayesian_tr_index, bayesian_val_index  = list(StratifiedKFold(n_splits=2, shuffle=True, random_state=1).split(train_df_new, train_df_new.RainTomorrow.values))[0]

These bayesian_tr_index and bayesian_val_index indexes will be used for the bayesian optimization as training and validation index of training dataset.

In [52]:
bayesian_tr_index

array([     0,      3,      5, ..., 142189, 142191, 142192])

Black box function to be optimized (LightGBM)

In [53]:
def LGB_bayesian(
    num_leaves,  # int
    min_data_in_leaf,  # int
    learning_rate,
    min_sum_hessian_in_leaf,    # int  
    feature_fraction,
    lambda_l1,
    lambda_l2,
    min_gain_to_split,
    max_depth):
    
    # LightGBM expects next three parameters need to be integer. So we make them integer
    num_leaves = int(num_leaves)
    min_data_in_leaf = int(min_data_in_leaf)
    max_depth = int(max_depth)

    assert type(num_leaves) == int
    assert type(min_data_in_leaf) == int
    assert type(max_depth) == int

    param = {
        'num_leaves': num_leaves,
        'max_bin': 63,
        'min_data_in_leaf': min_data_in_leaf,
        'learning_rate': learning_rate,
        'min_sum_hessian_in_leaf': min_sum_hessian_in_leaf,
        'bagging_fraction': 1.0,
        'bagging_freq': 5,
        'feature_fraction': feature_fraction,
        'lambda_l1': lambda_l1,
        'lambda_l2': lambda_l2,
        'min_gain_to_split': min_gain_to_split,
        'max_depth': max_depth,
        'save_binary': True, 
        'seed': 1337,
        'feature_fraction_seed': 1337,
        'bagging_seed': 1337,
        'drop_seed': 1337,
        'data_random_seed': 1337,
        'objective': 'binary',
        'boosting_type': 'gbdt',
        'verbose': 1,
        'metric': 'auc',
        'is_unbalance': True,
        'boost_from_average': False,
        'verbose':-1,   

    }    
    
    
    xg_train = lgb.Dataset(train_df_new.iloc[bayesian_tr_index][predictors].values,
                           label=train_df_new.iloc[bayesian_tr_index][target].values,
                           feature_name=predictors,
                           free_raw_data = False
                           )
    xg_valid = lgb.Dataset(train_df_new.iloc[bayesian_val_index][predictors].values,
                           label=train_df_new.iloc[bayesian_val_index][target].values,
                           feature_name=predictors,
                           free_raw_data = False
                           )   

    num_round = 5000
    clf = lgb.train(param, xg_train, num_round, valid_sets = [xg_valid], verbose_eval=250, early_stopping_rounds = 50)
    
    predictions = clf.predict(train_df_new.iloc[bayesian_val_index][predictors].values, num_iteration=clf.best_iteration)   
    
    score = metrics.roc_auc_score(train_df_new.iloc[bayesian_val_index][target].values, predictions)
    
    return score

The above LGB_bayesian function will act as black box function for Bayesian optimization. I already defined the the trainng and validation dataset for LightGBM inside the LGB_bayesian function.

The LGB_bayesian function takes values for num_leaves, min_data_in_leaf, learning_rate, min_sum_hessian_in_leaf, feature_fraction, lambda_l1, lambda_l2, min_gain_to_split, max_depth from Bayesian optimization framework. Keep in mind that num_leaves, min_data_in_leaf, and max_depth should be integer for LightGBM. But Bayesian Optimization sends continous vales to function. So I force them to be integer. I am only going to find optimal parameter values of them.

In [54]:
# Bounded region of parameter space
bounds_LGB = {
    'num_leaves': (5, 20), 
    'min_data_in_leaf': (5, 20),  
    'learning_rate': (0.01, 0.3),
    'min_sum_hessian_in_leaf': (0.00001, 0.01),    
    'feature_fraction': (0.05, 0.5),
    'lambda_l1': (0, 5.0), 
    'lambda_l2': (0, 5.0), 
    'min_gain_to_split': (0, 1.0),
    'max_depth':(3,15),
}

Let's put all of them in BayesianOptimization object

In [55]:
from bayes_opt import BayesianOptimization

In [56]:
LGB_BO = BayesianOptimization(LGB_bayesian, bounds_LGB, random_state=13)

Now, let's the the key space (parameters) we are going to optimize:

In [57]:
print(LGB_BO.space.keys)

['feature_fraction', 'lambda_l1', 'lambda_l2', 'learning_rate', 'max_depth', 'min_data_in_leaf', 'min_gain_to_split', 'min_sum_hessian_in_leaf', 'num_leaves']


I have created the BayesianOptimization object (LGB_BO), it will not work until I call maximize. Before calling it, I want to explain two parameters of BayesianOptimization object (LGB_BO) which we can pass to maximize:

init_points: How many initial random runs of random exploration we want to perform. In our case LGB_bayesian will be called n_iter times.
n_iter: How many runs of bayesian optimization we want to perform after number of init_points runs.
Now, it's time to call the function from Bayesian optimization framework to maximize. I allow LGB_BO object to run for 5 init_points (exploration) and 5 n_iter (exploitation).

In [58]:
init_points = 5
n_iter = 5

In [59]:
print('-' * 130)

with warnings.catch_warnings():
    warnings.filterwarnings('ignore')
    LGB_BO.maximize(init_points=init_points, n_iter=n_iter)

----------------------------------------------------------------------------------------------------------------------------------
|   iter    |  target   | featur... | lambda_l1 | lambda_l2 | learni... | max_depth | min_da... | min_ga... | min_su... | num_le... |
-------------------------------------------------------------------------------------------------------------------------------------
Training until validation scores don't improve for 50 rounds
[250]	valid_0's auc: 0.89037
Early stopping, best iteration is:
[387]	valid_0's auc: 0.890997
| [0m1        [0m | [0m0.891    [0m | [0m0.4      [0m | [0m1.188    [0m | [0m4.121    [0m | [0m0.2901   [0m | [0m14.67    [0m | [0m11.8     [0m | [0m0.609    [0m | [0m0.007758 [0m | [0m14.62    [0m |
Training until validation scores don't improve for 50 rounds
[250]	valid_0's auc: 0.869954
[500]	valid_0's auc: 0.879415
[750]	valid_0's auc: 0.883873
[1000]	valid_0's auc: 0.88656
[1250]	valid_0's auc: 0.888366
[1500]	valid

As the optimization is done, let's see what is the maximum value we have got.


In [60]:
LGB_BO.max['target']

0.8952502265443587

In [61]:
LGB_BO.max['params']

{'feature_fraction': 0.31393193317588663,
 'lambda_l1': 4.592234475725691,
 'lambda_l2': 3.019797930443967,
 'learning_rate': 0.04675496484634876,
 'max_depth': 10.019480917103467,
 'min_data_in_leaf': 19.984789783155488,
 'min_gain_to_split': 0.19046835383531202,
 'min_sum_hessian_in_leaf': 0.00023038709571279175,
 'num_leaves': 6.571637296909833}

###Train the model

Using the parameters that we got above, we train the model

In [69]:
param_lgb = {
        'num_leaves': int(LGB_BO.max['params']['num_leaves']), # remember to int here
        'max_bin': 63,
        'min_data_in_leaf': int(LGB_BO.max['params']['min_data_in_leaf']), # remember to int here
        'learning_rate': LGB_BO.max['params']['learning_rate'],
        'min_sum_hessian_in_leaf': LGB_BO.max['params']['min_sum_hessian_in_leaf'],
        'bagging_fraction': 1.0, 
        'bagging_freq': 5, 
        'feature_fraction': LGB_BO.max['params']['feature_fraction'],
        'lambda_l1': LGB_BO.max['params']['lambda_l1'],
        'lambda_l2': LGB_BO.max['params']['lambda_l2'],
        'min_gain_to_split': LGB_BO.max['params']['min_gain_to_split'],
        'max_depth': int(LGB_BO.max['params']['max_depth']), # remember to int here
        'save_binary': True,
        'seed': 1337,
        'feature_fraction_seed': 1337,
        'bagging_seed': 1337,
        'drop_seed': 1337,
        'data_random_seed': 1337,
        'objective': 'binary',
        'boosting_type': 'gbdt',
        'verbose': 1,
        'metric': 'auc',
        'is_unbalance': True,
        'boost_from_average': False,
        'verbose':-1, 
    }

In [73]:
# param_lgb['num_leaves']

In [71]:
#10 k folds
nfold = 10

skf = StratifiedKFold(n_splits=nfold, shuffle=True, random_state=2019)

oof = np.zeros(len(train_df))
# predictions = np.zeros(len(test_df))

i = 1
for train_index, valid_index in skf.split(train_df_new, train_df.RainTomorrow.values):
    print("\nfold {}".format(i))
    xg_train = lgb.Dataset(train_df_new.iloc[train_index][predictors].values,
                           label=train_df_new.iloc[train_index][target].values,
                           feature_name=predictors,
                           free_raw_data = False
                           )
    xg_valid = lgb.Dataset(train_df_new.iloc[valid_index][predictors].values,
                           label=train_df_new.iloc[valid_index][target].values,
                           feature_name=predictors,
                           free_raw_data = False
                           )   

    nround = 8523
    clf = lgb.train(param_lgb, xg_train, nround, valid_sets = [xg_valid], verbose_eval=250)
    oof[valid_index] = clf.predict(train_df_new.iloc[valid_index][predictors].values, num_iteration=nround) 
    
    # predictions += clf.predict(test_df[predictors], num_iteration=nround) / nfold
    i = i + 1

print("\n\nCV AUC: {:<0.4f}".format(metrics.roc_auc_score(train_df_new.RainTomorrow.values, oof)))


fold 1




[250]	valid_0's auc: 0.867867
[500]	valid_0's auc: 0.877529
[750]	valid_0's auc: 0.882494
[1000]	valid_0's auc: 0.885713
[1250]	valid_0's auc: 0.887994
[1500]	valid_0's auc: 0.889769
[1750]	valid_0's auc: 0.891267
[2000]	valid_0's auc: 0.89223
[2250]	valid_0's auc: 0.893142
[2500]	valid_0's auc: 0.893788
[2750]	valid_0's auc: 0.89445
[3000]	valid_0's auc: 0.895247
[3250]	valid_0's auc: 0.895668
[3500]	valid_0's auc: 0.89623
[3750]	valid_0's auc: 0.896606
[4000]	valid_0's auc: 0.896921
[4250]	valid_0's auc: 0.897245
[4500]	valid_0's auc: 0.897503
[4750]	valid_0's auc: 0.897837
[5000]	valid_0's auc: 0.898038
[5250]	valid_0's auc: 0.898203
[5500]	valid_0's auc: 0.898434
[5750]	valid_0's auc: 0.89868
[6000]	valid_0's auc: 0.898842
[6250]	valid_0's auc: 0.899006
[6500]	valid_0's auc: 0.899201
[6750]	valid_0's auc: 0.899282
[7000]	valid_0's auc: 0.899301
[7250]	valid_0's auc: 0.899301
[7500]	valid_0's auc: 0.899301
[7750]	valid_0's auc: 0.899301
[8000]	valid_0's auc: 0.899301
[8250]	valid_0'



[250]	valid_0's auc: 0.868368
[500]	valid_0's auc: 0.877162
[750]	valid_0's auc: 0.881697
[1000]	valid_0's auc: 0.884771
[1250]	valid_0's auc: 0.887124
[1500]	valid_0's auc: 0.888872
[1750]	valid_0's auc: 0.890199
[2000]	valid_0's auc: 0.891126
[2250]	valid_0's auc: 0.892045
[2500]	valid_0's auc: 0.892902
[2750]	valid_0's auc: 0.893533
[3000]	valid_0's auc: 0.894057
[3250]	valid_0's auc: 0.894561
[3500]	valid_0's auc: 0.894927
[3750]	valid_0's auc: 0.895514
[4000]	valid_0's auc: 0.895781
[4250]	valid_0's auc: 0.896119
[4500]	valid_0's auc: 0.896496
[4750]	valid_0's auc: 0.896863
[5000]	valid_0's auc: 0.897154
[5250]	valid_0's auc: 0.897464
[5500]	valid_0's auc: 0.897716
[5750]	valid_0's auc: 0.897905
[6000]	valid_0's auc: 0.898141
[6250]	valid_0's auc: 0.89836
[6500]	valid_0's auc: 0.898601
[6750]	valid_0's auc: 0.898696
[7000]	valid_0's auc: 0.898794
[7250]	valid_0's auc: 0.898776
[7500]	valid_0's auc: 0.898769
[7750]	valid_0's auc: 0.898769
[8000]	valid_0's auc: 0.898769
[8250]	valid



[250]	valid_0's auc: 0.86978
[500]	valid_0's auc: 0.878723
[750]	valid_0's auc: 0.883357
[1000]	valid_0's auc: 0.886368
[1250]	valid_0's auc: 0.888446
[1500]	valid_0's auc: 0.890094
[1750]	valid_0's auc: 0.891236
[2000]	valid_0's auc: 0.892179
[2250]	valid_0's auc: 0.893024
[2500]	valid_0's auc: 0.893783
[2750]	valid_0's auc: 0.894375
[3000]	valid_0's auc: 0.894966
[3250]	valid_0's auc: 0.895408
[3500]	valid_0's auc: 0.895745
[3750]	valid_0's auc: 0.89614
[4000]	valid_0's auc: 0.896421
[4250]	valid_0's auc: 0.896769
[4500]	valid_0's auc: 0.896988
[4750]	valid_0's auc: 0.897308
[5000]	valid_0's auc: 0.897509
[5250]	valid_0's auc: 0.897706
[5500]	valid_0's auc: 0.897852
[5750]	valid_0's auc: 0.897963
[6000]	valid_0's auc: 0.898139
[6250]	valid_0's auc: 0.898217
[6500]	valid_0's auc: 0.898403
[6750]	valid_0's auc: 0.898434
[7000]	valid_0's auc: 0.898452
[7250]	valid_0's auc: 0.898452
[7500]	valid_0's auc: 0.898452
[7750]	valid_0's auc: 0.898452
[8000]	valid_0's auc: 0.898452
[8250]	valid_



[250]	valid_0's auc: 0.876473
[500]	valid_0's auc: 0.885541
[750]	valid_0's auc: 0.890103
[1000]	valid_0's auc: 0.893003
[1250]	valid_0's auc: 0.895191
[1500]	valid_0's auc: 0.896722
[1750]	valid_0's auc: 0.89776
[2000]	valid_0's auc: 0.898616
[2250]	valid_0's auc: 0.899357
[2500]	valid_0's auc: 0.90014
[2750]	valid_0's auc: 0.900763
[3000]	valid_0's auc: 0.901151
[3250]	valid_0's auc: 0.901704
[3500]	valid_0's auc: 0.902102
[3750]	valid_0's auc: 0.902539
[4000]	valid_0's auc: 0.902878
[4250]	valid_0's auc: 0.903172
[4500]	valid_0's auc: 0.903503
[4750]	valid_0's auc: 0.90389
[5000]	valid_0's auc: 0.904345
[5250]	valid_0's auc: 0.904608
[5500]	valid_0's auc: 0.904854
[5750]	valid_0's auc: 0.905133
[6000]	valid_0's auc: 0.905302
[6250]	valid_0's auc: 0.905414
[6500]	valid_0's auc: 0.905574
[6750]	valid_0's auc: 0.905778
[7000]	valid_0's auc: 0.905853
[7250]	valid_0's auc: 0.90584
[7500]	valid_0's auc: 0.90584
[7750]	valid_0's auc: 0.90584
[8000]	valid_0's auc: 0.90584
[8250]	valid_0's a



[250]	valid_0's auc: 0.875798
[500]	valid_0's auc: 0.885498
[750]	valid_0's auc: 0.89083
[1000]	valid_0's auc: 0.893745
[1250]	valid_0's auc: 0.895837
[1500]	valid_0's auc: 0.897614
[1750]	valid_0's auc: 0.898678
[2000]	valid_0's auc: 0.899619
[2250]	valid_0's auc: 0.900533
[2500]	valid_0's auc: 0.901096
[2750]	valid_0's auc: 0.901521
[3000]	valid_0's auc: 0.902086
[3250]	valid_0's auc: 0.902656
[3500]	valid_0's auc: 0.903017
[3750]	valid_0's auc: 0.903439
[4000]	valid_0's auc: 0.903676
[4250]	valid_0's auc: 0.903946
[4500]	valid_0's auc: 0.904231
[4750]	valid_0's auc: 0.904473
[5000]	valid_0's auc: 0.904794
[5250]	valid_0's auc: 0.905027
[5500]	valid_0's auc: 0.905216
[5750]	valid_0's auc: 0.905371
[6000]	valid_0's auc: 0.905667
[6250]	valid_0's auc: 0.905826
[6500]	valid_0's auc: 0.905955
[6750]	valid_0's auc: 0.905984
[7000]	valid_0's auc: 0.906004
[7250]	valid_0's auc: 0.905999
[7500]	valid_0's auc: 0.905999
[7750]	valid_0's auc: 0.905999
[8000]	valid_0's auc: 0.905999
[8250]	valid



[250]	valid_0's auc: 0.870367
[500]	valid_0's auc: 0.878784
[750]	valid_0's auc: 0.883276
[1000]	valid_0's auc: 0.886109
[1250]	valid_0's auc: 0.888007
[1500]	valid_0's auc: 0.889589
[1750]	valid_0's auc: 0.890616
[2000]	valid_0's auc: 0.891557
[2250]	valid_0's auc: 0.89245
[2500]	valid_0's auc: 0.893064
[2750]	valid_0's auc: 0.893802
[3000]	valid_0's auc: 0.894427
[3250]	valid_0's auc: 0.895087
[3500]	valid_0's auc: 0.895582
[3750]	valid_0's auc: 0.896024
[4000]	valid_0's auc: 0.896471
[4250]	valid_0's auc: 0.896893
[4500]	valid_0's auc: 0.897224
[4750]	valid_0's auc: 0.897459
[5000]	valid_0's auc: 0.897823
[5250]	valid_0's auc: 0.898222
[5500]	valid_0's auc: 0.898456
[5750]	valid_0's auc: 0.898658
[6000]	valid_0's auc: 0.898928
[6250]	valid_0's auc: 0.899165
[6500]	valid_0's auc: 0.899376
[6750]	valid_0's auc: 0.899614
[7000]	valid_0's auc: 0.89963
[7250]	valid_0's auc: 0.89963
[7500]	valid_0's auc: 0.89963
[7750]	valid_0's auc: 0.89963
[8000]	valid_0's auc: 0.89963
[8250]	valid_0's 



[250]	valid_0's auc: 0.868045
[500]	valid_0's auc: 0.87728
[750]	valid_0's auc: 0.882263
[1000]	valid_0's auc: 0.885554
[1250]	valid_0's auc: 0.88783
[1500]	valid_0's auc: 0.889397
[1750]	valid_0's auc: 0.890693
[2000]	valid_0's auc: 0.891589
[2250]	valid_0's auc: 0.892369
[2500]	valid_0's auc: 0.893183
[2750]	valid_0's auc: 0.893604
[3000]	valid_0's auc: 0.894163
[3250]	valid_0's auc: 0.894685
[3500]	valid_0's auc: 0.895061
[3750]	valid_0's auc: 0.895399
[4000]	valid_0's auc: 0.895717
[4250]	valid_0's auc: 0.896037
[4500]	valid_0's auc: 0.896349
[4750]	valid_0's auc: 0.89668
[5000]	valid_0's auc: 0.896967
[5250]	valid_0's auc: 0.8972
[5500]	valid_0's auc: 0.897371
[5750]	valid_0's auc: 0.897638
[6000]	valid_0's auc: 0.897804
[6250]	valid_0's auc: 0.897935
[6500]	valid_0's auc: 0.898159
[6750]	valid_0's auc: 0.898226
[7000]	valid_0's auc: 0.898288
[7250]	valid_0's auc: 0.89829
[7500]	valid_0's auc: 0.89829
[7750]	valid_0's auc: 0.89829
[8000]	valid_0's auc: 0.89829
[8250]	valid_0's auc



[250]	valid_0's auc: 0.874197
[500]	valid_0's auc: 0.883405
[750]	valid_0's auc: 0.887723
[1000]	valid_0's auc: 0.890411
[1250]	valid_0's auc: 0.892397
[1500]	valid_0's auc: 0.893882
[1750]	valid_0's auc: 0.89495
[2000]	valid_0's auc: 0.895675
[2250]	valid_0's auc: 0.896375
[2500]	valid_0's auc: 0.897088
[2750]	valid_0's auc: 0.897498
[3000]	valid_0's auc: 0.898035
[3250]	valid_0's auc: 0.898434
[3500]	valid_0's auc: 0.898917
[3750]	valid_0's auc: 0.899418
[4000]	valid_0's auc: 0.899779
[4250]	valid_0's auc: 0.900254
[4500]	valid_0's auc: 0.900516
[4750]	valid_0's auc: 0.900711
[5000]	valid_0's auc: 0.901045
[5250]	valid_0's auc: 0.901357
[5500]	valid_0's auc: 0.901519
[5750]	valid_0's auc: 0.901685
[6000]	valid_0's auc: 0.901971
[6250]	valid_0's auc: 0.902135
[6500]	valid_0's auc: 0.902319
[6750]	valid_0's auc: 0.90248
[7000]	valid_0's auc: 0.902595
[7250]	valid_0's auc: 0.902589
[7500]	valid_0's auc: 0.90258
[7750]	valid_0's auc: 0.90258
[8000]	valid_0's auc: 0.90258
[8250]	valid_0's



[250]	valid_0's auc: 0.86876
[500]	valid_0's auc: 0.877956
[750]	valid_0's auc: 0.883137
[1000]	valid_0's auc: 0.886136
[1250]	valid_0's auc: 0.888268
[1500]	valid_0's auc: 0.889887
[1750]	valid_0's auc: 0.891238
[2000]	valid_0's auc: 0.892333
[2250]	valid_0's auc: 0.893239
[2500]	valid_0's auc: 0.894035
[2750]	valid_0's auc: 0.894791
[3000]	valid_0's auc: 0.89548
[3250]	valid_0's auc: 0.896006
[3500]	valid_0's auc: 0.896479
[3750]	valid_0's auc: 0.897057
[4000]	valid_0's auc: 0.897526
[4250]	valid_0's auc: 0.89803
[4500]	valid_0's auc: 0.898387
[4750]	valid_0's auc: 0.898735
[5000]	valid_0's auc: 0.899087
[5250]	valid_0's auc: 0.899397
[5500]	valid_0's auc: 0.899528
[5750]	valid_0's auc: 0.899711
[6000]	valid_0's auc: 0.899889
[6250]	valid_0's auc: 0.900038
[6500]	valid_0's auc: 0.900148
[6750]	valid_0's auc: 0.90027
[7000]	valid_0's auc: 0.900298
[7250]	valid_0's auc: 0.900298
[7500]	valid_0's auc: 0.900298
[7750]	valid_0's auc: 0.900298
[8000]	valid_0's auc: 0.900298
[8250]	valid_0'



[250]	valid_0's auc: 0.8698
[500]	valid_0's auc: 0.878793
[750]	valid_0's auc: 0.883537
[1000]	valid_0's auc: 0.886341
[1250]	valid_0's auc: 0.888228
[1500]	valid_0's auc: 0.889743
[1750]	valid_0's auc: 0.890877
[2000]	valid_0's auc: 0.891795
[2250]	valid_0's auc: 0.892645
[2500]	valid_0's auc: 0.893256
[2750]	valid_0's auc: 0.893725
[3000]	valid_0's auc: 0.894353
[3250]	valid_0's auc: 0.894794
[3500]	valid_0's auc: 0.895061
[3750]	valid_0's auc: 0.895431
[4000]	valid_0's auc: 0.895764
[4250]	valid_0's auc: 0.896144
[4500]	valid_0's auc: 0.896381
[4750]	valid_0's auc: 0.89679
[5000]	valid_0's auc: 0.897044
[5250]	valid_0's auc: 0.897254
[5500]	valid_0's auc: 0.897489
[5750]	valid_0's auc: 0.897669
[6000]	valid_0's auc: 0.897825
[6250]	valid_0's auc: 0.897924
[6500]	valid_0's auc: 0.898084
[6750]	valid_0's auc: 0.898053
[7000]	valid_0's auc: 0.898058
[7250]	valid_0's auc: 0.898058
[7500]	valid_0's auc: 0.898058
[7750]	valid_0's auc: 0.898058
[8000]	valid_0's auc: 0.898058
[8250]	valid_0