# Parameter Tuning : Xgboost

In [1]:
import numpy as np
import pandas as pd
from scipy import sparse

### Data Load

In [2]:
X_train = sparse.load_npz("data/X_train.npz")
X_train

<95674x10425 sparse matrix of type '<class 'numpy.float64'>'
	with 1575631 stored elements in Compressed Sparse Row format>

In [3]:
X_test = sparse.load_npz("data/X_test.npz")
X_test

<95674x10425 sparse matrix of type '<class 'numpy.float64'>'
	with 1584735 stored elements in Compressed Sparse Row format>

In [4]:
y_train = pd.read_csv("data/X_train_TripType.csv")
y_train = y_train["TripType"]

print(y_train.shape)
y_train[:5]

(95674,)


0    999.0
1     30.0
2     26.0
3      8.0
4      8.0
Name: TripType, dtype: float64

In [5]:
from sklearn.preprocessing import LabelEncoder

encoder = LabelEncoder()
y_train = encoder.fit_transform(y_train)

print(y_train.shape)
y_train[:5]

(95674,)


array([37, 22, 18,  5,  5])

### HyperParameter Search(Random Search)

In [7]:
import xgboost as xgb

num_epoch = 10
n_estimators = 100

dtrain = xgb.DMatrix(X_train, label=y_train)

hyperparameters_list = []

for epoch in range(num_epoch):

    np.random.seed(None)
    learning_rate = np.random.uniform(low=0.1, high=0.5)
    max_depth = np.random.randint(low=5, high=15)
    reg_alpha = 10 ** np.random.uniform(high=1.0, low=-10.0)
    reg_lambda = 10 ** np.random.uniform(high=1.0, low=-10.0)
    np.random.seed(None)

 

    params = {
        'booster': 'gbtree',
        'objective': 'multi:softprob',
        'eval_metric': 'mlogloss',
        'eta': learning_rate,
        'max_depth': max_depth,
        'reg_alpha': reg_alpha,
        'reg_lambda': reg_lambda,
        'num_class': len(np.unique(y_train)),
        'n_jobs': 8,
        'silent': 1,
    }

    dtrain = xgb.DMatrix(X_train, label=y_train)
    result = xgb.cv(params, dtrain, n_estimators, nfold=5, metrics={'mlogloss'})
    score = result["test-mlogloss-mean"].min()
    num_best_round = result["test-mlogloss-mean"].idxmin() + 1

    np.random.seed(None)

    print("{0:3} num_round = {1}, learning_rate = {2:.6f}, max_depth = {3}, reg_alpha = {4:.10f}, reg_lambda = {5:.10f}, score = {6:.5f}" \
          .format(epoch, num_best_round, learning_rate, max_depth, reg_alpha, reg_lambda, score))

    hyperparameters_list.append({

        'epoch': epoch,
        'n_estimators': num_best_round,
        'learning_rate': learning_rate,
        'max_depth': max_depth,
        'reg_alpha': reg_alpha,
        'reg_lambda': reg_lambda,
        'score': score,
        
    })

    tmp = pd.DataFrame.from_dict(hyperparameters_list)
    tmp = tmp.sort_values(by="score", ascending=True)

    tmp.to_csv("hyperparameters/parameters01.csv")


hyperparameters_list = pd.DataFrame.from_dict(hyperparameters_list)
hyperparameters_list = hyperparameters_list.sort_values(by="score", ascending=True)

print(hyperparameters_list.shape)

hyperparameters_list.head()

will be corrected to return the positional minimum in the future.
Use 'series.values.argmin' to get the position of the minimum now.


  0 num_round = 100, learning_rate = 0.350029, max_depth = 5, reg_alpha = 0.0002214152, reg_lambda = 0.0000000329, score = 0.79453
  1 num_round = 100, learning_rate = 0.433329, max_depth = 8, reg_alpha = 1.1259080079, reg_lambda = 0.0015721334, score = 0.99754
  2 num_round = 1, learning_rate = 0.487687, max_depth = 10, reg_alpha = 0.0000000002, reg_lambda = 0.0014117298, score = 1.44036
  3 num_round = 100, learning_rate = 0.259794, max_depth = 10, reg_alpha = 0.0000886337, reg_lambda = 0.0003268571, score = 0.73457
  4 num_round = 100, learning_rate = 0.178601, max_depth = 7, reg_alpha = 0.0000000001, reg_lambda = 0.0499822792, score = 0.70055
  5 num_round = 82, learning_rate = 0.283044, max_depth = 12, reg_alpha = 0.0027881801, reg_lambda = 0.0000001144, score = 0.76155
  6 num_round = 1, learning_rate = 0.477913, max_depth = 5, reg_alpha = 0.0000000011, reg_lambda = 0.0045379917, score = 1.56475
  7 num_round = 100, learning_rate = 0.110012, max_depth = 5, reg_alpha = 0.000044620

Unnamed: 0,epoch,learning_rate,max_depth,n_estimators,reg_alpha,reg_lambda,score
4,4,0.178601,7,100,1.053684e-10,0.04998228,0.700547
9,9,0.179919,14,98,0.0006163092,0.001730106,0.719794
3,3,0.259794,10,100,8.863369e-05,0.0003268571,0.734573
7,7,0.110012,5,100,4.462066e-05,4.834013e-06,0.750065
5,5,0.283044,12,82,0.00278818,1.143694e-07,0.761554
