In [1]:
import os
import argparse
import pandas as pd
import numpy as np
import xgboost as xgb
from math import ceil
from math import sqrt
from math import acos
from operator import itemgetter
from itertools import product
from scipy.stats import pearsonr
import matplotlib.pyplot as plt
import gc
import seaborn as sns
from scipy import stats
from collections import defaultdict
from tqdm import tqdm
from hyperopt import STATUS_OK, Trials, fmin, hp, tpe , space_eval
import time

# Training new models on chymotrypsin-immunopeptides

In [2]:
def load_data(vector_filename, ion_type):
    # Read file
    if vector_filename.split(".")[-1] == "pkl":
        vectors = pd.read_pickle(vector_filename)
    elif vector_filename.split(".")[-1] == "h5":
        # vectors = pd.read_hdf(vector_filename, key='table', stop=1000)
        vectors = pd.read_hdf(vector_filename, key="table")
    else:
        print("Unsuported feature vector format")
        exit(1)

    # Extract targets for given ion type
    target_names = list(vectors.columns[vectors.columns.str.contains("targets")])
    if not "targets_{}".format(ion_type) in target_names:
        print("Targets for {} could not be found in vector file.".format(ion_type))
        print("Vector file only contains these targets: {}".format(target_names))
        exit(1)

    targets = vectors.pop("targets_{}".format(ion_type))
    target_names.remove("targets_{}".format(ion_type))
    for n in target_names:
        vectors.pop(n)

    # Get psmids
    psmids = vectors.pop("psmid")

    return (vectors, targets, psmids)

In [3]:
def get_params_combinations(params):
    keys, values = zip(*params.items())
    combinations = [dict(zip(keys, v)) for v in product(*values)]
    return(combinations)

In [4]:
def get_best_params(df, params_grid):
    params = {}
    best = df[df['test-rmse-mean'] == df['test-rmse-mean'].min()]
    best_rmse = df['test-rmse-mean'].min()
    for p in params_grid.keys():
        params[p] = best[p].iloc[0]
    # num_boost_round = best['boosting-round'].iloc[0]
    return(params, best_rmse)

In [5]:
def gridsearch(xtrain, params, params_grid):
    cols = ['boosting-round', 'test-rmse-mean', 'test-rmse-std', 'train-rmse-mean', 'train-rmse-std']
    cols.extend(sorted(params_grid.keys()))
    result = pd.DataFrame(columns=cols)

    count = 1
    combinations = get_params_combinations(params_grid)

    for param_overrides in combinations:
        print("Working on combination {}/{}".format(count, len(combinations)))
        count += 1
        params.update(param_overrides)
        tmp = xgb.cv(params, xtrain, nfold=5, num_boost_round=200, early_stopping_rounds=10, verbose_eval=10)
        tmp['boosting-round'] = tmp.index
        for param in param_overrides.keys():
            tmp[param] = param_overrides[param]
        result = result.append(tmp)

    print("Grid search ready!\n")

    return(result)

In [6]:
def ms2pip_pearson(true, pred):
    """
    Return pearson of tic-normalized, log-transformed intensities, 
    the MS2PIP way.
    """
    #tic_norm = lambda x: x / np.sum(x)
    # log_transform = lambda x: np.log2(x + 0.001)
    corr = pearsonr(
        true, 
        pred
    )[0]
    return (corr)

In [7]:
def spectral_angle(true, pred, epsilon=1e-7):
    """
    Return square root normalized spectral angle.
    See https://doi.org/10.1074/mcp.O113.036475
    """
    
    de_log = lambda x: (2**x)-0.001
    l2_normalize = lambda x: x / sqrt(max(sum(x**2), epsilon))
    
    pred_norm = l2_normalize(de_log(pred))
    true_norm = l2_normalize(de_log(true))
    
    spectral_angle = 1 - (2 * acos(np.dot(pred_norm, true_norm)) / np.pi)

    return (spectral_angle)

# New scoring method

In [8]:
class Scorer:
    def __init__(self,psmids):
        self.psmids = psmids
        
    def psm_score(self,targets, predictions):
        tmp = pd.DataFrame(columns=["psmids", "targets", "predictions"])
        tmp["psmids"] = np.array(self.psmids)
        tmp["targets"] = np.array(targets)
        tmp["predictions"] = np.array(predictions)
        tmp2 = tmp.groupby("psmids").agg({'predictions': list, 'targets': list}).reset_index()
        spectral_corr = []
        pearson_corr = []
        for spectra in range(0, len(tmp2["psmids"])):
            spectral_corr.append(spectral_angle(np.array(tmp2.targets.loc[spectra]), np.array(tmp2.predictions.loc[spectra])))
            pearson_corr.append(ms2pip_pearson(np.array(tmp2.targets.loc[spectra]), np.array(tmp2.predictions.loc[spectra])))
        return (pearson_corr, spectral_corr) 

# Hyperparameter optimization B ions

In [9]:
vectors, targets, psmids = load_data("data/immunopeptide_chymotrypsin.pkl", "B")

In [33]:
len(psmids.unique())

520579

In [13]:
upeps = psmids.unique()
np.random.shuffle(upeps)
test_psms = upeps[:int(len(upeps) * 0.1)]

train_vectors = vectors[~psmids.isin(test_psms)]
train_targets = targets[~psmids.isin(test_psms)]
train_psmids = psmids[~psmids.isin(test_psms)]

test_vectors = vectors[psmids.isin(test_psms)]
test_targets = targets[psmids.isin(test_psms)]
test_psmids = psmids[psmids.isin(test_psms)]

In [14]:
pd.Series(test_psmids.unique()).str.contains('PXD010154').sum()

5960

In [15]:
pd.Series(train_psmids.unique()).str.contains('PXD010154').sum()

54428

In [16]:
xtrain = xgb.DMatrix(train_vectors, label=train_targets)
xtest = xgb.DMatrix(test_vectors, label=test_targets)
evallist = [(xtrain, 'train'),(xtest, 'test')]

In [19]:
space= {
    'eta': hp.loguniform('eta', np.log(0.01), np.log(1)),
    'max_depth': hp.quniform('max_depth', 3, 18, 1),
    'max_leaves': hp.quniform('max_leaves', 5, 500, 1),
    'colsample_bytree': hp.uniform('colsample_bytree', 0.5, 1.0),
    'reg_lambda': hp.uniform('reg_lambda', 0.0, 1.0),
    'gamma' : hp.uniform ('gamma', 0.0,1),
    'min_child_weight' : hp.quniform('min_child_weight', 0, 500, 1),
    'subsample': hp.quniform('subsample', 0.5, 1 ,0.1),
    'reg_alpha': hp.quniform('reg_alpha', 0, 5 ,0.1)
}

In [22]:
def objective(space):
    params = {
    "nthread": 64,
    "objective": "reg:squarederror",
    "eval_metric": 'rmse',
    "eta": space["eta"],
    "max_depth": int(space['max_depth']),
    "grow_policy":"lossguide",
    "max_leaves":int(space["max_leaves"]),
    "min_child_weight": int(space["min_child_weight"]),
    "gamma": space['gamma'],
    "reg_lambda" : space['reg_lambda'],
    "colsample_bytree": space['colsample_bytree'],
    "subsample" : space["subsample"],
    "reg_alpha" : space["reg_alpha"]
    }
    print(params)
    tmp = xgb.cv(params, xtrain, nfold=4, num_boost_round=400, early_stopping_rounds=10, verbose_eval=True)
    rmse = tmp['test-rmse-mean'].min()

    print ("rmse:", rmse)
    return {'loss': rmse, 'status': STATUS_OK, "params": params}

In [23]:
trials = Trials()
best_hyperparams = fmin(fn = objective,
                        space = space,
                        algo = tpe.suggest,
                        max_evals = 25,
                        trials = trials)


best_hyperparams

{'nthread': 64, 'objective': 'reg:squarederror', 'eval_metric': 'rmse', 'eta': 0.19930579190931164, 'max_depth': 11, 'grow_policy': 'lossguide', 'max_leaves': 475, 'min_child_weight': 305, 'gamma': 0.572100811979309, 'reg_lambda': 0.9647796716443167, 'colsample_bytree': 0.6310208275906879, 'subsample': 0.6000000000000001, 'reg_alpha': 0.2}
[0]	train-rmse:7.59217+0.00037	test-rmse:7.59221+0.00090

[1]	train-rmse:6.11557+0.00161	test-rmse:6.11578+0.00197

[2]	train-rmse:4.93899+0.00195	test-rmse:4.93933+0.00205

[3]	train-rmse:4.00398+0.00179	test-rmse:4.00418+0.00176

[4]	train-rmse:3.26443+0.00143	test-rmse:3.26486+0.00158

[5]	train-rmse:2.68346+0.00128	test-rmse:2.68409+0.00145

[6]	train-rmse:2.23058+0.00159	test-rmse:2.23146+0.00177

[7]	train-rmse:1.87950+0.00179	test-rmse:1.88067+0.00208

[8]	train-rmse:1.61204+0.00198	test-rmse:1.61361+0.00242

[9]	train-rmse:1.41136+0.00184	test-rmse:1.41333+0.00251

[10]	train-rmse:1.26542+0.00206	test-rmse:1.26778+0.00274

[11]	train-rmse:1.1

[268]	train-rmse:0.75649+0.00119	test-rmse:0.79532+0.00019

[269]	train-rmse:0.75630+0.00118	test-rmse:0.79525+0.00021

[270]	train-rmse:0.75613+0.00123	test-rmse:0.79518+0.00022

[271]	train-rmse:0.75594+0.00122	test-rmse:0.79510+0.00023

[272]	train-rmse:0.75580+0.00120	test-rmse:0.79504+0.00022

[273]	train-rmse:0.75564+0.00125	test-rmse:0.79498+0.00022

[274]	train-rmse:0.75552+0.00123	test-rmse:0.79493+0.00021

[275]	train-rmse:0.75528+0.00123	test-rmse:0.79481+0.00025

[276]	train-rmse:0.75512+0.00120	test-rmse:0.79474+0.00024

[277]	train-rmse:0.75497+0.00122	test-rmse:0.79468+0.00025

[278]	train-rmse:0.75473+0.00116	test-rmse:0.79458+0.00021

[279]	train-rmse:0.75455+0.00117	test-rmse:0.79450+0.00019

[280]	train-rmse:0.75433+0.00116	test-rmse:0.79440+0.00016

[281]	train-rmse:0.75420+0.00117	test-rmse:0.79436+0.00016

[282]	train-rmse:0.75399+0.00121	test-rmse:0.79427+0.00021

[283]	train-rmse:0.75383+0.00118	test-rmse:0.79420+0.00022

[284]	train-rmse:0.75362+0.00117	test-rm

[0]	train-rmse:9.27642+0.00032	test-rmse:9.27685+0.00106                       

[1]	train-rmse:9.11491+0.00034	test-rmse:9.11532+0.00105                       

[2]	train-rmse:8.95691+0.00029	test-rmse:8.95699+0.00105                       

[3]	train-rmse:8.80080+0.00035	test-rmse:8.80114+0.00096                       

[4]	train-rmse:8.64833+0.00030	test-rmse:8.64838+0.00100                       

[5]	train-rmse:8.49788+0.00029	test-rmse:8.49817+0.00097                       

[6]	train-rmse:8.35089+0.00031	test-rmse:8.35073+0.00095                       

[7]	train-rmse:8.20597+0.00029	test-rmse:8.20596+0.00094                       

[8]	train-rmse:8.06341+0.00031	test-rmse:8.06359+0.00098                       

[9]	train-rmse:7.92361+0.00027	test-rmse:7.92381+0.00095                       

[10]	train-rmse:7.78636+0.00029	test-rmse:7.78658+0.00095                      

[11]	train-rmse:7.65151+0.00024	test-rmse:7.65171+0.00090                      

[12]	train-rmse:7.51914+0.00

[202]	train-rmse:0.86156+0.00044	test-rmse:0.87556+0.00060                     

[203]	train-rmse:0.85978+0.00041	test-rmse:0.87387+0.00062                     

[204]	train-rmse:0.85801+0.00042	test-rmse:0.87219+0.00061                     

[205]	train-rmse:0.85633+0.00041	test-rmse:0.87060+0.00062                     

[206]	train-rmse:0.85467+0.00040	test-rmse:0.86903+0.00064                     

[207]	train-rmse:0.85302+0.00038	test-rmse:0.86748+0.00067                     

[208]	train-rmse:0.85140+0.00041	test-rmse:0.86596+0.00062                     

[209]	train-rmse:0.84982+0.00042	test-rmse:0.86448+0.00062                     

[210]	train-rmse:0.84830+0.00045	test-rmse:0.86305+0.00059                     

[211]	train-rmse:0.84686+0.00045	test-rmse:0.86169+0.00060                     

[212]	train-rmse:0.84543+0.00043	test-rmse:0.86037+0.00063                     

[213]	train-rmse:0.84400+0.00039	test-rmse:0.85903+0.00067                     

[214]	train-rmse:0.84264+0.0

[0]	train-rmse:3.22583+0.00026	test-rmse:3.22605+0.00051                       

[1]	train-rmse:1.52134+0.00843	test-rmse:1.52171+0.00898                       

[2]	train-rmse:1.18222+0.00483	test-rmse:1.18275+0.00505                       

[3]	train-rmse:1.11637+0.00648	test-rmse:1.11695+0.00676                       

[4]	train-rmse:1.09414+0.00473	test-rmse:1.09479+0.00486                       

[5]	train-rmse:1.07578+0.00530	test-rmse:1.07647+0.00505                       

[6]	train-rmse:1.05787+0.00468	test-rmse:1.05862+0.00495                       

[7]	train-rmse:1.04562+0.00359	test-rmse:1.04649+0.00365                       

[8]	train-rmse:1.03590+0.00389	test-rmse:1.03689+0.00448                       

[9]	train-rmse:1.02607+0.00171	test-rmse:1.02714+0.00296                       

[10]	train-rmse:1.01909+0.00195	test-rmse:1.02020+0.00298                      

[11]	train-rmse:1.01189+0.00359	test-rmse:1.01306+0.00323                      

[12]	train-rmse:1.00619+0.00

[202]	train-rmse:0.85411+0.00093	test-rmse:0.87061+0.00093                     

[203]	train-rmse:0.85383+0.00090	test-rmse:0.87040+0.00096                     

[204]	train-rmse:0.85359+0.00075	test-rmse:0.87023+0.00085                     

[205]	train-rmse:0.85338+0.00080	test-rmse:0.87008+0.00084                     

[206]	train-rmse:0.85309+0.00067	test-rmse:0.86986+0.00077                     

[207]	train-rmse:0.85279+0.00075	test-rmse:0.86965+0.00070                     

[208]	train-rmse:0.85248+0.00072	test-rmse:0.86942+0.00076                     

[209]	train-rmse:0.85224+0.00080	test-rmse:0.86927+0.00077                     

[210]	train-rmse:0.85203+0.00081	test-rmse:0.86914+0.00075                     

[211]	train-rmse:0.85180+0.00079	test-rmse:0.86899+0.00077                     

[212]	train-rmse:0.85159+0.00075	test-rmse:0.86885+0.00069                     

[213]	train-rmse:0.85135+0.00087	test-rmse:0.86869+0.00066                     

[214]	train-rmse:0.85108+0.0

[0]	train-rmse:8.35520+0.00031	test-rmse:8.35555+0.00090                       

[1]	train-rmse:7.39685+0.00027	test-rmse:7.39694+0.00081                       

[2]	train-rmse:6.55181+0.00021	test-rmse:6.55183+0.00079                       

[3]	train-rmse:5.80704+0.00023	test-rmse:5.80726+0.00070                       

[4]	train-rmse:5.15144+0.00030	test-rmse:5.15162+0.00055                       

[5]	train-rmse:4.57431+0.00045	test-rmse:4.57471+0.00037                       

[6]	train-rmse:4.06700+0.00049	test-rmse:4.06748+0.00037                       

[7]	train-rmse:3.62122+0.00055	test-rmse:3.62196+0.00040                       

[8]	train-rmse:3.23027+0.00054	test-rmse:3.23119+0.00030                       

[9]	train-rmse:2.88817+0.00049	test-rmse:2.88922+0.00031                       

[10]	train-rmse:2.58902+0.00069	test-rmse:2.59036+0.00045                      

[11]	train-rmse:2.32837+0.00035	test-rmse:2.32998+0.00029                      

[12]	train-rmse:2.10197+0.00

[202]	train-rmse:0.73655+0.00052	test-rmse:0.77948+0.00048                     

[203]	train-rmse:0.73632+0.00053	test-rmse:0.77938+0.00046                     

[204]	train-rmse:0.73608+0.00056	test-rmse:0.77927+0.00045                     

[205]	train-rmse:0.73576+0.00047	test-rmse:0.77909+0.00042                     

[206]	train-rmse:0.73547+0.00051	test-rmse:0.77896+0.00044                     

[207]	train-rmse:0.73531+0.00049	test-rmse:0.77889+0.00043                     

[208]	train-rmse:0.73510+0.00051	test-rmse:0.77878+0.00044                     

[209]	train-rmse:0.73488+0.00048	test-rmse:0.77868+0.00043                     

[210]	train-rmse:0.73465+0.00052	test-rmse:0.77858+0.00040                     

[211]	train-rmse:0.73436+0.00056	test-rmse:0.77845+0.00040                     

[212]	train-rmse:0.73406+0.00055	test-rmse:0.77832+0.00040                     

[213]	train-rmse:0.73375+0.00058	test-rmse:0.77816+0.00039                     

[214]	train-rmse:0.73358+0.0

[0]	train-rmse:3.64074+0.00024	test-rmse:3.64097+0.00020                               

[1]	train-rmse:1.66849+0.00070	test-rmse:1.66933+0.00126                               

[2]	train-rmse:1.12609+0.00324	test-rmse:1.12797+0.00312                               

[3]	train-rmse:1.00788+0.00264	test-rmse:1.01045+0.00176                               

[4]	train-rmse:0.97776+0.00294	test-rmse:0.98097+0.00172                               

[5]	train-rmse:0.95912+0.00256	test-rmse:0.96300+0.00226                               

[6]	train-rmse:0.94535+0.00121	test-rmse:0.94997+0.00190                               

[7]	train-rmse:0.93430+0.00041	test-rmse:0.93959+0.00133                               

[8]	train-rmse:0.92646+0.00065	test-rmse:0.93236+0.00055                               

[9]	train-rmse:0.91886+0.00165	test-rmse:0.92540+0.00094                               

[10]	train-rmse:0.91241+0.00046	test-rmse:0.91967+0.00060                              

[11]	train-rmse:0.906

[184]	train-rmse:0.76888+0.00079	test-rmse:0.83062+0.00079                             

[185]	train-rmse:0.76863+0.00079	test-rmse:0.83056+0.00078                             

[186]	train-rmse:0.76838+0.00079	test-rmse:0.83047+0.00079                             

[187]	train-rmse:0.76807+0.00071	test-rmse:0.83040+0.00078                             

[188]	train-rmse:0.76769+0.00074	test-rmse:0.83031+0.00079                             

[189]	train-rmse:0.76737+0.00062	test-rmse:0.83020+0.00079                             

[190]	train-rmse:0.76696+0.00075	test-rmse:0.83004+0.00066                             

[191]	train-rmse:0.76666+0.00081	test-rmse:0.82994+0.00068                             

[192]	train-rmse:0.76647+0.00083	test-rmse:0.82992+0.00069                             

[193]	train-rmse:0.76610+0.00080	test-rmse:0.82985+0.00067                             

[194]	train-rmse:0.76578+0.00084	test-rmse:0.82975+0.00065                             

[195]	train-rmse:0.76

[366]	train-rmse:0.72306+0.00112	test-rmse:0.82174+0.00053                              

[367]	train-rmse:0.72284+0.00121	test-rmse:0.82170+0.00050                              

[368]	train-rmse:0.72260+0.00122	test-rmse:0.82167+0.00047                              

[369]	train-rmse:0.72240+0.00129	test-rmse:0.82164+0.00045                              

[370]	train-rmse:0.72220+0.00130	test-rmse:0.82163+0.00045                              

[371]	train-rmse:0.72196+0.00129	test-rmse:0.82160+0.00045                              

[372]	train-rmse:0.72175+0.00133	test-rmse:0.82156+0.00043                              

[373]	train-rmse:0.72154+0.00137	test-rmse:0.82154+0.00041                              

[374]	train-rmse:0.72134+0.00136	test-rmse:0.82153+0.00040                              

[375]	train-rmse:0.72109+0.00133	test-rmse:0.82151+0.00040                              

[376]	train-rmse:0.72077+0.00134	test-rmse:0.82146+0.00036                              

[377]	trai

[142]	train-rmse:0.79357+0.00049	test-rmse:0.81699+0.00071                              

[143]	train-rmse:0.79293+0.00053	test-rmse:0.81650+0.00063                              

[144]	train-rmse:0.79229+0.00063	test-rmse:0.81602+0.00053                              

[145]	train-rmse:0.79156+0.00052	test-rmse:0.81545+0.00061                              

[146]	train-rmse:0.79107+0.00052	test-rmse:0.81511+0.00062                              

[147]	train-rmse:0.79037+0.00038	test-rmse:0.81456+0.00075                              

[148]	train-rmse:0.78966+0.00045	test-rmse:0.81401+0.00073                              

[149]	train-rmse:0.78906+0.00055	test-rmse:0.81357+0.00069                              

[150]	train-rmse:0.78846+0.00060	test-rmse:0.81313+0.00080                              

[151]	train-rmse:0.78789+0.00065	test-rmse:0.81269+0.00082                              

[152]	train-rmse:0.78730+0.00075	test-rmse:0.81225+0.00085                              

[153]	trai

[324]	train-rmse:0.73452+0.00017	test-rmse:0.77864+0.00088                              

[325]	train-rmse:0.73432+0.00018	test-rmse:0.77853+0.00086                              

[326]	train-rmse:0.73411+0.00016	test-rmse:0.77843+0.00085                              

[327]	train-rmse:0.73393+0.00017	test-rmse:0.77833+0.00088                              

[328]	train-rmse:0.73376+0.00016	test-rmse:0.77824+0.00089                              

[329]	train-rmse:0.73362+0.00016	test-rmse:0.77816+0.00089                              

[330]	train-rmse:0.73348+0.00014	test-rmse:0.77809+0.00089                              

[331]	train-rmse:0.73337+0.00012	test-rmse:0.77803+0.00088                              

[332]	train-rmse:0.73318+0.00018	test-rmse:0.77793+0.00088                              

[333]	train-rmse:0.73298+0.00017	test-rmse:0.77784+0.00087                              

[334]	train-rmse:0.73282+0.00022	test-rmse:0.77774+0.00087                              

[335]	trai

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



[70]	train-rmse:0.85029+0.00116	test-rmse:0.85856+0.00138                               

[71]	train-rmse:0.84908+0.00118	test-rmse:0.85747+0.00125                               

[72]	train-rmse:0.84790+0.00114	test-rmse:0.85639+0.00122                               

[73]	train-rmse:0.84684+0.00106	test-rmse:0.85542+0.00103                               

[74]	train-rmse:0.84585+0.00106	test-rmse:0.85453+0.00105                               

[75]	train-rmse:0.84479+0.00105	test-rmse:0.85359+0.00090                               

[76]	train-rmse:0.84373+0.00084	test-rmse:0.85265+0.00092                               

[77]	train-rmse:0.84265+0.00092	test-rmse:0.85168+0.00097                               

[78]	train-rmse:0.84157+0.00088	test-rmse:0.85072+0.00080                               

[79]	train-rmse:0.84074+0.00092	test-rmse:0.84999+0.00080                               

[80]	train-rmse:0.83971+0.00090	test-rmse:0.84908+0.00065                               

[81]	train

[252]	train-rmse:0.77901+0.00016	test-rmse:0.80146+0.00068                              

[253]	train-rmse:0.77883+0.00018	test-rmse:0.80135+0.00067                              

[254]	train-rmse:0.77864+0.00014	test-rmse:0.80123+0.00067                              

[255]	train-rmse:0.77850+0.00019	test-rmse:0.80114+0.00069                              

[256]	train-rmse:0.77831+0.00018	test-rmse:0.80101+0.00072                              

[257]	train-rmse:0.77811+0.00020	test-rmse:0.80087+0.00072                              

[258]	train-rmse:0.77790+0.00022	test-rmse:0.80074+0.00071                              

[259]	train-rmse:0.77770+0.00022	test-rmse:0.80061+0.00068                              

[260]	train-rmse:0.77753+0.00022	test-rmse:0.80050+0.00067                              

[261]	train-rmse:0.77740+0.00021	test-rmse:0.80042+0.00066                              

[262]	train-rmse:0.77726+0.00022	test-rmse:0.80033+0.00061                              

[263]	trai

[27]	train-rmse:1.15000+0.00042	test-rmse:1.15683+0.00026                                

[28]	train-rmse:1.10610+0.00039	test-rmse:1.11342+0.00047                                

[29]	train-rmse:1.06767+0.00062	test-rmse:1.07545+0.00040                                

[30]	train-rmse:1.03430+0.00055	test-rmse:1.04254+0.00033                                

[31]	train-rmse:1.00498+0.00062	test-rmse:1.01366+0.00014                                

[32]	train-rmse:0.97948+0.00051	test-rmse:0.98859+0.00025                                

[33]	train-rmse:0.95751+0.00053	test-rmse:0.96703+0.00026                                

[34]	train-rmse:0.93831+0.00064	test-rmse:0.94826+0.00015                                

[35]	train-rmse:0.92180+0.00058	test-rmse:0.93213+0.00020                                

[36]	train-rmse:0.90751+0.00080	test-rmse:0.91821+0.00028                                

[37]	train-rmse:0.89503+0.00069	test-rmse:0.90610+0.00052                                


[207]	train-rmse:0.73365+0.00062	test-rmse:0.77800+0.00067                               

[208]	train-rmse:0.73350+0.00063	test-rmse:0.77793+0.00066                               

[209]	train-rmse:0.73326+0.00071	test-rmse:0.77781+0.00069                               

[210]	train-rmse:0.73303+0.00073	test-rmse:0.77771+0.00069                               

[211]	train-rmse:0.73281+0.00070	test-rmse:0.77762+0.00069                               

[212]	train-rmse:0.73257+0.00074	test-rmse:0.77750+0.00065                               

[213]	train-rmse:0.73237+0.00071	test-rmse:0.77740+0.00064                               

[214]	train-rmse:0.73210+0.00068	test-rmse:0.77728+0.00064                               

[215]	train-rmse:0.73183+0.00069	test-rmse:0.77718+0.00064                               

[216]	train-rmse:0.73166+0.00070	test-rmse:0.77710+0.00062                               

[217]	train-rmse:0.73140+0.00071	test-rmse:0.77699+0.00063                               


[387]	train-rmse:0.69915+0.00087	test-rmse:0.76488+0.00040                               

[388]	train-rmse:0.69902+0.00084	test-rmse:0.76484+0.00041                               

[389]	train-rmse:0.69882+0.00083	test-rmse:0.76478+0.00042                               

[390]	train-rmse:0.69871+0.00082	test-rmse:0.76474+0.00042                               

[391]	train-rmse:0.69855+0.00084	test-rmse:0.76469+0.00040                               

[392]	train-rmse:0.69839+0.00076	test-rmse:0.76464+0.00042                               

[393]	train-rmse:0.69822+0.00076	test-rmse:0.76458+0.00044                               

[394]	train-rmse:0.69807+0.00074	test-rmse:0.76453+0.00043                               

[395]	train-rmse:0.69792+0.00071	test-rmse:0.76449+0.00044                               

[396]	train-rmse:0.69776+0.00070	test-rmse:0.76445+0.00044                               

[397]	train-rmse:0.69760+0.00073	test-rmse:0.76440+0.00043                               


[161]	train-rmse:0.96000+0.00097	test-rmse:0.96113+0.00184                               

[162]	train-rmse:0.95963+0.00094	test-rmse:0.96076+0.00181                               

[163]	train-rmse:0.95927+0.00098	test-rmse:0.96040+0.00183                               

[164]	train-rmse:0.95885+0.00078	test-rmse:0.95999+0.00163                               

[165]	train-rmse:0.95845+0.00077	test-rmse:0.95960+0.00162                               

[166]	train-rmse:0.95805+0.00069	test-rmse:0.95921+0.00151                               

[167]	train-rmse:0.95769+0.00055	test-rmse:0.95885+0.00129                               

[168]	train-rmse:0.95736+0.00065	test-rmse:0.95853+0.00135                               

[169]	train-rmse:0.95699+0.00046	test-rmse:0.95816+0.00112                               

[170]	train-rmse:0.95650+0.00034	test-rmse:0.95769+0.00110                               

[171]	train-rmse:0.95620+0.00040	test-rmse:0.95739+0.00118                               


[341]	train-rmse:0.91588+0.00043	test-rmse:0.91791+0.00085                               

[342]	train-rmse:0.91575+0.00041	test-rmse:0.91778+0.00085                               

[343]	train-rmse:0.91561+0.00037	test-rmse:0.91765+0.00086                               

[344]	train-rmse:0.91545+0.00039	test-rmse:0.91751+0.00082                               

[345]	train-rmse:0.91521+0.00031	test-rmse:0.91727+0.00083                               

[346]	train-rmse:0.91507+0.00030	test-rmse:0.91713+0.00084                               

[347]	train-rmse:0.91492+0.00033	test-rmse:0.91698+0.00079                               

[348]	train-rmse:0.91479+0.00032	test-rmse:0.91686+0.00082                               

[349]	train-rmse:0.91467+0.00036	test-rmse:0.91674+0.00078                               

[350]	train-rmse:0.91443+0.00014	test-rmse:0.91650+0.00078                               

[351]	train-rmse:0.91434+0.00016	test-rmse:0.91641+0.00077                               


[115]	train-rmse:0.86575+0.00207	test-rmse:0.88534+0.00246                               

[116]	train-rmse:0.86533+0.00212	test-rmse:0.88504+0.00250                               

[117]	train-rmse:0.86496+0.00206	test-rmse:0.88482+0.00249                               

[118]	train-rmse:0.86460+0.00207	test-rmse:0.88459+0.00253                               

[119]	train-rmse:0.86435+0.00203	test-rmse:0.88444+0.00251                               

[120]	train-rmse:0.86402+0.00212	test-rmse:0.88426+0.00256                               

[121]	train-rmse:0.86376+0.00212	test-rmse:0.88414+0.00256                               

[122]	train-rmse:0.86332+0.00200	test-rmse:0.88385+0.00242                               

[123]	train-rmse:0.86276+0.00213	test-rmse:0.88343+0.00255                               

[124]	train-rmse:0.86239+0.00221	test-rmse:0.88317+0.00263                               

[125]	train-rmse:0.86195+0.00223	test-rmse:0.88284+0.00267                               


[295]	train-rmse:0.81935+0.00141	test-rmse:0.86095+0.00188                               

[296]	train-rmse:0.81916+0.00145	test-rmse:0.86088+0.00189                               

[297]	train-rmse:0.81902+0.00148	test-rmse:0.86083+0.00189                               

[298]	train-rmse:0.81881+0.00145	test-rmse:0.86074+0.00185                               

[299]	train-rmse:0.81860+0.00155	test-rmse:0.86063+0.00193                               

[300]	train-rmse:0.81841+0.00156	test-rmse:0.86059+0.00193                               

[301]	train-rmse:0.81823+0.00149	test-rmse:0.86050+0.00187                               

[302]	train-rmse:0.81811+0.00150	test-rmse:0.86047+0.00188                               

[303]	train-rmse:0.81797+0.00150	test-rmse:0.86044+0.00187                               

[304]	train-rmse:0.81781+0.00147	test-rmse:0.86040+0.00186                               

[305]	train-rmse:0.81763+0.00149	test-rmse:0.86037+0.00186                               


IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



[263]	train-rmse:0.93202+0.00020	test-rmse:0.93371+0.00124                               

[264]	train-rmse:0.93167+0.00019	test-rmse:0.93336+0.00126                               

[265]	train-rmse:0.93138+0.00023	test-rmse:0.93308+0.00128                               

[266]	train-rmse:0.93104+0.00027	test-rmse:0.93274+0.00132                               

[267]	train-rmse:0.93073+0.00033	test-rmse:0.93243+0.00133                               

[268]	train-rmse:0.93040+0.00032	test-rmse:0.93212+0.00137                               

[269]	train-rmse:0.93008+0.00030	test-rmse:0.93180+0.00134                               

[270]	train-rmse:0.92974+0.00023	test-rmse:0.93146+0.00130                               

[271]	train-rmse:0.92945+0.00026	test-rmse:0.93118+0.00128                               

[272]	train-rmse:0.92910+0.00031	test-rmse:0.93083+0.00134                               

[273]	train-rmse:0.92884+0.00034	test-rmse:0.93057+0.00137                               


[37]	train-rmse:1.83967+0.00193	test-rmse:1.83977+0.00181                                

[38]	train-rmse:1.78713+0.00198	test-rmse:1.78724+0.00185                                

[39]	train-rmse:1.73803+0.00203	test-rmse:1.73815+0.00202                                

[40]	train-rmse:1.69199+0.00181	test-rmse:1.69212+0.00173                                

[41]	train-rmse:1.64939+0.00189	test-rmse:1.64952+0.00176                                

[42]	train-rmse:1.60961+0.00204	test-rmse:1.60974+0.00194                                

[43]	train-rmse:1.57265+0.00196	test-rmse:1.57278+0.00182                                

[44]	train-rmse:1.53845+0.00202	test-rmse:1.53859+0.00181                                

[45]	train-rmse:1.50660+0.00203	test-rmse:1.50674+0.00179                                

[46]	train-rmse:1.47662+0.00180	test-rmse:1.47676+0.00153                                

[47]	train-rmse:1.44895+0.00183	test-rmse:1.44911+0.00146                                


[217]	train-rmse:1.01343+0.00130	test-rmse:1.01398+0.00189                               

[218]	train-rmse:1.01303+0.00126	test-rmse:1.01358+0.00188                               

[219]	train-rmse:1.01261+0.00126	test-rmse:1.01316+0.00187                               

[220]	train-rmse:1.01225+0.00124	test-rmse:1.01280+0.00185                               

[221]	train-rmse:1.01187+0.00117	test-rmse:1.01242+0.00181                               

[222]	train-rmse:1.01146+0.00127	test-rmse:1.01201+0.00186                               

[223]	train-rmse:1.01115+0.00128	test-rmse:1.01171+0.00191                               

[224]	train-rmse:1.01082+0.00126	test-rmse:1.01138+0.00186                               

[225]	train-rmse:1.01052+0.00124	test-rmse:1.01109+0.00181                               

[226]	train-rmse:1.01013+0.00125	test-rmse:1.01069+0.00183                               

[227]	train-rmse:1.00974+0.00124	test-rmse:1.01031+0.00185                               


[397]	train-rmse:0.96928+0.00142	test-rmse:0.97017+0.00150                               

[398]	train-rmse:0.96912+0.00147	test-rmse:0.97001+0.00153                               

[399]	train-rmse:0.96886+0.00147	test-rmse:0.96975+0.00149                               

rmse:                                                                                    
0.9697517500000001                                                                       
{'nthread': 64, 'objective': 'reg:squarederror', 'eval_metric': 'rmse', 'eta': 0.04152319023244187, 'max_depth': 9, 'grow_policy': 'lossguide', 'max_leaves': 217, 'min_child_weight': 162, 'gamma': 0.5760494431042892, 'reg_lambda': 0.4915385382933124, 'colsample_bytree': 0.5260724877176615, 'subsample': 0.6000000000000001, 'reg_alpha': 4.1000000000000005}
[0]	train-rmse:9.05549+0.00026	test-rmse:9.05542+0.00100                                 

[1]	train-rmse:8.68660+0.00045	test-rmse:8.68666+0.00139                                 

[2]	trai

[171]	train-rmse:0.88902+0.00098	test-rmse:0.89291+0.00153                               

[172]	train-rmse:0.88856+0.00094	test-rmse:0.89247+0.00154                               

[173]	train-rmse:0.88797+0.00099	test-rmse:0.89190+0.00141                               

[174]	train-rmse:0.88749+0.00103	test-rmse:0.89145+0.00142                               

[175]	train-rmse:0.88694+0.00086	test-rmse:0.89093+0.00133                               

[176]	train-rmse:0.88643+0.00091	test-rmse:0.89044+0.00137                               

[177]	train-rmse:0.88597+0.00091	test-rmse:0.89001+0.00136                               

[178]	train-rmse:0.88549+0.00079	test-rmse:0.88955+0.00133                               

[179]	train-rmse:0.88503+0.00079	test-rmse:0.88911+0.00140                               

[180]	train-rmse:0.88464+0.00080	test-rmse:0.88875+0.00143                               

[181]	train-rmse:0.88420+0.00069	test-rmse:0.88832+0.00136                               


[351]	train-rmse:0.84059+0.00047	test-rmse:0.84808+0.00120                               

[352]	train-rmse:0.84039+0.00048	test-rmse:0.84791+0.00122                               

[353]	train-rmse:0.84023+0.00049	test-rmse:0.84777+0.00123                               

[354]	train-rmse:0.84008+0.00045	test-rmse:0.84763+0.00123                               

[355]	train-rmse:0.83993+0.00040	test-rmse:0.84750+0.00120                               

[356]	train-rmse:0.83974+0.00046	test-rmse:0.84733+0.00124                               

[357]	train-rmse:0.83961+0.00043	test-rmse:0.84722+0.00121                               

[358]	train-rmse:0.83945+0.00045	test-rmse:0.84707+0.00124                               

[359]	train-rmse:0.83931+0.00044	test-rmse:0.84696+0.00122                               

[360]	train-rmse:0.83918+0.00048	test-rmse:0.84684+0.00124                               

[361]	train-rmse:0.83905+0.00044	test-rmse:0.84672+0.00122                               


IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



[112]	train-rmse:0.77244+0.00039	test-rmse:0.79895+0.00071                    

[113]	train-rmse:0.77183+0.00038	test-rmse:0.79856+0.00079                    

[114]	train-rmse:0.77122+0.00039	test-rmse:0.79815+0.00080                    

[115]	train-rmse:0.77065+0.00023	test-rmse:0.79776+0.00079                    

[116]	train-rmse:0.77008+0.00025	test-rmse:0.79738+0.00080                    

[117]	train-rmse:0.76953+0.00024	test-rmse:0.79703+0.00075                    

[118]	train-rmse:0.76904+0.00020	test-rmse:0.79672+0.00075                    

[119]	train-rmse:0.76853+0.00026	test-rmse:0.79639+0.00069                    

[120]	train-rmse:0.76795+0.00028	test-rmse:0.79602+0.00065                    

[121]	train-rmse:0.76736+0.00031	test-rmse:0.79563+0.00064                    

[122]	train-rmse:0.76678+0.00036	test-rmse:0.79526+0.00059                    

[123]	train-rmse:0.76632+0.00038	test-rmse:0.79499+0.00056                    

[124]	train-rmse:0.76576+0.00037	test-rm

[316]	train-rmse:0.71201+0.00037	test-rmse:0.76675+0.00067                    

[317]	train-rmse:0.71179+0.00036	test-rmse:0.76667+0.00068                    

[318]	train-rmse:0.71159+0.00040	test-rmse:0.76660+0.00070                    

[319]	train-rmse:0.71141+0.00036	test-rmse:0.76653+0.00071                    

[320]	train-rmse:0.71120+0.00034	test-rmse:0.76645+0.00071                    

[321]	train-rmse:0.71096+0.00037	test-rmse:0.76636+0.00070                    

[322]	train-rmse:0.71080+0.00032	test-rmse:0.76630+0.00072                    

[323]	train-rmse:0.71061+0.00036	test-rmse:0.76623+0.00073                    

[324]	train-rmse:0.71045+0.00035	test-rmse:0.76616+0.00074                    

[325]	train-rmse:0.71028+0.00037	test-rmse:0.76609+0.00075                    

[326]	train-rmse:0.71010+0.00041	test-rmse:0.76603+0.00075                    

[327]	train-rmse:0.70993+0.00039	test-rmse:0.76596+0.00074                    

[328]	train-rmse:0.70975+0.00039	test-rm

[113]	train-rmse:0.70967+0.00046	test-rmse:0.78899+0.00056                    

[114]	train-rmse:0.70899+0.00039	test-rmse:0.78884+0.00057                    

[115]	train-rmse:0.70833+0.00039	test-rmse:0.78870+0.00058                    

[116]	train-rmse:0.70785+0.00040	test-rmse:0.78858+0.00054                    

[117]	train-rmse:0.70725+0.00036	test-rmse:0.78849+0.00051                    

[118]	train-rmse:0.70670+0.00023	test-rmse:0.78839+0.00051                    

[119]	train-rmse:0.70618+0.00020	test-rmse:0.78830+0.00052                    

[120]	train-rmse:0.70558+0.00041	test-rmse:0.78814+0.00054                    

[121]	train-rmse:0.70499+0.00065	test-rmse:0.78803+0.00056                    

[122]	train-rmse:0.70461+0.00076	test-rmse:0.78796+0.00057                    

[123]	train-rmse:0.70401+0.00077	test-rmse:0.78784+0.00054                    

[124]	train-rmse:0.70353+0.00084	test-rmse:0.78777+0.00054                    

[125]	train-rmse:0.70291+0.00071	test-rm

[317]	train-rmse:0.61742+0.00059	test-rmse:0.77916+0.00061                    

[318]	train-rmse:0.61705+0.00057	test-rmse:0.77915+0.00060                    

[319]	train-rmse:0.61655+0.00050	test-rmse:0.77914+0.00060                    

[320]	train-rmse:0.61621+0.00058	test-rmse:0.77914+0.00059                    

[321]	train-rmse:0.61589+0.00065	test-rmse:0.77914+0.00059                    

[322]	train-rmse:0.61552+0.00062	test-rmse:0.77912+0.00057                    

[323]	train-rmse:0.61511+0.00065	test-rmse:0.77910+0.00056                    

[324]	train-rmse:0.61473+0.00065	test-rmse:0.77909+0.00056                    

[325]	train-rmse:0.61432+0.00073	test-rmse:0.77908+0.00056                    

[326]	train-rmse:0.61399+0.00077	test-rmse:0.77907+0.00055                    

[327]	train-rmse:0.61361+0.00076	test-rmse:0.77907+0.00056                    

[328]	train-rmse:0.61332+0.00081	test-rmse:0.77907+0.00055                    

[329]	train-rmse:0.61306+0.00087	test-rm

[139]	train-rmse:0.77910+0.00034	test-rmse:0.80228+0.00063                     

[140]	train-rmse:0.77875+0.00038	test-rmse:0.80205+0.00060                     

[141]	train-rmse:0.77836+0.00038	test-rmse:0.80177+0.00059                     

[142]	train-rmse:0.77800+0.00035	test-rmse:0.80152+0.00058                     

[143]	train-rmse:0.77762+0.00037	test-rmse:0.80126+0.00059                     

[144]	train-rmse:0.77722+0.00029	test-rmse:0.80099+0.00062                     

[145]	train-rmse:0.77683+0.00036	test-rmse:0.80072+0.00060                     

[146]	train-rmse:0.77649+0.00038	test-rmse:0.80049+0.00061                     

[147]	train-rmse:0.77618+0.00036	test-rmse:0.80030+0.00062                     

[148]	train-rmse:0.77577+0.00035	test-rmse:0.80001+0.00061                     

[149]	train-rmse:0.77541+0.00037	test-rmse:0.79977+0.00063                     

[150]	train-rmse:0.77500+0.00031	test-rmse:0.79950+0.00066                     

[151]	train-rmse:0.77463+0.0

[341]	train-rmse:0.73371+0.00021	test-rmse:0.77529+0.00087                     

[342]	train-rmse:0.73352+0.00023	test-rmse:0.77518+0.00083                     

[343]	train-rmse:0.73337+0.00020	test-rmse:0.77510+0.00085                     

[344]	train-rmse:0.73327+0.00021	test-rmse:0.77506+0.00084                     

[345]	train-rmse:0.73315+0.00021	test-rmse:0.77500+0.00083                     

[346]	train-rmse:0.73301+0.00022	test-rmse:0.77494+0.00084                     

[347]	train-rmse:0.73286+0.00023	test-rmse:0.77487+0.00081                     

[348]	train-rmse:0.73277+0.00023	test-rmse:0.77483+0.00083                     

[349]	train-rmse:0.73259+0.00023	test-rmse:0.77474+0.00084                     

[350]	train-rmse:0.73242+0.00027	test-rmse:0.77466+0.00087                     

[351]	train-rmse:0.73231+0.00025	test-rmse:0.77461+0.00088                     

[352]	train-rmse:0.73217+0.00023	test-rmse:0.77455+0.00086                     

[353]	train-rmse:0.73203+0.0

{'colsample_bytree': 0.891381182690278,
 'eta': 0.08060612330262913,
 'gamma': 0.031142279181653326,
 'max_depth': 18.0,
 'max_leaves': 117.0,
 'min_child_weight': 500.0,
 'reg_alpha': 3.4000000000000004,
 'reg_lambda': 0.27245538266226343,
 'subsample': 0.7000000000000001}

In [24]:
hyperoptimization_results = {'test-rmse-mean': [x['loss'] for x in trials.results]}
for key in trials.results[0]["params"].keys():
    hyperoptimization_results[key] = [x['params'][key] for x in trials.results]

In [25]:
df = pd.DataFrame(hyperoptimization_results)

In [26]:
df.head()

Unnamed: 0,test-rmse-mean,nthread,objective,eval_metric,eta,max_depth,grow_policy,max_leaves,min_child_weight,gamma,reg_lambda,colsample_bytree,subsample,reg_alpha
0,0.787612,64,reg:squarederror,rmse,0.199306,11,lossguide,475,305,0.572101,0.96478,0.631021,0.6,0.2
1,0.792407,64,reg:squarederror,rmse,0.017624,16,lossguide,427,230,0.595856,0.941849,0.675908,0.6,4.2
2,0.853171,64,reg:squarederror,rmse,0.683561,7,lossguide,488,386,0.757127,0.400145,0.731999,0.7,4.2
3,0.764726,64,reg:squarederror,rmse,0.116659,13,lossguide,280,321,0.852516,0.581348,0.936509,0.8,4.3
4,0.820941,64,reg:squarederror,rmse,0.630738,10,lossguide,171,324,0.762107,0.657339,0.964968,1.0,0.4


In [27]:
df.to_csv("data/immunopeptide_chymotrypsin_resultsB.csv")

In [28]:
params = {
    "nthread": 24,
    "objective": "reg:squarederror",
    #"objective": "binary:logistic",
    #"eval_metric": 'mae',
    "eval_metric": 'rmse',
    "verbosity": 1,
    "eta": 0.5,
    "max_depth": 9,
    "grow_policy":"lossguide",
    "max_leaves":100,
    "min_child_weight": 300,
    "gamma": 0.1,
    "subsample": 1,
    "lambda" : 0,
    "colsample_bytree": 1,
    "max_delta_step": 0,
}

In [29]:
params.update(best_hyperparams)

In [30]:
params["max_depth"] = int(params["max_depth"])
params["max_leaves"] = int(params["max_leaves"])
params["min_child_weight"] = int(params["min_child_weight"])
params

{'nthread': 24,
 'objective': 'reg:squarederror',
 'eval_metric': 'rmse',
 'verbosity': 1,
 'eta': 0.08060612330262913,
 'max_depth': 18,
 'grow_policy': 'lossguide',
 'max_leaves': 117,
 'min_child_weight': 500,
 'gamma': 0.031142279181653326,
 'subsample': 0.7000000000000001,
 'lambda': 0,
 'colsample_bytree': 0.891381182690278,
 'max_delta_step': 0,
 'reg_alpha': 3.4000000000000004,
 'reg_lambda': 0.27245538266226343}

In [31]:
bst = xgb.train(params, xtrain, 400, evallist, maximize=False, early_stopping_rounds=10)

[0]	train-rmse:8.69017	test-rmse:8.68801
[1]	train-rmse:7.99649	test-rmse:7.99719
[2]	train-rmse:7.36195	test-rmse:7.36278
[3]	train-rmse:6.78067	test-rmse:6.78068
[4]	train-rmse:6.24647	test-rmse:6.24583
[5]	train-rmse:5.75466	test-rmse:5.75498
[6]	train-rmse:5.30516	test-rmse:5.30451
[7]	train-rmse:4.89038	test-rmse:4.89110
[8]	train-rmse:4.51221	test-rmse:4.51234
[9]	train-rmse:4.16482	test-rmse:4.16514
[10]	train-rmse:3.84615	test-rmse:3.84695
[11]	train-rmse:3.55449	test-rmse:3.55548
[12]	train-rmse:3.28767	test-rmse:3.28884
[13]	train-rmse:3.04355	test-rmse:3.04501
[14]	train-rmse:2.82079	test-rmse:2.82216
[15]	train-rmse:2.61730	test-rmse:2.61883
[16]	train-rmse:2.43133	test-rmse:2.43352
[17]	train-rmse:2.26237	test-rmse:2.26468
[18]	train-rmse:2.10852	test-rmse:2.11132
[19]	train-rmse:1.96884	test-rmse:1.97188
[20]	train-rmse:1.84169	test-rmse:1.84516
[21]	train-rmse:1.72719	test-rmse:1.73104
[22]	train-rmse:1.62340	test-rmse:1.62768
[23]	train-rmse:1.53018	test-rmse:1.53502
[2

[194]	train-rmse:0.72337	test-rmse:0.77058
[195]	train-rmse:0.72306	test-rmse:0.77045
[196]	train-rmse:0.72290	test-rmse:0.77036
[197]	train-rmse:0.72269	test-rmse:0.77026
[198]	train-rmse:0.72252	test-rmse:0.77020
[199]	train-rmse:0.72220	test-rmse:0.77006
[200]	train-rmse:0.72210	test-rmse:0.77001
[201]	train-rmse:0.72183	test-rmse:0.76984
[202]	train-rmse:0.72147	test-rmse:0.76965
[203]	train-rmse:0.72112	test-rmse:0.76950
[204]	train-rmse:0.72084	test-rmse:0.76940
[205]	train-rmse:0.72053	test-rmse:0.76926
[206]	train-rmse:0.72032	test-rmse:0.76915
[207]	train-rmse:0.72007	test-rmse:0.76903
[208]	train-rmse:0.71974	test-rmse:0.76890
[209]	train-rmse:0.71937	test-rmse:0.76876
[210]	train-rmse:0.71912	test-rmse:0.76868
[211]	train-rmse:0.71886	test-rmse:0.76860
[212]	train-rmse:0.71860	test-rmse:0.76850
[213]	train-rmse:0.71838	test-rmse:0.76841
[214]	train-rmse:0.71828	test-rmse:0.76837
[215]	train-rmse:0.71800	test-rmse:0.76827
[216]	train-rmse:0.71765	test-rmse:0.76812
[217]	train

[385]	train-rmse:0.68457	test-rmse:0.75731
[386]	train-rmse:0.68440	test-rmse:0.75726
[387]	train-rmse:0.68418	test-rmse:0.75721
[388]	train-rmse:0.68402	test-rmse:0.75716
[389]	train-rmse:0.68387	test-rmse:0.75714
[390]	train-rmse:0.68366	test-rmse:0.75708
[391]	train-rmse:0.68350	test-rmse:0.75703
[392]	train-rmse:0.68341	test-rmse:0.75701
[393]	train-rmse:0.68327	test-rmse:0.75695
[394]	train-rmse:0.68320	test-rmse:0.75692
[395]	train-rmse:0.68310	test-rmse:0.75690
[396]	train-rmse:0.68304	test-rmse:0.75688
[397]	train-rmse:0.68278	test-rmse:0.75685
[398]	train-rmse:0.68265	test-rmse:0.75682
[399]	train-rmse:0.68250	test-rmse:0.75675


In [32]:
bst.save_model("Models/Immunopeptide_chymotrypsin_HCDb.xgboost")

# Fast b-ion model

In [34]:
space= {
    'eta': hp.loguniform('eta', np.log(0.01), np.log(1)),
    'max_depth': hp.quniform('max_depth', 2, 7, 1),
    'max_leaves': hp.quniform('max_leaves', 5, 500, 1),
    'colsample_bytree': hp.uniform('colsample_bytree', 0.5, 1.0),
    'reg_lambda': hp.uniform('reg_lambda', 0.0, 1.0),
    'gamma' : hp.uniform ('gamma', 0.0,1),
    'min_child_weight' : hp.quniform('min_child_weight', 0, 500, 1),
    'subsample': hp.quniform('subsample', 0.5, 1 ,0.1),
    'reg_alpha': hp.quniform('reg_alpha', 0, 5 ,0.1),
    'grow_policy': hp.choice('grow_policy', ("lossguide", "depthwise"))
}

In [35]:
def objective(space):
    params = {
    "nthread": 64,
    "objective": "reg:squarederror",
    "eval_metric": 'rmse',
    "eta": space["eta"],
    "max_depth": int(space['max_depth']),
    "grow_policy":space['grow_policy'],
    "max_leaves":int(space["max_leaves"]),
    "min_child_weight": int(space["min_child_weight"]),
    "gamma": space['gamma'],
    "reg_lambda" : space['reg_lambda'],
    "colsample_bytree": space['colsample_bytree'],
    "subsample" : space["subsample"],
    "reg_alpha" : space["reg_alpha"]
    }
    print(params)
    tmp = xgb.cv(params, xtrain, nfold=4, num_boost_round=400, early_stopping_rounds=10, verbose_eval=True)
    rmse = tmp['test-rmse-mean'].min()

    print ("rmse:", rmse)
    return {'loss': rmse, 'status': STATUS_OK, "params": params}

In [36]:
trials = Trials()
best_hyperparams = fmin(fn = objective,
                        space = space,
                        algo = tpe.suggest,
                        max_evals = 30,
                        trials = trials)


best_hyperparams

{'nthread': 64, 'objective': 'reg:squarederror', 'eval_metric': 'rmse', 'eta': 0.016602908138118818, 'max_depth': 7, 'grow_policy': 'depthwise', 'max_leaves': 405, 'min_child_weight': 164, 'gamma': 0.050256953380391156, 'reg_lambda': 0.6812023260652089, 'colsample_bytree': 0.7988295968988938, 'subsample': 0.6000000000000001, 'reg_alpha': 3.9000000000000004}
[0]	train-rmse:9.28739+0.00035	test-rmse:9.28683+0.00106

[1]	train-rmse:9.13587+0.00033	test-rmse:9.13587+0.00106

[2]	train-rmse:8.98634+0.00034	test-rmse:8.98692+0.00104

[3]	train-rmse:8.84084+0.00032	test-rmse:8.84059+0.00102

[4]	train-rmse:8.69662+0.00025	test-rmse:8.69656+0.00106

[5]	train-rmse:8.55494+0.00023	test-rmse:8.55528+0.00105

[6]	train-rmse:8.41622+0.00027	test-rmse:8.41614+0.00105

[7]	train-rmse:8.27959+0.00024	test-rmse:8.27958+0.00103

[8]	train-rmse:8.14482+0.00023	test-rmse:8.14501+0.00100

[9]	train-rmse:8.01316+0.00025	test-rmse:8.01289+0.00098

[10]	train-rmse:7.88292+0.00029	test-rmse:7.88304+0.00089

[

[268]	train-rmse:1.00403+0.00016	test-rmse:1.00492+0.00105

[269]	train-rmse:1.00346+0.00014	test-rmse:1.00436+0.00105

[270]	train-rmse:1.00286+0.00013	test-rmse:1.00377+0.00103

[271]	train-rmse:1.00230+0.00018	test-rmse:1.00320+0.00108

[272]	train-rmse:1.00172+0.00017	test-rmse:1.00263+0.00106

[273]	train-rmse:1.00124+0.00020	test-rmse:1.00215+0.00109

[274]	train-rmse:1.00067+0.00017	test-rmse:1.00158+0.00112

[275]	train-rmse:1.00013+0.00011	test-rmse:1.00105+0.00116

[276]	train-rmse:0.99961+0.00019	test-rmse:1.00053+0.00120

[277]	train-rmse:0.99910+0.00013	test-rmse:1.00003+0.00112

[278]	train-rmse:0.99857+0.00016	test-rmse:0.99950+0.00116

[279]	train-rmse:0.99808+0.00011	test-rmse:0.99901+0.00111

[280]	train-rmse:0.99761+0.00013	test-rmse:0.99854+0.00116

[281]	train-rmse:0.99707+0.00009	test-rmse:0.99801+0.00112

[282]	train-rmse:0.99648+0.00011	test-rmse:0.99742+0.00112

[283]	train-rmse:0.99600+0.00013	test-rmse:0.99695+0.00110

[284]	train-rmse:0.99556+0.00017	test-rm

[0]	train-rmse:7.52804+0.00031	test-rmse:7.52777+0.00091                       

[1]	train-rmse:6.02016+0.00332	test-rmse:6.02031+0.00395                       

[2]	train-rmse:4.83104+0.00318	test-rmse:4.83117+0.00344                       

[3]	train-rmse:3.89881+0.00301	test-rmse:3.89886+0.00307                       

[4]	train-rmse:3.17375+0.00226	test-rmse:3.17378+0.00256                       

[5]	train-rmse:2.61359+0.00305	test-rmse:2.61361+0.00321                       

[6]	train-rmse:2.18362+0.00477	test-rmse:2.18372+0.00492                       

[7]	train-rmse:1.85994+0.00461	test-rmse:1.86006+0.00483                       

[8]	train-rmse:1.61991+0.00522	test-rmse:1.62011+0.00543                       

[9]	train-rmse:1.44396+0.00591	test-rmse:1.44420+0.00620                       

[10]	train-rmse:1.31853+0.00457	test-rmse:1.31881+0.00493                      

[11]	train-rmse:1.23144+0.00354	test-rmse:1.23176+0.00411                      

[12]	train-rmse:1.17019+0.00

[202]	train-rmse:0.85991+0.00071	test-rmse:0.86624+0.00140                     

[203]	train-rmse:0.85976+0.00074	test-rmse:0.86611+0.00141                     

[204]	train-rmse:0.85952+0.00073	test-rmse:0.86589+0.00137                     

[205]	train-rmse:0.85923+0.00068	test-rmse:0.86563+0.00127                     

[206]	train-rmse:0.85891+0.00069	test-rmse:0.86533+0.00123                     

[207]	train-rmse:0.85868+0.00062	test-rmse:0.86513+0.00120                     

[208]	train-rmse:0.85841+0.00066	test-rmse:0.86489+0.00124                     

[209]	train-rmse:0.85819+0.00067	test-rmse:0.86470+0.00122                     

[210]	train-rmse:0.85805+0.00069	test-rmse:0.86457+0.00126                     

[211]	train-rmse:0.85786+0.00067	test-rmse:0.86440+0.00128                     

[212]	train-rmse:0.85770+0.00071	test-rmse:0.86427+0.00134                     

[213]	train-rmse:0.85751+0.00076	test-rmse:0.86410+0.00137                     

[214]	train-rmse:0.85734+0.0

[0]	train-rmse:8.51484+0.00037	test-rmse:8.51499+0.00103                      

[1]	train-rmse:7.68352+0.00081	test-rmse:7.68371+0.00122                      

[2]	train-rmse:6.93725+0.00068	test-rmse:6.93737+0.00095                      

[3]	train-rmse:6.26806+0.00115	test-rmse:6.26814+0.00061                      

[4]	train-rmse:5.66846+0.00097	test-rmse:5.66842+0.00106                      

[5]	train-rmse:5.13170+0.00122	test-rmse:5.13171+0.00141                      

[6]	train-rmse:4.65145+0.00088	test-rmse:4.65141+0.00100                      

[7]	train-rmse:4.22211+0.00079	test-rmse:4.22216+0.00111                      

[8]	train-rmse:3.83914+0.00084	test-rmse:3.83917+0.00077                      

[9]	train-rmse:3.49770+0.00100	test-rmse:3.49774+0.00088                      

[10]	train-rmse:3.19434+0.00103	test-rmse:3.19439+0.00144                     

[11]	train-rmse:2.92467+0.00080	test-rmse:2.92470+0.00122                     

[12]	train-rmse:2.68641+0.00083	test-rms

[204]	train-rmse:0.97037+0.00122	test-rmse:0.97126+0.00208                    

[205]	train-rmse:0.97015+0.00122	test-rmse:0.97105+0.00209                    

[206]	train-rmse:0.96983+0.00127	test-rmse:0.97073+0.00215                    

[207]	train-rmse:0.96949+0.00133	test-rmse:0.97040+0.00220                    

[208]	train-rmse:0.96921+0.00125	test-rmse:0.97011+0.00212                    

[209]	train-rmse:0.96893+0.00122	test-rmse:0.96984+0.00209                    

[210]	train-rmse:0.96861+0.00122	test-rmse:0.96954+0.00207                    

                                                                              

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



[126]	train-rmse:1.05614+0.00073	test-rmse:1.05662+0.00088                    

[127]	train-rmse:1.05461+0.00080	test-rmse:1.05509+0.00096                    

[128]	train-rmse:1.05332+0.00085	test-rmse:1.05382+0.00110                    

[129]	train-rmse:1.05210+0.00069	test-rmse:1.05260+0.00104                    

[130]	train-rmse:1.05095+0.00042	test-rmse:1.05145+0.00102                    

[131]	train-rmse:1.04971+0.00030	test-rmse:1.05022+0.00105                    

[132]	train-rmse:1.04852+0.00028	test-rmse:1.04902+0.00087                    

[133]	train-rmse:1.04725+0.00032	test-rmse:1.04776+0.00087                     

[134]	train-rmse:1.04611+0.00022	test-rmse:1.04663+0.00101                     

[135]	train-rmse:1.04507+0.00032	test-rmse:1.04558+0.00110                     

[136]	train-rmse:1.04396+0.00026	test-rmse:1.04447+0.00102                     

[137]	train-rmse:1.04289+0.00040	test-rmse:1.04340+0.00092                     

[138]	train-rmse:1.04185+0.00055	te

[328]	train-rmse:0.95810+0.00061	test-rmse:0.95919+0.00102                     

[329]	train-rmse:0.95787+0.00066	test-rmse:0.95896+0.00104                     

[330]	train-rmse:0.95761+0.00063	test-rmse:0.95870+0.00107                     

[331]	train-rmse:0.95743+0.00063	test-rmse:0.95852+0.00106                     

[332]	train-rmse:0.95724+0.00063	test-rmse:0.95834+0.00107                     

[333]	train-rmse:0.95704+0.00063	test-rmse:0.95813+0.00104                     

[334]	train-rmse:0.95681+0.00064	test-rmse:0.95792+0.00097                     

[335]	train-rmse:0.95657+0.00059	test-rmse:0.95767+0.00096                     

[336]	train-rmse:0.95634+0.00052	test-rmse:0.95745+0.00099                     

[337]	train-rmse:0.95612+0.00050	test-rmse:0.95723+0.00099                     

[338]	train-rmse:0.95594+0.00050	test-rmse:0.95706+0.00095                     

[339]	train-rmse:0.95572+0.00049	test-rmse:0.95684+0.00093                     

[340]	train-rmse:0.95551+0.0

[121]	train-rmse:0.99882+0.00216	test-rmse:0.99951+0.00126                      

[122]	train-rmse:0.99844+0.00227	test-rmse:0.99914+0.00137                      

[123]	train-rmse:0.99780+0.00238	test-rmse:0.99851+0.00150                      

[124]	train-rmse:0.99733+0.00221	test-rmse:0.99804+0.00134                      

[125]	train-rmse:0.99684+0.00214	test-rmse:0.99755+0.00135                      

[126]	train-rmse:0.99643+0.00198	test-rmse:0.99716+0.00124                      

[127]	train-rmse:0.99577+0.00173	test-rmse:0.99649+0.00104                      

[128]	train-rmse:0.99528+0.00182	test-rmse:0.99600+0.00107                      

[129]	train-rmse:0.99481+0.00193	test-rmse:0.99552+0.00119                      

[130]	train-rmse:0.99430+0.00194	test-rmse:0.99502+0.00116                      

[131]	train-rmse:0.99388+0.00208	test-rmse:0.99462+0.00135                      

[132]	train-rmse:0.99341+0.00208	test-rmse:0.99415+0.00133                      

[133]	train-rmse

[319]	train-rmse:0.93898+0.00207	test-rmse:0.94033+0.00208                      

[320]	train-rmse:0.93878+0.00191	test-rmse:0.94015+0.00195                      

[321]	train-rmse:0.93858+0.00186	test-rmse:0.93994+0.00191                      

[322]	train-rmse:0.93844+0.00184	test-rmse:0.93981+0.00190                      

[323]	train-rmse:0.93831+0.00172	test-rmse:0.93969+0.00180                      

[324]	train-rmse:0.93816+0.00172	test-rmse:0.93955+0.00181                      

[325]	train-rmse:0.93794+0.00165	test-rmse:0.93933+0.00182                      

[326]	train-rmse:0.93779+0.00164	test-rmse:0.93918+0.00183                      

[327]	train-rmse:0.93764+0.00164	test-rmse:0.93904+0.00186                      

[328]	train-rmse:0.93753+0.00160	test-rmse:0.93893+0.00182                      

[329]	train-rmse:0.93741+0.00161	test-rmse:0.93881+0.00182                      

[330]	train-rmse:0.93727+0.00160	test-rmse:0.93868+0.00175                      

[331]	train-rmse

[111]	train-rmse:1.46041+0.00048	test-rmse:1.46068+0.00025                      

[112]	train-rmse:1.44753+0.00047	test-rmse:1.44780+0.00027                      

[113]	train-rmse:1.43504+0.00051	test-rmse:1.43531+0.00026                      

[114]	train-rmse:1.42288+0.00047	test-rmse:1.42315+0.00031                      

[115]	train-rmse:1.41115+0.00051	test-rmse:1.41143+0.00032                      

[116]	train-rmse:1.39990+0.00053	test-rmse:1.40018+0.00035                      

[117]	train-rmse:1.38898+0.00055	test-rmse:1.38926+0.00026                      

[118]	train-rmse:1.37814+0.00051	test-rmse:1.37843+0.00031                      

[119]	train-rmse:1.36780+0.00048	test-rmse:1.36809+0.00034                      

[120]	train-rmse:1.35778+0.00052	test-rmse:1.35806+0.00033                      

[121]	train-rmse:1.34794+0.00052	test-rmse:1.34824+0.00042                      

[122]	train-rmse:1.33841+0.00051	test-rmse:1.33871+0.00040                      

[123]	train-rmse

[309]	train-rmse:1.00790+0.00079	test-rmse:1.00862+0.00037                      

[310]	train-rmse:1.00760+0.00077	test-rmse:1.00833+0.00038                      

[311]	train-rmse:1.00727+0.00075	test-rmse:1.00800+0.00043                      

[312]	train-rmse:1.00701+0.00074	test-rmse:1.00774+0.00043                      

[313]	train-rmse:1.00675+0.00071	test-rmse:1.00749+0.00046                      

[314]	train-rmse:1.00642+0.00070	test-rmse:1.00715+0.00047                      

[315]	train-rmse:1.00612+0.00067	test-rmse:1.00685+0.00051                      

[316]	train-rmse:1.00586+0.00066	test-rmse:1.00660+0.00052                      

[317]	train-rmse:1.00550+0.00062	test-rmse:1.00623+0.00056                      

[318]	train-rmse:1.00520+0.00062	test-rmse:1.00594+0.00054                      

[319]	train-rmse:1.00489+0.00065	test-rmse:1.00563+0.00051                      

[320]	train-rmse:1.00462+0.00064	test-rmse:1.00536+0.00052                      

[321]	train-rmse

[101]	train-rmse:1.08679+0.00106	test-rmse:1.08703+0.00204                      

[102]	train-rmse:1.08593+0.00099	test-rmse:1.08618+0.00201                      

[103]	train-rmse:1.08527+0.00097	test-rmse:1.08552+0.00199                      

[104]	train-rmse:1.08431+0.00099	test-rmse:1.08455+0.00200                      

[105]	train-rmse:1.08355+0.00106	test-rmse:1.08379+0.00209                      

[106]	train-rmse:1.08287+0.00104	test-rmse:1.08311+0.00206                      

[107]	train-rmse:1.08223+0.00106	test-rmse:1.08247+0.00205                      

[108]	train-rmse:1.08154+0.00120	test-rmse:1.08177+0.00214                      

[109]	train-rmse:1.08094+0.00125	test-rmse:1.08118+0.00219                      

[110]	train-rmse:1.08000+0.00140	test-rmse:1.08024+0.00231                      

[111]	train-rmse:1.07913+0.00133	test-rmse:1.07937+0.00224                      

[112]	train-rmse:1.07832+0.00114	test-rmse:1.07857+0.00203                      

[113]	train-rmse

[299]	train-rmse:1.00672+0.00100	test-rmse:1.00728+0.00172                      

[300]	train-rmse:1.00660+0.00101	test-rmse:1.00717+0.00172                      

[301]	train-rmse:1.00644+0.00107	test-rmse:1.00701+0.00175                      

[302]	train-rmse:1.00631+0.00102	test-rmse:1.00688+0.00171                      

[303]	train-rmse:1.00604+0.00091	test-rmse:1.00661+0.00156                      

[304]	train-rmse:1.00585+0.00085	test-rmse:1.00642+0.00147                      

[305]	train-rmse:1.00572+0.00083	test-rmse:1.00629+0.00145                      

[306]	train-rmse:1.00551+0.00096	test-rmse:1.00609+0.00153                      

[307]	train-rmse:1.00536+0.00097	test-rmse:1.00594+0.00157                      

[308]	train-rmse:1.00515+0.00090	test-rmse:1.00574+0.00146                      

[309]	train-rmse:1.00499+0.00099	test-rmse:1.00558+0.00155                      

[310]	train-rmse:1.00474+0.00105	test-rmse:1.00534+0.00164                      

[311]	train-rmse

[108]	train-rmse:0.94832+0.00393	test-rmse:0.95168+0.00330                      

[109]	train-rmse:0.94747+0.00357	test-rmse:0.95085+0.00292                      

[110]	train-rmse:0.94710+0.00348	test-rmse:0.95051+0.00280                      

[111]	train-rmse:0.94638+0.00350	test-rmse:0.94981+0.00289                      

[112]	train-rmse:0.94590+0.00350	test-rmse:0.94936+0.00291                      

[113]	train-rmse:0.94548+0.00349	test-rmse:0.94901+0.00288                      

[114]	train-rmse:0.94500+0.00322	test-rmse:0.94851+0.00258                      

[115]	train-rmse:0.94450+0.00332	test-rmse:0.94801+0.00270                      

[116]	train-rmse:0.94405+0.00324	test-rmse:0.94762+0.00262                      

[117]	train-rmse:0.94335+0.00309	test-rmse:0.94695+0.00252                      

[118]	train-rmse:0.94284+0.00315	test-rmse:0.94645+0.00252                      

[119]	train-rmse:0.94249+0.00317	test-rmse:0.94612+0.00253                      

[120]	train-rmse

[306]	train-rmse:0.89602+0.00165	test-rmse:0.90432+0.00112                      

[307]	train-rmse:0.89588+0.00163	test-rmse:0.90421+0.00113                      

[308]	train-rmse:0.89571+0.00159	test-rmse:0.90407+0.00109                      

[309]	train-rmse:0.89559+0.00160	test-rmse:0.90397+0.00108                      

[310]	train-rmse:0.89539+0.00148	test-rmse:0.90378+0.00091                      

[311]	train-rmse:0.89526+0.00147	test-rmse:0.90367+0.00089                      

[312]	train-rmse:0.89513+0.00146	test-rmse:0.90357+0.00091                      

[313]	train-rmse:0.89499+0.00145	test-rmse:0.90345+0.00092                      

[314]	train-rmse:0.89486+0.00140	test-rmse:0.90335+0.00087                      

[315]	train-rmse:0.89473+0.00139	test-rmse:0.90326+0.00087                      

[316]	train-rmse:0.89460+0.00136	test-rmse:0.90315+0.00086                      

[317]	train-rmse:0.89444+0.00138	test-rmse:0.90303+0.00089                      

[318]	train-rmse

[0]	train-rmse:6.40041+0.00048	test-rmse:6.40051+0.00094                        

[1]	train-rmse:4.40999+0.00256	test-rmse:4.40998+0.00269                        

[2]	train-rmse:3.13136+0.00279	test-rmse:3.13151+0.00299                        

[3]	train-rmse:2.32714+0.00840	test-rmse:2.32710+0.00844                        

[4]	train-rmse:1.85446+0.01048	test-rmse:1.85461+0.01063                        

[5]	train-rmse:1.59085+0.01013	test-rmse:1.59095+0.01037                        

[6]	train-rmse:1.44863+0.00979	test-rmse:1.44877+0.01007                        

[7]	train-rmse:1.37285+0.00720	test-rmse:1.37307+0.00756                        

[8]	train-rmse:1.33006+0.00924	test-rmse:1.33023+0.00967                        

[9]	train-rmse:1.30335+0.01109	test-rmse:1.30354+0.01153                        

[10]	train-rmse:1.28448+0.00783	test-rmse:1.28461+0.00846                       

[11]	train-rmse:1.26874+0.00917	test-rmse:1.26875+0.00975                       

[12]	train-rmse:

[198]	train-rmse:1.02498+0.00230	test-rmse:1.02559+0.00282                      

[199]	train-rmse:1.02468+0.00238	test-rmse:1.02528+0.00293                      

[200]	train-rmse:1.02445+0.00241	test-rmse:1.02505+0.00295                      

[201]	train-rmse:1.02421+0.00250	test-rmse:1.02480+0.00305                      

[202]	train-rmse:1.02374+0.00257	test-rmse:1.02433+0.00310                      

[203]	train-rmse:1.02353+0.00258	test-rmse:1.02413+0.00310                      

[204]	train-rmse:1.02334+0.00260	test-rmse:1.02394+0.00312                      

[205]	train-rmse:1.02308+0.00267	test-rmse:1.02368+0.00315                      

[206]	train-rmse:1.02284+0.00270	test-rmse:1.02344+0.00315                      

[207]	train-rmse:1.02246+0.00277	test-rmse:1.02306+0.00320                      

[208]	train-rmse:1.02195+0.00276	test-rmse:1.02256+0.00308                      

[209]	train-rmse:1.02164+0.00275	test-rmse:1.02224+0.00306                      

[210]	train-rmse

[396]	train-rmse:0.98254+0.00221	test-rmse:0.98340+0.00232                      

[397]	train-rmse:0.98236+0.00225	test-rmse:0.98322+0.00236                      

[398]	train-rmse:0.98219+0.00221	test-rmse:0.98304+0.00231                      

[399]	train-rmse:0.98204+0.00221	test-rmse:0.98289+0.00231                      

rmse:                                                                           
0.98289                                                                         
{'nthread': 64, 'objective': 'reg:squarederror', 'eval_metric': 'rmse', 'eta': 0.11153923627739093, 'max_depth': 5, 'grow_policy': 'depthwise', 'max_leaves': 293, 'min_child_weight': 243, 'gamma': 0.7884619486632751, 'reg_lambda': 0.7194568187544329, 'colsample_bytree': 0.5925226883652641, 'subsample': 0.8, 'reg_alpha': 2.6}
[0]	train-rmse:8.41150+0.00042	test-rmse:8.41111+0.00101                        

[1]	train-rmse:7.49893+0.00068	test-rmse:7.49903+0.00133                        

[2]	train-rmse:6.69

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



[229]	train-rmse:1.25961+0.00088	test-rmse:1.25966+0.00066                      

[230]	train-rmse:1.25889+0.00093	test-rmse:1.25894+0.00071                      

[231]	train-rmse:1.25814+0.00098	test-rmse:1.25819+0.00069                      

[232]	train-rmse:1.25730+0.00100	test-rmse:1.25735+0.00067                      

[233]	train-rmse:1.25642+0.00097	test-rmse:1.25648+0.00061                      

[234]	train-rmse:1.25575+0.00098	test-rmse:1.25580+0.00068                      

[235]	train-rmse:1.25502+0.00110	test-rmse:1.25507+0.00078                      

[236]	train-rmse:1.25431+0.00098	test-rmse:1.25436+0.00070                      

[237]	train-rmse:1.25366+0.00103	test-rmse:1.25371+0.00075                      

[238]	train-rmse:1.25300+0.00092	test-rmse:1.25305+0.00073                      

[239]	train-rmse:1.25226+0.00093	test-rmse:1.25231+0.00074                      

[240]	train-rmse:1.25164+0.00096	test-rmse:1.25168+0.00070                      

[241]	train-rmse

[22]	train-rmse:1.28832+0.00163	test-rmse:1.28861+0.00150                      

[23]	train-rmse:1.24916+0.00153	test-rmse:1.24945+0.00134                      

[24]	train-rmse:1.21679+0.00127	test-rmse:1.21710+0.00137                      

[25]	train-rmse:1.18932+0.00141	test-rmse:1.18966+0.00147                      

[26]	train-rmse:1.16641+0.00134	test-rmse:1.16677+0.00149                      

[27]	train-rmse:1.14742+0.00111	test-rmse:1.14780+0.00163                      

[28]	train-rmse:1.13183+0.00074	test-rmse:1.13223+0.00137                      

[29]	train-rmse:1.11818+0.00112	test-rmse:1.11859+0.00154                      

[30]	train-rmse:1.10688+0.00165	test-rmse:1.10731+0.00141                      

[31]	train-rmse:1.09707+0.00149	test-rmse:1.09751+0.00143                      

[32]	train-rmse:1.08897+0.00171	test-rmse:1.08946+0.00161                      

[33]	train-rmse:1.08171+0.00174	test-rmse:1.08219+0.00131                      

[34]	train-rmse:1.07486+0.00

[224]	train-rmse:0.91056+0.00051	test-rmse:0.91260+0.00096                     

[225]	train-rmse:0.91032+0.00052	test-rmse:0.91236+0.00105                     

[226]	train-rmse:0.91011+0.00047	test-rmse:0.91217+0.00106                     

[227]	train-rmse:0.90981+0.00044	test-rmse:0.91187+0.00108                     

[228]	train-rmse:0.90953+0.00044	test-rmse:0.91160+0.00093                     

[229]	train-rmse:0.90927+0.00046	test-rmse:0.91135+0.00091                     

[230]	train-rmse:0.90895+0.00047	test-rmse:0.91104+0.00093                     

[231]	train-rmse:0.90869+0.00050	test-rmse:0.91079+0.00090                     

[232]	train-rmse:0.90845+0.00055	test-rmse:0.91055+0.00096                     

[233]	train-rmse:0.90821+0.00056	test-rmse:0.91032+0.00097                     

[234]	train-rmse:0.90803+0.00053	test-rmse:0.91015+0.00096                     

[235]	train-rmse:0.90781+0.00055	test-rmse:0.90994+0.00100                     

[236]	train-rmse:0.90746+0.0

[19]	train-rmse:1.02491+0.00140	test-rmse:1.02568+0.00223                       

[20]	train-rmse:1.02103+0.00137	test-rmse:1.02181+0.00191                       

[21]	train-rmse:1.01694+0.00326	test-rmse:1.01778+0.00354                       

[22]	train-rmse:1.01295+0.00325	test-rmse:1.01382+0.00350                       

[23]	train-rmse:1.00991+0.00292	test-rmse:1.01083+0.00323                       

[24]	train-rmse:1.00542+0.00291	test-rmse:1.00637+0.00339                       

[25]	train-rmse:1.00255+0.00343	test-rmse:1.00350+0.00371                       

[26]	train-rmse:0.99959+0.00308	test-rmse:1.00055+0.00328                       

[27]	train-rmse:0.99540+0.00248	test-rmse:0.99639+0.00296                       

[28]	train-rmse:0.99327+0.00265	test-rmse:0.99430+0.00317                       

[29]	train-rmse:0.99116+0.00270	test-rmse:0.99219+0.00319                       

[30]	train-rmse:0.98843+0.00287	test-rmse:0.98948+0.00302                       

[31]	train-rmse:

[217]	train-rmse:0.87214+0.00133	test-rmse:0.87758+0.00076                      

[218]	train-rmse:0.87196+0.00134	test-rmse:0.87742+0.00078                      

[219]	train-rmse:0.87170+0.00139	test-rmse:0.87719+0.00083                      

[220]	train-rmse:0.87149+0.00138	test-rmse:0.87700+0.00084                      

[221]	train-rmse:0.87133+0.00136	test-rmse:0.87687+0.00080                      

[222]	train-rmse:0.87111+0.00136	test-rmse:0.87666+0.00084                      

[223]	train-rmse:0.87091+0.00131	test-rmse:0.87647+0.00081                      

[224]	train-rmse:0.87066+0.00137	test-rmse:0.87625+0.00088                      

[225]	train-rmse:0.87056+0.00139	test-rmse:0.87617+0.00092                      

[226]	train-rmse:0.87036+0.00146	test-rmse:0.87600+0.00102                      

[227]	train-rmse:0.87012+0.00147	test-rmse:0.87578+0.00102                      

[228]	train-rmse:0.86989+0.00152	test-rmse:0.87557+0.00106                      

[229]	train-rmse

[9]	train-rmse:1.03414+0.00639	test-rmse:1.03510+0.00677                       

[10]	train-rmse:1.02537+0.00576	test-rmse:1.02642+0.00621                      

[11]	train-rmse:1.01905+0.00500	test-rmse:1.02015+0.00558                      

[12]	train-rmse:1.01191+0.00418	test-rmse:1.01316+0.00438                      

[13]	train-rmse:1.00529+0.00328	test-rmse:1.00663+0.00333                      

[14]	train-rmse:0.99981+0.00364	test-rmse:1.00121+0.00333                      

[15]	train-rmse:0.99501+0.00333	test-rmse:0.99645+0.00309                      

[16]	train-rmse:0.99070+0.00357	test-rmse:0.99221+0.00316                      

[17]	train-rmse:0.98672+0.00370	test-rmse:0.98828+0.00344                      

[18]	train-rmse:0.98283+0.00300	test-rmse:0.98450+0.00287                      

[19]	train-rmse:0.97716+0.00202	test-rmse:0.97896+0.00297                      

[20]	train-rmse:0.97347+0.00235	test-rmse:0.97534+0.00334                      

[21]	train-rmse:0.97028+0.00

[211]	train-rmse:0.84412+0.00043	test-rmse:0.85780+0.00086                     

[212]	train-rmse:0.84393+0.00046	test-rmse:0.85768+0.00091                     

[213]	train-rmse:0.84369+0.00036	test-rmse:0.85749+0.00085                     

[214]	train-rmse:0.84345+0.00036	test-rmse:0.85729+0.00085                     

[215]	train-rmse:0.84327+0.00034	test-rmse:0.85717+0.00084                     

[216]	train-rmse:0.84304+0.00043	test-rmse:0.85700+0.00080                     

[217]	train-rmse:0.84272+0.00030	test-rmse:0.85675+0.00082                     

[218]	train-rmse:0.84258+0.00033	test-rmse:0.85664+0.00088                     

[219]	train-rmse:0.84239+0.00025	test-rmse:0.85650+0.00088                     

[220]	train-rmse:0.84221+0.00023	test-rmse:0.85637+0.00091                     

[221]	train-rmse:0.84195+0.00024	test-rmse:0.85616+0.00100                     

[222]	train-rmse:0.84170+0.00026	test-rmse:0.85597+0.00103                     

[223]	train-rmse:0.84147+0.0

[7]	train-rmse:1.05717+0.00535	test-rmse:1.05768+0.00620                      

[8]	train-rmse:1.04429+0.00625	test-rmse:1.04489+0.00692                      

[9]	train-rmse:1.03536+0.00645	test-rmse:1.03605+0.00729                      

[10]	train-rmse:1.02685+0.00554	test-rmse:1.02773+0.00649                     

[11]	train-rmse:1.01849+0.00344	test-rmse:1.01947+0.00448                     

[12]	train-rmse:1.01097+0.00210	test-rmse:1.01205+0.00314                     

[13]	train-rmse:1.00443+0.00133	test-rmse:1.00558+0.00193                     

[14]	train-rmse:0.99903+0.00102	test-rmse:1.00026+0.00168                     

[15]	train-rmse:0.99455+0.00177	test-rmse:0.99583+0.00264                     

[16]	train-rmse:0.98995+0.00155	test-rmse:0.99131+0.00252                     

[17]	train-rmse:0.98557+0.00179	test-rmse:0.98702+0.00262                     

[18]	train-rmse:0.98152+0.00170	test-rmse:0.98305+0.00270                     

[19]	train-rmse:0.97730+0.00269	test-rms

[211]	train-rmse:0.84509+0.00124	test-rmse:0.85842+0.00121                    

[212]	train-rmse:0.84481+0.00136	test-rmse:0.85820+0.00139                    

[213]	train-rmse:0.84459+0.00135	test-rmse:0.85803+0.00143                    

[214]	train-rmse:0.84436+0.00131	test-rmse:0.85783+0.00128                    

[215]	train-rmse:0.84412+0.00124	test-rmse:0.85765+0.00126                    

[216]	train-rmse:0.84384+0.00130	test-rmse:0.85741+0.00121                    

[217]	train-rmse:0.84368+0.00127	test-rmse:0.85729+0.00116                    

[218]	train-rmse:0.84346+0.00129	test-rmse:0.85713+0.00117                    

[219]	train-rmse:0.84327+0.00119	test-rmse:0.85699+0.00109                    

[220]	train-rmse:0.84305+0.00123	test-rmse:0.85682+0.00115                    

[221]	train-rmse:0.84288+0.00122	test-rmse:0.85668+0.00118                    

[222]	train-rmse:0.84271+0.00128	test-rmse:0.85657+0.00122                    

[223]	train-rmse:0.84251+0.00131	test-rm

[9]	train-rmse:1.03301+0.00557	test-rmse:1.03408+0.00618                      

[10]	train-rmse:1.02402+0.00481	test-rmse:1.02522+0.00551                     

[11]	train-rmse:1.01592+0.00396	test-rmse:1.01727+0.00468                     

[12]	train-rmse:1.00892+0.00405	test-rmse:1.01035+0.00454                     

[13]	train-rmse:1.00321+0.00292	test-rmse:1.00472+0.00363                     

[14]	train-rmse:0.99782+0.00251	test-rmse:0.99939+0.00330                     

[15]	train-rmse:0.99230+0.00314	test-rmse:0.99401+0.00414                     

[16]	train-rmse:0.98793+0.00349	test-rmse:0.98973+0.00434                     

[17]	train-rmse:0.98262+0.00270	test-rmse:0.98445+0.00358                     

[18]	train-rmse:0.97953+0.00264	test-rmse:0.98142+0.00348                     

[19]	train-rmse:0.97649+0.00256	test-rmse:0.97846+0.00351                     

[20]	train-rmse:0.97239+0.00137	test-rmse:0.97446+0.00241                     

[21]	train-rmse:0.96959+0.00208	test-rms

[213]	train-rmse:0.84578+0.00156	test-rmse:0.85993+0.00212                    

[214]	train-rmse:0.84566+0.00155	test-rmse:0.85986+0.00213                    

[215]	train-rmse:0.84546+0.00158	test-rmse:0.85970+0.00214                    

[216]	train-rmse:0.84529+0.00161	test-rmse:0.85958+0.00218                    

[217]	train-rmse:0.84507+0.00161	test-rmse:0.85944+0.00217                    

[218]	train-rmse:0.84486+0.00162	test-rmse:0.85927+0.00221                    

[219]	train-rmse:0.84462+0.00161	test-rmse:0.85909+0.00220                    

[220]	train-rmse:0.84442+0.00158	test-rmse:0.85895+0.00218                    

[221]	train-rmse:0.84398+0.00151	test-rmse:0.85856+0.00197                    

[222]	train-rmse:0.84369+0.00146	test-rmse:0.85832+0.00183                    

[223]	train-rmse:0.84348+0.00147	test-rmse:0.85817+0.00181                    

[224]	train-rmse:0.84326+0.00146	test-rmse:0.85801+0.00183                    

[225]	train-rmse:0.84306+0.00146	test-rm

[11]	train-rmse:1.01103+0.00481	test-rmse:1.01209+0.00393                     

[12]	train-rmse:1.00485+0.00444	test-rmse:1.00599+0.00377                     

[13]	train-rmse:0.99852+0.00434	test-rmse:0.99981+0.00390                     

[14]	train-rmse:0.99377+0.00373	test-rmse:0.99515+0.00341                     

[15]	train-rmse:0.98876+0.00270	test-rmse:0.99013+0.00228                     

[16]	train-rmse:0.98271+0.00234	test-rmse:0.98421+0.00239                     

[17]	train-rmse:0.97899+0.00197	test-rmse:0.98058+0.00189                     

[18]	train-rmse:0.97528+0.00210	test-rmse:0.97701+0.00158                     

[19]	train-rmse:0.97156+0.00220	test-rmse:0.97338+0.00160                     

[20]	train-rmse:0.96875+0.00195	test-rmse:0.97062+0.00127                     

[21]	train-rmse:0.96569+0.00194	test-rmse:0.96762+0.00116                     

[22]	train-rmse:0.96234+0.00230	test-rmse:0.96437+0.00146                     

[23]	train-rmse:0.95927+0.00254	test-rms

[215]	train-rmse:0.84176+0.00166	test-rmse:0.85512+0.00194                    

[216]	train-rmse:0.84160+0.00156	test-rmse:0.85500+0.00185                    

[217]	train-rmse:0.84136+0.00152	test-rmse:0.85481+0.00181                    

[218]	train-rmse:0.84110+0.00134	test-rmse:0.85459+0.00165                    

[219]	train-rmse:0.84085+0.00131	test-rmse:0.85440+0.00163                    

[220]	train-rmse:0.84064+0.00130	test-rmse:0.85425+0.00163                    

[221]	train-rmse:0.84048+0.00131	test-rmse:0.85414+0.00164                    

[222]	train-rmse:0.84026+0.00130	test-rmse:0.85396+0.00164                    

[223]	train-rmse:0.84003+0.00128	test-rmse:0.85378+0.00158                    

[224]	train-rmse:0.83984+0.00131	test-rmse:0.85365+0.00160                    

[225]	train-rmse:0.83961+0.00127	test-rmse:0.85345+0.00154                    

[226]	train-rmse:0.83944+0.00132	test-rmse:0.85332+0.00158                    

[227]	train-rmse:0.83932+0.00134	test-rm

[12]	train-rmse:1.00892+0.00672	test-rmse:1.01040+0.00799                       

[13]	train-rmse:1.00336+0.00629	test-rmse:1.00493+0.00756                       

[14]	train-rmse:0.99650+0.00582	test-rmse:0.99814+0.00709                       

[15]	train-rmse:0.99156+0.00500	test-rmse:0.99326+0.00623                       

[16]	train-rmse:0.98716+0.00482	test-rmse:0.98892+0.00607                       

[17]	train-rmse:0.98240+0.00460	test-rmse:0.98429+0.00581                       

[18]	train-rmse:0.97887+0.00405	test-rmse:0.98090+0.00515                       

[19]	train-rmse:0.97538+0.00328	test-rmse:0.97742+0.00427                       

[20]	train-rmse:0.97152+0.00359	test-rmse:0.97364+0.00460                       

[21]	train-rmse:0.96876+0.00350	test-rmse:0.97100+0.00441                       

[22]	train-rmse:0.96455+0.00364	test-rmse:0.96686+0.00441                       

[23]	train-rmse:0.96175+0.00247	test-rmse:0.96411+0.00332                       

[24]	train-rmse:

[210]	train-rmse:0.84710+0.00058	test-rmse:0.86148+0.00158                      

[211]	train-rmse:0.84685+0.00058	test-rmse:0.86129+0.00162                      

[212]	train-rmse:0.84653+0.00056	test-rmse:0.86103+0.00155                      

[213]	train-rmse:0.84632+0.00052	test-rmse:0.86086+0.00151                      

[214]	train-rmse:0.84614+0.00049	test-rmse:0.86074+0.00147                      

[215]	train-rmse:0.84597+0.00044	test-rmse:0.86063+0.00143                      

[216]	train-rmse:0.84584+0.00048	test-rmse:0.86055+0.00144                      

[217]	train-rmse:0.84567+0.00048	test-rmse:0.86046+0.00143                      

[218]	train-rmse:0.84552+0.00051	test-rmse:0.86036+0.00143                      

[219]	train-rmse:0.84535+0.00051	test-rmse:0.86024+0.00145                      

[220]	train-rmse:0.84518+0.00046	test-rmse:0.86013+0.00143                      

[221]	train-rmse:0.84494+0.00039	test-rmse:0.85994+0.00132                      

[222]	train-rmse

[2]	train-rmse:7.92572+0.00040	test-rmse:7.92541+0.00093                        

[3]	train-rmse:7.47816+0.00050	test-rmse:7.47824+0.00069                        

[4]	train-rmse:7.05789+0.00036	test-rmse:7.05764+0.00080                        

[5]	train-rmse:6.66202+0.00038	test-rmse:6.66219+0.00067                        

[6]	train-rmse:6.28992+0.00043	test-rmse:6.28999+0.00058                        

[7]	train-rmse:5.93995+0.00035	test-rmse:5.94000+0.00060                        

[8]	train-rmse:5.61080+0.00043	test-rmse:5.61085+0.00055                        

[9]	train-rmse:5.30131+0.00050	test-rmse:5.30142+0.00043                        

[10]	train-rmse:5.01032+0.00057	test-rmse:5.01048+0.00042                       

[11]	train-rmse:4.73716+0.00067	test-rmse:4.73714+0.00041                       

[12]	train-rmse:4.48053+0.00077	test-rmse:4.48062+0.00042                       

[13]	train-rmse:4.23940+0.00066	test-rmse:4.23947+0.00039                       

[14]	train-rmse:

[200]	train-rmse:0.91566+0.00010	test-rmse:0.91769+0.00103                      

[201]	train-rmse:0.91531+0.00012	test-rmse:0.91735+0.00092                      

[202]	train-rmse:0.91500+0.00022	test-rmse:0.91705+0.00092                      

[203]	train-rmse:0.91462+0.00024	test-rmse:0.91667+0.00095                      

[204]	train-rmse:0.91426+0.00021	test-rmse:0.91633+0.00099                      

[205]	train-rmse:0.91391+0.00023	test-rmse:0.91598+0.00095                      

[206]	train-rmse:0.91364+0.00026	test-rmse:0.91572+0.00092                      

[207]	train-rmse:0.91332+0.00022	test-rmse:0.91542+0.00084                      

                                                                                

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



[326]	train-rmse:0.82408+0.00083	test-rmse:0.83926+0.00078                      

[327]	train-rmse:0.82397+0.00083	test-rmse:0.83919+0.00075                      

[328]	train-rmse:0.82383+0.00084	test-rmse:0.83910+0.00072                      

[329]	train-rmse:0.82369+0.00090	test-rmse:0.83900+0.00071                      

[330]	train-rmse:0.82356+0.00088	test-rmse:0.83891+0.00066                      

[331]	train-rmse:0.82345+0.00091	test-rmse:0.83883+0.00070                      

[332]	train-rmse:0.82327+0.00087	test-rmse:0.83871+0.00071                      

[333]	train-rmse:0.82315+0.00088	test-rmse:0.83862+0.00069                      

[334]	train-rmse:0.82301+0.00089	test-rmse:0.83852+0.00070                      

[335]	train-rmse:0.82289+0.00086	test-rmse:0.83845+0.00072                      

[336]	train-rmse:0.82277+0.00085	test-rmse:0.83837+0.00072                      

[337]	train-rmse:0.82264+0.00092	test-rmse:0.83826+0.00082                      

[338]	train-rmse

[118]	train-rmse:0.87076+0.00169	test-rmse:0.87720+0.00232                      

[119]	train-rmse:0.87025+0.00180	test-rmse:0.87675+0.00241                      

[120]	train-rmse:0.86971+0.00178	test-rmse:0.87625+0.00238                      

[121]	train-rmse:0.86917+0.00160	test-rmse:0.87577+0.00218                      

[122]	train-rmse:0.86853+0.00153	test-rmse:0.87518+0.00222                      

[123]	train-rmse:0.86816+0.00150	test-rmse:0.87487+0.00218                      

[124]	train-rmse:0.86761+0.00156	test-rmse:0.87438+0.00213                      

[125]	train-rmse:0.86733+0.00159	test-rmse:0.87414+0.00213                      

[126]	train-rmse:0.86703+0.00155	test-rmse:0.87388+0.00208                      

[127]	train-rmse:0.86671+0.00156	test-rmse:0.87360+0.00205                      

[128]	train-rmse:0.86628+0.00149	test-rmse:0.87325+0.00194                      

[129]	train-rmse:0.86590+0.00157	test-rmse:0.87290+0.00196                      

[130]	train-rmse

[316]	train-rmse:0.82446+0.00072	test-rmse:0.83888+0.00103                      

[317]	train-rmse:0.82434+0.00068	test-rmse:0.83879+0.00101                      

[318]	train-rmse:0.82419+0.00072	test-rmse:0.83867+0.00097                      

[319]	train-rmse:0.82407+0.00071	test-rmse:0.83860+0.00095                      

[320]	train-rmse:0.82394+0.00071	test-rmse:0.83851+0.00097                      

[321]	train-rmse:0.82378+0.00070	test-rmse:0.83837+0.00097                      

[322]	train-rmse:0.82367+0.00071	test-rmse:0.83830+0.00098                      

[323]	train-rmse:0.82351+0.00069	test-rmse:0.83818+0.00099                      

[324]	train-rmse:0.82338+0.00065	test-rmse:0.83808+0.00099                      

[325]	train-rmse:0.82323+0.00067	test-rmse:0.83798+0.00103                      

[326]	train-rmse:0.82309+0.00069	test-rmse:0.83789+0.00107                      

[327]	train-rmse:0.82298+0.00072	test-rmse:0.83781+0.00109                      

[328]	train-rmse

[108]	train-rmse:1.18401+0.00111	test-rmse:1.18413+0.00074                      

[109]	train-rmse:1.18341+0.00116	test-rmse:1.18352+0.00075                      

[110]	train-rmse:1.18277+0.00119	test-rmse:1.18290+0.00077                      

[111]	train-rmse:1.18216+0.00119	test-rmse:1.18228+0.00086                      

[112]	train-rmse:1.18141+0.00113	test-rmse:1.18153+0.00091                      

[113]	train-rmse:1.18083+0.00109	test-rmse:1.18095+0.00091                      

[114]	train-rmse:1.18012+0.00107	test-rmse:1.18024+0.00085                      

[115]	train-rmse:1.17959+0.00103	test-rmse:1.17972+0.00090                      

[116]	train-rmse:1.17902+0.00100	test-rmse:1.17913+0.00088                      

[117]	train-rmse:1.17850+0.00109	test-rmse:1.17861+0.00098                      

[118]	train-rmse:1.17769+0.00093	test-rmse:1.17780+0.00089                      

[119]	train-rmse:1.17711+0.00091	test-rmse:1.17722+0.00081                      

[120]	train-rmse

[306]	train-rmse:1.11643+0.00149	test-rmse:1.11662+0.00065                      

[307]	train-rmse:1.11629+0.00140	test-rmse:1.11647+0.00059                      

[308]	train-rmse:1.11609+0.00139	test-rmse:1.11628+0.00059                      

[309]	train-rmse:1.11593+0.00140	test-rmse:1.11611+0.00059                      

[310]	train-rmse:1.11580+0.00143	test-rmse:1.11599+0.00061                      

[311]	train-rmse:1.11556+0.00148	test-rmse:1.11575+0.00063                      

[312]	train-rmse:1.11539+0.00148	test-rmse:1.11558+0.00060                      

[313]	train-rmse:1.11508+0.00138	test-rmse:1.11528+0.00050                      

[314]	train-rmse:1.11487+0.00140	test-rmse:1.11506+0.00053                      

[315]	train-rmse:1.11464+0.00139	test-rmse:1.11484+0.00052                      

[316]	train-rmse:1.11443+0.00129	test-rmse:1.11463+0.00046                      

[317]	train-rmse:1.11422+0.00131	test-rmse:1.11442+0.00050                      

[318]	train-rmse

{'colsample_bytree': 0.7848151878076199,
 'eta': 0.3781092476928985,
 'gamma': 0.06996074946187703,
 'grow_policy': 0,
 'max_depth': 7.0,
 'max_leaves': 178.0,
 'min_child_weight': 455.0,
 'reg_alpha': 4.3,
 'reg_lambda': 0.8000693484694821,
 'subsample': 0.8}

In [37]:
hyperoptimization_results = {'test-rmse-mean': [x['loss'] for x in trials.results]}
for key in trials.results[0]["params"].keys():
    hyperoptimization_results[key] = [x['params'][key] for x in trials.results]

In [38]:
df = pd.DataFrame(hyperoptimization_results)

In [39]:
df

Unnamed: 0,test-rmse-mean,nthread,objective,eval_metric,eta,max_depth,grow_policy,max_leaves,min_child_weight,gamma,reg_lambda,colsample_bytree,subsample,reg_alpha
0,0.959987,64,reg:squarederror,rmse,0.016603,7,depthwise,405,164,0.050257,0.681202,0.79883,0.6,3.9
1,0.839292,64,reg:squarederror,rmse,0.206926,7,depthwise,464,126,0.311932,0.187296,0.5417,0.7,0.6
2,0.932049,64,reg:squarederror,rmse,0.100318,5,lossguide,37,116,0.007688,0.243689,0.605985,0.5,1.9
3,0.85946,64,reg:squarederror,rmse,0.252189,6,depthwise,71,122,0.62544,0.446438,0.659243,0.6,4.4
4,0.894177,64,reg:squarederror,rmse,0.904584,5,lossguide,32,447,0.546149,0.154155,0.896689,0.6,1.5
5,0.960251,64,reg:squarederror,rmse,0.156704,4,depthwise,134,321,0.452511,0.700861,0.659554,0.5,2.0
6,1.146492,64,reg:squarederror,rmse,0.028222,3,lossguide,479,351,0.719716,0.234996,0.963862,0.7,1.6
7,0.962821,64,reg:squarederror,rmse,0.899945,3,lossguide,125,31,0.498392,0.955533,0.667469,0.6,2.0
8,1.01013,64,reg:squarederror,rmse,0.068371,4,lossguide,65,4,0.689045,0.152349,0.60016,1.0,3.2
9,0.945339,64,reg:squarederror,rmse,0.033636,6,depthwise,117,387,0.550947,0.629054,0.917246,0.7,0.8


In [40]:
df.to_csv("data/immunopeptide_chymotrypsin_resultsB.csv", mode="a", header=False)


In [41]:
params = {
    "nthread": 64,
    "objective": "reg:squarederror",
    #"objective": "binary:logistic",
    #"eval_metric": 'mae',
    "eval_metric": 'rmse',
    "verbosity": 1,
    "eta": 0.5,
    "max_depth": 9,
    "grow_policy":"lossguide",
    "max_leaves":100,
    "min_child_weight": 300,
    "gamma": 0.1,
    "subsample": 1,
    "reg_lambda" : 0,
    "colsample_bytree": 1,
}

In [42]:
best_hyperparams = space_eval(space, best_hyperparams)
params.update(best_hyperparams)

In [43]:
params["max_depth"] = int(params["max_depth"])
params["max_leaves"] = int(params["max_leaves"])
params["min_child_weight"] = int(params["min_child_weight"])


In [44]:
params

{'nthread': 64,
 'objective': 'reg:squarederror',
 'eval_metric': 'rmse',
 'verbosity': 1,
 'eta': 0.3781092476928985,
 'max_depth': 7,
 'grow_policy': 'lossguide',
 'max_leaves': 178,
 'min_child_weight': 455,
 'gamma': 0.06996074946187703,
 'subsample': 0.8,
 'reg_lambda': 0.8000693484694821,
 'colsample_bytree': 0.7848151878076199,
 'reg_alpha': 4.3}

In [45]:
bst2 = xgb.train(params, xtrain, 400, evallist, maximize=False, early_stopping_rounds=10)

[0]	train-rmse:5.95186	test-rmse:5.95062
[1]	train-rmse:3.81930	test-rmse:3.81917
[2]	train-rmse:2.54050	test-rmse:2.54090
[3]	train-rmse:1.81110	test-rmse:1.81196
[4]	train-rmse:1.41726	test-rmse:1.41861
[5]	train-rmse:1.21577	test-rmse:1.21763
[6]	train-rmse:1.12195	test-rmse:1.12414
[7]	train-rmse:1.07496	test-rmse:1.07737
[8]	train-rmse:1.05086	test-rmse:1.05331
[9]	train-rmse:1.03541	test-rmse:1.03796
[10]	train-rmse:1.02496	test-rmse:1.02744
[11]	train-rmse:1.01630	test-rmse:1.01868
[12]	train-rmse:1.01004	test-rmse:1.01235
[13]	train-rmse:1.00173	test-rmse:1.00425
[14]	train-rmse:0.99733	test-rmse:1.00000
[15]	train-rmse:0.99219	test-rmse:0.99485
[16]	train-rmse:0.98369	test-rmse:0.98645
[17]	train-rmse:0.97813	test-rmse:0.98082
[18]	train-rmse:0.97519	test-rmse:0.97782
[19]	train-rmse:0.97103	test-rmse:0.97369
[20]	train-rmse:0.96673	test-rmse:0.96948
[21]	train-rmse:0.96225	test-rmse:0.96508
[22]	train-rmse:0.95823	test-rmse:0.96103
[23]	train-rmse:0.95652	test-rmse:0.95934
[2

[194]	train-rmse:0.84357	test-rmse:0.85240
[195]	train-rmse:0.84350	test-rmse:0.85235
[196]	train-rmse:0.84342	test-rmse:0.85231
[197]	train-rmse:0.84328	test-rmse:0.85218
[198]	train-rmse:0.84289	test-rmse:0.85180
[199]	train-rmse:0.84274	test-rmse:0.85167
[200]	train-rmse:0.84241	test-rmse:0.85137
[201]	train-rmse:0.84229	test-rmse:0.85129
[202]	train-rmse:0.84215	test-rmse:0.85119
[203]	train-rmse:0.84186	test-rmse:0.85094
[204]	train-rmse:0.84169	test-rmse:0.85082
[205]	train-rmse:0.84152	test-rmse:0.85066
[206]	train-rmse:0.84111	test-rmse:0.85027
[207]	train-rmse:0.84090	test-rmse:0.85009
[208]	train-rmse:0.84082	test-rmse:0.85002
[209]	train-rmse:0.84067	test-rmse:0.84990
[210]	train-rmse:0.84050	test-rmse:0.84981
[211]	train-rmse:0.84029	test-rmse:0.84963
[212]	train-rmse:0.83998	test-rmse:0.84936
[213]	train-rmse:0.83979	test-rmse:0.84919
[214]	train-rmse:0.83960	test-rmse:0.84899
[215]	train-rmse:0.83943	test-rmse:0.84884
[216]	train-rmse:0.83923	test-rmse:0.84869
[217]	train

[385]	train-rmse:0.81535	test-rmse:0.83061
[386]	train-rmse:0.81525	test-rmse:0.83052
[387]	train-rmse:0.81513	test-rmse:0.83044
[388]	train-rmse:0.81502	test-rmse:0.83039
[389]	train-rmse:0.81498	test-rmse:0.83038
[390]	train-rmse:0.81486	test-rmse:0.83028
[391]	train-rmse:0.81477	test-rmse:0.83022
[392]	train-rmse:0.81472	test-rmse:0.83018
[393]	train-rmse:0.81451	test-rmse:0.83003
[394]	train-rmse:0.81426	test-rmse:0.82981
[395]	train-rmse:0.81408	test-rmse:0.82966
[396]	train-rmse:0.81403	test-rmse:0.82964
[397]	train-rmse:0.81390	test-rmse:0.82954
[398]	train-rmse:0.81374	test-rmse:0.82943
[399]	train-rmse:0.81362	test-rmse:0.82936


In [46]:
bst2.save_model("Models/Fast_immunopeptide_chymotrypsin_HCDb.xgboost")