## Import modules, set options

In [1]:
%matplotlib inline

import pandas as pd
import numpy as np

import math
import gc
import copy

from sklearn.model_selection import KFold, train_test_split
from sklearn.metrics import mean_absolute_error

import matplotlib.pyplot as plt
import seaborn as sns

from lightgbm import LGBMRegressor
import os


from Utils import *
# os.listdir('../input/imputed')

In [2]:
pd.set_option('display.max_colwidth', -1)
pd.set_option('display.max_rows', 120)
pd.set_option('display.max_columns', 120)

In [3]:
submission_csv = pd.read_csv(f'{Config.DATA_PATH}\\sample_submission.csv', index_col='id')

  mask |= (ar1 == a)


## Submission Model


In [4]:
test_csv = pd.read_csv(f'{Config.DATA_PATH}\\test.csv', index_col='id', dtype=train_dtypes)
test_csv['molecule_index'] = test_csv['molecule_name'].str.replace('dsgdb9nsd_', '').astype('int32')
test_csv = test_csv[['molecule_index', 'atom_index_0', 'atom_index_1', 'type']]
test_csv.head(10)

Unnamed: 0_level_0,molecule_index,atom_index_0,atom_index_1,type
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
4658147,4,2,0,2JHC
4658148,4,2,1,1JHC
4658149,4,2,3,3JHH
4658150,4,3,0,1JHC
4658151,4,3,1,2JHC
4658152,15,3,0,1JHC
4658153,15,3,2,3JHC
4658154,15,3,4,2JHH
4658155,15,3,5,2JHH
4658156,15,4,0,1JHC


In [5]:
from sklearn.model_selection import GroupKFold, train_test_split
def train_and_predict_for_one_coupling_type(coupling_type, submission, n_folds=5, n_splits=5, random_state=128):
    model_type =  'lgb' #lgb  cat
    print(f'*** Training Model for {coupling_type} ***')
    XY_Data = pd.read_csv(f'{Config.INPUT_XY}/{coupling_type}.csv', index_col=0)
    X_data, y_data = build_x_y_data(XY_Data)    
    
    columns = X_data.columns 
    XY_Test = pd.read_csv(f'{Config.INPUT_XY}/test_{coupling_type}.csv', index_col=0)
    X_test, _ = build_x_y_data(XY_Test)
    y_pred = np.zeros(X_test.shape[0], dtype='float32')

    train_csv = load_train()

    cv_score = 0
    
    if n_folds > n_splits:
        n_splits = n_folds
    
    #kfold = KFold(n_splits=n_splits, shuffle=True, random_state=random_state)
    kfold = GroupKFold(n_splits=n_splits)
    
    
    cur_train_csv = train_csv[train_csv.type == coupling_type]
    groups  = cur_train_csv.molecule_index.values

    for fold, (train_index, val_index) in enumerate(kfold.split(X_data, y_data, groups=groups)):
        if fold >= n_folds:
            break

        #X_train, X_val = X_data[train_index], X_data[val_index]
        #y_train, y_val = y_data[train_index], y_data[val_index]
        X_train, X_val = X_data[columns].iloc[train_index], X_data[columns].iloc[val_index]
        y_train, y_val = y_data.iloc[train_index], y_data.iloc[val_index]

        
        model = ""
        categorical_features = [col for col in X_train if col.startswith('atom_')]
        if model_type == 'lgb':

            model = LGBMRegressor(**Config.LGB_PARAMS, n_estimators=15000, n_jobs = 5)
            #model = LGBMRegressor(**LGB_PARAMS, n_estimators=6000, n_jobs = -1)
            model.fit(X_train, y_train, 
                eval_set=[(X_train, y_train), (X_val, y_val)], eval_metric='mae',
                verbose=100, early_stopping_rounds=1000,
                     categorical_feature = categorical_features)
        
        if model_type == 'cat': 
            model = CatBoostRegressor(eval_metric='MAE', **CAT_PARAMS, loss_function='MAE')
            X_trainGlob = X_train
            model.fit(X_train, y_train, eval_set=((X_val, y_val)), 
                      cat_features = categorical_features,
                      use_best_model=True,
                      verbose=False)

        y_val_pred = model.predict(X_val)
        val_score = np.log(mean_absolute_error(y_val, y_val_pred))
        print(f'{coupling_type} Fold {fold}, logMAE: {val_score}')
        
        cv_score += val_score / n_folds
        y_pred += model.predict(X_test) / n_folds
        
    submission.loc[test_csv['type'] == coupling_type, 'scalar_coupling_constant'] = y_pred
    return cv_score

Let's build a separate model for each type of coupling. Dataset is split into 5 pieces and in this kernel we will use only 3 folds for speed up.

Main tuning parameter is the number of atoms. I took good numbers, but accuracy can be improved a bit by tuning them for each type.

Checking cross-validation scores for each type:

In [6]:
N_FOLDS = 5
submission = submission_csv.copy()

cv_scores = {}
for coupling_type in Config.MODEL_PARAMS.keys():
    cv_score = train_and_predict_for_one_coupling_type(
        coupling_type, submission, n_folds=N_FOLDS)
    cv_scores[coupling_type] = cv_score

*** Training Model for 1JHN ***


New categorical_feature is ['atom_10', 'atom_2', 'atom_3', 'atom_4', 'atom_5', 'atom_6', 'atom_7', 'atom_8', 'atom_9']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 1000 rounds.
[100]	training's l1: 0.537618	valid_1's l1: 0.585336
[200]	training's l1: 0.413442	valid_1's l1: 0.490275
[300]	training's l1: 0.351078	valid_1's l1: 0.451444
[400]	training's l1: 0.307937	valid_1's l1: 0.426764
[500]	training's l1: 0.275382	valid_1's l1: 0.409766
[600]	training's l1: 0.249522	valid_1's l1: 0.397709
[700]	training's l1: 0.227688	valid_1's l1: 0.387508
[800]	training's l1: 0.21004	valid_1's l1: 0.380222
[900]	training's l1: 0.194102	valid_1's l1: 0.374254
[1000]	training's l1: 0.180395	valid_1's l1: 0.369353
[1100]	training's l1: 0.168497	valid_1's l1: 0.365107
[1200]	training's l1: 0.157691	valid_1's l1: 0.361835
[1300]	training's l1: 0.148041	valid_1's l1: 0.359033
[1400]	training's l1: 0.139338	valid_1's l1: 0.356513
[1500]	training's l1: 0.131388	valid_1's l1: 0.354285
[1600]	training's l1: 0.123978	valid_1's l1: 0.352345
[1700]	training's l1: 0.117423	valid_1's l1: 0.350892
[1800]	training's l1: 0.1112

[14900]	training's l1: 0.00201781	valid_1's l1: 0.331873
[15000]	training's l1: 0.00197249	valid_1's l1: 0.331868
Did not meet early stopping. Best iteration is:
[15000]	training's l1: 0.00197249	valid_1's l1: 0.331868
1JHN Fold 0, logMAE: -1.1030186004080824


New categorical_feature is ['atom_10', 'atom_2', 'atom_3', 'atom_4', 'atom_5', 'atom_6', 'atom_7', 'atom_8', 'atom_9']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 1000 rounds.
[100]	training's l1: 0.537012	valid_1's l1: 0.603523
[200]	training's l1: 0.411885	valid_1's l1: 0.509113
[300]	training's l1: 0.349734	valid_1's l1: 0.466607
[400]	training's l1: 0.308393	valid_1's l1: 0.440447
[500]	training's l1: 0.277049	valid_1's l1: 0.423058
[600]	training's l1: 0.250312	valid_1's l1: 0.408879
[700]	training's l1: 0.229175	valid_1's l1: 0.398289
[800]	training's l1: 0.21107	valid_1's l1: 0.390041
[900]	training's l1: 0.19581	valid_1's l1: 0.384381
[1000]	training's l1: 0.182215	valid_1's l1: 0.378879
[1100]	training's l1: 0.16981	valid_1's l1: 0.374208
[1200]	training's l1: 0.158821	valid_1's l1: 0.370042
[1300]	training's l1: 0.149446	valid_1's l1: 0.366989
[1400]	training's l1: 0.140754	valid_1's l1: 0.364265
[1500]	training's l1: 0.133004	valid_1's l1: 0.362158
[1600]	training's l1: 0.125615	valid_1's l1: 0.359975
[1700]	training's l1: 0.118572	valid_1's l1: 0.357909
[1800]	training's l1: 0.112533

[14800]	training's l1: 0.00205358	valid_1's l1: 0.338623
[14900]	training's l1: 0.00200733	valid_1's l1: 0.338621
[15000]	training's l1: 0.00196486	valid_1's l1: 0.338621
Did not meet early stopping. Best iteration is:
[15000]	training's l1: 0.00196486	valid_1's l1: 0.338621
1JHN Fold 1, logMAE: -1.0830265176329374


New categorical_feature is ['atom_10', 'atom_2', 'atom_3', 'atom_4', 'atom_5', 'atom_6', 'atom_7', 'atom_8', 'atom_9']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 1000 rounds.
[100]	training's l1: 0.53977	valid_1's l1: 0.59804
[200]	training's l1: 0.414936	valid_1's l1: 0.498964
[300]	training's l1: 0.351226	valid_1's l1: 0.456103
[400]	training's l1: 0.308706	valid_1's l1: 0.432642
[500]	training's l1: 0.275941	valid_1's l1: 0.414787
[600]	training's l1: 0.249937	valid_1's l1: 0.401873
[700]	training's l1: 0.228674	valid_1's l1: 0.392167
[800]	training's l1: 0.21056	valid_1's l1: 0.384104
[900]	training's l1: 0.195051	valid_1's l1: 0.377895
[1000]	training's l1: 0.181534	valid_1's l1: 0.372706
[1100]	training's l1: 0.169451	valid_1's l1: 0.368555
[1200]	training's l1: 0.158758	valid_1's l1: 0.36498
[1300]	training's l1: 0.148948	valid_1's l1: 0.361909
[1400]	training's l1: 0.140002	valid_1's l1: 0.35928
[1500]	training's l1: 0.131959	valid_1's l1: 0.356816
[1600]	training's l1: 0.124516	valid_1's l1: 0.354865
[1700]	training's l1: 0.117935	valid_1's l1: 0.353084
[1800]	training's l1: 0.111955	v

[14800]	training's l1: 0.00209293	valid_1's l1: 0.333693
[14900]	training's l1: 0.00204362	valid_1's l1: 0.33369
[15000]	training's l1: 0.00199811	valid_1's l1: 0.333689
Did not meet early stopping. Best iteration is:
[15000]	training's l1: 0.00199811	valid_1's l1: 0.333689
1JHN Fold 2, logMAE: -1.0975467621221928


New categorical_feature is ['atom_10', 'atom_2', 'atom_3', 'atom_4', 'atom_5', 'atom_6', 'atom_7', 'atom_8', 'atom_9']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 1000 rounds.
[100]	training's l1: 0.541761	valid_1's l1: 0.58421
[200]	training's l1: 0.415321	valid_1's l1: 0.488139
[300]	training's l1: 0.353529	valid_1's l1: 0.447525
[400]	training's l1: 0.310239	valid_1's l1: 0.422468
[500]	training's l1: 0.278294	valid_1's l1: 0.405074
[600]	training's l1: 0.252071	valid_1's l1: 0.392118
[700]	training's l1: 0.230636	valid_1's l1: 0.382064
[800]	training's l1: 0.212324	valid_1's l1: 0.374012
[900]	training's l1: 0.196569	valid_1's l1: 0.36761
[1000]	training's l1: 0.182566	valid_1's l1: 0.362312
[1100]	training's l1: 0.170631	valid_1's l1: 0.358414
[1200]	training's l1: 0.159607	valid_1's l1: 0.354331
[1300]	training's l1: 0.150131	valid_1's l1: 0.351431
[1400]	training's l1: 0.141182	valid_1's l1: 0.348516
[1500]	training's l1: 0.133155	valid_1's l1: 0.346165
[1600]	training's l1: 0.125904	valid_1's l1: 0.343842
[1700]	training's l1: 0.119118	valid_1's l1: 0.341983
[1800]	training's l1: 0.11294

[14800]	training's l1: 0.00206967	valid_1's l1: 0.321552
[14900]	training's l1: 0.00202495	valid_1's l1: 0.321546
[15000]	training's l1: 0.00197925	valid_1's l1: 0.321541
Did not meet early stopping. Best iteration is:
[15000]	training's l1: 0.00197925	valid_1's l1: 0.321541
1JHN Fold 3, logMAE: -1.134631051876189


New categorical_feature is ['atom_10', 'atom_2', 'atom_3', 'atom_4', 'atom_5', 'atom_6', 'atom_7', 'atom_8', 'atom_9']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 1000 rounds.
[100]	training's l1: 0.542299	valid_1's l1: 0.594727
[200]	training's l1: 0.417184	valid_1's l1: 0.494472
[300]	training's l1: 0.352835	valid_1's l1: 0.450002
[400]	training's l1: 0.310211	valid_1's l1: 0.42515
[500]	training's l1: 0.277884	valid_1's l1: 0.40858
[600]	training's l1: 0.251792	valid_1's l1: 0.395746
[700]	training's l1: 0.230588	valid_1's l1: 0.385981
[800]	training's l1: 0.212581	valid_1's l1: 0.378146
[900]	training's l1: 0.19692	valid_1's l1: 0.372039
[1000]	training's l1: 0.18366	valid_1's l1: 0.367312
[1100]	training's l1: 0.171647	valid_1's l1: 0.36339
[1200]	training's l1: 0.160567	valid_1's l1: 0.359444
[1300]	training's l1: 0.150619	valid_1's l1: 0.356552
[1400]	training's l1: 0.14169	valid_1's l1: 0.353499
[1500]	training's l1: 0.133527	valid_1's l1: 0.351447
[1600]	training's l1: 0.126252	valid_1's l1: 0.349777
[1700]	training's l1: 0.119529	valid_1's l1: 0.348063
[1800]	training's l1: 0.113211	va

[14900]	training's l1: 0.00202127	valid_1's l1: 0.328152
[15000]	training's l1: 0.00197866	valid_1's l1: 0.328149
Did not meet early stopping. Best iteration is:
[15000]	training's l1: 0.00197866	valid_1's l1: 0.328149
1JHN Fold 4, logMAE: -1.1142873188537183
*** Training Model for 1JHC ***


  mask |= (ar1 == a)
New categorical_feature is ['atom_10', 'atom_2', 'atom_3', 'atom_4', 'atom_5', 'atom_6', 'atom_7', 'atom_8', 'atom_9']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 1000 rounds.
[100]	training's l1: 1.66051	valid_1's l1: 1.67471
[200]	training's l1: 1.41447	valid_1's l1: 1.43559
[300]	training's l1: 1.29766	valid_1's l1: 1.32463
[400]	training's l1: 1.21948	valid_1's l1: 1.25288
[500]	training's l1: 1.16105	valid_1's l1: 1.20039
[600]	training's l1: 1.11565	valid_1's l1: 1.16061
[700]	training's l1: 1.07602	valid_1's l1: 1.1264
[800]	training's l1: 1.04421	valid_1's l1: 1.09958
[900]	training's l1: 1.01476	valid_1's l1: 1.07531
[1000]	training's l1: 0.987843	valid_1's l1: 1.05333
[1100]	training's l1: 0.964155	valid_1's l1: 1.03436
[1200]	training's l1: 0.942556	valid_1's l1: 1.01773
[1300]	training's l1: 0.923184	valid_1's l1: 1.00292
[1400]	training's l1: 0.905723	valid_1's l1: 0.989946
[1500]	training's l1: 0.889057	valid_1's l1: 0.977886
[1600]	training's l1: 0.873192	valid_1's l1: 0.966301
[1700]	training's l1: 0.857542	valid_1's l1: 0.954942
[1800]	training's l1: 0.843985	valid_1's l1: 0.945

1JHC Fold 0, logMAE: -0.38346623764805493


New categorical_feature is ['atom_10', 'atom_2', 'atom_3', 'atom_4', 'atom_5', 'atom_6', 'atom_7', 'atom_8', 'atom_9']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 1000 rounds.
[100]	training's l1: 1.65115	valid_1's l1: 1.66903
[200]	training's l1: 1.41365	valid_1's l1: 1.438
[300]	training's l1: 1.2992	valid_1's l1: 1.32897
[400]	training's l1: 1.22386	valid_1's l1: 1.25889
[500]	training's l1: 1.16431	valid_1's l1: 1.20566
[600]	training's l1: 1.11829	valid_1's l1: 1.16507
[700]	training's l1: 1.0811	valid_1's l1: 1.13333
[800]	training's l1: 1.04406	valid_1's l1: 1.10139
[900]	training's l1: 1.01431	valid_1's l1: 1.0766
[1000]	training's l1: 0.988869	valid_1's l1: 1.0558
[1100]	training's l1: 0.965882	valid_1's l1: 1.03711
[1200]	training's l1: 0.943448	valid_1's l1: 1.01935
[1300]	training's l1: 0.92285	valid_1's l1: 1.00347
[1400]	training's l1: 0.905338	valid_1's l1: 0.990023
[1500]	training's l1: 0.887401	valid_1's l1: 0.976222
[1600]	training's l1: 0.871979	valid_1's l1: 0.964761
[1700]	training's l1: 0.856829	valid_1's l1: 0.953565
[1800]	training's l1: 0.842956	valid_1's l1: 0.94349
[19

1JHC Fold 1, logMAE: -0.3890450446345543


New categorical_feature is ['atom_10', 'atom_2', 'atom_3', 'atom_4', 'atom_5', 'atom_6', 'atom_7', 'atom_8', 'atom_9']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 1000 rounds.
[100]	training's l1: 1.66081	valid_1's l1: 1.67279
[200]	training's l1: 1.42005	valid_1's l1: 1.43854
[300]	training's l1: 1.30156	valid_1's l1: 1.32611
[400]	training's l1: 1.22132	valid_1's l1: 1.25168
[500]	training's l1: 1.1621	valid_1's l1: 1.19791
[600]	training's l1: 1.11566	valid_1's l1: 1.15634
[700]	training's l1: 1.07655	valid_1's l1: 1.12202
[800]	training's l1: 1.04386	valid_1's l1: 1.09441
[900]	training's l1: 1.01635	valid_1's l1: 1.07164
[1000]	training's l1: 0.99071	valid_1's l1: 1.05098
[1100]	training's l1: 0.967182	valid_1's l1: 1.03199
[1200]	training's l1: 0.946473	valid_1's l1: 1.01582
[1300]	training's l1: 0.925612	valid_1's l1: 0.999392
[1400]	training's l1: 0.906878	valid_1's l1: 0.985091
[1500]	training's l1: 0.890418	valid_1's l1: 0.972772
[1600]	training's l1: 0.874859	valid_1's l1: 0.96135
[1700]	training's l1: 0.860315	valid_1's l1: 0.950701
[1800]	training's l1: 0.846182	valid_1's l1: 0.9403

1JHC Fold 2, logMAE: -0.3960480457339578


New categorical_feature is ['atom_10', 'atom_2', 'atom_3', 'atom_4', 'atom_5', 'atom_6', 'atom_7', 'atom_8', 'atom_9']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 1000 rounds.
[100]	training's l1: 1.66579	valid_1's l1: 1.66946
[200]	training's l1: 1.42545	valid_1's l1: 1.43948
[300]	training's l1: 1.30676	valid_1's l1: 1.3282
[400]	training's l1: 1.22777	valid_1's l1: 1.25561
[500]	training's l1: 1.16557	valid_1's l1: 1.19891
[600]	training's l1: 1.11963	valid_1's l1: 1.15799
[700]	training's l1: 1.08169	valid_1's l1: 1.12497
[800]	training's l1: 1.04913	valid_1's l1: 1.09694
[900]	training's l1: 1.01899	valid_1's l1: 1.07169
[1000]	training's l1: 0.994524	valid_1's l1: 1.05215
[1100]	training's l1: 0.970307	valid_1's l1: 1.03242
[1200]	training's l1: 0.948877	valid_1's l1: 1.01564
[1300]	training's l1: 0.928661	valid_1's l1: 1.00002
[1400]	training's l1: 0.910205	valid_1's l1: 0.985763
[1500]	training's l1: 0.892318	valid_1's l1: 0.972104
[1600]	training's l1: 0.876289	valid_1's l1: 0.960237
[1700]	training's l1: 0.861775	valid_1's l1: 0.949946
[1800]	training's l1: 0.846903	valid_1's l1: 0.939

1JHC Fold 3, logMAE: -0.3967982850266533


New categorical_feature is ['atom_10', 'atom_2', 'atom_3', 'atom_4', 'atom_5', 'atom_6', 'atom_7', 'atom_8', 'atom_9']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 1000 rounds.
[100]	training's l1: 1.65984	valid_1's l1: 1.6712
[200]	training's l1: 1.41866	valid_1's l1: 1.44041
[300]	training's l1: 1.2997	valid_1's l1: 1.32759
[400]	training's l1: 1.22344	valid_1's l1: 1.25796
[500]	training's l1: 1.16103	valid_1's l1: 1.20211
[600]	training's l1: 1.11525	valid_1's l1: 1.16215
[700]	training's l1: 1.07706	valid_1's l1: 1.12971
[800]	training's l1: 1.04331	valid_1's l1: 1.10109
[900]	training's l1: 1.01198	valid_1's l1: 1.07518
[1000]	training's l1: 0.985088	valid_1's l1: 1.0531
[1100]	training's l1: 0.961891	valid_1's l1: 1.03439
[1200]	training's l1: 0.940641	valid_1's l1: 1.01756
[1300]	training's l1: 0.921079	valid_1's l1: 1.00216
[1400]	training's l1: 0.903565	valid_1's l1: 0.988253
[1500]	training's l1: 0.886856	valid_1's l1: 0.975988
[1600]	training's l1: 0.870292	valid_1's l1: 0.963492
[1700]	training's l1: 0.855228	valid_1's l1: 0.952214
[1800]	training's l1: 0.841116	valid_1's l1: 0.94185

1JHC Fold 4, logMAE: -0.3886621951544665
*** Training Model for 2JHH ***


  mask |= (ar1 == a)
New categorical_feature is ['atom_10', 'atom_2', 'atom_3', 'atom_4', 'atom_5', 'atom_6', 'atom_7', 'atom_8', 'atom_9']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 1000 rounds.
[100]	training's l1: 0.362807	valid_1's l1: 0.368054
[200]	training's l1: 0.302626	valid_1's l1: 0.311385
[300]	training's l1: 0.272911	valid_1's l1: 0.284682
[400]	training's l1: 0.252741	valid_1's l1: 0.267006
[500]	training's l1: 0.237171	valid_1's l1: 0.253503
[600]	training's l1: 0.22457	valid_1's l1: 0.243259
[700]	training's l1: 0.213701	valid_1's l1: 0.234481
[800]	training's l1: 0.205094	valid_1's l1: 0.227769
[900]	training's l1: 0.197256	valid_1's l1: 0.221706
[1000]	training's l1: 0.189943	valid_1's l1: 0.216206
[1100]	training's l1: 0.183694	valid_1's l1: 0.211563
[1200]	training's l1: 0.177686	valid_1's l1: 0.207154
[1300]	training's l1: 0.172022	valid_1's l1: 0.203065
[1400]	training's l1: 0.166855	valid_1's l1: 0.199434
[1500]	training's l1: 0.16255	valid_1's l1: 0.196685
[1600]	training's l1: 0.15843	valid_1's l1: 0.193901
[1700]	training's l1: 0.154612	valid_1's l1: 0.191384
[1800]	training's l1: 0.150836

[15000]	training's l1: 0.0390445	valid_1's l1: 0.139259
Did not meet early stopping. Best iteration is:
[15000]	training's l1: 0.0390445	valid_1's l1: 0.139259
2JHH Fold 0, logMAE: -1.9714206391579985


New categorical_feature is ['atom_10', 'atom_2', 'atom_3', 'atom_4', 'atom_5', 'atom_6', 'atom_7', 'atom_8', 'atom_9']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 1000 rounds.
[100]	training's l1: 0.362084	valid_1's l1: 0.368998
[200]	training's l1: 0.302823	valid_1's l1: 0.314147
[300]	training's l1: 0.272422	valid_1's l1: 0.287152
[400]	training's l1: 0.252312	valid_1's l1: 0.269684
[500]	training's l1: 0.237242	valid_1's l1: 0.257163
[600]	training's l1: 0.224278	valid_1's l1: 0.246541
[700]	training's l1: 0.214028	valid_1's l1: 0.238329
[800]	training's l1: 0.20475	valid_1's l1: 0.231026
[900]	training's l1: 0.197001	valid_1's l1: 0.225083
[1000]	training's l1: 0.190242	valid_1's l1: 0.219995
[1100]	training's l1: 0.183642	valid_1's l1: 0.21504
[1200]	training's l1: 0.177601	valid_1's l1: 0.210639
[1300]	training's l1: 0.172001	valid_1's l1: 0.206457
[1400]	training's l1: 0.16709	valid_1's l1: 0.202929
[1500]	training's l1: 0.162729	valid_1's l1: 0.199885
[1600]	training's l1: 0.158709	valid_1's l1: 0.197198
[1700]	training's l1: 0.154762	valid_1's l1: 0.194598
[1800]	training's l1: 0.151244

[15000]	training's l1: 0.0389425	valid_1's l1: 0.140905
Did not meet early stopping. Best iteration is:
[15000]	training's l1: 0.0389425	valid_1's l1: 0.140905
2JHH Fold 1, logMAE: -1.9596917422026616


New categorical_feature is ['atom_10', 'atom_2', 'atom_3', 'atom_4', 'atom_5', 'atom_6', 'atom_7', 'atom_8', 'atom_9']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 1000 rounds.
[100]	training's l1: 0.363058	valid_1's l1: 0.369434
[200]	training's l1: 0.304549	valid_1's l1: 0.314616
[300]	training's l1: 0.273853	valid_1's l1: 0.287536
[400]	training's l1: 0.252994	valid_1's l1: 0.269384
[500]	training's l1: 0.238058	valid_1's l1: 0.256685
[600]	training's l1: 0.22619	valid_1's l1: 0.247158
[700]	training's l1: 0.215698	valid_1's l1: 0.238812
[800]	training's l1: 0.207101	valid_1's l1: 0.232071
[900]	training's l1: 0.199093	valid_1's l1: 0.225976
[1000]	training's l1: 0.191771	valid_1's l1: 0.22049
[1100]	training's l1: 0.185124	valid_1's l1: 0.215486
[1200]	training's l1: 0.179214	valid_1's l1: 0.211201
[1300]	training's l1: 0.173549	valid_1's l1: 0.207192
[1400]	training's l1: 0.168369	valid_1's l1: 0.203457
[1500]	training's l1: 0.163846	valid_1's l1: 0.200414
[1600]	training's l1: 0.159487	valid_1's l1: 0.197266
[1700]	training's l1: 0.155458	valid_1's l1: 0.194667
[1800]	training's l1: 0.15177

[15000]	training's l1: 0.0389059	valid_1's l1: 0.140708
Did not meet early stopping. Best iteration is:
[15000]	training's l1: 0.0389059	valid_1's l1: 0.140708
2JHH Fold 2, logMAE: -1.9610710555116164


New categorical_feature is ['atom_10', 'atom_2', 'atom_3', 'atom_4', 'atom_5', 'atom_6', 'atom_7', 'atom_8', 'atom_9']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 1000 rounds.
[100]	training's l1: 0.360045	valid_1's l1: 0.362661
[200]	training's l1: 0.303622	valid_1's l1: 0.309649
[300]	training's l1: 0.273358	valid_1's l1: 0.282689
[400]	training's l1: 0.252351	valid_1's l1: 0.264814
[500]	training's l1: 0.23692	valid_1's l1: 0.252023
[600]	training's l1: 0.224176	valid_1's l1: 0.241757
[700]	training's l1: 0.213393	valid_1's l1: 0.233283
[800]	training's l1: 0.204586	valid_1's l1: 0.226642
[900]	training's l1: 0.196531	valid_1's l1: 0.220532
[1000]	training's l1: 0.189526	valid_1's l1: 0.215383
[1100]	training's l1: 0.182765	valid_1's l1: 0.210425
[1200]	training's l1: 0.176545	valid_1's l1: 0.205853
[1300]	training's l1: 0.171055	valid_1's l1: 0.202003
[1400]	training's l1: 0.166461	valid_1's l1: 0.198891
[1500]	training's l1: 0.161939	valid_1's l1: 0.195855
[1600]	training's l1: 0.157628	valid_1's l1: 0.193033
[1700]	training's l1: 0.153772	valid_1's l1: 0.190506
[1800]	training's l1: 0.1501

[15000]	training's l1: 0.0386056	valid_1's l1: 0.138246
Did not meet early stopping. Best iteration is:
[15000]	training's l1: 0.0386056	valid_1's l1: 0.138246
2JHH Fold 3, logMAE: -1.9787239564917325


New categorical_feature is ['atom_10', 'atom_2', 'atom_3', 'atom_4', 'atom_5', 'atom_6', 'atom_7', 'atom_8', 'atom_9']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 1000 rounds.
[100]	training's l1: 0.362183	valid_1's l1: 0.368712
[200]	training's l1: 0.304345	valid_1's l1: 0.313535
[300]	training's l1: 0.274865	valid_1's l1: 0.286753
[400]	training's l1: 0.254846	valid_1's l1: 0.269462
[500]	training's l1: 0.238928	valid_1's l1: 0.255866
[600]	training's l1: 0.22612	valid_1's l1: 0.245671
[700]	training's l1: 0.215641	valid_1's l1: 0.237405
[800]	training's l1: 0.206129	valid_1's l1: 0.230028
[900]	training's l1: 0.198057	valid_1's l1: 0.223971
[1000]	training's l1: 0.190414	valid_1's l1: 0.218278
[1100]	training's l1: 0.183723	valid_1's l1: 0.213367
[1200]	training's l1: 0.177838	valid_1's l1: 0.209174
[1300]	training's l1: 0.172413	valid_1's l1: 0.20525
[1400]	training's l1: 0.167405	valid_1's l1: 0.201728
[1500]	training's l1: 0.163034	valid_1's l1: 0.198925
[1600]	training's l1: 0.158977	valid_1's l1: 0.196256
[1700]	training's l1: 0.155181	valid_1's l1: 0.193801
[1800]	training's l1: 0.15145

[15000]	training's l1: 0.0389709	valid_1's l1: 0.140399
Did not meet early stopping. Best iteration is:
[15000]	training's l1: 0.0389709	valid_1's l1: 0.140399
2JHH Fold 4, logMAE: -1.9632670791269073
*** Training Model for 2JHN ***


  mask |= (ar1 == a)
New categorical_feature is ['atom_10', 'atom_2', 'atom_3', 'atom_4', 'atom_5', 'atom_6', 'atom_7', 'atom_8', 'atom_9']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 1000 rounds.
[100]	training's l1: 0.294263	valid_1's l1: 0.309568
[200]	training's l1: 0.233842	valid_1's l1: 0.25418
[300]	training's l1: 0.203387	valid_1's l1: 0.228954
[400]	training's l1: 0.182961	valid_1's l1: 0.213009
[500]	training's l1: 0.167326	valid_1's l1: 0.201378
[600]	training's l1: 0.155744	valid_1's l1: 0.193167
[700]	training's l1: 0.146	valid_1's l1: 0.186554
[800]	training's l1: 0.137481	valid_1's l1: 0.181009
[900]	training's l1: 0.13011	valid_1's l1: 0.176558
[1000]	training's l1: 0.123702	valid_1's l1: 0.172623
[1100]	training's l1: 0.1179	valid_1's l1: 0.169248
[1200]	training's l1: 0.112611	valid_1's l1: 0.166248
[1300]	training's l1: 0.108001	valid_1's l1: 0.163831
[1400]	training's l1: 0.103822	valid_1's l1: 0.161477
[1500]	training's l1: 0.0997944	valid_1's l1: 0.159423
[1600]	training's l1: 0.0961111	valid_1's l1: 0.157454
[1700]	training's l1: 0.0926849	valid_1's l1: 0.15583
[1800]	training's l1: 0.0895408	

[14900]	training's l1: 0.00988509	valid_1's l1: 0.128831
[15000]	training's l1: 0.00977215	valid_1's l1: 0.128813
Did not meet early stopping. Best iteration is:
[15000]	training's l1: 0.00977215	valid_1's l1: 0.128813
2JHN Fold 0, logMAE: -2.04939736163057


New categorical_feature is ['atom_10', 'atom_2', 'atom_3', 'atom_4', 'atom_5', 'atom_6', 'atom_7', 'atom_8', 'atom_9']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 1000 rounds.
[100]	training's l1: 0.293693	valid_1's l1: 0.308355
[200]	training's l1: 0.232371	valid_1's l1: 0.252298
[300]	training's l1: 0.201909	valid_1's l1: 0.226227
[400]	training's l1: 0.182202	valid_1's l1: 0.210435
[500]	training's l1: 0.166592	valid_1's l1: 0.198744
[600]	training's l1: 0.154382	valid_1's l1: 0.189826
[700]	training's l1: 0.144763	valid_1's l1: 0.183336
[800]	training's l1: 0.136241	valid_1's l1: 0.177626
[900]	training's l1: 0.129128	valid_1's l1: 0.173168
[1000]	training's l1: 0.122868	valid_1's l1: 0.169319
[1100]	training's l1: 0.117227	valid_1's l1: 0.165929
[1200]	training's l1: 0.112008	valid_1's l1: 0.162796
[1300]	training's l1: 0.107371	valid_1's l1: 0.160227
[1400]	training's l1: 0.103152	valid_1's l1: 0.157905
[1500]	training's l1: 0.0991848	valid_1's l1: 0.155647
[1600]	training's l1: 0.0955429	valid_1's l1: 0.153742
[1700]	training's l1: 0.0922347	valid_1's l1: 0.152052
[1800]	training's l1: 0.

[14900]	training's l1: 0.00979522	valid_1's l1: 0.125235
[15000]	training's l1: 0.00967708	valid_1's l1: 0.125217
Did not meet early stopping. Best iteration is:
[15000]	training's l1: 0.00967708	valid_1's l1: 0.125217
2JHN Fold 1, logMAE: -2.0777074253530787


New categorical_feature is ['atom_10', 'atom_2', 'atom_3', 'atom_4', 'atom_5', 'atom_6', 'atom_7', 'atom_8', 'atom_9']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 1000 rounds.
[100]	training's l1: 0.289187	valid_1's l1: 0.301203
[200]	training's l1: 0.23084	valid_1's l1: 0.248381
[300]	training's l1: 0.198761	valid_1's l1: 0.220746
[400]	training's l1: 0.178666	valid_1's l1: 0.20492
[500]	training's l1: 0.163141	valid_1's l1: 0.193389
[600]	training's l1: 0.151775	valid_1's l1: 0.185594
[700]	training's l1: 0.142196	valid_1's l1: 0.17924
[800]	training's l1: 0.134363	valid_1's l1: 0.174394
[900]	training's l1: 0.12712	valid_1's l1: 0.16983
[1000]	training's l1: 0.121073	valid_1's l1: 0.16613
[1100]	training's l1: 0.115666	valid_1's l1: 0.163195
[1200]	training's l1: 0.110702	valid_1's l1: 0.16043
[1300]	training's l1: 0.106116	valid_1's l1: 0.15805
[1400]	training's l1: 0.10195	valid_1's l1: 0.155847
[1500]	training's l1: 0.0981757	valid_1's l1: 0.153941
[1600]	training's l1: 0.0946193	valid_1's l1: 0.152159
[1700]	training's l1: 0.0914294	valid_1's l1: 0.150659
[1800]	training's l1: 0.088394	va

[14900]	training's l1: 0.00978653	valid_1's l1: 0.125095
[15000]	training's l1: 0.00967355	valid_1's l1: 0.125078
Did not meet early stopping. Best iteration is:
[15000]	training's l1: 0.00967355	valid_1's l1: 0.125078
2JHN Fold 2, logMAE: -2.0785464598950787


New categorical_feature is ['atom_10', 'atom_2', 'atom_3', 'atom_4', 'atom_5', 'atom_6', 'atom_7', 'atom_8', 'atom_9']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 1000 rounds.
[100]	training's l1: 0.291448	valid_1's l1: 0.301236
[200]	training's l1: 0.232199	valid_1's l1: 0.247741
[300]	training's l1: 0.201336	valid_1's l1: 0.222023
[400]	training's l1: 0.180649	valid_1's l1: 0.205702
[500]	training's l1: 0.165788	valid_1's l1: 0.194832
[600]	training's l1: 0.15395	valid_1's l1: 0.186491
[700]	training's l1: 0.144255	valid_1's l1: 0.179971
[800]	training's l1: 0.135812	valid_1's l1: 0.174349
[900]	training's l1: 0.128672	valid_1's l1: 0.169684
[1000]	training's l1: 0.122202	valid_1's l1: 0.165806
[1100]	training's l1: 0.11674	valid_1's l1: 0.162654
[1200]	training's l1: 0.111797	valid_1's l1: 0.159971
[1300]	training's l1: 0.107176	valid_1's l1: 0.157417
[1400]	training's l1: 0.102988	valid_1's l1: 0.155175
[1500]	training's l1: 0.0992051	valid_1's l1: 0.153382
[1600]	training's l1: 0.0956394	valid_1's l1: 0.151598
[1700]	training's l1: 0.0923424	valid_1's l1: 0.150111
[1800]	training's l1: 0.08

[15000]	training's l1: 0.00970822	valid_1's l1: 0.124266
Did not meet early stopping. Best iteration is:
[15000]	training's l1: 0.00970822	valid_1's l1: 0.124266
2JHN Fold 3, logMAE: -2.0853284440025797


New categorical_feature is ['atom_10', 'atom_2', 'atom_3', 'atom_4', 'atom_5', 'atom_6', 'atom_7', 'atom_8', 'atom_9']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 1000 rounds.
[100]	training's l1: 0.295427	valid_1's l1: 0.300505
[200]	training's l1: 0.233706	valid_1's l1: 0.245826
[300]	training's l1: 0.203577	valid_1's l1: 0.221676
[400]	training's l1: 0.182867	valid_1's l1: 0.205804
[500]	training's l1: 0.167389	valid_1's l1: 0.194544
[600]	training's l1: 0.154941	valid_1's l1: 0.185827
[700]	training's l1: 0.144675	valid_1's l1: 0.179047
[800]	training's l1: 0.136601	valid_1's l1: 0.173999
[900]	training's l1: 0.129423	valid_1's l1: 0.169825
[1000]	training's l1: 0.122839	valid_1's l1: 0.165738
[1100]	training's l1: 0.117006	valid_1's l1: 0.162351
[1200]	training's l1: 0.111955	valid_1's l1: 0.159548
[1300]	training's l1: 0.107526	valid_1's l1: 0.157255
[1400]	training's l1: 0.103311	valid_1's l1: 0.155119
[1500]	training's l1: 0.0993272	valid_1's l1: 0.153169
[1600]	training's l1: 0.0957087	valid_1's l1: 0.151437
[1700]	training's l1: 0.0922748	valid_1's l1: 0.149753
[1800]	training's l1: 0.

[14900]	training's l1: 0.00985231	valid_1's l1: 0.123789
[15000]	training's l1: 0.00973819	valid_1's l1: 0.123774
Did not meet early stopping. Best iteration is:
[15000]	training's l1: 0.00973819	valid_1's l1: 0.123774
2JHN Fold 4, logMAE: -2.089298129231239
*** Training Model for 2JHC ***


  mask |= (ar1 == a)
New categorical_feature is ['atom_10', 'atom_2', 'atom_3', 'atom_4', 'atom_5', 'atom_6', 'atom_7', 'atom_8', 'atom_9']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 1000 rounds.
[100]	training's l1: 0.722765	valid_1's l1: 0.725662
[200]	training's l1: 0.611337	valid_1's l1: 0.616341
[300]	training's l1: 0.554353	valid_1's l1: 0.561442
[400]	training's l1: 0.517572	valid_1's l1: 0.526385
[500]	training's l1: 0.48935	valid_1's l1: 0.49993
[600]	training's l1: 0.467363	valid_1's l1: 0.479434
[700]	training's l1: 0.450428	valid_1's l1: 0.463985
[800]	training's l1: 0.435485	valid_1's l1: 0.450293
[900]	training's l1: 0.422653	valid_1's l1: 0.438781
[1000]	training's l1: 0.41122	valid_1's l1: 0.428453
[1100]	training's l1: 0.400796	valid_1's l1: 0.419149
[1200]	training's l1: 0.391203	valid_1's l1: 0.41063
[1300]	training's l1: 0.38248	valid_1's l1: 0.403
[1400]	training's l1: 0.374809	valid_1's l1: 0.396593
[1500]	training's l1: 0.367849	valid_1's l1: 0.390682
[1600]	training's l1: 0.361078	valid_1's l1: 0.38498
[1700]	training's l1: 0.355076	valid_1's l1: 0.380077
[1800]	training's l1: 0.349288	valid

2JHC Fold 0, logMAE: -1.3899488913519698


New categorical_feature is ['atom_10', 'atom_2', 'atom_3', 'atom_4', 'atom_5', 'atom_6', 'atom_7', 'atom_8', 'atom_9']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 1000 rounds.
[100]	training's l1: 0.730969	valid_1's l1: 0.733814
[200]	training's l1: 0.616368	valid_1's l1: 0.621529
[300]	training's l1: 0.55976	valid_1's l1: 0.566687
[400]	training's l1: 0.519902	valid_1's l1: 0.528359
[500]	training's l1: 0.492238	valid_1's l1: 0.502148
[600]	training's l1: 0.470232	valid_1's l1: 0.48166
[700]	training's l1: 0.450807	valid_1's l1: 0.463661
[800]	training's l1: 0.435713	valid_1's l1: 0.449952
[900]	training's l1: 0.422292	valid_1's l1: 0.437945
[1000]	training's l1: 0.410885	valid_1's l1: 0.42769
[1100]	training's l1: 0.400754	valid_1's l1: 0.418809
[1200]	training's l1: 0.391971	valid_1's l1: 0.411305
[1300]	training's l1: 0.383525	valid_1's l1: 0.404223
[1400]	training's l1: 0.375923	valid_1's l1: 0.397829
[1500]	training's l1: 0.368568	valid_1's l1: 0.391612
[1600]	training's l1: 0.362146	valid_1's l1: 0.386268
[1700]	training's l1: 0.35602	valid_1's l1: 0.381215
[1800]	training's l1: 0.349852	

2JHC Fold 1, logMAE: -1.390459768074527


New categorical_feature is ['atom_10', 'atom_2', 'atom_3', 'atom_4', 'atom_5', 'atom_6', 'atom_7', 'atom_8', 'atom_9']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 1000 rounds.
[100]	training's l1: 0.724327	valid_1's l1: 0.731958
[200]	training's l1: 0.615101	valid_1's l1: 0.624662
[300]	training's l1: 0.558112	valid_1's l1: 0.569308
[400]	training's l1: 0.51969	valid_1's l1: 0.532343
[500]	training's l1: 0.490878	valid_1's l1: 0.505137
[600]	training's l1: 0.468527	valid_1's l1: 0.484189
[700]	training's l1: 0.450229	valid_1's l1: 0.46734
[800]	training's l1: 0.434955	valid_1's l1: 0.453285
[900]	training's l1: 0.42191	valid_1's l1: 0.441622
[1000]	training's l1: 0.410027	valid_1's l1: 0.43106
[1100]	training's l1: 0.399281	valid_1's l1: 0.421596
[1200]	training's l1: 0.39023	valid_1's l1: 0.413795
[1300]	training's l1: 0.381592	valid_1's l1: 0.406355
[1400]	training's l1: 0.373775	valid_1's l1: 0.39983
[1500]	training's l1: 0.366988	valid_1's l1: 0.394196
[1600]	training's l1: 0.360232	valid_1's l1: 0.388527
[1700]	training's l1: 0.354525	valid_1's l1: 0.383868
[1800]	training's l1: 0.348793	va

2JHC Fold 2, logMAE: -1.3759209369904162


New categorical_feature is ['atom_10', 'atom_2', 'atom_3', 'atom_4', 'atom_5', 'atom_6', 'atom_7', 'atom_8', 'atom_9']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 1000 rounds.
[100]	training's l1: 0.721911	valid_1's l1: 0.72411
[200]	training's l1: 0.609787	valid_1's l1: 0.614
[300]	training's l1: 0.552723	valid_1's l1: 0.559245
[400]	training's l1: 0.515558	valid_1's l1: 0.524288
[500]	training's l1: 0.486711	valid_1's l1: 0.497482
[600]	training's l1: 0.464392	valid_1's l1: 0.476766
[700]	training's l1: 0.447348	valid_1's l1: 0.461425
[800]	training's l1: 0.432549	valid_1's l1: 0.447847
[900]	training's l1: 0.419058	valid_1's l1: 0.435811
[1000]	training's l1: 0.408308	valid_1's l1: 0.426418
[1100]	training's l1: 0.397804	valid_1's l1: 0.417209
[1200]	training's l1: 0.388846	valid_1's l1: 0.409338
[1300]	training's l1: 0.380631	valid_1's l1: 0.402397
[1400]	training's l1: 0.373156	valid_1's l1: 0.396188
[1500]	training's l1: 0.366033	valid_1's l1: 0.390165
[1600]	training's l1: 0.359244	valid_1's l1: 0.384387
[1700]	training's l1: 0.352851	valid_1's l1: 0.379051
[1800]	training's l1: 0.347232	

2JHC Fold 3, logMAE: -1.3940046938679638


New categorical_feature is ['atom_10', 'atom_2', 'atom_3', 'atom_4', 'atom_5', 'atom_6', 'atom_7', 'atom_8', 'atom_9']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 1000 rounds.
[100]	training's l1: 0.723214	valid_1's l1: 0.727708
[200]	training's l1: 0.612751	valid_1's l1: 0.619351
[300]	training's l1: 0.55654	valid_1's l1: 0.564838
[400]	training's l1: 0.517202	valid_1's l1: 0.527238
[500]	training's l1: 0.490259	valid_1's l1: 0.5021
[600]	training's l1: 0.467402	valid_1's l1: 0.480635
[700]	training's l1: 0.450156	valid_1's l1: 0.464849
[800]	training's l1: 0.435136	valid_1's l1: 0.451409
[900]	training's l1: 0.422258	valid_1's l1: 0.439914
[1000]	training's l1: 0.410668	valid_1's l1: 0.429574
[1100]	training's l1: 0.400377	valid_1's l1: 0.420626
[1200]	training's l1: 0.391317	valid_1's l1: 0.412849
[1300]	training's l1: 0.382739	valid_1's l1: 0.405576
[1400]	training's l1: 0.374464	valid_1's l1: 0.398502
[1500]	training's l1: 0.367529	valid_1's l1: 0.392626
[1600]	training's l1: 0.360982	valid_1's l1: 0.387204
[1700]	training's l1: 0.354865	valid_1's l1: 0.3822
[1800]	training's l1: 0.348885	v

2JHC Fold 4, logMAE: -1.3847231070787989
*** Training Model for 3JHH ***


  mask |= (ar1 == a)
New categorical_feature is ['atom_10', 'atom_2', 'atom_3', 'atom_4', 'atom_5', 'atom_6', 'atom_7', 'atom_8', 'atom_9']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 1000 rounds.
[100]	training's l1: 0.375805	valid_1's l1: 0.379748
[200]	training's l1: 0.317768	valid_1's l1: 0.323762
[300]	training's l1: 0.288205	valid_1's l1: 0.296272
[400]	training's l1: 0.268149	valid_1's l1: 0.278074
[500]	training's l1: 0.25255	valid_1's l1: 0.264137
[600]	training's l1: 0.240306	valid_1's l1: 0.253349
[700]	training's l1: 0.230075	valid_1's l1: 0.244456
[800]	training's l1: 0.221757	valid_1's l1: 0.237397
[900]	training's l1: 0.213934	valid_1's l1: 0.230974
[1000]	training's l1: 0.207429	valid_1's l1: 0.225697
[1100]	training's l1: 0.201795	valid_1's l1: 0.221322
[1200]	training's l1: 0.196162	valid_1's l1: 0.216884
[1300]	training's l1: 0.191147	valid_1's l1: 0.212921
[1400]	training's l1: 0.186738	valid_1's l1: 0.209559
[1500]	training's l1: 0.18249	valid_1's l1: 0.20628
[1600]	training's l1: 0.178511	valid_1's l1: 0.203251
[1700]	training's l1: 0.174917	valid_1's l1: 0.20059
[1800]	training's l1: 0.17129	v

3JHH Fold 0, logMAE: -2.003478115268563


New categorical_feature is ['atom_10', 'atom_2', 'atom_3', 'atom_4', 'atom_5', 'atom_6', 'atom_7', 'atom_8', 'atom_9']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 1000 rounds.
[100]	training's l1: 0.376771	valid_1's l1: 0.378257
[200]	training's l1: 0.319146	valid_1's l1: 0.322653
[300]	training's l1: 0.289618	valid_1's l1: 0.294949
[400]	training's l1: 0.269405	valid_1's l1: 0.276518
[500]	training's l1: 0.254275	valid_1's l1: 0.26288
[600]	training's l1: 0.241961	valid_1's l1: 0.252195
[700]	training's l1: 0.231515	valid_1's l1: 0.243106
[800]	training's l1: 0.223197	valid_1's l1: 0.236122
[900]	training's l1: 0.21557	valid_1's l1: 0.229846
[1000]	training's l1: 0.208915	valid_1's l1: 0.224449
[1100]	training's l1: 0.202988	valid_1's l1: 0.219687
[1200]	training's l1: 0.197464	valid_1's l1: 0.215354
[1300]	training's l1: 0.192419	valid_1's l1: 0.211324
[1400]	training's l1: 0.188088	valid_1's l1: 0.208037
[1500]	training's l1: 0.183755	valid_1's l1: 0.204664
[1600]	training's l1: 0.179601	valid_1's l1: 0.201479
[1700]	training's l1: 0.175954	valid_1's l1: 0.198792
[1800]	training's l1: 0.17241

[15000]	training's l1: 0.058013	valid_1's l1: 0.134089
Did not meet early stopping. Best iteration is:
[15000]	training's l1: 0.058013	valid_1's l1: 0.134089
3JHH Fold 1, logMAE: -2.0092484053706787


New categorical_feature is ['atom_10', 'atom_2', 'atom_3', 'atom_4', 'atom_5', 'atom_6', 'atom_7', 'atom_8', 'atom_9']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 1000 rounds.
[100]	training's l1: 0.378315	valid_1's l1: 0.382553
[200]	training's l1: 0.318312	valid_1's l1: 0.324329
[300]	training's l1: 0.288129	valid_1's l1: 0.296012
[400]	training's l1: 0.267997	valid_1's l1: 0.277468
[500]	training's l1: 0.253387	valid_1's l1: 0.264297
[600]	training's l1: 0.240874	valid_1's l1: 0.253042
[700]	training's l1: 0.230788	valid_1's l1: 0.244325
[800]	training's l1: 0.221975	valid_1's l1: 0.236827
[900]	training's l1: 0.214459	valid_1's l1: 0.230602
[1000]	training's l1: 0.207571	valid_1's l1: 0.2249
[1100]	training's l1: 0.20137	valid_1's l1: 0.219791
[1200]	training's l1: 0.195938	valid_1's l1: 0.215394
[1300]	training's l1: 0.190994	valid_1's l1: 0.211487
[1400]	training's l1: 0.186342	valid_1's l1: 0.207845
[1500]	training's l1: 0.18232	valid_1's l1: 0.204807
[1600]	training's l1: 0.178433	valid_1's l1: 0.201908
[1700]	training's l1: 0.174741	valid_1's l1: 0.199124
[1800]	training's l1: 0.171397	

[15000]	training's l1: 0.0578551	valid_1's l1: 0.134859
Did not meet early stopping. Best iteration is:
[15000]	training's l1: 0.0578551	valid_1's l1: 0.134859
3JHH Fold 2, logMAE: -2.0035580596821254


New categorical_feature is ['atom_10', 'atom_2', 'atom_3', 'atom_4', 'atom_5', 'atom_6', 'atom_7', 'atom_8', 'atom_9']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 1000 rounds.
[100]	training's l1: 0.380703	valid_1's l1: 0.383405
[200]	training's l1: 0.319611	valid_1's l1: 0.324562
[300]	training's l1: 0.288431	valid_1's l1: 0.295507
[400]	training's l1: 0.26894	valid_1's l1: 0.27771
[500]	training's l1: 0.254069	valid_1's l1: 0.26412
[600]	training's l1: 0.242293	valid_1's l1: 0.253918
[700]	training's l1: 0.231842	valid_1's l1: 0.244972
[800]	training's l1: 0.223571	valid_1's l1: 0.237973
[900]	training's l1: 0.215717	valid_1's l1: 0.231348
[1000]	training's l1: 0.208983	valid_1's l1: 0.225824
[1100]	training's l1: 0.202687	valid_1's l1: 0.220685
[1200]	training's l1: 0.19728	valid_1's l1: 0.216326
[1300]	training's l1: 0.19234	valid_1's l1: 0.212498
[1400]	training's l1: 0.187594	valid_1's l1: 0.208852
[1500]	training's l1: 0.183193	valid_1's l1: 0.205478
[1600]	training's l1: 0.179172	valid_1's l1: 0.202441
[1700]	training's l1: 0.175501	valid_1's l1: 0.199696
[1800]	training's l1: 0.172103	v

[15000]	training's l1: 0.0580877	valid_1's l1: 0.134645
Did not meet early stopping. Best iteration is:
[15000]	training's l1: 0.0580877	valid_1's l1: 0.134645
3JHH Fold 3, logMAE: -2.0050935430519545


New categorical_feature is ['atom_10', 'atom_2', 'atom_3', 'atom_4', 'atom_5', 'atom_6', 'atom_7', 'atom_8', 'atom_9']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 1000 rounds.
[100]	training's l1: 0.379574	valid_1's l1: 0.382308
[200]	training's l1: 0.318597	valid_1's l1: 0.32286
[300]	training's l1: 0.288961	valid_1's l1: 0.294859
[400]	training's l1: 0.26903	valid_1's l1: 0.276513
[500]	training's l1: 0.254667	valid_1's l1: 0.263507
[600]	training's l1: 0.24226	valid_1's l1: 0.252411
[700]	training's l1: 0.232258	valid_1's l1: 0.243843
[800]	training's l1: 0.223712	valid_1's l1: 0.236615
[900]	training's l1: 0.215865	valid_1's l1: 0.229971
[1000]	training's l1: 0.209052	valid_1's l1: 0.224304
[1100]	training's l1: 0.203313	valid_1's l1: 0.219728
[1200]	training's l1: 0.197736	valid_1's l1: 0.215256
[1300]	training's l1: 0.192686	valid_1's l1: 0.211327
[1400]	training's l1: 0.187984	valid_1's l1: 0.207582
[1500]	training's l1: 0.183822	valid_1's l1: 0.204447
[1600]	training's l1: 0.180033	valid_1's l1: 0.201561
[1700]	training's l1: 0.176212	valid_1's l1: 0.198708
[1800]	training's l1: 0.1728	v

[15000]	training's l1: 0.0582905	valid_1's l1: 0.134014
Did not meet early stopping. Best iteration is:
[15000]	training's l1: 0.0582905	valid_1's l1: 0.134014
3JHH Fold 4, logMAE: -2.0098080705694343
*** Training Model for 3JHC ***


  mask |= (ar1 == a)
New categorical_feature is ['atom_10', 'atom_2', 'atom_3', 'atom_4', 'atom_5', 'atom_6', 'atom_7', 'atom_8', 'atom_9']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 1000 rounds.
[100]	training's l1: 0.75675	valid_1's l1: 0.7586
[200]	training's l1: 0.656663	valid_1's l1: 0.661371
[300]	training's l1: 0.604582	valid_1's l1: 0.611244
[400]	training's l1: 0.569169	valid_1's l1: 0.577726
[500]	training's l1: 0.541757	valid_1's l1: 0.551924
[600]	training's l1: 0.518778	valid_1's l1: 0.530392
[700]	training's l1: 0.500728	valid_1's l1: 0.513931
[800]	training's l1: 0.485576	valid_1's l1: 0.499936
[900]	training's l1: 0.471816	valid_1's l1: 0.487378
[1000]	training's l1: 0.460479	valid_1's l1: 0.477197
[1100]	training's l1: 0.449898	valid_1's l1: 0.467738
[1200]	training's l1: 0.440026	valid_1's l1: 0.458883
[1300]	training's l1: 0.431674	valid_1's l1: 0.451535
[1400]	training's l1: 0.424047	valid_1's l1: 0.444918
[1500]	training's l1: 0.416809	valid_1's l1: 0.438617
[1600]	training's l1: 0.410252	valid_1's l1: 0.432945
[1700]	training's l1: 0.404017	valid_1's l1: 0.42759
[1800]	training's l1: 0.398401	

3JHC Fold 0, logMAE: -1.2575690817773812


New categorical_feature is ['atom_10', 'atom_2', 'atom_3', 'atom_4', 'atom_5', 'atom_6', 'atom_7', 'atom_8', 'atom_9']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 1000 rounds.
[100]	training's l1: 0.754785	valid_1's l1: 0.756464
[200]	training's l1: 0.655291	valid_1's l1: 0.658219
[300]	training's l1: 0.60353	valid_1's l1: 0.607626
[400]	training's l1: 0.566568	valid_1's l1: 0.5719
[500]	training's l1: 0.538824	valid_1's l1: 0.54565
[600]	training's l1: 0.517555	valid_1's l1: 0.525734
[700]	training's l1: 0.499342	valid_1's l1: 0.508785
[800]	training's l1: 0.484854	valid_1's l1: 0.495434
[900]	training's l1: 0.471189	valid_1's l1: 0.482889
[1000]	training's l1: 0.459986	valid_1's l1: 0.47277
[1100]	training's l1: 0.449901	valid_1's l1: 0.463677
[1200]	training's l1: 0.440206	valid_1's l1: 0.454958
[1300]	training's l1: 0.431588	valid_1's l1: 0.447303
[1400]	training's l1: 0.423823	valid_1's l1: 0.440452
[1500]	training's l1: 0.416507	valid_1's l1: 0.434079
[1600]	training's l1: 0.410027	valid_1's l1: 0.428591
[1700]	training's l1: 0.403905	valid_1's l1: 0.423418
[1800]	training's l1: 0.397992	v

3JHC Fold 1, logMAE: -1.2626838003179817


New categorical_feature is ['atom_10', 'atom_2', 'atom_3', 'atom_4', 'atom_5', 'atom_6', 'atom_7', 'atom_8', 'atom_9']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 1000 rounds.
[100]	training's l1: 0.755539	valid_1's l1: 0.758923
[200]	training's l1: 0.656652	valid_1's l1: 0.661301
[300]	training's l1: 0.602209	valid_1's l1: 0.607898
[400]	training's l1: 0.568084	valid_1's l1: 0.574864
[500]	training's l1: 0.539603	valid_1's l1: 0.547599
[600]	training's l1: 0.517839	valid_1's l1: 0.526887
[700]	training's l1: 0.49958	valid_1's l1: 0.509896
[800]	training's l1: 0.484285	valid_1's l1: 0.495761
[900]	training's l1: 0.47139	valid_1's l1: 0.483911
[1000]	training's l1: 0.459859	valid_1's l1: 0.473496
[1100]	training's l1: 0.449784	valid_1's l1: 0.464425
[1200]	training's l1: 0.440516	valid_1's l1: 0.456104
[1300]	training's l1: 0.431809	valid_1's l1: 0.448409
[1400]	training's l1: 0.42418	valid_1's l1: 0.441803
[1500]	training's l1: 0.417117	valid_1's l1: 0.435708
[1600]	training's l1: 0.410595	valid_1's l1: 0.430115
[1700]	training's l1: 0.404368	valid_1's l1: 0.424738
[1800]	training's l1: 0.398574

3JHC Fold 2, logMAE: -1.2615989062104886


New categorical_feature is ['atom_10', 'atom_2', 'atom_3', 'atom_4', 'atom_5', 'atom_6', 'atom_7', 'atom_8', 'atom_9']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 1000 rounds.
[100]	training's l1: 0.758621	valid_1's l1: 0.764276
[200]	training's l1: 0.654844	valid_1's l1: 0.660901
[300]	training's l1: 0.601884	valid_1's l1: 0.608921
[400]	training's l1: 0.564646	valid_1's l1: 0.572715
[500]	training's l1: 0.538462	valid_1's l1: 0.547456
[600]	training's l1: 0.517108	valid_1's l1: 0.527234
[700]	training's l1: 0.499364	valid_1's l1: 0.510387
[800]	training's l1: 0.485285	valid_1's l1: 0.497353
[900]	training's l1: 0.471701	valid_1's l1: 0.484798
[1000]	training's l1: 0.460229	valid_1's l1: 0.474439
[1100]	training's l1: 0.449953	valid_1's l1: 0.465111
[1200]	training's l1: 0.440576	valid_1's l1: 0.456807
[1300]	training's l1: 0.432603	valid_1's l1: 0.449797
[1400]	training's l1: 0.42487	valid_1's l1: 0.443106
[1500]	training's l1: 0.417752	valid_1's l1: 0.436948
[1600]	training's l1: 0.411262	valid_1's l1: 0.431379
[1700]	training's l1: 0.405263	valid_1's l1: 0.426327
[1800]	training's l1: 0.3993

3JHC Fold 3, logMAE: -1.2597712817649032


New categorical_feature is ['atom_10', 'atom_2', 'atom_3', 'atom_4', 'atom_5', 'atom_6', 'atom_7', 'atom_8', 'atom_9']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 1000 rounds.
[100]	training's l1: 0.754271	valid_1's l1: 0.755367
[200]	training's l1: 0.655242	valid_1's l1: 0.658393
[300]	training's l1: 0.601707	valid_1's l1: 0.606727
[400]	training's l1: 0.564993	valid_1's l1: 0.571581
[500]	training's l1: 0.538493	valid_1's l1: 0.546621
[600]	training's l1: 0.51747	valid_1's l1: 0.526935
[700]	training's l1: 0.498777	valid_1's l1: 0.509522
[800]	training's l1: 0.484622	valid_1's l1: 0.496487
[900]	training's l1: 0.470838	valid_1's l1: 0.483757
[1000]	training's l1: 0.458916	valid_1's l1: 0.473076
[1100]	training's l1: 0.448728	valid_1's l1: 0.464011
[1200]	training's l1: 0.439439	valid_1's l1: 0.455729
[1300]	training's l1: 0.430884	valid_1's l1: 0.448082
[1400]	training's l1: 0.423516	valid_1's l1: 0.441657
[1500]	training's l1: 0.416487	valid_1's l1: 0.435549
[1600]	training's l1: 0.410065	valid_1's l1: 0.430051
[1700]	training's l1: 0.403837	valid_1's l1: 0.424647
[1800]	training's l1: 0.3979

3JHC Fold 4, logMAE: -1.263154924423659
*** Training Model for 3JHN ***


  mask |= (ar1 == a)
New categorical_feature is ['atom_10', 'atom_2', 'atom_3', 'atom_4', 'atom_5', 'atom_6', 'atom_7', 'atom_8', 'atom_9']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 1000 rounds.
[100]	training's l1: 0.245052	valid_1's l1: 0.254022
[200]	training's l1: 0.201821	valid_1's l1: 0.214953
[300]	training's l1: 0.177824	valid_1's l1: 0.194551
[400]	training's l1: 0.161824	valid_1's l1: 0.181455
[500]	training's l1: 0.14943	valid_1's l1: 0.171749
[600]	training's l1: 0.139618	valid_1's l1: 0.16435
[700]	training's l1: 0.131363	valid_1's l1: 0.158483
[800]	training's l1: 0.124386	valid_1's l1: 0.153519
[900]	training's l1: 0.118434	valid_1's l1: 0.149642
[1000]	training's l1: 0.112999	valid_1's l1: 0.14605
[1100]	training's l1: 0.108169	valid_1's l1: 0.142964
[1200]	training's l1: 0.103875	valid_1's l1: 0.140286
[1300]	training's l1: 0.0999727	valid_1's l1: 0.137877
[1400]	training's l1: 0.0964001	valid_1's l1: 0.135805
[1500]	training's l1: 0.0932088	valid_1's l1: 0.134064
[1600]	training's l1: 0.0901698	valid_1's l1: 0.132458
[1700]	training's l1: 0.0873086	valid_1's l1: 0.130838
[1800]	training's l1: 0.0

[14900]	training's l1: 0.012536	valid_1's l1: 0.10442
[15000]	training's l1: 0.0124147	valid_1's l1: 0.104392
Did not meet early stopping. Best iteration is:
[15000]	training's l1: 0.0124147	valid_1's l1: 0.104392
3JHN Fold 0, logMAE: -2.2596033872504275


New categorical_feature is ['atom_10', 'atom_2', 'atom_3', 'atom_4', 'atom_5', 'atom_6', 'atom_7', 'atom_8', 'atom_9']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 1000 rounds.
[100]	training's l1: 0.246936	valid_1's l1: 0.25374
[200]	training's l1: 0.203471	valid_1's l1: 0.214059
[300]	training's l1: 0.178559	valid_1's l1: 0.192565
[400]	training's l1: 0.162628	valid_1's l1: 0.179594
[500]	training's l1: 0.150121	valid_1's l1: 0.16995
[600]	training's l1: 0.140294	valid_1's l1: 0.162772
[700]	training's l1: 0.132112	valid_1's l1: 0.156889
[800]	training's l1: 0.124993	valid_1's l1: 0.151843
[900]	training's l1: 0.119094	valid_1's l1: 0.147913
[1000]	training's l1: 0.113663	valid_1's l1: 0.144381
[1100]	training's l1: 0.10902	valid_1's l1: 0.141566
[1200]	training's l1: 0.104602	valid_1's l1: 0.138897
[1300]	training's l1: 0.100567	valid_1's l1: 0.136376
[1400]	training's l1: 0.0970966	valid_1's l1: 0.134377
[1500]	training's l1: 0.0938106	valid_1's l1: 0.132566
[1600]	training's l1: 0.090806	valid_1's l1: 0.130859
[1700]	training's l1: 0.0880181	valid_1's l1: 0.129436
[1800]	training's l1: 0.085

[14900]	training's l1: 0.0125251	valid_1's l1: 0.102238
[15000]	training's l1: 0.0123983	valid_1's l1: 0.102219
Did not meet early stopping. Best iteration is:
[15000]	training's l1: 0.0123983	valid_1's l1: 0.102219
3JHN Fold 1, logMAE: -2.28063630117181


New categorical_feature is ['atom_10', 'atom_2', 'atom_3', 'atom_4', 'atom_5', 'atom_6', 'atom_7', 'atom_8', 'atom_9']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 1000 rounds.
[100]	training's l1: 0.245869	valid_1's l1: 0.24989
[200]	training's l1: 0.201671	valid_1's l1: 0.210456
[300]	training's l1: 0.177836	valid_1's l1: 0.191114
[400]	training's l1: 0.161364	valid_1's l1: 0.178552
[500]	training's l1: 0.14883	valid_1's l1: 0.169076
[600]	training's l1: 0.139242	valid_1's l1: 0.162176
[700]	training's l1: 0.131075	valid_1's l1: 0.15645
[800]	training's l1: 0.124211	valid_1's l1: 0.151726
[900]	training's l1: 0.118295	valid_1's l1: 0.147956
[1000]	training's l1: 0.113119	valid_1's l1: 0.144715
[1100]	training's l1: 0.108529	valid_1's l1: 0.141831
[1200]	training's l1: 0.10426	valid_1's l1: 0.139324
[1300]	training's l1: 0.100306	valid_1's l1: 0.137067
[1400]	training's l1: 0.0966595	valid_1's l1: 0.135043
[1500]	training's l1: 0.0934315	valid_1's l1: 0.133258
[1600]	training's l1: 0.0903962	valid_1's l1: 0.131695
[1700]	training's l1: 0.0875826	valid_1's l1: 0.130133
[1800]	training's l1: 0.084

[15000]	training's l1: 0.0123444	valid_1's l1: 0.103739
Did not meet early stopping. Best iteration is:
[15000]	training's l1: 0.0123444	valid_1's l1: 0.103739
3JHN Fold 2, logMAE: -2.2658725275257514


New categorical_feature is ['atom_10', 'atom_2', 'atom_3', 'atom_4', 'atom_5', 'atom_6', 'atom_7', 'atom_8', 'atom_9']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 1000 rounds.
[100]	training's l1: 0.247143	valid_1's l1: 0.252515
[200]	training's l1: 0.203008	valid_1's l1: 0.213152
[300]	training's l1: 0.179158	valid_1's l1: 0.193106
[400]	training's l1: 0.163232	valid_1's l1: 0.180301
[500]	training's l1: 0.150657	valid_1's l1: 0.17038
[600]	training's l1: 0.140869	valid_1's l1: 0.163114
[700]	training's l1: 0.132597	valid_1's l1: 0.156996
[800]	training's l1: 0.125393	valid_1's l1: 0.151856
[900]	training's l1: 0.119193	valid_1's l1: 0.147607
[1000]	training's l1: 0.113813	valid_1's l1: 0.14402
[1100]	training's l1: 0.10911	valid_1's l1: 0.141019
[1200]	training's l1: 0.104801	valid_1's l1: 0.138334
[1300]	training's l1: 0.100936	valid_1's l1: 0.135918
[1400]	training's l1: 0.0974138	valid_1's l1: 0.133801
[1500]	training's l1: 0.0941216	valid_1's l1: 0.131868
[1600]	training's l1: 0.0910172	valid_1's l1: 0.130242
[1700]	training's l1: 0.0880965	valid_1's l1: 0.128612
[1800]	training's l1: 0.08

[14900]	training's l1: 0.0125055	valid_1's l1: 0.101944
[15000]	training's l1: 0.0123849	valid_1's l1: 0.101921
Did not meet early stopping. Best iteration is:
[15000]	training's l1: 0.0123849	valid_1's l1: 0.101921
3JHN Fold 3, logMAE: -2.28355338493762


New categorical_feature is ['atom_10', 'atom_2', 'atom_3', 'atom_4', 'atom_5', 'atom_6', 'atom_7', 'atom_8', 'atom_9']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 1000 rounds.
[100]	training's l1: 0.246235	valid_1's l1: 0.257501
[200]	training's l1: 0.201345	valid_1's l1: 0.215582
[300]	training's l1: 0.177415	valid_1's l1: 0.194626
[400]	training's l1: 0.160859	valid_1's l1: 0.180704
[500]	training's l1: 0.148345	valid_1's l1: 0.170813
[600]	training's l1: 0.138207	valid_1's l1: 0.162923
[700]	training's l1: 0.130189	valid_1's l1: 0.157048
[800]	training's l1: 0.12356	valid_1's l1: 0.15228
[900]	training's l1: 0.117629	valid_1's l1: 0.148091
[1000]	training's l1: 0.112371	valid_1's l1: 0.144632
[1100]	training's l1: 0.107777	valid_1's l1: 0.141745
[1200]	training's l1: 0.103663	valid_1's l1: 0.139335
[1300]	training's l1: 0.100042	valid_1's l1: 0.137185
[1400]	training's l1: 0.0964206	valid_1's l1: 0.135067
[1500]	training's l1: 0.0931597	valid_1's l1: 0.133077
[1600]	training's l1: 0.0901067	valid_1's l1: 0.131298
[1700]	training's l1: 0.0873327	valid_1's l1: 0.129776
[1800]	training's l1: 0.0

[15000]	training's l1: 0.0123662	valid_1's l1: 0.103232
Did not meet early stopping. Best iteration is:
[15000]	training's l1: 0.0123662	valid_1's l1: 0.103232
3JHN Fold 4, logMAE: -2.270774089701595


In [7]:
pd.DataFrame({'type': list(cv_scores.keys()), 'cv_score': list(cv_scores.values())})

Unnamed: 0,type,cv_score
0,1JHN,-1.106502
1,1JHC,-0.390804
2,2JHH,-1.966835
3,2JHN,-2.076056
4,2JHC,-1.387011
5,3JHH,-2.006237
6,3JHC,-1.260956
7,3JHN,-2.272088


And cv mean score:

In [8]:
np.mean(list(cv_scores.values()))

-1.558311090702058

Sanity check for all cells to be filled with predictions:

In [9]:
submission[submission['scalar_coupling_constant'] == 0].shape

(0, 1)

In [10]:
submission.head(10)

Unnamed: 0_level_0,scalar_coupling_constant
id,Unnamed: 1_level_1
4658147,12.797239
4658148,164.06311
4658149,2.448566
4658150,164.06311
4658151,12.797239
4658152,91.457123
4658153,2.108005
4658154,-7.776263
4658155,-9.894598
4658156,91.55661


In [11]:
submission.to_csv(f'{Config.SUBMISSIONS_PATH}/submission.csv')

## a!!!