In [0]:
from google.colab import drive
drive.mount('/gdrive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=email%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdocs.test%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive.photos.readonly%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fpeopleapi.readonly&response_type=code

Enter your authorization code:
··········
Mounted at /gdrive


In [0]:
%cd "/gdrive/My Drive"

/gdrive/My Drive


In [0]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

from sklearn.base import clone
from sklearn.model_selection import KFold, StratifiedKFold
from sklearn.preprocessing import LabelEncoder
import lightgbm as lgb
import math

from tqdm import tqdm
import joblib
import gc

## config

In [0]:
INPUT = './analysis/mole/data/raw/'
TRAIN_PATH = INPUT + 'train.csv'
TEST_PATH = INPUT + 'test.csv'
PREPROCESS = './analysis/mole/data/preprocess/'

MID_MODEL_PATH = PREPROCESS + 'middle_model.pkl'
MODEL_PATH = PREPROCESS + 'model.pkl'
ENCODER_PATH = PREPROCESS + 'le.pkl'

RUN_PLOT = True
TARGET = 'scalar_coupling_constant'
N_FOLDS = 3

atom_weight = {'H': 1.008, 'C': 12.01, 'N': 14.01, 'O':16.00}

## logging

In [0]:
import logging
import logging.handlers


def create_logger(log_file_name):
    logger_ = logging.getLogger('main')
    logger_.setLevel(logging.DEBUG)
    fh = logging.handlers.RotatingFileHandler(
        log_file_name, maxBytes=100000, backupCount=8)
    fh.setLevel(logging.DEBUG)
    ch = logging.StreamHandler()
    ch.setLevel(logging.DEBUG)
    formatter = logging.Formatter(
        '[%(levelname)s]%(asctime)s:%(name)s:%(message)s')
    fh.setFormatter(formatter)
    ch.setFormatter(formatter)
    # add the handlers to the logger
    logger_.addHandler(fh)
    logger_.addHandler(ch)


def get_logger():
    return logging.getLogger('main')

In [0]:
create_logger('mole.log')

## util

In [0]:
def onehot(_df):
    cat_names = [name for name, col in _df.iteritems() if col.dtype == 'O']
    df_cat = pd.get_dummies(_df[cat_names])
    _df = pd.concat([_df, df_cat], axis=1).drop(cat_names, axis=1)
    return _df

def label_encode(df):
    cat_names = [name for name, col in df.iteritems() if col.dtype == 'O']    
    for cat_name in cat_names:
        print(cat_name)
        le = LabelEncoder()
        le.fit(df[cat_name].values)
        df[cat_name] = le.transform(df[cat_name].values)
    return df

class Encoder:
    def __init__(self):        
        self.encoders = {}
    
    def fit(self, df, cat_names):
        for cat_name in cat_names:
            le = LabelEncoder()
            le.fit(df[cat_name].values)
            self.encoders[cat_name] = le        
    
    def transform(self, df):
        for cat_name in self.encoders.keys():            
            df[cat_name] = self.encoders[cat_name].transform(df[cat_name].values)
            
        return df


def reduce_mem_usage(df, verbose=True):
    numerics = ['int16', 'int32', 'int64', 'float16', 'float32', 'float64']
    start_mem = df.memory_usage().sum() / 1024**2    
    for col in df.columns:
        col_type = df[col].dtypes
        if col_type in numerics:
            c_min = df[col].min()
            c_max = df[col].max()
            if str(col_type)[:3] == 'int':
                if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:
                    df[col] = df[col].astype(np.int8)
                elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:
                    df[col] = df[col].astype(np.int16)
                elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:
                    df[col] = df[col].astype(np.int32)
                elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(np.int64).max:
                    df[col] = df[col].astype(np.int64)  
            else:
                if c_min > np.finfo(np.float16).min and c_max < np.finfo(np.float16).max:
                    df[col] = df[col].astype(np.float16)
                elif c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max:
                    df[col] = df[col].astype(np.float32)
                else:
                    df[col] = df[col].astype(np.float64)    
    end_mem = df.memory_usage().sum() / 1024**2
    if verbose: 
        print('Mem. usage decreased to {:5.2f} Mb ({:.1f}% reduction)'.format(end_mem, 100 * (start_mem - end_mem) / start_mem))
    
    return df

## Preprocess

In [0]:
def map_atom_info(df, strct, atom_idx):
    df = pd.merge(df, strct, how = 'left',
                  left_on  = ['molecule_name', f'atom_index_{atom_idx}'],
                  right_on = ['molecule_name',  'atom_index'])
    
    df = df.drop('atom_index', axis=1)
    df = df.rename(columns={'atom': f'atom_{atom_idx}',
                            'x': f'x_{atom_idx}',
                            'y': f'y_{atom_idx}',
                            'z': f'z_{atom_idx}'})
    return df

def calc_dist(df):
    p_0 = df[['x_0', 'y_0', 'z_0']].values
    p_1 = df[['x_1', 'y_1', 'z_1']].values

    df['dist'] = np.linalg.norm(p_0 - p_1, axis=1)
    df['dist_x'] = (df['x_0'] - df['x_1']) ** 2
    df['dist_y'] = (df['y_0'] - df['y_1']) ** 2
    df['dist_z'] = (df['z_0'] - df['z_1']) ** 2

    return df

def divide_type(df):    
    df['type_0'] = df['type'].apply(lambda x: x[0])
    df['type_1'] = df['type'].apply(lambda x: x[1:])
    return df

In [0]:
def feature_engineering(df):
    print("Starting Feature Engineering...")
    g = df.groupby('molecule_name')
    g1 = df.groupby(['molecule_name', 'atom_index_0'])
    g2 = df.groupby(['molecule_name', 'atom_index_1'])
    g3 = df.groupby(['molecule_name', 'atom_1'])
    g4 = df.groupby(['molecule_name', 'type_0'])
    g5 = df.groupby(['molecule_name', 'type'])
    
    df['type_0'] = df['type'].apply(lambda x: x[0])
    df['molecule_couples'] = g['id'].transform('count')
    df['molecule_dist_mean'] = g['dist'].transform('mean')
    df['molecule_dist_min'] = g['dist'].transform('min')
    df['molecule_dist_max'] = g['dist'].transform('max')
    df['atom_0_couples_count'] = g1['id'].transform('count')
    df['atom_1_couples_count'] = g2['id'].transform('count')
    df[f'molecule_atom_index_0_x_1_std'] = g1['x_1'].transform('std')
    df[f'molecule_atom_index_0_y_1_mean'] = g1['y_1'].transform('mean')
    df[f'molecule_atom_index_0_y_1_mean_diff'] = df[f'molecule_atom_index_0_y_1_mean'] - df['y_1']
    df[f'molecule_atom_index_0_y_1_mean_div'] = df[f'molecule_atom_index_0_y_1_mean'] / df['y_1']
    df[f'molecule_atom_index_0_y_1_max'] = g1['y_1'].transform('max')
    df[f'molecule_atom_index_0_y_1_max_diff'] = df[f'molecule_atom_index_0_y_1_max'] - df['y_1']
    df[f'molecule_atom_index_0_y_1_std'] = g1['y_1'].transform('std')
    df[f'molecule_atom_index_0_z_1_std'] = g1['z_1'].transform('std')
    df[f'molecule_atom_index_0_dist_mean'] = g1['dist'].transform('mean')
    df[f'molecule_atom_index_0_dist_mean_diff'] = df[f'molecule_atom_index_0_dist_mean'] - df['dist']
    df[f'molecule_atom_index_0_dist_mean_div'] = df[f'molecule_atom_index_0_dist_mean'] / df['dist']
    df[f'molecule_atom_index_0_dist_max'] = g1['dist'].transform('max')
    df[f'molecule_atom_index_0_dist_max_diff'] = df[f'molecule_atom_index_0_dist_max'] - df['dist']
    df[f'molecule_atom_index_0_dist_max_div'] = df[f'molecule_atom_index_0_dist_max'] / df['dist']
    df[f'molecule_atom_index_0_dist_min'] = g1['dist'].transform('min')
    df[f'molecule_atom_index_0_dist_min_diff'] = df[f'molecule_atom_index_0_dist_min'] - df['dist']
    df[f'molecule_atom_index_0_dist_min_div'] = df[f'molecule_atom_index_0_dist_min'] / df['dist']
    df[f'molecule_atom_index_0_dist_std'] = g1['dist'].transform('std')
    df[f'molecule_atom_index_0_dist_std_diff'] = df[f'molecule_atom_index_0_dist_std'] - df['dist']
    df[f'molecule_atom_index_0_dist_std_div'] = df[f'molecule_atom_index_0_dist_std'] / df['dist']
    df[f'molecule_atom_index_1_dist_mean'] = g2['dist'].transform('mean')
    df[f'molecule_atom_index_1_dist_mean_diff'] = df[f'molecule_atom_index_1_dist_mean'] - df['dist']
    df[f'molecule_atom_index_1_dist_mean_div'] = df[f'molecule_atom_index_1_dist_mean'] / df['dist']
    df[f'molecule_atom_index_1_dist_max'] = g2['dist'].transform('max')
    df[f'molecule_atom_index_1_dist_max_diff'] = df[f'molecule_atom_index_1_dist_max'] - df['dist']
    df[f'molecule_atom_index_1_dist_max_div'] = df[f'molecule_atom_index_1_dist_max'] / df['dist']
    df[f'molecule_atom_index_1_dist_min'] = g2['dist'].transform('min')
    df[f'molecule_atom_index_1_dist_min_diff'] = df[f'molecule_atom_index_1_dist_min'] - df['dist']
    df[f'molecule_atom_index_1_dist_min_div'] = df[f'molecule_atom_index_1_dist_min'] / df['dist']
    df[f'molecule_atom_index_1_dist_std'] = g2['dist'].transform('std')
    df[f'molecule_atom_index_1_dist_std_diff'] = df[f'molecule_atom_index_1_dist_std'] - df['dist']
    df[f'molecule_atom_index_1_dist_std_div'] = df[f'molecule_atom_index_1_dist_std'] / df['dist']
    df[f'molecule_atom_1_dist_mean'] = g3['dist'].transform('mean')
    df[f'molecule_atom_1_dist_min'] = g3['dist'].transform('min')
    df[f'molecule_atom_1_dist_min_diff'] = df[f'molecule_atom_1_dist_min'] - df['dist']
    df[f'molecule_atom_1_dist_min_div'] = df[f'molecule_atom_1_dist_min'] / df['dist']
    df[f'molecule_atom_1_dist_std'] = g3['dist'].transform('std')
    df[f'molecule_atom_1_dist_std_diff'] = df[f'molecule_atom_1_dist_std'] - df['dist']
    df[f'molecule_type_0_dist_std'] = g4['dist'].transform('std')
    df[f'molecule_type_0_dist_std_diff'] = df[f'molecule_type_0_dist_std'] - df['dist']
    df[f'molecule_type_dist_mean'] = g5['dist'].transform('mean')
    df[f'molecule_type_dist_mean_diff'] = df[f'molecule_type_dist_mean'] - df['dist']
    df[f'molecule_type_dist_mean_div'] = df[f'molecule_type_dist_mean'] / df['dist']
    df[f'molecule_type_dist_max'] = g5['dist'].transform('max')
    df[f'molecule_type_dist_min'] = g5['dist'].transform('min')
    df[f'molecule_type_dist_std'] = g5['dist'].transform('std')
    df[f'molecule_type_dist_std_diff'] = df[f'molecule_type_dist_std'] - df['dist']

    # TODO: back
    # df = reduce_mem_usage(df)
    
    return df

In [0]:
def add_1j(df):
    get_logger().info('load df_1j')
    
    df_1j = joblib.load(PREPROCESS + 'df_1j.pkl')
    
    df = df.merge(df_1j, on=['molecule_name', 'atom_index_0', 'atom_index_1'], how='left') 
    
    return df


def add_2j_center_atom(df):    
    get_logger().info('load df_2jsim')
    
    df_2j = joblib.load(PREPROCESS + 'df_2jsim.pkl')  
    
    # atom weight
    df_2j['2j_atom_center_weight'] = df_2j['2j_atom_center'].replace(atom_weight)
    
    # sum of norm
    df_2j['2j_sum_norm_vec'] = df_2j['2j_norm_vec_02'] + df_2j['2j_norm_vec_12']
    
    df = df.merge(df_2j, on=['molecule_name', 'atom_index_0', 'atom_index_1'], how='left')    
    
    # replace missing vlaue to 'nan' for LabelEncoder
    df.loc[df['2j_atom_center'].isnull(), '2j_atom_center'] = 'nan'
    
    return df

def str_sort(s):
    """
    Parameters
    ----------
    x: str   
    """
    # print(s)
    if not isinstance(s, str):
        return s
    elif s[0] > s[1]:
        return s[1] + s[0]
    else:
        return s

def add_3j_center_atom(df):    
    get_logger().info('load df_3jsim')
    
    df_3j = joblib.load(PREPROCESS + 'df_3jsim.pkl')
    
    # atom weight
    s_atom_w0 = df_3j['3j_atom_center_0'].replace(atom_weight)
    s_atom_w1 = df_3j['3j_atom_center_1'].replace(atom_weight)
    df_3j['3j_atom_center_weight'] = s_atom_w0 + s_atom_w1

    # concatenate atom string 'C' + 'C' - > 'CC'
    tmp = df_3j['3j_atom_center_0'] + df_3j['3j_atom_center_1']
    df_3j['3j_atom_center'] = tmp.transform(str_sort)    
    df_3j.drop(['3j_atom_center_0', '3j_atom_center_1'], axis=1, inplace=True)
    
    # sum norm_vec
    df_3j['3j_sum_norm_vec'] = df_3j['3j_norm_vec_02'] + df_3j['3j_norm_vec_13'] + df_3j['3j_norm_vec_23']
    
    df = df.merge(df_3j, on=['molecule_name', 'atom_index_0', 'atom_index_1'], how='left')    
    
    # replace missing vlaue to 'nan' for LabelEncoder
    df.loc[df['3j_atom_center'].isnull(), '3j_atom_center'] = 'nan'    
    
    return df

In [0]:
def drop_col(df_org):
    df = df_org.copy()
    to_drop = ['id', 'molecule_name', 'atom_index_0', 'atom_index_1',
               'x_0', 'y_0', 'z_0', 'x_1', 'y_1', 'z_1', #'dist_x', 'dist_y', 'dist_z',
               'atom_0', 'atom_1'
              ]
    df = df.drop(to_drop, axis=1)
    
    return df

In [0]:
def group_mean_log_mae(y_true, y_pred, types, floor=1e-9):
    """
    Fast metric computation for this competition: https://www.kaggle.com/c/champs-scalar-coupling
    Code is from this kernel: https://www.kaggle.com/uberkinder/efficient-metric
    """
    maes = (y_true-y_pred).abs().groupby(types).mean()
    return np.log(maes.map(lambda x: max(x, floor))).mean()

def oof_train(_X, _y, _types):
    """
    Parameters
    ----------
    _X: pd.DataFrame, shape [n_samples, n_features]
    _y: array-like object, shape [n_samples]
    _types: array-like object, shsape [n_samples]
        array of `type` (e.g. 2JHC, 1JHC, 3JHH, etc.)
    """
    # TODO: divide data to training and validation about molecular
    
    models = []
    df_scores = pd.DataFrame(columns=['valid_score'])
    df_pred = pd.DataFrame(index=_X.index).reset_index(drop=True)

    fold = StratifiedKFold(n_splits=N_FOLDS, shuffle=True, random_state=1)
    for n_fold, (train_idx, valid_idx) in enumerate(fold.split(_X, _types)):
        # prepare data
        X_train, y_train = _X.iloc[train_idx], _y.iloc[train_idx]
        X_valid, y_valid = _X.iloc[valid_idx], _y.iloc[valid_idx]
        print('mean of target. train:{}, valid:{}'.format(y_train.mean(), y_valid.mean()))

        # generate model
        model = gen_model(_X)
        
        # train
        model.fit(X_train, y_train, eval_metric='mae',
                  eval_set=[(X_train, y_train), (X_valid, y_valid)],
                  verbose=100,
                  early_stopping_rounds=100
                  )
        
        # validate
        y_pred = model.predict(X_valid, num_iteration=model.best_iteration_)
        
        types_valid = _types.iloc[valid_idx]
        valid_score = group_mean_log_mae(y_valid, y_pred, types_valid)
        get_logger().info('fold %d valid %f' % (n_fold+1, valid_score))
        
        df_scores = df_scores.append(pd.Series([valid_score], index=['valid_score']), ignore_index=True)
        df_pred.loc[valid_idx, 'proba'] = y_pred
        df_pred.loc[valid_idx, 'y_true'] = y_valid
        models.append(model)
        
        # TODO: back
        # break
    get_logger().info('CV score: %f' % df_scores.mean()[0])
    
    return models, df_scores, df_pred

def oof_predict(_models, _X):
    get_logger().info('Start oof_predict')
    y_pred = np.zeros(_X.shape[0])
        
    for i, model in enumerate(_models):
        get_logger().info('prediction: %d' % i)
        y_pred += model.predict(_X) / len(_models)
    
    get_logger().info('Finish oof_predict')
    return y_pred


def gen_model(_X):
    n_features = _X.shape[1]
    colsample_rate = max(0.7, math.sqrt(n_features)/n_features)
    
    _model = lgb.LGBMRegressor(
        learning_rate=0.2,
        n_estimators=1500,
        num_leaves=128,
        # min_child_weight=15, # good value: 0, 5, 15, 300
        min_child_samples=80,
        subsample=0.7,
        colsample_bytree=1, #colsample_rate,
        objective='regression',
        reg_lambda=0.1,
        reg_alpha=0.1,
        seed=2019
        )
    return _model


In [0]:
def preprocess(df, strct, mode, s_type=None):
    """
    Parameters
    ----------
    df: pd.DataFrame
        dataframe of train.csv or test.csv
    strct: pd.DataFrame
        dataframe of structures.csv
    mode: str
        'train' or 'predict'
    s_type: None or pd.Series
        'type' column (e.g. 1JHC, 2JHH).
        If mode is 'train', the s_type must be specified.
    """
    get_logger().info('Start preprocess()')
    df = add_1j(df)
    df = add_2j_center_atom(df)
    df = add_3j_center_atom(df)
    df = map_atom_info(df, strct, 0)
    df = map_atom_info(df, strct, 1)
    df = calc_dist(df)
    df = divide_type(df)
    df = feature_engineering(df)
    
    display(df.head(10))
    display(df.tail(10))
    
    # encode
    if mode == 'train':
        enc = Encoder()
        enc.fit(df, ['type', 'type_0', 'type_1', 
                     '2j_atom_center', '3j_atom_center'])
        joblib.dump(enc, ENCODER_PATH)
    elif mode == 'predict':
        get_logger().info('loading encoder from %s' % ENCODER_PATH)
        enc = joblib.load(ENCODER_PATH)
    df = enc.transform(df)
        
    use_features = [col for col in df.columns if col not in [TARGET]]
    get_logger().info(use_features)
    df[use_features] = reduce_mem_usage(df[use_features])
    # TODO: back
    # df = add_scc_feature(df, 'fc', mode=mode, s_type=s_type)
    
    get_logger().info('Finish preprocess()')
    return df

In [0]:
def drop_uneffect_feature(df):
    """
    Drop uneffective features from dataframe
    """
    for col in df.columns:
        if len(df[col].unique()) == 1:
            df.drop(col, axis=1, inplace=True)
    return df

### fermi constant

In [0]:
class CNTR:
    """Model to predict fc/sd/pso/dso columns"""
    
    def __init__(self, y_col):
        self.y_col = y_col
        
    def train(self, df_org, scc, s_type):
        """
        Parameters
        ----------
        s_type: pd.Series
            'type' column (e.g. 1JHC, 2JHH)
        """
        df = df_org.copy()
        # Merge
        key_cols = ['molecule_name', 'atom_index_0', 'atom_index_1']
        df = df.merge(scc[key_cols + [self.y_col]], how='left', on=key_cols)
        
        # drop unnecessary cols        
        df = drop_col(df)        
        
        y = df[self.y_col].copy()        
        df.drop([TARGET, self.y_col], axis=1, inplace=True)
        X = df
        
        display(X.head())
        display(y.head())
        models, scores, y_pred = oof_train(X, y, s_type)
        
        # save model
        joblib.dump(models, MID_MODEL_PATH)
        
        self.models_ = models
        self.scores_ = scores
        self.y_pred_ = y_pred
        
    def predict(self, df_org):    
        y_pred = np.zeros(df_org.shape[0])
        
        X = df_org.copy()
        X = drop_col(X)
        
        display(X.head())
        # X = self.preprocess(df_org)
        for model in self.models_:            
            y_pred += model.predict(X) / len(models)
        
        return y_pred
    
    def load_model(self):
        # load pkl by joblib
        self.models_ = joblib.load(MID_MODEL_PATH)

In [0]:
def add_scc_feature(df, cntr_name, mode, s_type=None):
    """
    Parameters
    ----------
    cntr_name: str
        'fc', 'sd', 'pso' or 'dso'
    mode: str
        'train' or 'predict'
    s_type: None or pd.Series
        'type' column (e.g. 1JHC, 2JHH).
        If mode is 'train', the s_type must be specified.
    """
    add_feature = '%s_pred' % cntr_name
    cntr = CNTR(cntr_name)
    if mode == 'train': 
        assert s_type is not None, 's_type must be specified.'
        
        get_logger().info('start loading scalar_coupling_contributions')
        scc = pd.read_csv(INPUT + 'scalar_coupling_contributions.csv')
        get_logger().info('finished loading scalar_coupling_contributions')
        
        # train contribution(fc/sd/pso/dso)
        cntr.train(df, scc, s_type)
    
        display(cntr.y_pred_.head())
        df[add_feature] = cntr.y_pred_
    elif mode == 'predict':
        cntr.load_model()
        y_pred = cntr.predict(df)
        df[add_feature] = y_pred
    
    return df

## Train

In [0]:
df_train = pd.read_csv(TRAIN_PATH)
df_strct = pd.read_csv(INPUT + 'structures.csv')

In [0]:
# TODO: remove
# df_train = df_train[(df_train['type']=='1JHC') | (df_train['type']=='1JHN')]

In [0]:
def train_single_model(df, strct):
    # TODO: back
    df = df.head(10000)

    s_type = df['type'].copy()

    df = preprocess(df, strct, mode='train', s_type=s_type)
    df = drop_col(df)

    y = df[TARGET].copy()
    df.drop([TARGET], axis=1, inplace=True)
    X = df
    
    display(X.head())
    display(y.head())
    models, df_scores, df_pred = oof_train(X, y, s_type)

    joblib.dump(models, MODEL_PATH)
    
    return models, df_scores, df_pred

In [0]:
def train_models_each_type(df, strct):
    # TODO:back
    # df = df.head(10000)
    
    s_type = df['type'].copy()
    
    df = preprocess(df, strct, mode='train', s_type=s_type)
    df = drop_col(df)
    
    model_dict = {}
    score_dict = {}
    pred_dict = {}
    coupling_types = s_type.unique()
    for coup_type in coupling_types:
        get_logger().info('Starting train model(%s)' % coup_type)
        is_the_type = (s_type == coup_type)        
        df_type = df[is_the_type.values]
                
        y = df_type[TARGET]
        df_type.drop([TARGET], axis=1, inplace=True)
        X = df_type
        X = drop_uneffect_feature(X)
        
        get_logger().info('features(%s): %s' % (coup_type, str(X.columns.tolist())))
        display(X.head())
        display(y.head())
        models, df_scores, df_pred = oof_train(X, y, _types=s_type[is_the_type].reset_index(drop=True))
        
        model_dict[coup_type] = models
        score_dict[coup_type] = df_scores
        pred_dict[coup_type] = df_pred
        
    joblib.dump(model_dict, MODEL_PATH)
    
    return model_dict, score_dict, pred_dict

In [0]:
# models, df_scores, df_pred = train_single_model(df_train, df_strct)
model_dict, score_dict, pred_dict = train_models_each_type(df_train, df_strct)

[INFO]2019-06-25 15:07:25,997:main:Start preprocess()
[INFO]2019-06-25 15:07:26,249:main:load df_1j
[INFO]2019-06-25 15:07:30,410:main:load df_2jsim
[INFO]2019-06-25 15:07:40,693:main:load df_3jsim


Starting Feature Engineering...


Unnamed: 0,id,molecule_name,atom_index_0,atom_index_1,type,scalar_coupling_constant,1j_nbonds,2j_atom_center,2j_area_021,2j_norm_vec_02,2j_norm_vec_12,2j_cos,2j_atom_center_weight,2j_sum_norm_vec,3j_norm_vec_02,3j_norm_vec_13,3j_norm_vec_23,3j_cos_023,3j_cos_231,3j_area_023,3j_area_231,3j_dihedral,3j_atom_center_weight,3j_atom_center,3j_sum_norm_vec,atom_0,x_0,y_0,z_0,atom_1,x_1,y_1,z_1,dist,dist_x,dist_y,dist_z,type_0,type_1,molecule_couples,...,molecule_atom_index_0_z_1_std,molecule_atom_index_0_dist_mean,molecule_atom_index_0_dist_mean_diff,molecule_atom_index_0_dist_mean_div,molecule_atom_index_0_dist_max,molecule_atom_index_0_dist_max_diff,molecule_atom_index_0_dist_max_div,molecule_atom_index_0_dist_min,molecule_atom_index_0_dist_min_diff,molecule_atom_index_0_dist_min_div,molecule_atom_index_0_dist_std,molecule_atom_index_0_dist_std_diff,molecule_atom_index_0_dist_std_div,molecule_atom_index_1_dist_mean,molecule_atom_index_1_dist_mean_diff,molecule_atom_index_1_dist_mean_div,molecule_atom_index_1_dist_max,molecule_atom_index_1_dist_max_diff,molecule_atom_index_1_dist_max_div,molecule_atom_index_1_dist_min,molecule_atom_index_1_dist_min_diff,molecule_atom_index_1_dist_min_div,molecule_atom_index_1_dist_std,molecule_atom_index_1_dist_std_diff,molecule_atom_index_1_dist_std_div,molecule_atom_1_dist_mean,molecule_atom_1_dist_min,molecule_atom_1_dist_min_diff,molecule_atom_1_dist_min_div,molecule_atom_1_dist_std,molecule_atom_1_dist_std_diff,molecule_type_0_dist_std,molecule_type_0_dist_std_diff,molecule_type_dist_mean,molecule_type_dist_mean_diff,molecule_type_dist_mean_div,molecule_type_dist_max,molecule_type_dist_min,molecule_type_dist_std,molecule_type_dist_std_diff
0,0,dsgdb9nsd_000001,1,0,1JHC,84.8076,4.0,,,,,,,,,,,,,,,,,,,H,0.00215,-0.006031,0.001976,C,-0.012698,1.085804,0.008001,1.091953,0.00022,1.192105,3.6e-05,1,JHC,10,...,0.727957,1.610344,0.518391,1.474738,1.783157,0.691204,1.632998,1.091953,0.0,1.0,0.345594,-0.746359,0.316492,1.09195,-3e-06,0.999997,1.091953,0.0,1.0,1.091946,-6.680479e-06,0.999994,3e-06,-1.09195,3e-06,1.09195,1.091946,-7e-06,0.999994,3e-06,-1.09195,3e-06,-1.09195,1.09195,-3e-06,0.999997,1.091953,1.091946,3e-06,-1.09195
1,1,dsgdb9nsd_000001,1,2,2JHH,-11.257,,C,1.124187,1.091953,1.091952,-0.333287,12.01,2.183905,,,,,,,,,,,,H,0.00215,-0.006031,0.001976,H,1.011731,1.463751,0.000277,1.78312,1.019253,2.160261,3e-06,2,JHH,10,...,0.727957,1.610344,-0.172776,0.903105,1.783157,3.7e-05,1.000021,1.091953,-0.691167,0.612383,0.345594,-1.437526,0.193814,1.78312,0.0,1.0,1.78312,0.0,1.0,1.78312,0.0,1.0,,,,1.783146,1.78312,0.0,1.0,1.4e-05,-1.783106,1.4e-05,-1.783106,1.783146,2.7e-05,1.000015,1.783158,1.78312,1.4e-05,-1.783106
2,2,dsgdb9nsd_000001,1,3,2JHH,-11.2548,,C,1.124162,1.091953,1.091946,-0.333335,12.01,2.183899,,,,,,,,,,,,H,0.00215,-0.006031,0.001976,H,-0.540815,1.447527,-0.876644,1.783147,0.294812,2.112831,0.771973,2,JHH,10,...,0.727957,1.610344,-0.172803,0.903091,1.783157,9e-06,1.000005,1.091953,-0.691194,0.612374,0.345594,-1.437553,0.193811,1.783153,5e-06,1.000003,1.783158,1e-05,1.000006,1.783147,0.0,1.0,7e-06,-1.78314,4e-06,1.783146,1.78312,-2.8e-05,0.999984,1.4e-05,-1.783134,1.4e-05,-1.783134,1.783146,-1e-06,0.999999,1.783158,1.78312,1.4e-05,-1.783134
3,3,dsgdb9nsd_000001,1,4,2JHH,-11.2543,,C,1.124158,1.091953,1.091948,-0.333347,12.01,2.183901,,,,,,,,,,,,H,0.00215,-0.006031,0.001976,H,-0.523814,1.437933,0.906397,1.783157,0.276638,2.085032,0.817978,2,JHH,10,...,0.727957,1.610344,-0.172812,0.903086,1.783157,0.0,1.0,1.091953,-0.691204,0.612371,0.345594,-1.437563,0.19381,1.783151,-6e-06,0.999997,1.783157,0.0,1.0,1.783148,-8.8131e-06,0.999995,5e-06,-1.783152,3e-06,1.783146,1.78312,-3.7e-05,0.999979,1.4e-05,-1.783143,1.4e-05,-1.783143,1.783146,-1e-05,0.999994,1.783158,1.78312,1.4e-05,-1.783143
4,4,dsgdb9nsd_000001,2,0,1JHC,84.8074,4.0,,,,,,,,,,,,,,,,,,,H,1.011731,1.463751,0.000277,C,-0.012698,1.085804,0.008001,1.091952,1.049455,0.142844,6e-05,1,JHC,10,...,0.891529,1.552753,0.460801,1.421998,1.783158,0.691206,1.633001,1.091952,0.0,1.0,0.399065,-0.692886,0.365461,1.09195,-2e-06,0.999998,1.091953,1e-06,1.000001,1.091946,-5.239448e-06,0.999995,3e-06,-1.091948,3e-06,1.09195,1.091946,-5e-06,0.999995,3e-06,-1.091948,3e-06,-1.091948,1.09195,-2e-06,0.999998,1.091953,1.091946,3e-06,-1.091948
5,5,dsgdb9nsd_000001,2,3,2JHH,-11.2541,,C,1.124153,1.091952,1.091946,-0.333352,12.01,2.183898,,,,,,,,,,,,H,1.011731,1.463751,0.000277,H,-0.540815,1.447527,-0.876644,1.783158,2.410399,0.000263,0.768989,2,JHH,10,...,0.891529,1.552753,-0.230405,0.870788,1.783158,0.0,1.0,1.091952,-0.691206,0.61237,0.399065,-1.384092,0.223797,1.783153,-5e-06,0.999997,1.783158,0.0,1.0,1.783147,-1.016344e-05,0.999994,7e-06,-1.78315,4e-06,1.783146,1.78312,-3.8e-05,0.999979,1.4e-05,-1.783144,1.4e-05,-1.783144,1.783146,-1.1e-05,0.999994,1.783158,1.78312,1.4e-05,-1.783144
6,6,dsgdb9nsd_000001,2,4,2JHH,-11.2548,,C,1.124161,1.091952,1.091948,-0.333337,12.01,2.183899,,,,,,,,,,,,H,1.011731,1.463751,0.000277,H,-0.523814,1.437933,0.906397,1.783148,2.357897,0.000667,0.821055,2,JHH,10,...,0.891529,1.552753,-0.230396,0.870793,1.783158,9e-06,1.000005,1.091952,-0.691197,0.612373,0.399065,-1.384083,0.223798,1.783151,3e-06,1.000001,1.783157,8e-06,1.000005,1.783148,-5.221498e-07,1.0,5e-06,-1.783143,3e-06,1.783146,1.78312,-2.9e-05,0.999984,1.4e-05,-1.783135,1.4e-05,-1.783135,1.783146,-2e-06,0.999999,1.783158,1.78312,1.4e-05,-1.783135
7,7,dsgdb9nsd_000001,3,0,1JHC,84.8093,4.0,,,,,,,,,,,,,,,,,,,H,-0.540815,1.447527,-0.876644,C,-0.012698,1.085804,0.008001,1.091946,0.278907,0.130843,0.782596,1,JHC,10,...,0.635262,1.437547,0.345601,1.3165,1.783148,0.691201,1.632999,1.091946,0.0,1.0,0.488753,-0.603193,0.447598,1.09195,3e-06,1.000003,1.091953,7e-06,1.000006,1.091946,0.0,1.0,3e-06,-1.091943,3e-06,1.09195,1.091946,0.0,1.0,3e-06,-1.091943,3e-06,-1.091943,1.09195,3e-06,1.000003,1.091953,1.091946,3e-06,-1.091943
8,8,dsgdb9nsd_000001,3,4,2JHH,-11.2543,,C,1.124153,1.091946,1.091948,-0.333342,12.01,2.183894,,,,,,,,,,,,H,-0.540815,1.447527,-0.876644,H,-0.523814,1.437933,0.906397,1.783148,0.000289,9.2e-05,3.179235,2,JHH,10,...,0.635262,1.437547,-0.345601,0.806185,1.783148,0.0,1.0,1.091946,-0.691201,0.61237,0.488753,-1.294395,0.274096,1.783151,3e-06,1.000002,1.783157,9e-06,1.000005,1.783148,0.0,1.0,5e-06,-1.783143,3e-06,1.783146,1.78312,-2.8e-05,0.999984,1.4e-05,-1.783134,1.4e-05,-1.783134,1.783146,-2e-06,0.999999,1.783158,1.78312,1.4e-05,-1.783134
9,9,dsgdb9nsd_000001,4,0,1JHC,84.8095,4.0,,,,,,,,,,,,,,,,,,,H,-0.523814,1.437933,0.906397,C,-0.012698,1.085804,0.008001,1.091948,0.261239,0.123994,0.807116,1,JHC,10,...,,1.091948,0.0,1.0,1.091948,0.0,1.0,1.091948,0.0,1.0,,,,1.09195,2e-06,1.000002,1.091953,6e-06,1.000005,1.091946,-1.161979e-06,0.999999,3e-06,-1.091944,3e-06,1.09195,1.091946,-1e-06,0.999999,3e-06,-1.091944,3e-06,-1.091944,1.09195,2e-06,1.000002,1.091953,1.091946,3e-06,-1.091944


Unnamed: 0,id,molecule_name,atom_index_0,atom_index_1,type,scalar_coupling_constant,1j_nbonds,2j_atom_center,2j_area_021,2j_norm_vec_02,2j_norm_vec_12,2j_cos,2j_atom_center_weight,2j_sum_norm_vec,3j_norm_vec_02,3j_norm_vec_13,3j_norm_vec_23,3j_cos_023,3j_cos_231,3j_area_023,3j_area_231,3j_dihedral,3j_atom_center_weight,3j_atom_center,3j_sum_norm_vec,atom_0,x_0,y_0,z_0,atom_1,x_1,y_1,z_1,dist,dist_x,dist_y,dist_z,type_0,type_1,molecule_couples,...,molecule_atom_index_0_z_1_std,molecule_atom_index_0_dist_mean,molecule_atom_index_0_dist_mean_diff,molecule_atom_index_0_dist_mean_div,molecule_atom_index_0_dist_max,molecule_atom_index_0_dist_max_diff,molecule_atom_index_0_dist_max_div,molecule_atom_index_0_dist_min,molecule_atom_index_0_dist_min_diff,molecule_atom_index_0_dist_min_div,molecule_atom_index_0_dist_std,molecule_atom_index_0_dist_std_diff,molecule_atom_index_0_dist_std_div,molecule_atom_index_1_dist_mean,molecule_atom_index_1_dist_mean_diff,molecule_atom_index_1_dist_mean_div,molecule_atom_index_1_dist_max,molecule_atom_index_1_dist_max_diff,molecule_atom_index_1_dist_max_div,molecule_atom_index_1_dist_min,molecule_atom_index_1_dist_min_diff,molecule_atom_index_1_dist_min_div,molecule_atom_index_1_dist_std,molecule_atom_index_1_dist_std_diff,molecule_atom_index_1_dist_std_div,molecule_atom_1_dist_mean,molecule_atom_1_dist_min,molecule_atom_1_dist_min_diff,molecule_atom_1_dist_min_div,molecule_atom_1_dist_std,molecule_atom_1_dist_std_diff,molecule_type_0_dist_std,molecule_type_0_dist_std_diff,molecule_type_dist_mean,molecule_type_dist_mean_diff,molecule_type_dist_mean_div,molecule_type_dist_max,molecule_type_dist_min,molecule_type_dist_std,molecule_type_dist_std_diff
4658137,4658137,dsgdb9nsd_133884,16,8,2JHC,9.11973,,C,1.419871,1.09048,1.50993,-0.506344,12.01,2.60041,,,,,,,,,,,,H,-0.084531,1.110807,-1.796741,C,0.787756,-0.840138,-1.042152,2.266379,0.760884,3.806185,0.569405,2,JHC,78,...,0.916944,2.600236,0.333857,1.147309,3.358747,1.092368,1.481988,1.09048,-1.175899,0.481155,0.679559,-1.58682,0.299844,2.536801,0.270422,1.119319,3.448811,1.182432,1.521728,1.080997,-1.185382,0.476971,0.823964,-1.442415,0.36356,2.625259,1.080997,-1.185382,0.476971,0.794133,-1.472246,0.135944,-2.130435,2.274912,0.008533,1.003765,2.312206,2.209489,0.037188,-2.229191
4658138,4658138,dsgdb9nsd_133884,16,17,3JHH,0.789559,,,,,,,,,1.09048,1.080997,1.50993,-0.506344,-0.581363,1.419871,1.328055,0.823388,24.02,CC,3.681407,H,-0.084531,1.110807,-1.796741,H,1.12655,-1.348733,-1.933838,2.744968,1.466716,6.049335,0.018796,3,JHH,78,...,0.916944,2.600236,-0.144732,0.947274,3.358747,0.61378,1.223602,1.09048,-1.654488,0.397265,0.679559,-2.065408,0.247565,2.705595,-0.039372,0.985657,2.744968,0.0,1.0,2.662406,-0.082561,0.969923,0.041413,-2.703555,0.015087,2.503545,1.784424,-0.9605434,0.650071,0.403132,-2.341835,0.344874,-2.400093,2.706152,-0.038816,0.985859,2.744969,2.6624,0.033847,-2.711121
4658139,4658139,dsgdb9nsd_133884,17,1,3JHN,-0.006537,,,,,,,,,1.080997,1.560647,1.50993,-0.581363,-0.411704,1.328055,2.14749,-0.310911,24.02,CC,4.151574,H,1.12655,-1.348733,-1.933838,N,-1.5711,0.047932,-0.491726,3.362689,7.277316,1.950674,2.079688,3,JHN,78,...,0.755962,2.692607,-0.670082,0.80073,3.495226,0.132537,1.039414,1.080997,-2.281692,0.321468,0.845189,-2.5175,0.251343,2.609557,-0.753132,0.776033,3.362702,1.3e-05,1.000004,2.220388,-1.142301,0.660301,0.583487,-2.779202,0.173518,2.609557,2.220388,-1.142301,0.660301,0.583487,-2.779202,0.344874,-3.017815,3.362695,6e-06,1.000002,3.362702,3.362689,9e-06,-3.36268
4658140,4658140,dsgdb9nsd_133884,17,2,3JHC,1.94438,,,,,,,,,1.080997,1.509931,1.509711,-0.554904,-0.171603,1.357679,2.245745,-0.825532,24.02,CC,4.10064,H,1.12655,-1.348733,-1.933838,C,-0.75085,-0.602182,0.665932,3.292533,3.524629,0.557339,6.758808,3,JHC,78,...,0.755962,2.692607,-0.599926,0.817792,3.495226,0.202693,1.061561,1.080997,-2.211536,0.328318,0.845189,-2.447344,0.256699,2.794773,-0.497761,0.848821,3.470578,0.178044,1.054075,1.09048,-2.202054,0.331198,0.752773,-2.53976,0.22863,2.625259,1.080997,-2.211536,0.328318,0.794133,-2.4984,0.344874,-2.947659,3.210469,-0.082064,0.975076,3.601194,2.469623,0.321635,-2.970899
4658141,4658141,dsgdb9nsd_133884,17,3,2JHC,0.861412,,C,1.357679,1.080997,1.509711,-0.554904,12.01,2.590709,,,,,,,,,,,,H,1.12655,-1.348733,-1.933838,C,0.326978,-1.554195,0.205661,2.293248,0.639315,0.042215,4.577456,2,JHC,78,...,0.755962,2.692607,0.399359,1.174146,3.495226,1.201978,1.524138,1.080997,-1.21225,0.471383,0.845189,-1.448058,0.368556,2.536799,0.243551,1.106204,3.448809,1.155561,1.503897,1.080997,-1.212251,0.471383,0.823962,-1.469286,0.359299,2.625259,1.080997,-1.212251,0.471383,0.794133,-1.499115,0.135944,-2.157304,2.274912,-0.018336,0.992004,2.312206,2.209489,0.037188,-2.25606
4658142,4658142,dsgdb9nsd_133884,17,4,2JHC,3.54345,,C,1.390609,1.080997,1.532828,-0.543759,12.01,2.613826,,,,,,,,,,,,H,1.12655,-1.348733,-1.933838,C,1.629865,-0.747236,0.235262,2.306538,0.253326,0.361798,4.704995,2,JHC,78,...,0.755962,2.692607,0.386069,1.16738,3.495226,1.188688,1.515356,1.080997,-1.225541,0.468667,0.845189,-1.461349,0.366432,2.371793,0.065255,1.028291,3.243547,0.937008,1.40624,1.083421,-1.223117,0.469717,0.734959,-1.571579,0.318642,2.625259,1.080997,-1.225541,0.468666,0.794133,-1.512405,0.135944,-2.170594,2.274912,-0.031626,0.986288,2.312206,2.209489,0.037188,-2.269351
4658143,4658143,dsgdb9nsd_133884,17,5,3JHC,0.568997,,,,,,,,,1.080997,1.553264,1.532828,-0.543759,-0.355358,1.390609,2.225488,-0.813418,24.02,CC,4.16709,H,1.12655,-1.348733,-1.933838,C,1.415947,0.620773,0.939122,3.495226,0.08375,3.878955,8.253901,3,JHC,78,...,0.755962,2.692607,-0.802619,0.770367,3.495226,0.0,1.0,1.080997,-2.414229,0.309278,0.845189,-2.650037,0.241813,2.778225,-0.717001,0.794863,3.601194,0.105968,1.030318,1.094292,-2.400934,0.313082,1.034817,-2.46041,0.296066,2.625259,1.080997,-2.414229,0.309278,0.794133,-2.701093,0.344874,-3.150352,3.210469,-0.284757,0.91853,3.601194,2.469623,0.321635,-3.173592
4658144,4658144,dsgdb9nsd_133884,17,6,3JHC,1.17337,,,,,,,,,1.080997,1.542518,1.50993,-0.581363,-0.179514,1.328055,2.291259,-0.986229,24.02,CC,4.133445,H,1.12655,-1.348733,-1.933838,C,-0.027076,0.747033,0.478506,3.397424,1.330852,4.392233,5.819407,3,JHC,78,...,0.755962,2.692607,-0.704817,0.792544,3.495226,0.097802,1.028787,1.080997,-2.316427,0.318181,0.845189,-2.552235,0.248774,2.631319,-0.766106,0.774504,3.397424,0.0,1.0,2.21175,-1.185674,0.651008,0.551357,-2.846067,0.162287,2.625259,1.080997,-2.316427,0.318181,0.794133,-2.603291,0.344874,-3.05255,3.210469,-0.186955,0.944972,3.601194,2.469623,0.321635,-3.07579
4658145,4658145,dsgdb9nsd_133884,17,7,2JHC,4.76201,,C,1.328055,1.080997,1.50993,-0.581363,12.01,2.590927,,,,,,,,,,,,H,1.12655,-1.348733,-1.933838,C,-0.131901,0.356983,-1.010196,2.312202,1.583699,2.909466,0.853115,2,JHC,78,...,0.755962,2.692607,0.380405,1.164521,3.495226,1.183024,1.511644,1.080997,-1.231205,0.467518,0.845189,-1.467013,0.365534,2.794769,0.482567,1.208704,3.47058,1.158378,1.500985,1.09048,-1.221722,0.47162,0.752773,-1.559429,0.325565,2.625259,1.080997,-1.231205,0.467518,0.794133,-1.518069,0.135944,-2.176258,2.274912,-0.03729,0.983872,2.312206,2.209489,0.037188,-2.275015
4658146,4658146,dsgdb9nsd_133884,17,8,1JHC,117.934,4.0,,,,,,,,,,,,,,,,,,,H,1.12655,-1.348733,-1.933838,C,0.787756,-0.840138,-1.042152,1.080997,0.114781,0.258669,0.795105,1,JHC,78,...,0.755962,2.692607,1.61161,2.490855,3.495226,2.414229,3.233335,1.080997,0.0,1.0,0.845189,-0.235808,0.781861,2.536801,1.455803,2.346722,3.448811,2.367814,3.190398,1.080997,0.0,1.0,0.823964,-0.257034,0.762225,2.625259,1.080997,-1.288042e-07,1.0,0.794133,-0.286864,0.005663,-1.075335,1.089062,0.008064,1.00746,1.094293,1.080997,0.005663,-1.075335


[INFO]2019-06-25 15:08:36,393:main:['id', 'molecule_name', 'atom_index_0', 'atom_index_1', 'type', '1j_nbonds', '2j_atom_center', '2j_area_021', '2j_norm_vec_02', '2j_norm_vec_12', '2j_cos', '2j_atom_center_weight', '2j_sum_norm_vec', '3j_norm_vec_02', '3j_norm_vec_13', '3j_norm_vec_23', '3j_cos_023', '3j_cos_231', '3j_area_023', '3j_area_231', '3j_dihedral', '3j_atom_center_weight', '3j_atom_center', '3j_sum_norm_vec', 'atom_0', 'x_0', 'y_0', 'z_0', 'atom_1', 'x_1', 'y_1', 'z_1', 'dist', 'dist_x', 'dist_y', 'dist_z', 'type_0', 'type_1', 'molecule_couples', 'molecule_dist_mean', 'molecule_dist_min', 'molecule_dist_max', 'atom_0_couples_count', 'atom_1_couples_count', 'molecule_atom_index_0_x_1_std', 'molecule_atom_index_0_y_1_mean', 'molecule_atom_index_0_y_1_mean_diff', 'molecule_atom_index_0_y_1_mean_div', 'molecule_atom_index_0_y_1_max', 'molecule_atom_index_0_y_1_max_diff', 'molecule_atom_index_0_y_1_std', 'molecule_atom_index_0_z_1_std', 'molecule_atom_index_0_dist_mean', 'molecul

Mem. usage decreased to 919.57 Mb (71.9% reduction)


[INFO]2019-06-25 15:10:17,045:main:Finish preprocess()
[INFO]2019-06-25 15:10:19,188:main:Starting train model(1JHC)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  errors=errors)
[INFO]2019-06-25 15:10:24,787:main:features(1JHC): ['1j_nbonds', 'dist', 'dist_x', 'dist_y', 'dist_z', 'molecule_couples', 'molecule_dist_mean', 'molecule_dist_min', 'molecule_dist_max', 'atom_0_couples_count', 'atom_1_couples_count', 'molecule_atom_index_0_x_1_std', 'molecule_atom_index_0_y_1_mean', 'molecule_atom_index_0_y_1_mean_diff', 'molecule_atom_index_0_y_1_mean_div', 'molecule_atom_index_0_y_1_max', 'molecule_atom_index_0_y_1_max_diff', 'molecule_atom_index_0_y_1_std', 'molecule_atom_index_0_z_1_std', 'molecule_atom_index_0_dist_mean', 'molecule_atom_index_0_dist_mean_diff', 'molecule_atom_index_0_dist_mean_div', 'molecule_atom_index_0_dist_max', 'molecule_ato

Unnamed: 0,1j_nbonds,dist,dist_x,dist_y,dist_z,molecule_couples,molecule_dist_mean,molecule_dist_min,molecule_dist_max,atom_0_couples_count,atom_1_couples_count,molecule_atom_index_0_x_1_std,molecule_atom_index_0_y_1_mean,molecule_atom_index_0_y_1_mean_diff,molecule_atom_index_0_y_1_mean_div,molecule_atom_index_0_y_1_max,molecule_atom_index_0_y_1_max_diff,molecule_atom_index_0_y_1_std,molecule_atom_index_0_z_1_std,molecule_atom_index_0_dist_mean,molecule_atom_index_0_dist_mean_diff,molecule_atom_index_0_dist_mean_div,molecule_atom_index_0_dist_max,molecule_atom_index_0_dist_max_diff,molecule_atom_index_0_dist_max_div,molecule_atom_index_0_dist_min,molecule_atom_index_0_dist_std,molecule_atom_index_0_dist_std_diff,molecule_atom_index_0_dist_std_div,molecule_atom_index_1_dist_mean,molecule_atom_index_1_dist_mean_diff,molecule_atom_index_1_dist_mean_div,molecule_atom_index_1_dist_max,molecule_atom_index_1_dist_max_diff,molecule_atom_index_1_dist_max_div,molecule_atom_index_1_dist_min,molecule_atom_index_1_dist_min_diff,molecule_atom_index_1_dist_min_div,molecule_atom_index_1_dist_std,molecule_atom_index_1_dist_std_diff,molecule_atom_index_1_dist_std_div,molecule_atom_1_dist_mean,molecule_atom_1_dist_min,molecule_atom_1_dist_min_diff,molecule_atom_1_dist_min_div,molecule_atom_1_dist_std,molecule_atom_1_dist_std_diff,molecule_type_0_dist_std,molecule_type_0_dist_std_diff,molecule_type_dist_mean,molecule_type_dist_mean_diff,molecule_type_dist_mean_div,molecule_type_dist_max,molecule_type_dist_min,molecule_type_dist_std,molecule_type_dist_std_diff
0,4.0,1.091797,0.000221,1.192383,3.6e-05,10,1.506836,1.091797,1.783203,4,4,0.728027,1.358398,0.272949,1.25138,1.463867,0.37793,0.182251,0.728027,1.610352,0.518555,1.474609,1.783203,0.691406,1.632812,1.091797,0.345703,-0.746582,0.316406,1.091797,-3e-06,1.0,1.091797,0.0,1.0,1.091797,-7e-06,1.0,3e-06,-1.091797,3e-06,1.091797,1.091797,-7e-06,1.0,3e-06,-1.091797,3e-06,-1.091797,1.091797,-3e-06,1.0,1.091797,1.091797,3e-06,-1.091797
4,4.0,1.091797,1.049805,0.142822,6e-05,10,1.506836,1.091797,1.783203,3,4,0.300049,1.324219,0.237915,1.219147,1.447266,0.361816,0.206177,0.891602,1.552734,0.460693,1.421875,1.783203,0.691406,1.632812,1.091797,0.39917,-0.692871,0.365479,1.091797,-2e-06,1.0,1.091797,1e-06,1.0,1.091797,-5e-06,1.0,3e-06,-1.091797,3e-06,1.091797,1.091797,-5e-06,1.0,3e-06,-1.091797,3e-06,-1.091797,1.091797,-2e-06,1.0,1.091797,1.091797,3e-06,-1.091797
7,4.0,1.091797,0.278809,0.130859,0.782715,10,1.506836,1.091797,1.783203,2,4,0.361328,1.261719,0.176025,1.162151,1.4375,0.352051,0.249023,0.635254,1.4375,0.345703,1.316406,1.783203,0.691406,1.632812,1.091797,0.48877,-0.603027,0.44751,1.091797,3e-06,1.0,1.091797,7e-06,1.0,1.091797,0.0,1.0,3e-06,-1.091797,3e-06,1.091797,1.091797,0.0,1.0,3e-06,-1.091797,3e-06,-1.091797,1.091797,3e-06,1.0,1.091797,1.091797,3e-06,-1.091797
9,4.0,1.091797,0.26123,0.124023,0.807129,10,1.506836,1.091797,1.783203,1,4,,1.085938,0.0,1.0,1.085938,0.0,,,1.091797,0.0,1.0,1.091797,0.0,1.0,1.091797,,,,1.091797,2e-06,1.0,1.091797,6e-06,1.0,1.091797,-1e-06,1.0,3e-06,-1.091797,3e-06,1.091797,1.091797,-1e-06,1.0,3e-06,-1.091797,3e-06,-1.091797,1.091797,2e-06,1.0,1.091797,1.091797,3e-06,-1.091797
17,2.0,1.066406,0.00021,1.137695,3.5e-05,2,1.642578,1.066406,2.21875,2,1,0.011055,0.556641,-0.575684,0.491541,1.132812,0.0,0.814453,0.00449,1.642578,0.575684,1.540039,2.21875,1.151367,2.080078,1.066406,0.814453,-0.252197,0.763672,1.066406,0.0,1.0,1.066406,0.0,1.0,1.066406,0.0,1.0,,,,1.066406,1.066406,0.0,1.0,,,,,1.066406,0.0,1.0,1.066406,1.066406,,


0      84.8076
4      84.8074
7      84.8093
9      84.8095
17    171.2200
Name: scalar_coupling_constant, dtype: float64

mean of target. train:94.98392857568842, valid:94.96060144118587
Training until validation scores don't improve for 100 rounds.
[100]	training's l2: 8.64586	training's l1: 2.06996	valid_1's l2: 10.1169	valid_1's l1: 2.20558
[200]	training's l2: 6.66291	training's l1: 1.82214	valid_1's l2: 8.85262	valid_1's l1: 2.04884
[300]	training's l2: 5.57882	training's l1: 1.66793	valid_1's l2: 8.26804	valid_1's l1: 1.97048
[400]	training's l2: 4.79719	training's l1: 1.54835	valid_1's l2: 7.88005	valid_1's l1: 1.91715
[500]	training's l2: 4.18129	training's l1: 1.44887	valid_1's l2: 7.58983	valid_1's l1: 1.87764
[600]	training's l2: 3.68757	training's l1: 1.36255	valid_1's l2: 7.3739	valid_1's l1: 1.84668
[700]	training's l2: 3.30048	training's l1: 1.29105	valid_1's l2: 7.22403	valid_1's l1: 1.82553
[800]	training's l2: 2.97473	training's l1: 1.22654	valid_1's l2: 7.10147	valid_1's l1: 1.807
[900]	training's l2: 2.67072	training's l1: 1.16437	valid_1's l2: 6.97871	valid_1's l1: 1.78886
[1000]	trai

[INFO]2019-06-25 15:14:40,580:main:fold 1 valid 0.390509


mean of target. train:94.9623837111789, valid:95.00369117020259
Training until validation scores don't improve for 100 rounds.
[100]	training's l2: 8.73739	training's l1: 2.07833	valid_1's l2: 10.1335	valid_1's l1: 2.20753
[200]	training's l2: 6.71965	training's l1: 1.82639	valid_1's l2: 8.82007	valid_1's l1: 2.04647
[300]	training's l2: 5.58767	training's l1: 1.67116	valid_1's l2: 8.21795	valid_1's l1: 1.96889
[400]	training's l2: 4.78852	training's l1: 1.55008	valid_1's l2: 7.83144	valid_1's l1: 1.91588
[500]	training's l2: 4.1834	training's l1: 1.45032	valid_1's l2: 7.55731	valid_1's l1: 1.87643
[600]	training's l2: 3.68658	training's l1: 1.36425	valid_1's l2: 7.34933	valid_1's l1: 1.84628
[700]	training's l2: 3.28128	training's l1: 1.2891	valid_1's l2: 7.17751	valid_1's l1: 1.82075
[800]	training's l2: 2.93319	training's l1: 1.22101	valid_1's l2: 7.04353	valid_1's l1: 1.80027
[900]	training's l2: 2.64768	training's l1: 1.16066	valid_1's l2: 6.93773	valid_1's l1: 1.78282
[1000]	trai

[INFO]2019-06-25 15:18:54,519:main:fold 2 valid 0.356513


mean of target. train:94.98214630569106, valid:94.9641659811711
Training until validation scores don't improve for 100 rounds.
[100]	training's l2: 8.65151	training's l1: 2.07381	valid_1's l2: 10.1176	valid_1's l1: 2.20701
[200]	training's l2: 6.67842	training's l1: 1.82392	valid_1's l2: 8.87421	valid_1's l1: 2.04953
[300]	training's l2: 5.52762	training's l1: 1.66286	valid_1's l2: 8.24223	valid_1's l1: 1.96659
[400]	training's l2: 4.71393	training's l1: 1.5404	valid_1's l2: 7.84862	valid_1's l1: 1.91362
[500]	training's l2: 4.12793	training's l1: 1.44228	valid_1's l2: 7.57308	valid_1's l1: 1.87305
[600]	training's l2: 3.67143	training's l1: 1.36064	valid_1's l2: 7.38203	valid_1's l1: 1.84492
[700]	training's l2: 3.29205	training's l1: 1.28942	valid_1's l2: 7.23623	valid_1's l1: 1.82364
[800]	training's l2: 2.95925	training's l1: 1.22408	valid_1's l2: 7.11732	valid_1's l1: 1.80552
[900]	training's l2: 2.66535	training's l1: 1.16375	valid_1's l2: 7.00677	valid_1's l1: 1.78949
[1000]	tra

[INFO]2019-06-25 15:23:07,857:main:fold 3 valid 0.392702
[INFO]2019-06-25 15:23:07,921:main:CV score: 0.379908
[INFO]2019-06-25 15:23:07,929:main:Starting train model(2JHH)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  errors=errors)
[INFO]2019-06-25 15:23:11,174:main:features(2JHH): ['2j_atom_center', '2j_area_021', '2j_norm_vec_02', '2j_norm_vec_12', '2j_cos', '2j_atom_center_weight', '2j_sum_norm_vec', 'dist', 'dist_x', 'dist_y', 'dist_z', 'molecule_couples', 'molecule_dist_mean', 'molecule_dist_min', 'molecule_dist_max', 'atom_0_couples_count', 'atom_1_couples_count', 'molecule_atom_index_0_x_1_std', 'molecule_atom_index_0_y_1_mean', 'molecule_atom_index_0_y_1_mean_diff', 'molecule_atom_index_0_y_1_mean_div', 'molecule_atom_index_0_y_1_max', 'molecule_atom_index_0_y_1_max_diff', 'molecule_atom_index_0_y_1_std', 'molecule_atom_index_0_z_1_s

Unnamed: 0,2j_atom_center,2j_area_021,2j_norm_vec_02,2j_norm_vec_12,2j_cos,2j_atom_center_weight,2j_sum_norm_vec,dist,dist_x,dist_y,dist_z,molecule_couples,molecule_dist_mean,molecule_dist_min,molecule_dist_max,atom_0_couples_count,atom_1_couples_count,molecule_atom_index_0_x_1_std,molecule_atom_index_0_y_1_mean,molecule_atom_index_0_y_1_mean_diff,molecule_atom_index_0_y_1_mean_div,molecule_atom_index_0_y_1_max,molecule_atom_index_0_y_1_max_diff,molecule_atom_index_0_y_1_std,molecule_atom_index_0_z_1_std,molecule_atom_index_0_dist_mean,molecule_atom_index_0_dist_mean_diff,molecule_atom_index_0_dist_mean_div,molecule_atom_index_0_dist_max,molecule_atom_index_0_dist_max_diff,molecule_atom_index_0_dist_max_div,molecule_atom_index_0_dist_min,molecule_atom_index_0_dist_min_diff,molecule_atom_index_0_dist_min_div,molecule_atom_index_0_dist_std,molecule_atom_index_0_dist_std_diff,molecule_atom_index_0_dist_std_div,molecule_atom_index_1_dist_mean,molecule_atom_index_1_dist_mean_diff,molecule_atom_index_1_dist_mean_div,molecule_atom_index_1_dist_max,molecule_atom_index_1_dist_max_diff,molecule_atom_index_1_dist_max_div,molecule_atom_index_1_dist_min,molecule_atom_index_1_dist_min_diff,molecule_atom_index_1_dist_min_div,molecule_atom_index_1_dist_std,molecule_atom_index_1_dist_std_diff,molecule_atom_index_1_dist_std_div,molecule_atom_1_dist_mean,molecule_atom_1_dist_min,molecule_atom_1_dist_min_diff,molecule_atom_1_dist_min_div,molecule_atom_1_dist_std,molecule_atom_1_dist_std_diff,molecule_type_0_dist_std,molecule_type_0_dist_std_diff,molecule_type_dist_mean,molecule_type_dist_mean_diff,molecule_type_dist_mean_div,molecule_type_dist_max,molecule_type_dist_min,molecule_type_dist_std,molecule_type_dist_std_diff
1,0,1.124023,1.091797,1.091797,-0.333252,12.007812,2.183594,1.783203,1.019531,2.160156,3e-06,10,1.506836,1.091797,1.783203,4,1,0.728027,1.358398,-0.10498,0.928268,1.463867,0.0,0.182251,0.728027,1.610352,-0.172729,0.90332,1.783203,3.7e-05,1.0,1.091797,-0.691406,0.612305,0.345703,-1.4375,0.193848,1.783203,0.0,1.0,1.783203,0.0,1.0,1.783203,0.0,1.0,,,,1.783203,1.783203,0.0,1.0,1.4e-05,-1.783203,1.4e-05,-1.783203,1.783203,2.7e-05,1.0,1.783203,1.783203,1.4e-05,-1.783203
2,0,1.124023,1.091797,1.091797,-0.333252,12.007812,2.183594,1.783203,0.294922,2.113281,0.771973,10,1.506836,1.091797,1.783203,4,2,0.728027,1.358398,-0.088745,0.938673,1.463867,0.01622,0.182251,0.728027,1.610352,-0.172852,0.90332,1.783203,9e-06,1.0,1.091797,-0.691406,0.612305,0.345703,-1.4375,0.193848,1.783203,5e-06,1.0,1.783203,1e-05,1.0,1.783203,0.0,1.0,7e-06,-1.783203,4e-06,1.783203,1.783203,-2.8e-05,1.0,1.4e-05,-1.783203,1.4e-05,-1.783203,1.783203,-1e-06,1.0,1.783203,1.783203,1.4e-05,-1.783203
3,0,1.124023,1.091797,1.091797,-0.333252,12.007812,2.183594,1.783203,0.276611,2.085938,0.817871,10,1.506836,1.091797,1.783203,4,3,0.728027,1.358398,-0.079163,0.944936,1.463867,0.025818,0.182251,0.728027,1.610352,-0.172852,0.90332,1.783203,0.0,1.0,1.091797,-0.691406,0.612305,0.345703,-1.4375,0.193848,1.783203,-6e-06,1.0,1.783203,0.0,1.0,1.783203,-8.821487e-06,1.0,5e-06,-1.783203,3e-06,1.783203,1.783203,-3.7e-05,1.0,1.4e-05,-1.783203,1.4e-05,-1.783203,1.783203,-1e-05,1.0,1.783203,1.783203,1.4e-05,-1.783203
5,0,1.124023,1.091797,1.091797,-0.333252,12.007812,2.183594,1.783203,2.410156,0.000263,0.769043,10,1.506836,1.091797,1.783203,3,2,0.300049,1.324219,-0.123779,0.914494,1.447266,0.0,0.206177,0.891602,1.552734,-0.230347,0.870605,1.783203,0.0,1.0,1.091797,-0.691406,0.612305,0.39917,-1.383789,0.223755,1.783203,-5e-06,1.0,1.783203,0.0,1.0,1.783203,-1.019239e-05,1.0,7e-06,-1.783203,4e-06,1.783203,1.783203,-3.8e-05,1.0,1.4e-05,-1.783203,1.4e-05,-1.783203,1.783203,-1.1e-05,1.0,1.783203,1.783203,1.4e-05,-1.783203
6,0,1.124023,1.091797,1.091797,-0.333252,12.007812,2.183594,1.783203,2.357422,0.000667,0.821289,10,1.506836,1.091797,1.783203,3,3,0.300049,1.324219,-0.114197,0.920596,1.447266,0.009598,0.206177,0.891602,1.552734,-0.230347,0.870605,1.783203,9e-06,1.0,1.091797,-0.691406,0.612305,0.39917,-1.383789,0.223755,1.783203,3e-06,1.0,1.783203,8e-06,1.0,1.783203,-5.364418e-07,1.0,5e-06,-1.783203,3e-06,1.783203,1.783203,-2.9e-05,1.0,1.4e-05,-1.783203,1.4e-05,-1.783203,1.783203,-2e-06,1.0,1.783203,1.783203,1.4e-05,-1.783203


1   -11.2570
2   -11.2548
3   -11.2543
5   -11.2541
6   -11.2548
Name: scalar_coupling_constant, dtype: float64

mean of target. train:-10.288322401219846, valid:-10.283170689505688
Training until validation scores don't improve for 100 rounds.
[100]	training's l2: 0.347483	training's l1: 0.382348	valid_1's l2: 0.480955	valid_1's l1: 0.432153
[200]	training's l2: 0.235878	training's l1: 0.319879	valid_1's l2: 0.41419	valid_1's l1: 0.39859
[300]	training's l2: 0.177513	training's l1: 0.280552	valid_1's l2: 0.3853	valid_1's l1: 0.382425
[400]	training's l2: 0.138903	training's l1: 0.250854	valid_1's l2: 0.367819	valid_1's l1: 0.372365
[500]	training's l2: 0.11317	training's l1: 0.228201	valid_1's l2: 0.356678	valid_1's l1: 0.365746
[600]	training's l2: 0.0938466	training's l1: 0.209147	valid_1's l2: 0.348877	valid_1's l1: 0.360858
[700]	training's l2: 0.0787019	training's l1: 0.192648	valid_1's l2: 0.342991	valid_1's l1: 0.356932
[800]	training's l2: 0.0666205	training's l1: 0.17829	valid_1's l2: 0.337699	valid_1's l1: 0.353764
[900]	training's l2: 0.0566925	training's l1: 0.165268	valid_1's l2: 0.

[INFO]2019-06-25 15:26:21,730:main:fold 1 valid -1.245093


mean of target. train:-10.285182848189041, valid:-10.289449795567212
Training until validation scores don't improve for 100 rounds.
[100]	training's l2: 0.337708	training's l1: 0.379484	valid_1's l2: 0.473102	valid_1's l1: 0.428151
[200]	training's l2: 0.230728	training's l1: 0.317744	valid_1's l2: 0.413602	valid_1's l1: 0.395575
[300]	training's l2: 0.174774	training's l1: 0.27991	valid_1's l2: 0.388422	valid_1's l1: 0.381175
[400]	training's l2: 0.136602	training's l1: 0.250461	valid_1's l2: 0.371907	valid_1's l1: 0.37143
[500]	training's l2: 0.110451	training's l1: 0.227326	valid_1's l2: 0.361356	valid_1's l1: 0.364991
[600]	training's l2: 0.0911649	training's l1: 0.207921	valid_1's l2: 0.353085	valid_1's l1: 0.359751
[700]	training's l2: 0.0764258	training's l1: 0.191483	valid_1's l2: 0.346897	valid_1's l1: 0.356024
[800]	training's l2: 0.0643747	training's l1: 0.176809	valid_1's l2: 0.342134	valid_1's l1: 0.352954
[900]	training's l2: 0.0553396	training's l1: 0.164372	valid_1's l2

[INFO]2019-06-25 15:29:29,062:main:fold 2 valid -1.231257


mean of target. train:-10.28631024253647, valid:-10.287195006872503
Training until validation scores don't improve for 100 rounds.
[100]	training's l2: 0.340551	training's l1: 0.379125	valid_1's l2: 0.478127	valid_1's l1: 0.430113
[200]	training's l2: 0.233095	training's l1: 0.317888	valid_1's l2: 0.415775	valid_1's l1: 0.397768
[300]	training's l2: 0.175656	training's l1: 0.278973	valid_1's l2: 0.386276	valid_1's l1: 0.381135
[400]	training's l2: 0.138479	training's l1: 0.250747	valid_1's l2: 0.369083	valid_1's l1: 0.371475
[500]	training's l2: 0.112739	training's l1: 0.2279	valid_1's l2: 0.35847	valid_1's l1: 0.365246
[600]	training's l2: 0.0923567	training's l1: 0.208179	valid_1's l2: 0.349879	valid_1's l1: 0.360019
[700]	training's l2: 0.0776903	training's l1: 0.191659	valid_1's l2: 0.343925	valid_1's l1: 0.356094
[800]	training's l2: 0.0656594	training's l1: 0.177262	valid_1's l2: 0.339393	valid_1's l1: 0.353356
[900]	training's l2: 0.0559799	training's l1: 0.164484	valid_1's l2: 

[INFO]2019-06-25 15:32:40,368:main:fold 3 valid -1.206961
[INFO]2019-06-25 15:32:40,409:main:CV score: -1.227770
[INFO]2019-06-25 15:32:40,414:main:Starting train model(1JHN)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  errors=errors)
[INFO]2019-06-25 15:32:42,808:main:features(1JHN): ['1j_nbonds', 'dist', 'dist_x', 'dist_y', 'dist_z', 'molecule_couples', 'molecule_dist_mean', 'molecule_dist_min', 'molecule_dist_max', 'atom_0_couples_count', 'atom_1_couples_count', 'molecule_atom_index_0_x_1_std', 'molecule_atom_index_0_y_1_mean', 'molecule_atom_index_0_y_1_mean_diff', 'molecule_atom_index_0_y_1_mean_div', 'molecule_atom_index_0_y_1_max', 'molecule_atom_index_0_y_1_max_diff', 'molecule_atom_index_0_y_1_std', 'molecule_atom_index_0_z_1_std', 'molecule_atom_index_0_dist_mean', 'molecule_atom_index_0_dist_mean_diff', 'molecule_atom_index_0_dist_

Unnamed: 0,1j_nbonds,dist,dist_x,dist_y,dist_z,molecule_couples,molecule_dist_mean,molecule_dist_min,molecule_dist_max,atom_0_couples_count,atom_1_couples_count,molecule_atom_index_0_x_1_std,molecule_atom_index_0_y_1_mean,molecule_atom_index_0_y_1_mean_diff,molecule_atom_index_0_y_1_mean_div,molecule_atom_index_0_y_1_max,molecule_atom_index_0_y_1_max_diff,molecule_atom_index_0_y_1_std,molecule_atom_index_0_z_1_std,molecule_atom_index_0_dist_mean,molecule_atom_index_0_dist_mean_diff,molecule_atom_index_0_dist_mean_div,molecule_atom_index_0_dist_max,molecule_atom_index_0_dist_max_diff,molecule_atom_index_0_dist_max_div,molecule_atom_index_0_dist_min,molecule_atom_index_0_dist_std,molecule_atom_index_0_dist_std_diff,molecule_atom_index_0_dist_std_div,molecule_atom_index_1_dist_mean,molecule_atom_index_1_dist_mean_diff,molecule_atom_index_1_dist_mean_div,molecule_atom_index_1_dist_max,molecule_atom_index_1_dist_max_diff,molecule_atom_index_1_dist_max_div,molecule_atom_index_1_dist_min,molecule_atom_index_1_dist_min_diff,molecule_atom_index_1_dist_min_div,molecule_atom_index_1_dist_std,molecule_atom_index_1_dist_std_diff,molecule_atom_index_1_dist_std_div,molecule_atom_1_dist_mean,molecule_atom_1_dist_min,molecule_atom_1_dist_min_diff,molecule_atom_1_dist_min_div,molecule_atom_1_dist_std,molecule_atom_1_dist_std_diff,molecule_type_0_dist_std,molecule_type_0_dist_std_diff,molecule_type_dist_mean,molecule_type_dist_mean_diff,molecule_type_dist_mean_div,molecule_type_dist_max,molecule_type_dist_min,molecule_type_dist_std,molecule_type_dist_std_diff
10,3.0,1.017578,0.003328,1.023438,0.008087,6,1.318359,1.017578,1.619141,3,3,0.730957,1.242188,0.218018,1.212888,1.358398,0.334717,0.188965,0.459717,1.417969,0.400879,1.394531,1.619141,0.601562,1.591797,1.017578,0.347168,-0.669922,0.341309,1.017578,5e-06,1.0,1.017578,1.8e-05,1.0,1.017578,-3e-06,1.0,1.1e-05,-1.017578,1.1e-05,1.017578,1.017578,-3e-06,1.0,1.1e-05,-1.017578,1.1e-05,-1.017578,1.017578,5e-06,1.0,1.017578,1.017578,1.1e-05,-1.017578
13,3.0,1.017578,0.914551,0.112,0.008339,6,1.318359,1.017578,1.619141,2,3,0.339355,1.183594,0.159668,1.155953,1.34375,0.319336,0.22583,0.592773,1.318359,0.300781,1.295898,1.619141,0.601562,1.591797,1.017578,0.425293,-0.591797,0.418213,1.017578,8e-06,1.0,1.017578,2.1e-05,1.0,1.017578,0.0,1.0,1.1e-05,-1.017578,1.1e-05,1.017578,1.017578,0.0,1.0,1.1e-05,-1.017578,1.1e-05,-1.017578,1.017578,8e-06,1.0,1.017578,1.017578,1.1e-05,-1.017578
15,3.0,1.017578,0.230225,0.102051,0.702637,6,1.318359,1.017578,1.619141,1,3,,1.024414,0.0,1.0,1.024414,0.0,,,1.017578,0.0,1.0,1.017578,0.0,1.0,1.017578,,,,1.017578,-1.3e-05,1.0,1.017578,0.0,1.0,1.017578,-2.1e-05,1.0,1.1e-05,-1.017578,1.1e-05,1.017578,1.017578,-2.1e-05,1.0,1.1e-05,-1.017578,1.1e-05,-1.017578,1.017578,-1.3e-05,1.0,1.017578,1.017578,1.1e-05,-1.017578
97,3.0,1.007812,0.724609,0.290527,2.7e-05,9,1.80957,1.004883,2.960938,4,3,0.546875,0.671387,-0.674805,0.498868,1.827148,0.480713,1.09082,0.007298,1.938477,0.930664,1.923828,2.960938,1.953125,2.9375,1.007812,0.809082,-0.198242,0.803223,1.353516,0.345703,1.342773,2.046875,1.040039,2.03125,1.004883,-0.002577,0.997559,0.601074,-0.406494,0.59668,1.353516,1.004883,-0.002577,0.997559,0.601074,-0.406494,0.059509,-0.947754,1.005859,-0.001288,0.998535,1.007812,1.004883,0.001822,-1.005859
101,3.0,1.004883,0.778809,0.231079,0.0001,9,1.80957,1.004883,2.960938,3,3,0.562012,0.286377,-1.05957,0.212805,1.345703,0.0,0.946289,0.004223,1.792969,0.788086,1.78418,2.302734,1.297852,2.291016,1.004883,0.692383,-0.312744,0.688965,1.353516,0.348389,1.34668,2.046875,1.041992,2.037109,1.004883,0.0,1.0,0.601074,-0.403809,0.598145,1.353516,1.004883,0.0,1.0,0.601074,-0.403809,0.059509,-0.945312,1.005859,0.001288,1.000977,1.007812,1.004883,0.001822,-1.00293


10     32.6889
13     32.6891
15     32.6905
97     55.5252
101    54.7359
Name: scalar_coupling_constant, dtype: float64

mean of target. train:47.485131115953955, valid:47.4693919474233
Training until validation scores don't improve for 100 rounds.
[100]	training's l2: 0.624648	training's l1: 0.541048	valid_1's l2: 2.15382	valid_1's l1: 0.968904
[200]	training's l2: 0.259197	training's l1: 0.343725	valid_1's l2: 1.99897	valid_1's l1: 0.931393
[300]	training's l2: 0.132984	training's l1: 0.235042	valid_1's l2: 1.94419	valid_1's l1: 0.918744
[400]	training's l2: 0.0791014	training's l1: 0.168204	valid_1's l2: 1.92133	valid_1's l1: 0.912019
[500]	training's l2: 0.0516213	training's l1: 0.123782	valid_1's l2: 1.91133	valid_1's l1: 0.908589
[600]	training's l2: 0.0364137	training's l1: 0.0938339	valid_1's l2: 1.90239	valid_1's l1: 0.906355
[700]	training's l2: 0.0265617	training's l1: 0.0724438	valid_1's l2: 1.8982	valid_1's l1: 0.905209
[800]	training's l2: 0.020857	training's l1: 0.0569935	valid_1's l2: 1.89687	valid_1's l1: 0.904768
[900]	training's l2: 0.0169834	training's l1: 0.0457997	valid_1's l2: 1.89

[INFO]2019-06-25 15:33:39,717:main:fold 1 valid -0.061536


mean of target. train:47.46433043342862, valid:47.5109936626538
Training until validation scores don't improve for 100 rounds.
[100]	training's l2: 0.595463	training's l1: 0.540844	valid_1's l2: 2.11689	valid_1's l1: 0.967588
[200]	training's l2: 0.225417	training's l1: 0.337989	valid_1's l2: 1.96829	valid_1's l1: 0.930726
[300]	training's l2: 0.105904	training's l1: 0.229604	valid_1's l2: 1.92354	valid_1's l1: 0.918911
[400]	training's l2: 0.0557131	training's l1: 0.162456	valid_1's l2: 1.89831	valid_1's l1: 0.912985
[500]	training's l2: 0.0318623	training's l1: 0.117824	valid_1's l2: 1.88395	valid_1's l1: 0.909755
[600]	training's l2: 0.0197242	training's l1: 0.0881589	valid_1's l2: 1.87594	valid_1's l1: 0.908047
[700]	training's l2: 0.0127574	training's l1: 0.0666686	valid_1's l2: 1.86933	valid_1's l1: 0.906729
[800]	training's l2: 0.00855857	training's l1: 0.0513225	valid_1's l2: 1.8653	valid_1's l1: 0.905964
[900]	training's l2: 0.00602657	training's l1: 0.0400898	valid_1's l2: 1.

[INFO]2019-06-25 15:34:49,206:main:fold 2 valid -0.141915


mean of target. train:47.49019208550947, valid:47.45926856925414
Training until validation scores don't improve for 100 rounds.
[100]	training's l2: 0.620459	training's l1: 0.544839	valid_1's l2: 2.13497	valid_1's l1: 0.962969
[200]	training's l2: 0.257047	training's l1: 0.345068	valid_1's l2: 1.96848	valid_1's l1: 0.924152
[300]	training's l2: 0.131356	training's l1: 0.235935	valid_1's l2: 1.91764	valid_1's l1: 0.912985
[400]	training's l2: 0.0770039	training's l1: 0.168709	valid_1's l2: 1.89312	valid_1's l1: 0.907546
[500]	training's l2: 0.0499244	training's l1: 0.12397	valid_1's l2: 1.88256	valid_1's l1: 0.905159
[600]	training's l2: 0.0351047	training's l1: 0.0934612	valid_1's l2: 1.87437	valid_1's l1: 0.90305
[700]	training's l2: 0.0264882	training's l1: 0.0718834	valid_1's l2: 1.86774	valid_1's l1: 0.901557
[800]	training's l2: 0.0202817	training's l1: 0.0562664	valid_1's l2: 1.86265	valid_1's l1: 0.90057
[900]	training's l2: 0.0161233	training's l1: 0.0448288	valid_1's l2: 1.860

[INFO]2019-06-25 15:36:01,129:main:fold 3 valid -0.244511
[INFO]2019-06-25 15:36:01,144:main:CV score: -0.149321
[INFO]2019-06-25 15:36:01,146:main:Starting train model(2JHN)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  errors=errors)
[INFO]2019-06-25 15:36:03,157:main:features(2JHN): ['2j_atom_center', '2j_area_021', '2j_norm_vec_02', '2j_norm_vec_12', '2j_cos', '2j_atom_center_weight', '2j_sum_norm_vec', 'dist', 'dist_x', 'dist_y', 'dist_z', 'molecule_couples', 'molecule_dist_mean', 'molecule_dist_min', 'molecule_dist_max', 'atom_0_couples_count', 'atom_1_couples_count', 'molecule_atom_index_0_x_1_std', 'molecule_atom_index_0_y_1_mean', 'molecule_atom_index_0_y_1_mean_diff', 'molecule_atom_index_0_y_1_mean_div', 'molecule_atom_index_0_y_1_max', 'molecule_atom_index_0_y_1_max_diff', 'molecule_atom_index_0_y_1_std', 'molecule_atom_index_0_z_1

Unnamed: 0,2j_atom_center,2j_area_021,2j_norm_vec_02,2j_norm_vec_12,2j_cos,2j_atom_center_weight,2j_sum_norm_vec,dist,dist_x,dist_y,dist_z,molecule_couples,molecule_dist_mean,molecule_dist_min,molecule_dist_max,atom_0_couples_count,atom_1_couples_count,molecule_atom_index_0_x_1_std,molecule_atom_index_0_y_1_mean,molecule_atom_index_0_y_1_mean_diff,molecule_atom_index_0_y_1_mean_div,molecule_atom_index_0_y_1_max,molecule_atom_index_0_y_1_max_diff,molecule_atom_index_0_y_1_std,molecule_atom_index_0_z_1_std,molecule_atom_index_0_dist_mean,molecule_atom_index_0_dist_mean_diff,molecule_atom_index_0_dist_mean_div,molecule_atom_index_0_dist_max,molecule_atom_index_0_dist_max_diff,molecule_atom_index_0_dist_max_div,molecule_atom_index_0_dist_min,molecule_atom_index_0_dist_min_diff,molecule_atom_index_0_dist_min_div,molecule_atom_index_0_dist_std,molecule_atom_index_0_dist_std_diff,molecule_atom_index_0_dist_std_div,molecule_atom_index_1_dist_mean,molecule_atom_index_1_dist_mean_diff,molecule_atom_index_1_dist_mean_div,molecule_atom_index_1_dist_max,molecule_atom_index_1_dist_max_diff,molecule_atom_index_1_dist_max_div,molecule_atom_index_1_dist_min,molecule_atom_index_1_dist_min_diff,molecule_atom_index_1_dist_min_div,molecule_atom_index_1_dist_std,molecule_atom_index_1_dist_std_diff,molecule_atom_index_1_dist_std_div,molecule_atom_1_dist_mean,molecule_atom_1_dist_min,molecule_atom_1_dist_min_diff,molecule_atom_1_dist_min_div,molecule_atom_1_dist_std,molecule_atom_1_dist_std_diff,molecule_type_0_dist_std,molecule_type_0_dist_std_diff,molecule_type_dist_mean,molecule_type_dist_mean_diff,molecule_type_dist_mean_div,molecule_type_dist_max,molecule_type_dist_min,molecule_type_dist_std,molecule_type_dist_std_diff
18,0,0.0,1.066406,1.151367,-1.0,12.007812,2.21875,2.21875,0.000907,4.921875,0.0001494884,2,1.642578,1.066406,2.21875,2,1,0.011055,0.556641,0.575684,-29.055043,1.132812,1.151367,0.814453,0.00449,1.642578,-0.575684,0.740234,2.21875,0.0,1.0,1.066406,-1.151367,0.480713,0.814453,-1.404297,0.367188,2.21875,0.0,1.0,2.21875,0.0,1.0,2.21875,0.0,1.0,,,,2.21875,2.21875,0.0,1.0,,,,,2.21875,0.0,1.0,2.21875,2.21875,,
104,0,1.402344,1.109375,1.359375,-0.368652,12.007812,2.46875,2.046875,0.875,3.316406,6.556511e-07,9,1.80957,1.004883,2.960938,2,3,0.051178,0.666992,-0.678711,0.495638,1.345703,0.0,0.959961,0.005436,1.578125,-0.468994,0.770996,2.046875,0.0,1.0,1.109375,-0.937988,0.541992,0.663574,-1.383789,0.323975,1.353516,-0.694336,0.661133,2.046875,0.0,1.0,1.004883,-1.041992,0.490723,0.601074,-1.446289,0.293701,1.353516,1.004883,-1.041992,0.490723,0.601074,-1.446289,0.161255,-1.885742,2.046875,0.0,1.0,2.046875,2.046875,,
400,0,1.458008,1.102539,1.463867,-0.429443,12.007812,2.566406,2.177734,0.276123,3.595703,0.8720703,20,2.130859,1.014648,3.285156,5,4,1.214844,-0.062622,-1.549805,-0.042105,1.487305,0.0,0.936035,0.37793,2.048828,-0.12854,0.940918,3.123047,0.944824,1.433594,1.102539,-1.075195,0.506348,0.73291,-1.445312,0.336426,1.574219,-0.604004,0.722656,2.177734,0.0,1.0,1.014648,-1.163086,0.466064,0.646973,-1.53125,0.296875,2.09375,1.014648,-1.163086,0.466064,0.948242,-1.230469,0.18811,-1.990234,2.132812,-0.044678,0.979492,2.177734,2.089844,0.063171,-2.115234
405,0,1.520508,1.095703,1.463867,-0.317871,12.007812,2.560547,2.089844,0.318604,3.359375,0.6855469,20,2.130859,1.014648,3.285156,4,4,1.170898,0.008034,-1.479492,0.005403,1.487305,0.0,1.06543,0.02417,2.103516,0.014084,1.006836,3.142578,1.054688,1.504883,1.095703,-0.992676,0.524902,0.835938,-1.25293,0.400146,1.574219,-0.514648,0.753906,2.177734,0.089355,1.042969,1.014648,-1.074219,0.48584,0.646973,-1.442383,0.30957,2.09375,1.014648,-1.074219,0.48584,0.948242,-1.140625,0.18811,-1.900391,2.132812,0.044678,1.021484,2.177734,2.089844,0.063171,-2.025391
695,2,1.326172,0.962891,1.407227,-0.207031,16.0,2.371094,1.862305,0.003351,0.032227,3.433594,32,2.222656,1.09082,3.513672,2,7,0.061554,-0.347656,0.344238,0.502479,-0.003431,0.688477,0.486816,0.759277,2.417969,0.555664,1.298828,2.974609,1.111328,1.59668,1.862305,0.0,1.0,0.786133,-1.076172,0.421875,2.779297,0.916504,1.492188,3.341797,1.478516,1.793945,1.862305,0.0,1.0,0.494385,-1.368164,0.265381,2.779297,1.862305,0.0,1.0,0.494385,-1.368164,0.190552,-1.671875,1.862305,0.0,1.0,1.862305,1.862305,,


18      5.182460
104    15.513500
400     0.633806
405     1.378300
695     0.784661
Name: scalar_coupling_constant, dtype: float64

mean of target. train:3.1358723658649814, valid:3.1025161085255877
Training until validation scores don't improve for 100 rounds.
[100]	training's l2: 0.205684	training's l1: 0.298112	valid_1's l2: 0.449114	valid_1's l1: 0.407152
[200]	training's l2: 0.107555	training's l1: 0.220255	valid_1's l2: 0.394698	valid_1's l1: 0.37962
[300]	training's l2: 0.0658666	training's l1: 0.173877	valid_1's l2: 0.37297	valid_1's l1: 0.367888
[400]	training's l2: 0.042396	training's l1: 0.140792	valid_1's l2: 0.360693	valid_1's l1: 0.361188
[500]	training's l2: 0.0283516	training's l1: 0.116183	valid_1's l2: 0.35282	valid_1's l1: 0.356996
[600]	training's l2: 0.0195673	training's l1: 0.097286	valid_1's l2: 0.3476	valid_1's l1: 0.354108
[700]	training's l2: 0.0136924	training's l1: 0.0818421	valid_1's l2: 0.344016	valid_1's l1: 0.351987
[800]	training's l2: 0.00986644	training's l1: 0.0695646	valid_1's l2: 0.341743	valid_1's l1: 0.350696
[900]	training's l2: 0.00719639	training's l1: 0.0595517	valid_1's 

[INFO]2019-06-25 15:37:54,691:main:fold 1 valid -1.574315


mean of target. train:3.106771189731069, valid:3.1607184607934324
Training until validation scores don't improve for 100 rounds.
[100]	training's l2: 0.207612	training's l1: 0.300262	valid_1's l2: 0.431736	valid_1's l1: 0.412424
[200]	training's l2: 0.108174	training's l1: 0.221393	valid_1's l2: 0.381654	valid_1's l1: 0.385501
[300]	training's l2: 0.0646025	training's l1: 0.173847	valid_1's l2: 0.360641	valid_1's l1: 0.373191
[400]	training's l2: 0.0417067	training's l1: 0.141268	valid_1's l2: 0.349161	valid_1's l1: 0.366638
[500]	training's l2: 0.0277662	training's l1: 0.116413	valid_1's l2: 0.341966	valid_1's l1: 0.362307
[600]	training's l2: 0.0188646	training's l1: 0.0966667	valid_1's l2: 0.337395	valid_1's l1: 0.359386
[700]	training's l2: 0.0132111	training's l1: 0.081364	valid_1's l2: 0.334282	valid_1's l1: 0.357539
[800]	training's l2: 0.00933533	training's l1: 0.068739	valid_1's l2: 0.331856	valid_1's l1: 0.356119
[900]	training's l2: 0.00678013	training's l1: 0.058608	valid_1

[INFO]2019-06-25 15:39:42,186:main:fold 2 valid -1.376606


mean of target. train:3.1316172846594754, valid:3.1110262709365695
Training until validation scores don't improve for 100 rounds.
[100]	training's l2: 0.201141	training's l1: 0.296385	valid_1's l2: 0.430008	valid_1's l1: 0.407235
[200]	training's l2: 0.105483	training's l1: 0.218761	valid_1's l2: 0.382641	valid_1's l1: 0.381423
[300]	training's l2: 0.0642842	training's l1: 0.172567	valid_1's l2: 0.363849	valid_1's l1: 0.370476
[400]	training's l2: 0.0415058	training's l1: 0.140393	valid_1's l2: 0.353301	valid_1's l1: 0.36413
[500]	training's l2: 0.0277129	training's l1: 0.115568	valid_1's l2: 0.34695	valid_1's l1: 0.360376
[600]	training's l2: 0.0188332	training's l1: 0.0960154	valid_1's l2: 0.341967	valid_1's l1: 0.357517
[700]	training's l2: 0.0132467	training's l1: 0.0808472	valid_1's l2: 0.338837	valid_1's l1: 0.355582
[800]	training's l2: 0.00948296	training's l1: 0.0685957	valid_1's l2: 0.336577	valid_1's l1: 0.35416
[900]	training's l2: 0.00691488	training's l1: 0.0585748	valid_

[INFO]2019-06-25 15:41:30,021:main:fold 3 valid -1.536088
[INFO]2019-06-25 15:41:30,040:main:CV score: -1.495670
[INFO]2019-06-25 15:41:30,043:main:Starting train model(2JHC)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  errors=errors)
[INFO]2019-06-25 15:41:35,927:main:features(2JHC): ['2j_atom_center', '2j_area_021', '2j_norm_vec_02', '2j_norm_vec_12', '2j_cos', '2j_atom_center_weight', '2j_sum_norm_vec', 'dist', 'dist_x', 'dist_y', 'dist_z', 'molecule_couples', 'molecule_dist_mean', 'molecule_dist_min', 'molecule_dist_max', 'atom_0_couples_count', 'atom_1_couples_count', 'molecule_atom_index_0_x_1_std', 'molecule_atom_index_0_y_1_mean', 'molecule_atom_index_0_y_1_mean_diff', 'molecule_atom_index_0_y_1_mean_div', 'molecule_atom_index_0_y_1_max', 'molecule_atom_index_0_y_1_max_diff', 'molecule_atom_index_0_y_1_std', 'molecule_atom_index_0_z_1

Unnamed: 0,2j_atom_center,2j_area_021,2j_norm_vec_02,2j_norm_vec_12,2j_cos,2j_atom_center_weight,2j_sum_norm_vec,dist,dist_x,dist_y,dist_z,molecule_couples,molecule_dist_mean,molecule_dist_min,molecule_dist_max,atom_0_couples_count,atom_1_couples_count,molecule_atom_index_0_x_1_std,molecule_atom_index_0_y_1_mean,molecule_atom_index_0_y_1_mean_diff,molecule_atom_index_0_y_1_mean_div,molecule_atom_index_0_y_1_max,molecule_atom_index_0_y_1_max_diff,molecule_atom_index_0_y_1_std,molecule_atom_index_0_z_1_std,molecule_atom_index_0_dist_mean,molecule_atom_index_0_dist_mean_diff,molecule_atom_index_0_dist_mean_div,molecule_atom_index_0_dist_max,molecule_atom_index_0_dist_max_diff,molecule_atom_index_0_dist_max_div,molecule_atom_index_0_dist_min,molecule_atom_index_0_dist_min_diff,molecule_atom_index_0_dist_min_div,molecule_atom_index_0_dist_std,molecule_atom_index_0_dist_std_diff,molecule_atom_index_0_dist_std_div,molecule_atom_index_1_dist_mean,molecule_atom_index_1_dist_mean_diff,molecule_atom_index_1_dist_mean_div,molecule_atom_index_1_dist_max,molecule_atom_index_1_dist_max_diff,molecule_atom_index_1_dist_max_div,molecule_atom_index_1_dist_min,molecule_atom_index_1_dist_min_diff,molecule_atom_index_1_dist_min_div,molecule_atom_index_1_dist_std,molecule_atom_index_1_dist_std_diff,molecule_atom_index_1_dist_std_div,molecule_atom_1_dist_mean,molecule_atom_1_dist_min,molecule_atom_1_dist_min_diff,molecule_atom_1_dist_min_div,molecule_atom_1_dist_std,molecule_atom_1_dist_std_diff,molecule_type_0_dist_std,molecule_type_0_dist_std_diff,molecule_type_dist_mean,molecule_type_dist_mean_diff,molecule_type_dist_mean_div,molecule_type_dist_max,molecule_type_dist_min,molecule_type_dist_std,molecule_type_dist_std_diff
20,0,1.558594,1.094727,1.529297,-0.365479,12.007812,2.625,2.181641,0.985352,3.777344,8.940697e-07,27,2.029297,1.094727,3.095703,7,6,0.572754,0.592285,0.596191,-152.615283,1.923828,1.927734,1.134766,0.720703,2.140625,-0.040985,0.981445,3.095703,0.913574,1.418945,1.094727,-1.087891,0.501465,0.660156,-1.522461,0.30249,1.638672,-0.543945,0.750977,2.181641,4.7e-05,1.0,1.094727,-1.087891,0.501465,0.595703,-1.586914,0.272949,1.638672,1.094727,-1.087891,0.501465,0.567871,-1.614258,0.217896,-1.964844,2.181641,1.2e-05,1.0,2.181641,2.181641,2.6e-05,-2.181641
27,0,1.558594,1.094727,1.529297,-0.365479,12.007812,2.625,2.181641,0.296143,3.714844,0.7519531,27,2.029297,1.094727,3.095703,6,6,0.598145,0.370605,0.374512,-95.462501,1.914062,1.917969,1.063477,0.667969,2.205078,0.021729,1.009766,3.095703,0.913574,1.418945,1.094727,-1.087891,0.501465,0.699707,-1.482422,0.320557,1.638672,-0.543945,0.750977,2.181641,5.2e-05,1.0,1.094727,-1.087891,0.501465,0.595703,-1.586914,0.272949,1.638672,1.094727,-1.087891,0.501465,0.567871,-1.614258,0.217896,-1.964844,2.181641,1.7e-05,1.0,2.181641,2.181641,2.6e-05,-2.181641
33,0,1.558594,1.094727,1.529297,-0.365723,12.007812,2.625,2.181641,0.278076,3.679688,0.8066406,27,2.029297,1.094727,3.095703,5,6,0.624023,0.061859,0.065735,-15.934839,1.525391,1.529297,0.836426,0.624023,2.292969,0.109497,1.049805,3.095703,0.913574,1.418945,1.094727,-1.087891,0.501465,0.744629,-1.4375,0.341064,1.638672,-0.543945,0.750977,2.181641,0.0,1.0,1.094727,-1.087891,0.501465,0.595703,-1.586914,0.272949,1.638672,1.094727,-1.087891,0.501465,0.567871,-1.614258,0.217896,-1.964844,2.181641,-3.5e-05,1.0,2.181641,2.181641,2.6e-05,-2.181641
37,0,1.558594,1.094727,1.529297,-0.365479,12.007812,2.625,2.181641,0.296143,3.714844,0.7519531,27,2.029297,1.094727,3.095703,4,6,0.63623,0.177856,-1.347656,0.116545,1.525391,0.0,0.918457,0.44751,1.702148,-0.480469,0.779785,2.181641,0.0,1.0,1.094727,-1.087891,0.501465,0.449951,-1.732422,0.206177,1.638672,-0.543945,0.750977,2.181641,5.2e-05,1.0,1.094727,-1.087891,0.501465,0.595703,-1.586914,0.272949,1.638672,1.094727,-1.087891,0.501465,0.567871,-1.614258,0.217896,-1.964844,2.181641,1.9e-05,1.0,2.181641,2.181641,2.6e-05,-2.181641
41,0,1.558594,1.094727,1.529297,-0.365479,12.007812,2.625,2.181641,0.98584,3.777344,8.34465e-07,27,2.029297,1.094727,3.095703,3,6,0.298584,0.376465,-1.149414,0.246732,1.525391,0.0,1.013672,0.516113,1.680664,-0.501465,0.77002,2.181641,0.0,1.0,1.094727,-1.087891,0.501465,0.548828,-1.633789,0.251465,1.638672,-0.543945,0.750977,2.181641,5.2e-05,1.0,1.094727,-1.087891,0.501465,0.595703,-1.586914,0.272949,1.638672,1.094727,-1.087891,0.501465,0.567871,-1.614258,0.217896,-1.964844,2.181641,1.9e-05,1.0,2.181641,2.181641,2.6e-05,-2.181641


20   -2.37831
27   -2.37862
33   -2.37716
37   -2.37876
41   -2.37852
Name: scalar_coupling_constant, dtype: float64

mean of target. train:-0.2669883847266586, valid:-0.27789653463344455
Training until validation scores don't improve for 100 rounds.
[100]	training's l2: 1.77539	training's l1: 0.899384	valid_1's l2: 1.98504	valid_1's l1: 0.93877
[200]	training's l2: 1.38554	training's l1: 0.797223	valid_1's l2: 1.70159	valid_1's l1: 0.864909
[300]	training's l2: 1.17388	training's l1: 0.735288	valid_1's l2: 1.56838	valid_1's l1: 0.826544
[400]	training's l2: 1.01828	training's l1: 0.686935	valid_1's l2: 1.47499	valid_1's l1: 0.799262
[500]	training's l2: 0.898653	training's l1: 0.647523	valid_1's l2: 1.40705	valid_1's l1: 0.778428
[600]	training's l2: 0.801662	training's l1: 0.613541	valid_1's l2: 1.35223	valid_1's l1: 0.761559
[700]	training's l2: 0.727981	training's l1: 0.585692	valid_1's l2: 1.315	valid_1's l1: 0.749605
[800]	training's l2: 0.662274	training's l1: 0.559771	valid_1's l2: 1.28149	valid_1's l1: 0.738541
[900]	training's l2: 0.60329	training's l1: 0.535786	valid_1's l2: 1.25348	valid_1

[INFO]2019-06-25 15:48:30,175:main:fold 1 valid -0.445296


mean of target. train:-0.2697028296664154, valid:-0.2724676518929613
Training until validation scores don't improve for 100 rounds.
[100]	training's l2: 1.77853	training's l1: 0.899978	valid_1's l2: 2.00808	valid_1's l1: 0.940044
[200]	training's l2: 1.4028	training's l1: 0.800447	valid_1's l2: 1.73339	valid_1's l1: 0.867057
[300]	training's l2: 1.17169	training's l1: 0.734542	valid_1's l2: 1.58144	valid_1's l1: 0.825153
[400]	training's l2: 1.02026	training's l1: 0.687484	valid_1's l2: 1.49009	valid_1's l1: 0.798739
[500]	training's l2: 0.900342	training's l1: 0.648252	valid_1's l2: 1.42205	valid_1's l1: 0.778883
[600]	training's l2: 0.801549	training's l1: 0.613354	valid_1's l2: 1.36632	valid_1's l1: 0.761274
[700]	training's l2: 0.725932	training's l1: 0.585287	valid_1's l2: 1.32881	valid_1's l1: 0.749504
[800]	training's l2: 0.662803	training's l1: 0.560393	valid_1's l2: 1.29794	valid_1's l1: 0.739399
[900]	training's l2: 0.606764	training's l1: 0.537126	valid_1's l2: 1.2699	valid_

[INFO]2019-06-25 15:55:20,043:main:fold 2 valid -0.439853


mean of target. train:-0.2751820932632042, valid:-0.2615091031497235
Training until validation scores don't improve for 100 rounds.
[100]	training's l2: 1.78566	training's l1: 0.901106	valid_1's l2: 1.98982	valid_1's l1: 0.937997
[200]	training's l2: 1.38975	training's l1: 0.798986	valid_1's l2: 1.70833	valid_1's l1: 0.864638
[300]	training's l2: 1.17097	training's l1: 0.734381	valid_1's l2: 1.56904	valid_1's l1: 0.824385
[400]	training's l2: 1.01953	training's l1: 0.687574	valid_1's l2: 1.47765	valid_1's l1: 0.797955
[500]	training's l2: 0.898409	training's l1: 0.647538	valid_1's l2: 1.4097	valid_1's l1: 0.777219
[600]	training's l2: 0.805251	training's l1: 0.614338	valid_1's l2: 1.36094	valid_1's l1: 0.761458
[700]	training's l2: 0.732315	training's l1: 0.586538	valid_1's l2: 1.32368	valid_1's l1: 0.749425
[800]	training's l2: 0.664319	training's l1: 0.560533	valid_1's l2: 1.28891	valid_1's l1: 0.738508
[900]	training's l2: 0.610859	training's l1: 0.538381	valid_1's l2: 1.26373	valid

[INFO]2019-06-25 16:02:35,457:main:fold 3 valid -0.435595
[INFO]2019-06-25 16:02:35,555:main:CV score: -0.440248
[INFO]2019-06-25 16:02:35,567:main:Starting train model(3JHH)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  errors=errors)
[INFO]2019-06-25 16:02:38,833:main:features(3JHH): ['3j_norm_vec_02', '3j_norm_vec_13', '3j_norm_vec_23', '3j_cos_023', '3j_cos_231', '3j_area_023', '3j_area_231', '3j_dihedral', '3j_atom_center_weight', '3j_atom_center', '3j_sum_norm_vec', 'dist', 'dist_x', 'dist_y', 'dist_z', 'molecule_couples', 'molecule_dist_mean', 'molecule_dist_min', 'molecule_dist_max', 'atom_0_couples_count', 'atom_1_couples_count', 'molecule_atom_index_0_x_1_std', 'molecule_atom_index_0_y_1_mean', 'molecule_atom_index_0_y_1_mean_diff', 'molecule_atom_index_0_y_1_mean_div', 'molecule_atom_index_0_y_1_max', 'molecule_atom_index_0_y_1_max_

Unnamed: 0,3j_norm_vec_02,3j_norm_vec_13,3j_norm_vec_23,3j_cos_023,3j_cos_231,3j_area_023,3j_area_231,3j_dihedral,3j_atom_center_weight,3j_atom_center,3j_sum_norm_vec,dist,dist_x,dist_y,dist_z,molecule_couples,molecule_dist_mean,molecule_dist_min,molecule_dist_max,atom_0_couples_count,atom_1_couples_count,molecule_atom_index_0_x_1_std,molecule_atom_index_0_y_1_mean,molecule_atom_index_0_y_1_mean_diff,molecule_atom_index_0_y_1_mean_div,molecule_atom_index_0_y_1_max,molecule_atom_index_0_y_1_max_diff,molecule_atom_index_0_y_1_std,molecule_atom_index_0_z_1_std,molecule_atom_index_0_dist_mean,molecule_atom_index_0_dist_mean_diff,molecule_atom_index_0_dist_mean_div,molecule_atom_index_0_dist_max,molecule_atom_index_0_dist_max_diff,molecule_atom_index_0_dist_max_div,molecule_atom_index_0_dist_min,molecule_atom_index_0_dist_min_diff,molecule_atom_index_0_dist_min_div,molecule_atom_index_0_dist_std,molecule_atom_index_0_dist_std_diff,molecule_atom_index_0_dist_std_div,molecule_atom_index_1_dist_mean,molecule_atom_index_1_dist_mean_diff,molecule_atom_index_1_dist_mean_div,molecule_atom_index_1_dist_max,molecule_atom_index_1_dist_max_diff,molecule_atom_index_1_dist_max_div,molecule_atom_index_1_dist_min,molecule_atom_index_1_dist_min_diff,molecule_atom_index_1_dist_min_div,molecule_atom_index_1_dist_std,molecule_atom_index_1_dist_std_diff,molecule_atom_index_1_dist_std_div,molecule_atom_1_dist_mean,molecule_atom_1_dist_min,molecule_atom_1_dist_min_diff,molecule_atom_1_dist_min_div,molecule_atom_1_dist_std,molecule_atom_1_dist_std_diff,molecule_type_0_dist_std,molecule_type_0_dist_std_diff,molecule_type_dist_mean,molecule_type_dist_mean_diff,molecule_type_dist_mean_div,molecule_type_dist_max,molecule_type_dist_min,molecule_type_dist_std,molecule_type_dist_std_diff
23,1.094727,1.094727,1.529297,-0.365479,-0.365479,1.558594,1.558594,0.5,24.015625,0,3.71875,2.542969,0.220337,5.484375,0.765137,27,2.029297,1.094727,3.095703,7,3,0.572754,0.592285,0.994141,-1.474066,1.923828,2.326172,1.134766,0.720703,2.140625,-0.401855,0.841797,3.095703,0.552734,1.216797,1.094727,-1.448242,0.43042,0.660156,-1.882812,0.259521,2.728516,0.184204,1.072266,3.095703,0.552734,1.216797,2.542969,0.0,1.0,0.319092,-2.224609,0.125488,2.341797,1.765625,-0.77832,0.693848,0.530762,-2.011719,0.276367,-2.267578,2.728516,0.184204,1.072266,3.095703,2.542969,0.276367,-2.267578
24,1.094727,1.094727,1.529297,-0.365479,-0.365479,1.558594,1.558594,-1.0,24.015625,0,3.71875,3.095703,4.027344,5.558594,4.3e-05,27,2.029297,1.094727,3.095703,7,4,0.572754,0.592285,1.010742,-1.417203,1.923828,2.341797,1.134766,0.720703,2.140625,-0.95459,0.691895,3.095703,0.0,1.0,1.094727,-2.0,0.35376,0.660156,-2.435547,0.213257,2.486328,-0.608887,0.803223,3.095703,0.0,1.0,1.765625,-1.331055,0.570312,0.547363,-2.548828,0.176758,2.341797,1.765625,-1.331055,0.570312,0.530762,-2.564453,0.276367,-2.820312,2.728516,-0.368408,0.880859,3.095703,2.542969,0.276367,-2.820312
25,1.094727,1.094727,1.529297,-0.365479,-0.365723,1.558594,1.558594,0.5,24.015625,0,3.71875,2.542969,0.23645,5.4375,0.792969,27,2.029297,1.094727,3.095703,7,5,0.572754,0.592285,0.984863,-1.509512,1.923828,2.316406,1.134766,0.720703,2.140625,-0.401855,0.841797,3.095703,0.552734,1.216797,1.094727,-1.448242,0.43042,0.660156,-1.883789,0.259521,2.341797,-0.200684,0.920898,3.095703,0.552734,1.216797,1.765625,-0.77832,0.693848,0.573242,-1.969727,0.225464,2.341797,1.765625,-0.77832,0.693848,0.530762,-2.011719,0.276367,-2.267578,2.728516,0.184204,1.072266,3.095703,2.542969,0.276367,-2.267578
29,1.094727,1.094727,1.529297,-0.365479,-0.365479,1.558594,1.558594,-1.0,24.015625,0,3.71875,3.095703,1.139648,5.40625,3.037109,27,2.029297,1.094727,3.095703,6,3,0.598145,0.370605,0.772461,-0.922044,1.914062,2.316406,1.063477,0.667969,2.205078,-0.891602,0.711914,3.095703,0.0,1.0,1.094727,-2.0,0.35376,0.699707,-2.396484,0.226074,2.728516,-0.368408,0.880859,3.095703,0.0,1.0,2.542969,-0.552734,0.821289,0.319092,-2.777344,0.103027,2.341797,1.765625,-1.331055,0.570312,0.530762,-2.564453,0.276367,-2.820312,2.728516,-0.368408,0.880859,3.095703,2.542969,0.276367,-2.820312
30,1.094727,1.094727,1.529297,-0.365479,-0.365479,1.558594,1.558594,0.5,24.015625,0,3.71875,2.542969,0.220337,5.484375,0.765137,27,2.029297,1.094727,3.095703,6,4,0.598145,0.370605,0.788574,-0.886476,1.914062,2.332031,1.063477,0.667969,2.205078,-0.339111,0.866699,3.095703,0.552734,1.216797,1.094727,-1.448242,0.43042,0.699707,-1.84375,0.275146,2.486328,-0.056396,0.978027,3.095703,0.552734,1.216797,1.765625,-0.77832,0.693848,0.547363,-1.996094,0.215088,2.341797,1.765625,-0.77832,0.693848,0.530762,-2.011719,0.276367,-2.267578,2.728516,0.184204,1.072266,3.095703,2.542969,0.276367,-2.267578


23     3.25281
24    13.69130
25     3.25205
29    13.69240
30     3.25253
Name: scalar_coupling_constant, dtype: float64

mean of target. train:4.768076147147949, valid:4.776917754971605
Training until validation scores don't improve for 100 rounds.
[100]	training's l2: 0.234978	training's l1: 0.330392	valid_1's l2: 0.283202	valid_1's l1: 0.35679
[200]	training's l2: 0.172218	training's l1: 0.28381	valid_1's l2: 0.243288	valid_1's l1: 0.32777
[300]	training's l2: 0.136337	training's l1: 0.25354	valid_1's l2: 0.222239	valid_1's l1: 0.311551
[400]	training's l2: 0.113308	training's l1: 0.231902	valid_1's l2: 0.210531	valid_1's l1: 0.302092
[500]	training's l2: 0.0949013	training's l1: 0.213017	valid_1's l2: 0.200491	valid_1's l1: 0.293664
[600]	training's l2: 0.0811821	training's l1: 0.197715	valid_1's l2: 0.193868	valid_1's l1: 0.287888
[700]	training's l2: 0.0705463	training's l1: 0.184946	valid_1's l2: 0.188993	valid_1's l1: 0.283498
[800]	training's l2: 0.0620771	training's l1: 0.173948	valid_1's l2: 0.185352	valid_1's l1: 0.280184
[900]	training's l2: 0.0545928	training's l1: 0.163552	valid_1's l2: 0.1

[INFO]2019-06-25 16:07:01,964:main:fold 1 valid -1.520597


mean of target. train:4.774106235047843, valid:4.764857593452466
Training until validation scores don't improve for 100 rounds.
[100]	training's l2: 0.233587	training's l1: 0.329518	valid_1's l2: 0.284039	valid_1's l1: 0.355919
[200]	training's l2: 0.170739	training's l1: 0.282726	valid_1's l2: 0.242792	valid_1's l1: 0.326238
[300]	training's l2: 0.136592	training's l1: 0.253176	valid_1's l2: 0.223132	valid_1's l1: 0.310728
[400]	training's l2: 0.112804	training's l1: 0.231072	valid_1's l2: 0.210679	valid_1's l1: 0.300795
[500]	training's l2: 0.0947136	training's l1: 0.212566	valid_1's l2: 0.201155	valid_1's l1: 0.292829
[600]	training's l2: 0.0813403	training's l1: 0.197451	valid_1's l2: 0.194306	valid_1's l1: 0.286879
[700]	training's l2: 0.0709454	training's l1: 0.185071	valid_1's l2: 0.189614	valid_1's l1: 0.282881
[800]	training's l2: 0.0624415	training's l1: 0.174068	valid_1's l2: 0.185787	valid_1's l1: 0.2795
[900]	training's l2: 0.0552569	training's l1: 0.164036	valid_1's l2: 0

[INFO]2019-06-25 16:11:23,440:main:fold 2 valid -1.529050


mean of target. train:4.770887689526883, valid:4.771294700843139
Training until validation scores don't improve for 100 rounds.
[100]	training's l2: 0.23538	training's l1: 0.330965	valid_1's l2: 0.286397	valid_1's l1: 0.357961
[200]	training's l2: 0.171051	training's l1: 0.283143	valid_1's l2: 0.244245	valid_1's l1: 0.327742
[300]	training's l2: 0.136942	training's l1: 0.25436	valid_1's l2: 0.225099	valid_1's l1: 0.313206
[400]	training's l2: 0.112383	training's l1: 0.231304	valid_1's l2: 0.211375	valid_1's l1: 0.302071
[500]	training's l2: 0.0956502	training's l1: 0.213862	valid_1's l2: 0.20332	valid_1's l1: 0.295019
[600]	training's l2: 0.0821148	training's l1: 0.198888	valid_1's l2: 0.196944	valid_1's l1: 0.289592
[700]	training's l2: 0.0709125	training's l1: 0.185398	valid_1's l2: 0.191174	valid_1's l1: 0.284517
[800]	training's l2: 0.0619934	training's l1: 0.17384	valid_1's l2: 0.187162	valid_1's l1: 0.280934
[900]	training's l2: 0.0544224	training's l1: 0.163611	valid_1's l2: 0.1

[INFO]2019-06-25 16:15:43,815:main:fold 3 valid -1.506999
[INFO]2019-06-25 16:15:43,874:main:CV score: -1.518882
[INFO]2019-06-25 16:15:43,880:main:Starting train model(3JHC)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  errors=errors)
[INFO]2019-06-25 16:15:51,086:main:features(3JHC): ['3j_norm_vec_02', '3j_norm_vec_13', '3j_norm_vec_23', '3j_cos_023', '3j_cos_231', '3j_area_023', '3j_area_231', '3j_dihedral', '3j_atom_center_weight', '3j_atom_center', '3j_sum_norm_vec', 'dist', 'dist_x', 'dist_y', 'dist_z', 'molecule_couples', 'molecule_dist_mean', 'molecule_dist_min', 'molecule_dist_max', 'atom_0_couples_count', 'atom_1_couples_count', 'molecule_atom_index_0_x_1_std', 'molecule_atom_index_0_y_1_mean', 'molecule_atom_index_0_y_1_mean_diff', 'molecule_atom_index_0_y_1_mean_div', 'molecule_atom_index_0_y_1_max', 'molecule_atom_index_0_y_1_max_

Unnamed: 0,3j_norm_vec_02,3j_norm_vec_13,3j_norm_vec_23,3j_cos_023,3j_cos_231,3j_area_023,3j_area_231,3j_dihedral,3j_atom_center_weight,3j_atom_center,3j_sum_norm_vec,dist,dist_x,dist_y,dist_z,molecule_couples,molecule_dist_mean,molecule_dist_min,molecule_dist_max,atom_0_couples_count,atom_1_couples_count,molecule_atom_index_0_x_1_std,molecule_atom_index_0_y_1_mean,molecule_atom_index_0_y_1_mean_diff,molecule_atom_index_0_y_1_mean_div,molecule_atom_index_0_y_1_max,molecule_atom_index_0_y_1_max_diff,molecule_atom_index_0_y_1_std,molecule_atom_index_0_z_1_std,molecule_atom_index_0_dist_mean,molecule_atom_index_0_dist_mean_diff,molecule_atom_index_0_dist_mean_div,molecule_atom_index_0_dist_max,molecule_atom_index_0_dist_max_diff,molecule_atom_index_0_dist_max_div,molecule_atom_index_0_dist_min,molecule_atom_index_0_dist_min_diff,molecule_atom_index_0_dist_min_div,molecule_atom_index_0_dist_std,molecule_atom_index_0_dist_std_diff,molecule_atom_index_0_dist_std_div,molecule_atom_index_1_dist_mean,molecule_atom_index_1_dist_mean_diff,molecule_atom_index_1_dist_mean_div,molecule_atom_index_1_dist_max,molecule_atom_index_1_dist_max_diff,molecule_atom_index_1_dist_max_div,molecule_atom_index_1_dist_min,molecule_atom_index_1_dist_min_diff,molecule_atom_index_1_dist_min_div,molecule_atom_index_1_dist_std,molecule_atom_index_1_dist_std_diff,molecule_atom_index_1_dist_std_div,molecule_atom_1_dist_mean,molecule_atom_1_dist_min,molecule_atom_1_dist_min_diff,molecule_atom_1_dist_min_div,molecule_atom_1_dist_std,molecule_atom_1_dist_std_diff,molecule_type_0_dist_std,molecule_type_0_dist_std_diff,molecule_type_dist_mean,molecule_type_dist_mean_diff,molecule_type_dist_mean_div,molecule_type_dist_max,molecule_type_dist_min,molecule_type_dist_std,molecule_type_dist_std_diff
58,1.095703,1.201172,1.455078,-0.361572,-1.0,1.486328,0.000355,-0.772949,24.015625,0,3.751953,3.21875,0.959473,9.398438,5.1e-05,15,2.109375,1.061523,3.71875,5,4,0.293213,0.797852,1.989258,-0.669307,1.858398,3.050781,1.34668,0.625488,1.993164,-1.225586,0.619141,3.21875,0.0,1.0,1.095703,-2.123047,0.340332,0.77832,-2.439453,0.241821,2.679688,-0.539551,0.83252,3.21875,0.0,1.0,1.061523,-2.158203,0.329834,1.078125,-2.140625,0.335205,2.193359,1.061523,-2.158203,0.329834,0.972168,-2.246094,0.249634,-2.96875,3.34375,0.124756,1.039062,3.71875,3.21875,0.249634,-2.96875
63,1.095703,1.201172,1.455078,-0.361572,-1.0,1.486328,0.000355,-0.163086,24.015625,0,3.751953,3.21875,0.314209,9.304688,0.744141,15,2.109375,1.061523,3.71875,4,4,0.263428,0.532715,1.724609,-0.446886,1.848633,3.041016,1.396484,0.449463,2.048828,-1.169922,0.636719,3.21875,0.0,1.0,1.095703,-2.123047,0.340332,0.887207,-2.332031,0.275635,2.679688,-0.539551,0.83252,3.21875,4.5e-05,1.0,1.061523,-2.15625,0.329834,1.078125,-2.140625,0.335205,2.193359,1.061523,-2.15625,0.329834,0.972168,-2.246094,0.249634,-2.96875,3.34375,0.124756,1.039062,3.71875,3.21875,0.249634,-2.96875
67,1.095703,1.201172,1.455078,-0.361328,-1.0,1.486328,0.000355,0.936035,24.015625,0,3.751953,3.21875,0.295654,9.242188,0.820801,15,2.109375,1.061523,3.71875,3,4,0.018112,0.093994,1.286133,-0.078889,1.464844,2.65625,1.330078,0.007313,2.142578,-1.076172,0.665527,3.21875,0.0,1.0,1.095703,-2.123047,0.340332,1.061523,-2.15625,0.329834,2.679688,-0.539062,0.83252,3.21875,0.000195,1.0,1.061523,-2.15625,0.329834,1.078125,-2.140625,0.335205,2.193359,1.061523,-2.15625,0.329834,0.972168,-2.246094,0.249634,-2.96875,3.34375,0.124939,1.039062,3.71875,3.21875,0.249634,-2.96875
68,1.061523,1.455078,1.201172,-1.0,-1.0,0.00046,0.000355,0.771484,24.015625,0,3.71875,3.71875,0.002514,13.820312,0.000414,15,2.109375,1.061523,3.71875,3,4,0.018112,0.093994,-1.370117,0.064206,1.464844,0.0,1.330078,0.007313,2.347656,-1.370117,0.631348,3.71875,0.0,1.0,1.061523,-2.65625,0.2854,1.330078,-2.386719,0.35791,1.750977,-1.966797,0.470947,3.71875,0.0,1.0,1.095703,-2.623047,0.294678,1.311523,-2.40625,0.352783,2.193359,1.061523,-2.65625,0.2854,0.972168,-2.746094,0.249634,-3.46875,3.34375,-0.374512,0.899414,3.71875,3.21875,0.249634,-3.46875
108,1.095703,1.53125,1.53125,-0.360352,-0.388916,1.56543,2.160156,0.503418,24.015625,0,4.160156,2.818359,0.066467,6.203125,1.672852,43,2.175781,1.094727,3.505859,7,8,0.619141,0.588867,1.115234,-1.120066,1.948242,2.474609,1.148438,0.80957,2.175781,-0.641602,0.772461,3.078125,0.260986,1.092773,1.095703,-1.72168,0.388916,0.689453,-2.128906,0.244629,2.09375,-0.724121,0.743164,3.505859,0.688477,1.244141,1.094727,-1.723633,0.388428,0.929199,-1.888672,0.329834,2.033203,1.094727,-1.723633,0.388428,0.78125,-2.037109,0.338867,-2.478516,3.046875,0.229492,1.081055,3.505859,2.818359,0.355713,-2.462891


58     4.55166
63     4.55410
67     4.55341
68     2.51865
108    2.51277
Name: scalar_coupling_constant, dtype: float64

mean of target. train:3.690534941137389, valid:3.684338890434173
Training until validation scores don't improve for 100 rounds.
[100]	training's l2: 0.868138	training's l1: 0.644721	valid_1's l2: 0.965642	valid_1's l1: 0.665045
[200]	training's l2: 0.686078	training's l1: 0.57491	valid_1's l2: 0.825467	valid_1's l1: 0.61051
[300]	training's l2: 0.590132	training's l1: 0.533072	valid_1's l2: 0.758676	valid_1's l1: 0.581602
[400]	training's l2: 0.517531	training's l1: 0.500753	valid_1's l2: 0.710555	valid_1's l1: 0.560893
[500]	training's l2: 0.464839	training's l1: 0.475633	valid_1's l2: 0.678689	valid_1's l1: 0.546391
[600]	training's l2: 0.420394	training's l1: 0.453162	valid_1's l2: 0.652227	valid_1's l1: 0.534026
[700]	training's l2: 0.385011	training's l1: 0.434341	valid_1's l2: 0.632317	valid_1's l1: 0.524578
[800]	training's l2: 0.353439	training's l1: 0.41686	valid_1's l2: 0.614283	valid_1's l1: 0.515767
[900]	training's l2: 0.326427	training's l1: 0.401393	valid_1's l2: 0.59969

[INFO]2019-06-25 16:25:07,927:main:fold 1 valid -0.702821


mean of target. train:3.6877465648081365, valid:3.6899156375542
Training until validation scores don't improve for 100 rounds.
[100]	training's l2: 0.873883	training's l1: 0.647198	valid_1's l2: 0.950916	valid_1's l1: 0.668744
[200]	training's l2: 0.693962	training's l1: 0.576483	valid_1's l2: 0.813075	valid_1's l1: 0.612618
[300]	training's l2: 0.58971	training's l1: 0.533351	valid_1's l2: 0.740814	valid_1's l1: 0.582671
[400]	training's l2: 0.520532	training's l1: 0.502233	valid_1's l2: 0.696636	valid_1's l1: 0.563256
[500]	training's l2: 0.464791	training's l1: 0.47594	valid_1's l2: 0.662726	valid_1's l1: 0.54812
[600]	training's l2: 0.421388	training's l1: 0.454484	valid_1's l2: 0.637698	valid_1's l1: 0.536699
[700]	training's l2: 0.385736	training's l1: 0.435396	valid_1's l2: 0.617031	valid_1's l1: 0.526612
[800]	training's l2: 0.354277	training's l1: 0.418247	valid_1's l2: 0.599594	valid_1's l1: 0.51824
[900]	training's l2: 0.326754	training's l1: 0.402555	valid_1's l2: 0.584475	

[INFO]2019-06-25 16:34:14,249:main:fold 2 valid -0.707220


mean of target. train:3.6871272639941557, valid:3.691154245950344
Training until validation scores don't improve for 100 rounds.
[100]	training's l2: 0.874382	training's l1: 0.64624	valid_1's l2: 0.958226	valid_1's l1: 0.667024
[200]	training's l2: 0.700048	training's l1: 0.57849	valid_1's l2: 0.823485	valid_1's l1: 0.614008
[300]	training's l2: 0.591855	training's l1: 0.534875	valid_1's l2: 0.748206	valid_1's l1: 0.583776
[400]	training's l2: 0.519031	training's l1: 0.501356	valid_1's l2: 0.699463	valid_1's l1: 0.561964
[500]	training's l2: 0.463476	training's l1: 0.475268	valid_1's l2: 0.665918	valid_1's l1: 0.547204
[600]	training's l2: 0.418534	training's l1: 0.452242	valid_1's l2: 0.639121	valid_1's l1: 0.534423
[700]	training's l2: 0.384482	training's l1: 0.433633	valid_1's l2: 0.619878	valid_1's l1: 0.524917
[800]	training's l2: 0.353154	training's l1: 0.416065	valid_1's l2: 0.602083	valid_1's l1: 0.516015
[900]	training's l2: 0.327211	training's l1: 0.401048	valid_1's l2: 0.588

[INFO]2019-06-25 16:43:38,859:main:fold 3 valid -0.703641
[INFO]2019-06-25 16:43:38,992:main:CV score: -0.704561
[INFO]2019-06-25 16:43:39,015:main:Starting train model(3JHN)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  errors=errors)
[INFO]2019-06-25 16:43:41,647:main:features(3JHN): ['3j_norm_vec_02', '3j_norm_vec_13', '3j_norm_vec_23', '3j_cos_023', '3j_cos_231', '3j_area_023', '3j_area_231', '3j_dihedral', '3j_atom_center_weight', '3j_atom_center', '3j_sum_norm_vec', 'dist', 'dist_x', 'dist_y', 'dist_z', 'molecule_couples', 'molecule_dist_mean', 'molecule_dist_min', 'molecule_dist_max', 'atom_0_couples_count', 'atom_1_couples_count', 'molecule_atom_index_0_x_1_std', 'molecule_atom_index_0_y_1_mean', 'molecule_atom_index_0_y_1_mean_diff', 'molecule_atom_index_0_y_1_mean_div', 'molecule_atom_index_0_y_1_max', 'molecule_atom_index_0_y_1_max_

Unnamed: 0,3j_norm_vec_02,3j_norm_vec_13,3j_norm_vec_23,3j_cos_023,3j_cos_231,3j_area_023,3j_area_231,3j_dihedral,3j_atom_center_weight,3j_atom_center,3j_sum_norm_vec,dist,dist_x,dist_y,dist_z,molecule_couples,molecule_dist_mean,molecule_dist_min,molecule_dist_max,atom_0_couples_count,atom_1_couples_count,molecule_atom_index_0_x_1_std,molecule_atom_index_0_y_1_mean,molecule_atom_index_0_y_1_mean_diff,molecule_atom_index_0_y_1_mean_div,molecule_atom_index_0_y_1_max,molecule_atom_index_0_y_1_max_diff,molecule_atom_index_0_y_1_std,molecule_atom_index_0_z_1_std,molecule_atom_index_0_dist_mean,molecule_atom_index_0_dist_mean_diff,molecule_atom_index_0_dist_mean_div,molecule_atom_index_0_dist_max,molecule_atom_index_0_dist_max_diff,molecule_atom_index_0_dist_max_div,molecule_atom_index_0_dist_min,molecule_atom_index_0_dist_min_diff,molecule_atom_index_0_dist_min_div,molecule_atom_index_0_dist_std,molecule_atom_index_0_dist_std_diff,molecule_atom_index_0_dist_std_div,molecule_atom_index_1_dist_mean,molecule_atom_index_1_dist_mean_diff,molecule_atom_index_1_dist_mean_div,molecule_atom_index_1_dist_max,molecule_atom_index_1_dist_max_diff,molecule_atom_index_1_dist_max_div,molecule_atom_index_1_dist_min,molecule_atom_index_1_dist_min_diff,molecule_atom_index_1_dist_min_div,molecule_atom_index_1_dist_std,molecule_atom_index_1_dist_std_diff,molecule_atom_index_1_dist_std_div,molecule_atom_1_dist_mean,molecule_atom_1_dist_min,molecule_atom_1_dist_min_diff,molecule_atom_1_dist_min_div,molecule_atom_1_dist_std,molecule_atom_1_dist_std_diff,molecule_type_0_dist_std,molecule_type_0_dist_std_diff,molecule_type_dist_mean,molecule_type_dist_mean_diff,molecule_type_dist_mean_div,molecule_type_dist_max,molecule_type_dist_min,molecule_type_dist_std,molecule_type_dist_std_diff
73,1.09375,1.155273,1.457031,-0.347412,-1.0,1.493164,0.000536,0.966309,24.015625,0,3.705078,3.162109,0.969238,9.03125,4.4e-05,12,2.033203,1.09375,3.162109,5,3,0.293945,0.802734,1.947266,-0.701237,1.844727,2.990234,1.324219,0.62793,1.982422,-1.180664,0.626953,3.162109,0.0,1.0,1.09375,-2.068359,0.345703,0.755371,-2.40625,0.238892,3.162109,5.1e-05,1.0,3.162109,0.000125,1.0,3.162109,0.0,1.0,6.6e-05,-3.162109,2.1e-05,3.162109,3.162109,0.0,1.0,6.6e-05,-3.162109,6.6e-05,-3.162109,3.162109,5.1e-05,1.0,3.162109,3.162109,6.6e-05,-3.162109
78,1.09375,1.155273,1.457031,-0.347412,-1.0,1.494141,0.000536,-0.706055,24.015625,0,3.705078,3.162109,0.31543,8.9375,0.750977,12,2.033203,1.09375,3.162109,4,3,0.26416,0.541992,1.686523,-0.473586,1.834961,2.980469,1.373047,0.451172,2.033203,-1.128906,0.643066,3.162109,0.0,1.0,1.09375,-2.070312,0.345703,0.862305,-2.300781,0.272705,3.162109,-7.4e-05,1.0,3.162109,0.0,1.0,3.162109,-0.000125,1.0,6.6e-05,-3.162109,2.1e-05,3.162109,3.162109,-0.000125,1.0,6.6e-05,-3.162109,6.6e-05,-3.162109,3.162109,-7.4e-05,1.0,3.162109,3.162109,6.6e-05,-3.162109
82,1.09375,1.155273,1.457031,-0.347412,-1.0,1.494141,0.000536,-0.260254,24.015625,0,3.705078,3.162109,0.296631,8.875,0.826172,12,2.033203,1.09375,3.162109,3,3,0.017807,0.110962,1.255859,-0.096969,1.466797,2.611328,1.308594,0.007175,2.119141,-1.042969,0.67041,3.162109,0.0,1.0,1.09375,-2.068359,0.345703,1.035156,-2.126953,0.327148,3.162109,2.4e-05,1.0,3.162109,9.8e-05,1.0,3.162109,-2.7e-05,1.0,6.6e-05,-3.162109,2.1e-05,3.162109,3.162109,-2.7e-05,1.0,6.6e-05,-3.162109,6.6e-05,-3.162109,3.162109,2.4e-05,1.0,3.162109,3.162109,6.6e-05,-3.162109
213,1.09375,1.369141,1.522461,-0.377686,-0.416016,1.541992,1.895508,0.772949,24.015625,0,3.986328,2.626953,0.069885,6.566406,0.268311,19,1.985352,1.004883,3.357422,5,5,0.730469,0.90918,1.541992,-1.435578,1.911133,2.544922,1.157227,0.577637,1.892578,-0.734863,0.720215,2.626953,0.0,1.0,1.09375,-1.533203,0.41626,0.567871,-2.060547,0.216064,2.166016,-0.461426,0.824219,3.318359,0.69043,1.262695,1.004883,-1.623047,0.382324,1.087891,-1.540039,0.414062,2.166016,1.004883,-1.623047,0.382324,1.087891,-1.540039,0.365967,-2.261719,2.939453,0.312012,1.119141,3.318359,2.626953,0.350098,-2.277344
218,1.089844,1.369141,1.522461,-0.321289,-0.416016,1.571289,1.895508,-0.945312,24.015625,0,3.982422,3.318359,4.199219,6.472656,0.336914,19,1.985352,1.004883,3.357422,4,5,0.597656,0.658691,1.291992,-1.039927,1.785156,2.417969,1.168945,0.541992,2.080078,-1.238281,0.626953,3.318359,0.0,1.0,1.089844,-2.228516,0.328369,0.933105,-2.384766,0.28125,2.166016,-1.152344,0.652832,3.318359,0.0,1.0,1.004883,-2.3125,0.302734,1.087891,-2.230469,0.327881,2.166016,1.004883,-2.3125,0.302734,1.087891,-2.230469,0.365967,-2.951172,2.939453,-0.378418,0.885742,3.318359,2.626953,0.350098,-2.96875


73     0.880802
78     0.880957
82     0.880871
213   -0.052074
218    1.611320
Name: scalar_coupling_constant, dtype: float64

mean of target. train:0.9912910848093136, valid:0.9896074279816763
Training until validation scores don't improve for 100 rounds.
[100]	training's l2: 0.0679074	training's l1: 0.172905	valid_1's l2: 0.126864	valid_1's l1: 0.221975
[200]	training's l2: 0.0385537	training's l1: 0.132187	valid_1's l2: 0.112252	valid_1's l1: 0.205625
[300]	training's l2: 0.0250182	training's l1: 0.107695	valid_1's l2: 0.106024	valid_1's l1: 0.198107
[400]	training's l2: 0.0172184	training's l1: 0.0902358	valid_1's l2: 0.102291	valid_1's l1: 0.193731
[500]	training's l2: 0.012196	training's l1: 0.0766538	valid_1's l2: 0.100113	valid_1's l1: 0.191228
[600]	training's l2: 0.0088899	training's l1: 0.0658484	valid_1's l2: 0.0984864	valid_1's l1: 0.18931
[700]	training's l2: 0.00659961	training's l1: 0.057136	valid_1's l2: 0.0973299	valid_1's l1: 0.187775
[800]	training's l2: 0.00494993	training's l1: 0.0497619	valid_1's l2: 0.0965614	valid_1's l1: 0.186815
[900]	training's l2: 0.00374003	training's l1: 0.043543

[INFO]2019-06-25 16:45:50,318:main:fold 1 valid -2.115853


mean of target. train:0.9900282797472645, valid:0.9921330153410689
Training until validation scores don't improve for 100 rounds.
[100]	training's l2: 0.0665362	training's l1: 0.17141	valid_1's l2: 0.123547	valid_1's l1: 0.219289
[200]	training's l2: 0.038311	training's l1: 0.131967	valid_1's l2: 0.109901	valid_1's l1: 0.203993
[300]	training's l2: 0.0248201	training's l1: 0.107769	valid_1's l2: 0.103346	valid_1's l1: 0.196504
[400]	training's l2: 0.0170714	training's l1: 0.0903531	valid_1's l2: 0.0999268	valid_1's l1: 0.192573
[500]	training's l2: 0.0122111	training's l1: 0.0770191	valid_1's l2: 0.0978756	valid_1's l1: 0.189842
[600]	training's l2: 0.00903547	training's l1: 0.0664369	valid_1's l2: 0.0965752	valid_1's l1: 0.188101
[700]	training's l2: 0.00663286	training's l1: 0.0574072	valid_1's l2: 0.0954158	valid_1's l1: 0.186584
[800]	training's l2: 0.00503176	training's l1: 0.0501613	valid_1's l2: 0.0946469	valid_1's l1: 0.185579
[900]	training's l2: 0.00383359	training's l1: 0.04

[INFO]2019-06-25 16:47:58,949:main:fold 2 valid -2.066710


mean of target. train:0.9908702216613917, valid:0.990449139099707
Training until validation scores don't improve for 100 rounds.
[100]	training's l2: 0.0696875	training's l1: 0.173641	valid_1's l2: 0.12797	valid_1's l1: 0.22157
[200]	training's l2: 0.0397558	training's l1: 0.132914	valid_1's l2: 0.112668	valid_1's l1: 0.205007
[300]	training's l2: 0.0253475	training's l1: 0.108076	valid_1's l2: 0.106301	valid_1's l1: 0.197904
[400]	training's l2: 0.0173393	training's l1: 0.0905601	valid_1's l2: 0.102934	valid_1's l1: 0.194004
[500]	training's l2: 0.0122701	training's l1: 0.0769351	valid_1's l2: 0.100554	valid_1's l1: 0.191067
[600]	training's l2: 0.00893245	training's l1: 0.066145	valid_1's l2: 0.0991038	valid_1's l1: 0.189118
[700]	training's l2: 0.00657579	training's l1: 0.05726	valid_1's l2: 0.0979407	valid_1's l1: 0.187826
[800]	training's l2: 0.00496125	training's l1: 0.049953	valid_1's l2: 0.0972266	valid_1's l1: 0.186842
[900]	training's l2: 0.00377297	training's l1: 0.0437322	v

[INFO]2019-06-25 16:50:06,872:main:fold 3 valid -2.076177
[INFO]2019-06-25 16:50:06,897:main:CV score: -2.086247


In [0]:
for _, df_score in score_dict.items():
    display(df_score.mean()[0])

### Check training result

In [0]:
# sns.distplot(df_pred['proba'])

In [0]:
def feat_importance(_models, _X, _imp_type='gain'):
    df_imp = pd.DataFrame(index=_X.columns)
    for i, model in enumerate(_models):
        df_imp[i] = model.booster_.feature_importance(importance_type=_imp_type)

    df_imp = df_imp.apply(lambda x: x/sum(x))
    df_imp['imp_mean'] = df_imp[list(range(len(models)))].mean(axis=1)
    df_imp['imp_std'] = df_imp[list(range(len(models)))].std(axis=1)
    sorted_imp = df_imp.sort_values(by='imp_mean', ascending=False)
    return sorted_imp

In [0]:
# imp = feat_importance(model_dict['1JHC'], X, _imp_type='gain')
# imp.head(100)

## Predict

In [0]:
df_test = pd.read_csv(TEST_PATH)
df_strct = pd.read_csv(INPUT + 'structures.csv')

In [0]:
def predict_single(df, strct):
    models = joblib.load(MODEL_PATH)

    df_submit = df[['id']].copy()
    df = preprocess(df, strct, mode='predict')
    X = drop_col(df)
    display(X.head())
    
    X.to_csv('test_prepro.csv', index=False)
    
    y_pred = oof_predict(models, X)
    df_submit['scalar_coupling_constant'] = y_pred
    
    return df_submit

In [0]:
def predict_each_type(df, strct):
    # df = df.head(10000)
    model_dict = joblib.load(MODEL_PATH)
    
    s_type = df['type'].copy()
    df_submit = df[['id']].copy()
    
    df = preprocess(df, strct, mode='predict')
    df = drop_col(df)    
    
    coupling_types = s_type.unique()
    print(coupling_types)
    for coup_type in coupling_types:
        
        models = model_dict[coup_type]
        
        get_logger().info('Starting predict target(%s)' % coup_type)
        is_the_type = (s_type == coup_type)
        df_type = df[is_the_type]
                      
        X = df_type
        X = drop_uneffect_feature(X)        
        
        display(X.head())  
        y_pred = oof_predict(models, X)        
        
        df_submit.loc[is_the_type, 'scalar_coupling_constant'] = y_pred
    
    display(df_submit.head())
    print((df_submit[TARGET].isnull()).sum())
    return df_submit

In [0]:
df_submit = predict_each_type(df_test, df_strct)

[INFO]2019-06-25 16:50:48,461:main:Start preprocess()
[INFO]2019-06-25 16:50:48,462:main:load df_1j
[INFO]2019-06-25 16:50:53,185:main:load df_2jsim
[INFO]2019-06-25 16:51:08,124:main:load df_3jsim


Starting Feature Engineering...


Unnamed: 0,id,molecule_name,atom_index_0,atom_index_1,type,1j_nbonds,2j_atom_center,2j_area_021,2j_norm_vec_02,2j_norm_vec_12,2j_cos,2j_atom_center_weight,2j_sum_norm_vec,3j_norm_vec_02,3j_norm_vec_13,3j_norm_vec_23,3j_cos_023,3j_cos_231,3j_area_023,3j_area_231,3j_dihedral,3j_atom_center_weight,3j_atom_center,3j_sum_norm_vec,atom_0,x_0,y_0,z_0,atom_1,x_1,y_1,z_1,dist,dist_x,dist_y,dist_z,type_0,type_1,molecule_couples,molecule_dist_mean,...,molecule_atom_index_0_z_1_std,molecule_atom_index_0_dist_mean,molecule_atom_index_0_dist_mean_diff,molecule_atom_index_0_dist_mean_div,molecule_atom_index_0_dist_max,molecule_atom_index_0_dist_max_diff,molecule_atom_index_0_dist_max_div,molecule_atom_index_0_dist_min,molecule_atom_index_0_dist_min_diff,molecule_atom_index_0_dist_min_div,molecule_atom_index_0_dist_std,molecule_atom_index_0_dist_std_diff,molecule_atom_index_0_dist_std_div,molecule_atom_index_1_dist_mean,molecule_atom_index_1_dist_mean_diff,molecule_atom_index_1_dist_mean_div,molecule_atom_index_1_dist_max,molecule_atom_index_1_dist_max_diff,molecule_atom_index_1_dist_max_div,molecule_atom_index_1_dist_min,molecule_atom_index_1_dist_min_diff,molecule_atom_index_1_dist_min_div,molecule_atom_index_1_dist_std,molecule_atom_index_1_dist_std_diff,molecule_atom_index_1_dist_std_div,molecule_atom_1_dist_mean,molecule_atom_1_dist_min,molecule_atom_1_dist_min_diff,molecule_atom_1_dist_min_div,molecule_atom_1_dist_std,molecule_atom_1_dist_std_diff,molecule_type_0_dist_std,molecule_type_0_dist_std_diff,molecule_type_dist_mean,molecule_type_dist_mean_diff,molecule_type_dist_mean_div,molecule_type_dist_max,molecule_type_dist_min,molecule_type_dist_std,molecule_type_dist_std_diff
0,4658147,dsgdb9nsd_000004,2,0,2JHC,,C,0.0,1.062099,1.199079,-1.0,12.01,2.261178,,,,,,,,,,,,H,-1.661639,0.0,1.0,C,0.599539,0.0,1.0,2.261178,5.112926,0.0,0.0,2,JHC,5,1.993966,...,0.0,2.215518,-0.04566,0.979807,3.323277,1.062099,1.469711,1.062099,-1.199079,0.469711,1.13128,-1.129898,0.500306,1.661639,-0.599539,0.734855,2.261178,0.0,1.0,1.062099,-1.199079,0.469711,0.847877,-1.413301,0.374971,1.661639,1.062099,-1.199079,0.469711,0.692289,-1.56889,0.0,-2.261178,2.261178,0.0,1.0,2.261178,2.261178,0.0,-2.261178
1,4658148,dsgdb9nsd_000004,2,1,1JHC,2.0,,,,,,,,,,,,,,,,,,,H,-1.661639,0.0,1.0,C,-0.599539,0.0,1.0,1.062099,1.128054,0.0,0.0,1,JHC,5,1.993966,...,0.0,2.215518,1.153419,2.085981,3.323277,2.261178,3.128971,1.062099,0.0,1.0,1.13128,0.069181,1.065136,1.661639,0.599539,1.564485,2.261178,1.199079,2.128971,1.062099,0.0,1.0,0.847877,-0.214222,0.798303,1.661639,1.062099,0.0,1.0,0.692289,-0.369811,0.0,-1.062099,1.062099,0.0,1.0,1.062099,1.062099,0.0,-1.062099
2,4658149,dsgdb9nsd_000004,2,3,3JHH,,,,,,,,,1.062099,1.062099,1.199079,-1.0,-1.0,0.0,0.0,,24.02,CC,3.323277,H,-1.661639,0.0,1.0,H,1.661639,0.0,1.0,3.323277,11.044171,0.0,0.0,3,JHH,5,1.993966,...,0.0,2.215518,-1.107759,0.666667,3.323277,0.0,1.0,1.062099,-2.261178,0.319594,1.13128,-2.191997,0.340411,3.323277,0.0,1.0,3.323277,0.0,1.0,3.323277,0.0,1.0,,,,3.323277,3.323277,0.0,1.0,,,,,3.323277,0.0,1.0,3.323277,3.323277,,
3,4658150,dsgdb9nsd_000004,3,0,1JHC,2.0,,,,,,,,,,,,,,,,,,,H,1.661639,0.0,1.0,C,0.599539,0.0,1.0,1.062099,1.128054,0.0,0.0,1,JHC,5,1.993966,...,0.0,1.661639,0.599539,1.564485,2.261178,1.199079,2.128971,1.062099,0.0,1.0,0.847877,-0.214222,0.798303,1.661639,0.599539,1.564485,2.261178,1.199079,2.128971,1.062099,0.0,1.0,0.847877,-0.214222,0.798303,1.661639,1.062099,0.0,1.0,0.692289,-0.369811,0.0,-1.062099,1.062099,0.0,1.0,1.062099,1.062099,0.0,-1.062099
4,4658151,dsgdb9nsd_000004,3,1,2JHC,,C,0.0,1.062099,1.199079,-1.0,12.01,2.261178,,,,,,,,,,,,H,1.661639,0.0,1.0,C,-0.599539,0.0,1.0,2.261178,5.112926,0.0,0.0,2,JHC,5,1.993966,...,0.0,1.661639,-0.599539,0.734855,2.261178,0.0,1.0,1.062099,-1.199079,0.469711,0.847877,-1.413301,0.374971,1.661639,-0.599539,0.734855,2.261178,0.0,1.0,1.062099,-1.199079,0.469711,0.847877,-1.413301,0.374971,1.661639,1.062099,-1.199079,0.469711,0.692289,-1.56889,0.0,-2.261178,2.261178,0.0,1.0,2.261178,2.261178,0.0,-2.261178
5,4658152,dsgdb9nsd_000015,3,0,1JHC,4.0,,,,,,,,,,,,,,,,,,,H,1.005284,1.810158,0.004656,C,-0.014821,1.392412,0.005671,1.102328,1.040614,0.174511,1e-06,1,JHC,18,1.910365,...,0.921126,1.827658,0.72533,1.657999,2.640324,1.537996,2.395226,1.102328,0.0,1.0,0.629906,-0.472422,0.571433,1.973384,0.871056,1.790197,3.262567,2.160239,2.959706,1.092852,-0.009476,0.991404,0.984263,-0.118065,0.892895,1.973384,1.092852,-0.009476094,0.991404,0.938458,-0.16387,0.004893,-1.097435,1.099169,-0.003159,0.997134,1.102328,1.092852,0.004893,-1.097435
6,4658153,dsgdb9nsd_000015,3,2,3JHC,,,,,,,,,1.102328,1.406083,1.406084,-0.372304,-0.381999,1.43854,1.827135,0.490214,28.01,CO,3.914495,H,1.005284,1.810158,0.004656,C,0.637949,-0.553297,-1.113582,2.640324,0.134935,5.585919,1.250458,3,JHC,18,1.910365,...,0.921126,1.827658,-0.812666,0.69221,2.640324,0.0,1.0,1.102328,-1.537996,0.417497,0.629906,-2.010418,0.238572,1.973384,-0.66694,0.747402,3.262566,0.622242,1.235669,1.092852,-1.547472,0.413908,0.984263,-1.656061,0.372781,1.973384,1.092852,-1.547472,0.413908,0.938458,-1.701866,0.321432,-2.318892,2.8476,0.207276,1.078504,3.262567,2.63991,0.321432,-2.318892
7,4658154,dsgdb9nsd_000015,3,4,2JHH,,C,1.156023,1.102328,1.102327,-0.308079,12.01,2.204655,,,,,,,,,,,,H,1.005284,1.810158,0.004656,H,-0.546896,1.793435,-0.872511,1.782964,2.40926,0.00028,0.769422,2,JHH,18,1.910365,...,0.921126,1.827658,0.044694,1.025067,2.640324,0.85736,1.480862,1.102328,-0.680636,0.618256,0.629906,-1.153058,0.353292,1.782964,0.0,1.0,1.782964,0.0,1.0,1.782964,0.0,1.0,,,,1.784327,1.782964,-2.799301e-07,1.0,0.001056,-1.781908,0.001056,-1.781908,1.784327,0.001363,1.000764,1.785018,1.782964,0.001056,-1.781908
8,4658155,dsgdb9nsd_000015,3,5,2JHH,,C,1.140346,1.102328,1.092852,-0.322423,12.01,2.19518,,,,,,,,,,,,H,1.005284,1.810158,0.004656,H,-0.530029,1.72292,0.911017,1.785017,2.357185,0.007611,0.82149,2,JHH,18,1.910365,...,0.921126,1.827658,0.042641,1.023889,2.640324,0.855307,1.479159,1.102328,-0.682689,0.617545,0.629906,-1.15511,0.352885,1.785009,-8e-06,0.999995,1.785017,0.0,1.0,1.785001,-1.6e-05,0.999991,1.1e-05,-1.785005,6e-06,1.784327,1.782964,-0.002052782,0.99885,0.001056,-1.783961,0.001056,-1.783961,1.784327,-0.000689,0.999614,1.785018,1.782964,0.001056,-1.783961
9,4658156,dsgdb9nsd_000015,4,0,1JHC,4.0,,,,,,,,,,,,,,,,,,,H,-0.546896,1.793435,-0.872511,C,-0.014821,1.392412,0.005671,1.102327,0.283103,0.160819,0.771203,1,JHC,18,1.910365,...,1.014181,1.842413,0.740085,1.671385,2.63991,1.537583,2.394851,1.102327,0.0,1.0,0.770397,-0.33193,0.698883,1.973384,0.871057,1.790198,3.262567,2.160239,2.959708,1.092852,-0.009475,0.991404,0.984263,-0.118064,0.892896,1.973384,1.092852,-0.009475326,0.991404,0.938458,-0.163869,0.004893,-1.097434,1.099169,-0.003158,0.997135,1.102328,1.092852,0.004893,-1.097434


Unnamed: 0,id,molecule_name,atom_index_0,atom_index_1,type,1j_nbonds,2j_atom_center,2j_area_021,2j_norm_vec_02,2j_norm_vec_12,2j_cos,2j_atom_center_weight,2j_sum_norm_vec,3j_norm_vec_02,3j_norm_vec_13,3j_norm_vec_23,3j_cos_023,3j_cos_231,3j_area_023,3j_area_231,3j_dihedral,3j_atom_center_weight,3j_atom_center,3j_sum_norm_vec,atom_0,x_0,y_0,z_0,atom_1,x_1,y_1,z_1,dist,dist_x,dist_y,dist_z,type_0,type_1,molecule_couples,molecule_dist_mean,...,molecule_atom_index_0_z_1_std,molecule_atom_index_0_dist_mean,molecule_atom_index_0_dist_mean_diff,molecule_atom_index_0_dist_mean_div,molecule_atom_index_0_dist_max,molecule_atom_index_0_dist_max_diff,molecule_atom_index_0_dist_max_div,molecule_atom_index_0_dist_min,molecule_atom_index_0_dist_min_diff,molecule_atom_index_0_dist_min_div,molecule_atom_index_0_dist_std,molecule_atom_index_0_dist_std_diff,molecule_atom_index_0_dist_std_div,molecule_atom_index_1_dist_mean,molecule_atom_index_1_dist_mean_diff,molecule_atom_index_1_dist_mean_div,molecule_atom_index_1_dist_max,molecule_atom_index_1_dist_max_diff,molecule_atom_index_1_dist_max_div,molecule_atom_index_1_dist_min,molecule_atom_index_1_dist_min_diff,molecule_atom_index_1_dist_min_div,molecule_atom_index_1_dist_std,molecule_atom_index_1_dist_std_diff,molecule_atom_index_1_dist_std_div,molecule_atom_1_dist_mean,molecule_atom_1_dist_min,molecule_atom_1_dist_min_diff,molecule_atom_1_dist_min_div,molecule_atom_1_dist_std,molecule_atom_1_dist_std_diff,molecule_type_0_dist_std,molecule_type_0_dist_std_diff,molecule_type_dist_mean,molecule_type_dist_mean_diff,molecule_type_dist_mean_div,molecule_type_dist_max,molecule_type_dist_min,molecule_type_dist_std,molecule_type_dist_std_diff
2505532,7163679,dsgdb9nsd_133885,14,7,1JHC,4.0,,,,,,,,,,,,,,,,,,,H,0.013699,1.199431,-1.680192,C,-0.095929,0.380424,-0.972098,1.088204,0.012018,0.670772,0.501398,1,JHC,52,2.499017,...,0.882245,2.537945,1.449741,2.332233,3.367225,2.279021,3.094296,1.088204,0.0,1.0,0.688326,-0.399878,0.632534,2.687623,1.599419,2.469779,3.378975,2.290771,3.105093,1.088204,0.0,1.0,0.81451,-0.273695,0.74849,2.465317,1.080482,-0.007721802,0.992904,0.771098,-0.317106,0.005294,-1.08291,1.08594,-0.002265,0.997919,1.091826,1.080482,0.005294,-1.08291
2505533,7163680,dsgdb9nsd_133885,14,8,2JHC,,C,1.40263,1.088204,1.503272,-0.514613,12.01,2.591476,,,,,,,,,,,,H,0.013699,1.199431,-1.680192,C,0.816694,-0.813067,-1.02236,2.264441,0.644802,4.050146,0.432744,2,JHC,52,2.499017,...,0.882245,2.537945,0.273504,1.120782,3.367225,1.102785,1.487001,1.088204,-1.176237,0.480562,0.688326,-1.576114,0.303972,2.268936,0.004495,1.001985,3.367229,1.102789,1.487003,1.080482,-1.183958,0.477152,0.809831,-1.454609,0.35763,2.465317,1.080482,-1.183958,0.477152,0.771098,-1.493343,0.113677,-2.150764,2.299965,0.035524,1.015688,2.32455,2.264441,0.021884,-2.242557
2505534,7163681,dsgdb9nsd_133885,14,15,3JHH,,,,,,,,,1.088204,1.080482,1.503272,-0.514613,-0.595536,1.40263,1.304814,0.839732,24.02,CC,3.671959,H,0.013699,1.199431,-1.680192,H,1.260745,-1.246754,-1.906767,2.755046,1.555125,5.983817,0.051336,3,JHH,52,2.499017,...,0.882245,2.537945,-0.217101,0.921199,3.367225,0.612179,1.222203,1.088204,-1.666842,0.394986,0.688326,-2.06672,0.249842,2.74421,-0.010836,0.996067,2.755046,0.0,1.0,2.737267,-0.017779,0.993547,0.009507,-2.745539,0.003451,2.589861,1.811174,-0.9438719,0.657403,0.381557,-2.373489,0.329958,-2.425088,2.745599,-0.009447,0.996571,2.755049,2.737267,0.008715,-2.746331
2505535,7163682,dsgdb9nsd_133885,15,1,3JHN,,,,,,,,,1.080482,1.563097,1.503272,-0.595536,-0.437273,1.304814,2.113208,-0.403965,24.02,CC,4.146852,H,1.260745,-1.246754,-1.906767,N,-1.564594,0.099125,-0.516941,3.424258,7.98254,1.811389,1.931616,3,JHN,52,2.499017,...,0.700067,2.585814,-0.838445,0.755146,3.424258,0.0,1.0,1.080482,-2.343776,0.315538,0.846251,-2.578007,0.247134,2.632836,-0.791422,0.768878,3.424258,0.0,1.0,2.225959,-1.198299,0.650056,0.613111,-2.811147,0.179049,2.632836,2.225959,-1.198299,0.650056,0.613111,-2.811147,0.329958,-3.0943,3.424254,-4e-06,0.999999,3.424258,3.42425,6e-06,-3.424252
2505536,7163683,dsgdb9nsd_133885,15,2,3JHC,,,,,,,,,1.080482,1.503272,1.519516,-0.563522,-0.177201,1.356301,2.248096,-0.855315,24.02,CC,4.10327,H,1.260745,-1.246754,-1.906767,C,-0.832899,-0.70755,0.604295,3.313536,4.383345,0.29074,6.305435,3,JHC,52,2.499017,...,0.700067,2.585814,-0.727722,0.780379,3.424258,0.110722,1.033415,1.080482,-2.233054,0.326081,0.846251,-2.467285,0.255392,2.687625,-0.625911,0.811105,3.378965,0.065429,1.019746,1.088204,-2.225332,0.328412,0.814509,-2.499027,0.245813,2.465317,1.080482,-2.233054,0.326081,0.771098,-2.542438,0.329958,-2.983578,3.095347,-0.218189,0.934152,3.378975,2.481084,0.328236,-2.9853
2505537,7163684,dsgdb9nsd_133885,15,3,2JHC,,C,1.356301,1.080482,1.519516,-0.563522,12.01,2.599998,,,,,,,,,,,,H,1.260745,-1.246754,-1.906767,C,0.27103,-1.618614,0.144805,2.307978,0.979537,0.13828,4.208946,2,JHC,52,2.499017,...,0.700067,2.585814,0.277835,1.12038,3.424258,1.11628,1.483661,1.080482,-1.227496,0.468151,0.846251,-1.461727,0.366663,2.268935,-0.039043,0.983083,3.367225,1.059247,1.45895,1.080482,-1.227496,0.468151,0.80983,-1.498148,0.350883,2.465317,1.080482,-1.227496,0.468151,0.771098,-1.536881,0.113677,-2.194302,2.299965,-0.008013,0.996528,2.32455,2.264441,0.021884,-2.286095
2505538,7163685,dsgdb9nsd_133885,15,4,2JHC,,C,1.387828,1.080482,1.537739,-0.549815,12.01,2.618222,,,,,,,,,,,,H,1.260745,-1.246754,-1.906767,C,1.528454,-0.755123,0.339508,2.314978,0.071668,0.241701,5.045753,2,JHC,52,2.499017,...,0.700067,2.585814,0.270836,1.116993,3.424258,1.109281,1.479176,1.080482,-1.234495,0.466736,0.846251,-1.468726,0.365555,2.419218,0.104241,1.045029,3.192797,0.877819,1.379191,1.080551,-1.234426,0.466765,0.867553,-1.447424,0.374757,2.465317,1.080482,-1.234495,0.466736,0.771098,-1.54388,0.113677,-2.201301,2.299965,-0.015013,0.993515,2.32455,2.264441,0.021884,-2.293094
2505539,7163686,dsgdb9nsd_133885,15,6,3JHC,,,,,,,,,1.080482,1.531182,1.503272,-0.595536,-0.137431,1.304814,2.279942,-0.976779,24.02,CC,4.114936,H,1.260745,-1.246754,-1.906767,C,-0.0574,0.61121,0.541102,3.34389,1.737507,3.45203,5.992061,3,JHC,52,2.499017,...,0.700067,2.585814,-0.758076,0.773295,3.424258,0.080369,1.024034,1.080482,-2.263407,0.323121,0.846251,-2.497639,0.253074,2.707363,-0.636526,0.809645,3.343894,4e-06,1.000001,2.273275,-1.070615,0.679829,0.530797,-2.813092,0.158736,2.465317,1.080482,-2.263407,0.323121,0.771098,-2.572792,0.329958,-3.013931,3.095347,-0.248542,0.925673,3.378975,2.481084,0.328236,-3.015654
2505540,7163687,dsgdb9nsd_133885,15,7,2JHC,,C,1.304814,1.080482,1.503272,-0.595536,12.01,2.583755,,,,,,,,,,,,H,1.260745,-1.246754,-1.906767,C,-0.095929,0.380424,-0.972098,2.315573,1.840564,2.647707,0.873607,2,JHC,52,2.499017,...,0.700067,2.585814,0.270241,1.116706,3.424258,1.108685,1.478795,1.080482,-1.235091,0.466616,0.846251,-1.469322,0.365461,2.687623,0.37205,1.160673,3.378975,1.063402,1.459239,1.088204,-1.227369,0.46995,0.81451,-1.501063,0.351753,2.465317,1.080482,-1.235091,0.466616,0.771098,-1.544475,0.113677,-2.201896,2.299965,-0.015608,0.993259,2.32455,2.264441,0.021884,-2.293689
2505541,7163688,dsgdb9nsd_133885,15,8,1JHC,4.0,,,,,,,,,,,,,,,,,,,H,1.260745,-1.246754,-1.906767,C,0.816694,-0.813067,-1.02236,1.080482,0.197181,0.188084,0.782177,1,JHC,52,2.499017,...,0.700067,2.585814,1.505331,2.393203,3.424258,2.343776,3.169194,1.080482,0.0,1.0,0.846251,-0.234231,0.783216,2.268936,1.188454,2.099929,3.367229,2.286747,3.116413,1.080482,0.0,1.0,0.809831,-0.270651,0.749509,2.465317,1.080482,-1.960511e-08,1.0,0.771098,-0.309385,0.005294,-1.075188,1.08594,0.005457,1.005051,1.091826,1.080482,0.005294,-1.075188


[INFO]2019-06-25 16:51:57,583:main:loading encoder from ./analysis/mole/data/preprocess/le.pkl
[INFO]2019-06-25 16:52:00,682:main:['id', 'molecule_name', 'atom_index_0', 'atom_index_1', 'type', '1j_nbonds', '2j_atom_center', '2j_area_021', '2j_norm_vec_02', '2j_norm_vec_12', '2j_cos', '2j_atom_center_weight', '2j_sum_norm_vec', '3j_norm_vec_02', '3j_norm_vec_13', '3j_norm_vec_23', '3j_cos_023', '3j_cos_231', '3j_area_023', '3j_area_231', '3j_dihedral', '3j_atom_center_weight', '3j_atom_center', '3j_sum_norm_vec', 'atom_0', 'x_0', 'y_0', 'z_0', 'atom_1', 'x_1', 'y_1', 'z_1', 'dist', 'dist_x', 'dist_y', 'dist_z', 'type_0', 'type_1', 'molecule_couples', 'molecule_dist_mean', 'molecule_dist_min', 'molecule_dist_max', 'atom_0_couples_count', 'atom_1_couples_count', 'molecule_atom_index_0_x_1_std', 'molecule_atom_index_0_y_1_mean', 'molecule_atom_index_0_y_1_mean_diff', 'molecule_atom_index_0_y_1_mean_div', 'molecule_atom_index_0_y_1_max', 'molecule_atom_index_0_y_1_max_diff', 'molecule_atom

Mem. usage decreased to 485.06 Mb (72.4% reduction)


[INFO]2019-06-25 16:52:48,287:main:Finish preprocess()
[INFO]2019-06-25 16:52:49,313:main:Starting predict target(2JHC)


['2JHC' '1JHC' '3JHH' '3JHC' '2JHH' '1JHN' '3JHN' '2JHN']


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  errors=errors)


Unnamed: 0,2j_atom_center,2j_area_021,2j_norm_vec_02,2j_norm_vec_12,2j_cos,2j_atom_center_weight,2j_sum_norm_vec,dist,dist_x,dist_y,dist_z,molecule_couples,molecule_dist_mean,molecule_dist_min,molecule_dist_max,atom_0_couples_count,atom_1_couples_count,molecule_atom_index_0_x_1_std,molecule_atom_index_0_y_1_mean,molecule_atom_index_0_y_1_mean_diff,molecule_atom_index_0_y_1_mean_div,molecule_atom_index_0_y_1_max,molecule_atom_index_0_y_1_max_diff,molecule_atom_index_0_y_1_std,molecule_atom_index_0_z_1_std,molecule_atom_index_0_dist_mean,molecule_atom_index_0_dist_mean_diff,molecule_atom_index_0_dist_mean_div,molecule_atom_index_0_dist_max,molecule_atom_index_0_dist_max_diff,molecule_atom_index_0_dist_max_div,molecule_atom_index_0_dist_min,molecule_atom_index_0_dist_min_diff,molecule_atom_index_0_dist_min_div,molecule_atom_index_0_dist_std,molecule_atom_index_0_dist_std_diff,molecule_atom_index_0_dist_std_div,molecule_atom_index_1_dist_mean,molecule_atom_index_1_dist_mean_diff,molecule_atom_index_1_dist_mean_div,molecule_atom_index_1_dist_max,molecule_atom_index_1_dist_max_diff,molecule_atom_index_1_dist_max_div,molecule_atom_index_1_dist_min,molecule_atom_index_1_dist_min_diff,molecule_atom_index_1_dist_min_div,molecule_atom_index_1_dist_std,molecule_atom_index_1_dist_std_diff,molecule_atom_index_1_dist_std_div,molecule_atom_1_dist_mean,molecule_atom_1_dist_min,molecule_atom_1_dist_min_diff,molecule_atom_1_dist_min_div,molecule_atom_1_dist_std,molecule_atom_1_dist_std_diff,molecule_type_0_dist_std,molecule_type_0_dist_std_diff,molecule_type_dist_mean,molecule_type_dist_mean_diff,molecule_type_dist_mean_div,molecule_type_dist_max,molecule_type_dist_min,molecule_type_dist_std,molecule_type_dist_std_diff
0,0,0.0,1.0625,1.199219,-1.0,12.007812,2.261719,2.261719,5.113281,0.0,0.0,5,1.994141,1.0625,3.324219,3,2,1.130859,0.0,0.0,,0.0,0.0,0.0,0.0,2.214844,-0.045654,0.97998,3.324219,1.0625,1.469727,1.0625,-1.199219,0.469727,1.130859,-1.129883,0.500488,1.662109,-0.599609,0.734863,2.261719,0.0,1.0,1.0625,-1.199219,0.469727,0.847656,-1.413086,0.375,1.662109,1.0625,-1.199219,0.469727,0.692383,-1.569336,0.0,-2.261719,2.261719,0.0,1.0,2.261719,2.261719,0.0,-2.261719
4,0,0.0,1.0625,1.199219,-1.0,12.007812,2.261719,2.261719,5.113281,0.0,0.0,5,1.994141,1.0625,3.324219,2,2,0.847656,0.0,0.0,,0.0,0.0,0.0,0.0,1.662109,-0.599609,0.734863,2.261719,0.0,1.0,1.0625,-1.199219,0.469727,0.847656,-1.413086,0.375,1.662109,-0.599609,0.734863,2.261719,0.0,1.0,1.0625,-1.199219,0.469727,0.847656,-1.413086,0.375,1.662109,1.0625,-1.199219,0.469727,0.692383,-1.569336,0.0,-2.261719,2.261719,0.0,1.0,2.261719,2.261719,0.0,-2.261719
24,0,1.44043,1.084961,1.506836,-0.47168,12.007812,2.591797,2.232422,2.587891,1.533203,0.867676,33,2.201172,1.083984,3.115234,8,6,0.994141,0.61084,-0.168213,0.783957,2.027344,1.248047,0.908691,0.756348,2.332031,0.099487,1.044922,3.115234,0.882812,1.395508,1.084961,-1.148438,0.485596,0.67041,-1.5625,0.300293,1.850586,-0.383057,0.828613,2.232422,0.0,1.0,1.083984,-1.148438,0.485596,0.593262,-1.639648,0.265625,1.850586,1.083984,-1.148438,0.485596,0.557129,-1.675781,0.171631,-2.0625,2.232422,6.1e-05,1.0,2.234375,2.232422,0.000143,-2.232422
25,0,1.44043,1.084961,1.506836,-0.471924,12.007812,2.591797,2.234375,0.098633,4.039062,0.852539,33,2.201172,1.083984,3.115234,8,6,0.994141,0.61084,0.603027,79.604134,2.027344,2.019531,0.908691,0.756348,2.332031,0.099304,1.044922,3.115234,0.882812,1.395508,1.084961,-1.149414,0.485596,0.67041,-1.563477,0.300293,1.850586,-0.383057,0.828613,2.234375,0.0,1.0,1.083984,-1.149414,0.485596,0.593262,-1.640625,0.265625,1.850586,1.083984,-1.149414,0.485596,0.557129,-1.675781,0.171631,-2.0625,2.232422,-0.000153,1.0,2.234375,2.232422,0.000143,-2.232422
32,0,1.44043,1.084961,1.506836,-0.47168,12.007812,2.591797,2.232422,2.642578,1.557617,0.787109,33,2.201172,1.083984,3.115234,7,6,1.007812,0.408203,-0.370605,0.524201,1.514648,0.735352,0.762207,0.742676,2.40625,0.172852,1.077148,3.115234,0.882812,1.395508,1.084961,-1.148438,0.485596,0.688477,-1.544922,0.30835,1.850586,-0.383057,0.828613,2.232422,3e-06,1.0,1.083984,-1.148438,0.485596,0.593262,-1.639648,0.265625,1.850586,1.083984,-1.148438,0.485596,0.557129,-1.675781,0.171631,-2.0625,2.232422,6.4e-05,1.0,2.234375,2.232422,0.000143,-2.232422


[INFO]2019-06-25 16:52:53,065:main:Start oof_predict
[INFO]2019-06-25 16:52:53,068:main:prediction: 0
[INFO]2019-06-25 16:53:47,642:main:prediction: 1
[INFO]2019-06-25 16:54:43,392:main:prediction: 2
[INFO]2019-06-25 16:55:37,072:main:Finish oof_predict
[INFO]2019-06-25 16:55:37,129:main:Starting predict target(1JHC)


Unnamed: 0,1j_nbonds,dist,dist_x,dist_y,dist_z,molecule_couples,molecule_dist_mean,molecule_dist_min,molecule_dist_max,atom_0_couples_count,atom_1_couples_count,molecule_atom_index_0_x_1_std,molecule_atom_index_0_y_1_mean,molecule_atom_index_0_y_1_mean_diff,molecule_atom_index_0_y_1_mean_div,molecule_atom_index_0_y_1_max,molecule_atom_index_0_y_1_max_diff,molecule_atom_index_0_y_1_std,molecule_atom_index_0_z_1_std,molecule_atom_index_0_dist_mean,molecule_atom_index_0_dist_mean_diff,molecule_atom_index_0_dist_mean_div,molecule_atom_index_0_dist_max,molecule_atom_index_0_dist_max_diff,molecule_atom_index_0_dist_max_div,molecule_atom_index_0_dist_min,molecule_atom_index_0_dist_std,molecule_atom_index_0_dist_std_diff,molecule_atom_index_0_dist_std_div,molecule_atom_index_1_dist_mean,molecule_atom_index_1_dist_mean_diff,molecule_atom_index_1_dist_mean_div,molecule_atom_index_1_dist_max,molecule_atom_index_1_dist_max_diff,molecule_atom_index_1_dist_max_div,molecule_atom_index_1_dist_min,molecule_atom_index_1_dist_min_diff,molecule_atom_index_1_dist_min_div,molecule_atom_index_1_dist_std,molecule_atom_index_1_dist_std_diff,molecule_atom_index_1_dist_std_div,molecule_atom_1_dist_mean,molecule_atom_1_dist_min,molecule_atom_1_dist_min_diff,molecule_atom_1_dist_min_div,molecule_atom_1_dist_std,molecule_atom_1_dist_std_diff,molecule_type_0_dist_std,molecule_type_0_dist_std_diff,molecule_type_dist_mean,molecule_type_dist_mean_diff,molecule_type_dist_mean_div,molecule_type_dist_max,molecule_type_dist_min,molecule_type_dist_std,molecule_type_dist_std_diff
1,2.0,1.0625,1.12793,0.0,0.0,5,1.994141,1.0625,3.324219,3,2,1.130859,0.0,0.0,,0.0,0.0,0.0,0.0,2.214844,1.15332,2.085938,3.324219,2.261719,3.128906,1.0625,1.130859,0.069153,1.06543,1.662109,0.599609,1.564453,2.261719,1.199219,2.128906,1.0625,0.0,1.0,0.847656,-0.214233,0.79834,1.662109,1.0625,0.0,1.0,0.692383,-0.369873,0.0,-1.0625,1.0625,0.0,1.0,1.0625,1.0625,0.0,-1.0625
3,2.0,1.0625,1.12793,0.0,0.0,5,1.994141,1.0625,3.324219,2,2,0.847656,0.0,0.0,,0.0,0.0,0.0,0.0,1.662109,0.599609,1.564453,2.261719,1.199219,2.128906,1.0625,0.847656,-0.214233,0.79834,1.662109,0.599609,1.564453,2.261719,1.199219,2.128906,1.0625,0.0,1.0,0.847656,-0.214233,0.79834,1.662109,1.0625,0.0,1.0,0.692383,-0.369873,0.0,-1.0625,1.0625,0.0,1.0,1.0625,1.0625,0.0,-1.0625
5,4.0,1.102539,1.041016,0.174561,1e-06,18,1.910156,1.092773,3.261719,4,6,0.558594,1.088867,-0.303467,0.782001,1.792969,0.401123,1.108398,0.920898,1.828125,0.725098,1.658203,2.640625,1.538086,2.394531,1.102539,0.629883,-0.472412,0.571289,1.973633,0.871094,1.790039,3.261719,2.160156,2.958984,1.092773,-0.009476,0.991211,0.984375,-0.118042,0.893066,1.973633,1.092773,-0.009476,0.991211,0.938477,-0.163818,0.004894,-1.097656,1.099609,-0.003159,0.99707,1.102539,1.092773,0.004894,-1.097656
9,4.0,1.102539,0.283203,0.160767,0.770996,18,1.910156,1.092773,3.261719,3,6,0.585449,0.854004,-0.538574,0.613333,1.722656,0.330566,1.229492,1.014648,1.842773,0.740234,1.670898,2.640625,1.537109,2.394531,1.102539,0.770508,-0.332031,0.69873,1.973633,0.871094,1.790039,3.261719,2.160156,2.958984,1.092773,-0.009476,0.991211,0.984375,-0.118042,0.893066,1.973633,1.092773,-0.009476,0.991211,0.938477,-0.163818,0.004894,-1.097656,1.099609,-0.003159,0.99707,1.102539,1.092773,0.004894,-1.097656
12,4.0,1.092773,0.265381,0.109253,0.819824,18,1.910156,1.092773,3.261719,2,6,0.46167,0.419678,-0.972656,0.301317,1.392578,0.0,1.375977,0.791504,2.177734,1.084961,1.993164,3.261719,2.169922,2.986328,1.092773,1.53418,0.441406,1.404297,1.973633,0.880371,1.805664,3.261719,2.169922,2.986328,1.092773,0.0,1.0,0.984375,-0.108582,0.900879,1.973633,1.092773,0.0,1.0,0.938477,-0.154419,0.004894,-1.087891,1.099609,0.006317,1.005859,1.102539,1.092773,0.004894,-1.087891


[INFO]2019-06-25 16:55:41,085:main:Start oof_predict
[INFO]2019-06-25 16:55:41,093:main:prediction: 0
[INFO]2019-06-25 16:56:20,665:main:prediction: 1
[INFO]2019-06-25 16:56:57,341:main:prediction: 2
[INFO]2019-06-25 16:57:33,167:main:Finish oof_predict
[INFO]2019-06-25 16:57:33,219:main:Starting predict target(3JHH)


Unnamed: 0,3j_norm_vec_02,3j_norm_vec_13,3j_norm_vec_23,3j_cos_023,3j_cos_231,3j_area_023,3j_area_231,3j_dihedral,3j_atom_center_weight,3j_atom_center,3j_sum_norm_vec,dist,dist_x,dist_y,dist_z,molecule_couples,molecule_dist_mean,molecule_dist_min,molecule_dist_max,atom_0_couples_count,atom_1_couples_count,molecule_atom_index_0_x_1_std,molecule_atom_index_0_y_1_mean,molecule_atom_index_0_y_1_mean_diff,molecule_atom_index_0_y_1_mean_div,molecule_atom_index_0_y_1_max,molecule_atom_index_0_y_1_max_diff,molecule_atom_index_0_y_1_std,molecule_atom_index_0_z_1_std,molecule_atom_index_0_dist_mean,molecule_atom_index_0_dist_mean_diff,molecule_atom_index_0_dist_mean_div,molecule_atom_index_0_dist_max,molecule_atom_index_0_dist_max_diff,molecule_atom_index_0_dist_max_div,molecule_atom_index_0_dist_min,molecule_atom_index_0_dist_min_diff,molecule_atom_index_0_dist_min_div,molecule_atom_index_0_dist_std,molecule_atom_index_0_dist_std_diff,molecule_atom_index_0_dist_std_div,molecule_atom_index_1_dist_mean,molecule_atom_index_1_dist_mean_diff,molecule_atom_index_1_dist_mean_div,molecule_atom_index_1_dist_max,molecule_atom_index_1_dist_max_diff,molecule_atom_index_1_dist_max_div,molecule_atom_index_1_dist_min,molecule_atom_index_1_dist_min_diff,molecule_atom_index_1_dist_min_div,molecule_atom_index_1_dist_std,molecule_atom_index_1_dist_std_diff,molecule_atom_index_1_dist_std_div,molecule_atom_1_dist_mean,molecule_atom_1_dist_min,molecule_atom_1_dist_min_diff,molecule_atom_1_dist_min_div,molecule_atom_1_dist_std,molecule_atom_1_dist_std_diff,molecule_type_0_dist_std,molecule_type_0_dist_std_diff,molecule_type_dist_mean,molecule_type_dist_mean_diff,molecule_type_dist_mean_div,molecule_type_dist_max,molecule_type_dist_min,molecule_type_dist_std,molecule_type_dist_std_diff
2,1.0625,1.0625,1.199219,-1.0,-1.0,0.0,0.0,,24.015625,0,3.324219,3.324219,11.046875,0.0,0.0,5,1.994141,1.0625,3.324219,3,1,1.130859,0.0,0.0,,0.0,0.0,0.0,0.0,2.214844,-1.107422,0.666504,3.324219,0.0,1.0,1.0625,-2.261719,0.31958,1.130859,-2.191406,0.340332,3.324219,0.0,1.0,3.324219,0.0,1.0,3.324219,0.0,1.0,,,,3.324219,3.324219,0.0,1.0,,,,,3.324219,0.0,1.0,3.324219,3.324219,,
27,1.084961,1.083984,1.506836,-0.47168,-0.471924,1.44043,1.44043,-0.809082,24.015625,0,3.675781,3.115234,4.796875,1.501953,3.410156,33,2.201172,1.083984,3.115234,8,2,0.994141,0.61084,-0.180908,0.771492,2.027344,1.235352,0.908691,0.756348,2.332031,-0.783203,0.748535,3.115234,0.000153,1.0,1.084961,-2.03125,0.348145,0.67041,-2.445312,0.21521,2.822266,-0.292969,0.905762,3.115234,0.0,1.0,2.529297,-0.585938,0.812012,0.414307,-2.701172,0.132935,2.623047,1.819336,-1.296875,0.583496,0.496338,-2.619141,0.306152,-2.810547,2.822266,-0.292969,0.905762,3.115234,2.529297,0.306152,-2.810547
28,1.084961,1.083984,1.506836,-0.47168,-0.471924,1.44043,1.44043,1.0,24.015625,0,3.675781,2.529297,4.875,1.526367,0.000765,33,2.201172,1.083984,3.115234,8,3,0.994141,0.61084,-0.171021,0.781161,2.027344,1.245117,0.908691,0.756348,2.332031,-0.197144,0.921875,3.115234,0.585938,1.231445,1.084961,-1.445312,0.428711,0.67041,-1.859375,0.265137,2.488281,-0.041656,0.983398,3.115234,0.585938,1.231445,1.819336,-0.710938,0.71875,0.649414,-1.880859,0.256836,2.623047,1.819336,-0.710938,0.71875,0.496338,-2.033203,0.306152,-2.224609,2.822266,0.292969,1.116211,3.115234,2.529297,0.306152,-2.224609
29,1.084961,1.083984,1.506836,-0.471924,-0.471924,1.44043,1.44043,1.0,24.015625,0,3.675781,2.529297,0.001193,6.402344,0.000195,33,2.201172,1.083984,3.115234,8,4,0.994141,0.61084,1.123047,-1.1905,2.027344,2.539062,0.908691,0.756348,2.332031,-0.197388,0.921875,3.115234,0.585938,1.231445,1.084961,-1.445312,0.428711,0.67041,-1.859375,0.264893,2.822266,0.292725,1.115234,3.115234,0.585938,1.231445,2.529297,-0.000641,0.999512,0.338379,-2.191406,0.133667,2.623047,1.819336,-0.711426,0.71875,0.496338,-2.033203,0.306152,-2.224609,2.822266,0.292725,1.115234,3.115234,2.529297,0.306152,-2.224609
30,1.084961,1.083984,1.506836,-0.471924,-0.471924,1.44043,1.44043,-0.80957,24.015625,0,3.675781,3.115234,0.000296,6.351562,3.359375,33,2.201172,1.083984,3.115234,8,5,0.994141,0.61084,1.113281,-1.21366,2.027344,2.529297,0.908691,0.756348,2.332031,-0.783203,0.748535,3.115234,0.0,1.0,1.084961,-2.03125,0.3479,0.67041,-2.445312,0.21521,2.623047,-0.494141,0.841309,3.115234,0.0,1.0,1.819336,-1.296875,0.583496,0.536133,-2.580078,0.171997,2.623047,1.819336,-1.296875,0.583496,0.496338,-2.619141,0.306152,-2.810547,2.822266,-0.293213,0.905762,3.115234,2.529297,0.306152,-2.810547


[INFO]2019-06-25 16:57:35,437:main:Start oof_predict
[INFO]2019-06-25 16:57:35,441:main:prediction: 0
[INFO]2019-06-25 16:58:07,793:main:prediction: 1
[INFO]2019-06-25 16:58:40,134:main:prediction: 2
[INFO]2019-06-25 16:59:11,374:main:Finish oof_predict
[INFO]2019-06-25 16:59:11,420:main:Starting predict target(3JHC)


Unnamed: 0,3j_norm_vec_02,3j_norm_vec_13,3j_norm_vec_23,3j_cos_023,3j_cos_231,3j_area_023,3j_area_231,3j_dihedral,3j_atom_center_weight,3j_atom_center,3j_sum_norm_vec,dist,dist_x,dist_y,dist_z,molecule_couples,molecule_dist_mean,molecule_dist_min,molecule_dist_max,atom_0_couples_count,atom_1_couples_count,molecule_atom_index_0_x_1_std,molecule_atom_index_0_y_1_mean,molecule_atom_index_0_y_1_mean_diff,molecule_atom_index_0_y_1_mean_div,molecule_atom_index_0_y_1_max,molecule_atom_index_0_y_1_max_diff,molecule_atom_index_0_y_1_std,molecule_atom_index_0_z_1_std,molecule_atom_index_0_dist_mean,molecule_atom_index_0_dist_mean_diff,molecule_atom_index_0_dist_mean_div,molecule_atom_index_0_dist_max,molecule_atom_index_0_dist_max_diff,molecule_atom_index_0_dist_max_div,molecule_atom_index_0_dist_min,molecule_atom_index_0_dist_min_diff,molecule_atom_index_0_dist_min_div,molecule_atom_index_0_dist_std,molecule_atom_index_0_dist_std_diff,molecule_atom_index_0_dist_std_div,molecule_atom_index_1_dist_mean,molecule_atom_index_1_dist_mean_diff,molecule_atom_index_1_dist_mean_div,molecule_atom_index_1_dist_max,molecule_atom_index_1_dist_max_diff,molecule_atom_index_1_dist_max_div,molecule_atom_index_1_dist_min,molecule_atom_index_1_dist_min_diff,molecule_atom_index_1_dist_min_div,molecule_atom_index_1_dist_std,molecule_atom_index_1_dist_std_diff,molecule_atom_index_1_dist_std_div,molecule_atom_1_dist_mean,molecule_atom_1_dist_min,molecule_atom_1_dist_min_diff,molecule_atom_1_dist_min_div,molecule_atom_1_dist_std,molecule_atom_1_dist_std_diff,molecule_type_0_dist_std,molecule_type_0_dist_std_diff,molecule_type_dist_mean,molecule_type_dist_mean_diff,molecule_type_dist_mean_div,molecule_type_dist_max,molecule_type_dist_min,molecule_type_dist_std,molecule_type_dist_std_diff
6,1.102539,1.40625,1.40625,-0.372314,-0.38208,1.438477,1.827148,0.490234,28.015625,2,3.914062,2.640625,0.134888,5.585938,1.25,18,1.910156,1.092773,3.261719,4,6,0.558594,1.088867,1.642578,-1.967963,1.792969,2.347656,1.108398,0.920898,1.828125,-0.8125,0.692383,2.640625,0.0,1.0,1.102539,-1.538086,0.41748,0.629883,-2.009766,0.238525,1.973633,-0.666992,0.747559,3.261719,0.62207,1.235352,1.092773,-1.547852,0.413818,0.984375,-1.65625,0.372803,1.973633,1.092773,-1.547852,0.413818,0.938477,-1.702148,0.321533,-2.318359,2.847656,0.207275,1.078125,3.261719,2.640625,0.321533,-2.318359
10,1.102539,1.40625,1.40625,-0.372314,-0.38208,1.438477,1.827148,0.490967,28.015625,2,3.914062,2.640625,1.404297,5.507812,0.058105,18,1.910156,1.092773,3.261719,3,6,0.585449,0.854004,1.407227,-1.543497,1.722656,2.275391,1.229492,1.014648,1.842773,-0.797363,0.697754,2.640625,0.0,1.0,1.102539,-1.537109,0.41748,0.770508,-1.869141,0.291748,1.973633,-0.666504,0.747559,3.261719,0.622559,1.236328,1.092773,-1.546875,0.414062,0.984375,-1.655273,0.372803,1.973633,1.092773,-1.546875,0.414062,0.938477,-1.701172,0.321533,-2.318359,2.847656,0.207642,1.079102,3.261719,2.640625,0.321533,-2.318359
13,1.092773,1.40625,1.40625,-0.300537,-0.38208,1.46582,1.827148,-1.0,28.015625,2,3.904297,3.261719,1.364258,5.179688,4.097656,18,1.910156,1.092773,3.261719,2,6,0.46167,0.419678,0.972656,-0.758287,1.392578,1.945312,1.375977,0.791504,2.177734,-1.084961,0.66748,3.261719,0.0,1.0,1.092773,-2.169922,0.334961,1.53418,-1.728516,0.470215,1.973633,-1.289062,0.60498,3.261719,0.0,1.0,1.092773,-2.169922,0.334961,0.984375,-2.277344,0.301758,1.973633,1.092773,-2.169922,0.334961,0.938477,-2.324219,0.321533,-2.941406,2.847656,-0.415039,0.873047,3.261719,2.640625,0.321533,-2.941406
14,1.102539,1.40625,1.40625,-0.372314,-0.38208,1.438477,1.827148,0.490234,28.015625,2,3.914062,2.640625,0.023956,2.716797,4.230469,18,1.910156,1.092773,3.261719,4,6,0.708496,-0.260254,-1.652344,-0.186972,1.392578,0.0,1.254883,0.558105,1.828125,-0.8125,0.692383,2.640625,0.0,1.0,1.102539,-1.538086,0.41748,0.629883,-2.009766,0.238525,1.973633,-0.666992,0.747559,3.261719,0.62207,1.235352,1.092773,-1.547852,0.413818,0.984375,-1.65625,0.372803,1.973633,1.092773,-1.547852,0.413818,0.938477,-1.702148,0.321533,-2.318359,2.847656,0.207275,1.078125,3.261719,2.640625,0.321533,-2.318359
18,1.102539,1.40625,1.40625,-0.372314,-0.38208,1.438477,1.827148,0.490967,28.015625,2,3.914062,2.640625,2.916016,2.660156,1.393555,18,1.910156,1.092773,3.261719,3,6,0.366211,-0.267578,-1.660156,-0.192157,1.392578,0.0,1.537109,0.62207,1.842773,-0.797363,0.697754,2.640625,0.0,1.0,1.102539,-1.538086,0.41748,0.770508,-1.869141,0.291748,1.973633,-0.666504,0.747559,3.261719,0.622559,1.235352,1.092773,-1.546875,0.414062,0.984375,-1.655273,0.372803,1.973633,1.092773,-1.546875,0.414062,0.938477,-1.701172,0.321533,-2.318359,2.847656,0.207642,1.079102,3.261719,2.640625,0.321533,-2.318359


[INFO]2019-06-25 16:59:15,287:main:Start oof_predict
[INFO]2019-06-25 16:59:15,290:main:prediction: 0
[INFO]2019-06-25 17:00:25,111:main:prediction: 1
[INFO]2019-06-25 17:01:37,089:main:prediction: 2
[INFO]2019-06-25 17:02:49,679:main:Finish oof_predict
[INFO]2019-06-25 17:02:49,730:main:Starting predict target(2JHH)


Unnamed: 0,2j_atom_center,2j_area_021,2j_norm_vec_02,2j_norm_vec_12,2j_cos,2j_atom_center_weight,2j_sum_norm_vec,dist,dist_x,dist_y,dist_z,molecule_couples,molecule_dist_mean,molecule_dist_min,molecule_dist_max,atom_0_couples_count,atom_1_couples_count,molecule_atom_index_0_x_1_std,molecule_atom_index_0_y_1_mean,molecule_atom_index_0_y_1_mean_diff,molecule_atom_index_0_y_1_mean_div,molecule_atom_index_0_y_1_max,molecule_atom_index_0_y_1_max_diff,molecule_atom_index_0_y_1_std,molecule_atom_index_0_z_1_std,molecule_atom_index_0_dist_mean,molecule_atom_index_0_dist_mean_diff,molecule_atom_index_0_dist_mean_div,molecule_atom_index_0_dist_max,molecule_atom_index_0_dist_max_diff,molecule_atom_index_0_dist_max_div,molecule_atom_index_0_dist_min,molecule_atom_index_0_dist_min_diff,molecule_atom_index_0_dist_min_div,molecule_atom_index_0_dist_std,molecule_atom_index_0_dist_std_diff,molecule_atom_index_0_dist_std_div,molecule_atom_index_1_dist_mean,molecule_atom_index_1_dist_mean_diff,molecule_atom_index_1_dist_mean_div,molecule_atom_index_1_dist_max,molecule_atom_index_1_dist_max_diff,molecule_atom_index_1_dist_max_div,molecule_atom_index_1_dist_min,molecule_atom_index_1_dist_min_diff,molecule_atom_index_1_dist_min_div,molecule_atom_index_1_dist_std,molecule_atom_index_1_dist_std_diff,molecule_atom_index_1_dist_std_div,molecule_atom_1_dist_mean,molecule_atom_1_dist_min,molecule_atom_1_dist_min_diff,molecule_atom_1_dist_min_div,molecule_atom_1_dist_std,molecule_atom_1_dist_std_diff,molecule_type_0_dist_std,molecule_type_0_dist_std_diff,molecule_type_dist_mean,molecule_type_dist_mean_diff,molecule_type_dist_mean_div,molecule_type_dist_max,molecule_type_dist_min,molecule_type_dist_std,molecule_type_dist_std_diff
7,0,1.15625,1.102539,1.102539,-0.308105,12.007812,2.205078,1.783203,2.410156,0.00028,0.769531,18,1.910156,1.092773,3.261719,4,1,0.558594,1.088867,-0.70459,0.607141,1.792969,0.0,1.108398,0.920898,1.828125,0.044708,1.025391,2.640625,0.857422,1.480469,1.102539,-0.680664,0.618164,0.629883,-1.15332,0.353271,1.783203,0.0,1.0,1.783203,0.0,1.0,1.783203,0.0,1.0,,,,1.78418,1.783203,-2.980232e-07,1.0,0.001056,-1.782227,0.001056,-1.782227,1.78418,0.001363,1.000977,1.785156,1.783203,0.001056,-1.782227
8,0,1.140625,1.102539,1.092773,-0.32251,12.007812,2.195312,1.785156,2.357422,0.00761,0.821289,18,1.910156,1.092773,3.261719,4,2,0.558594,1.088867,-0.634277,0.63199,1.792969,0.070496,1.108398,0.920898,1.828125,0.042633,1.023438,2.640625,0.855469,1.479492,1.102539,-0.682617,0.617676,0.629883,-1.155273,0.352783,1.785156,-8e-06,1.0,1.785156,0.0,1.0,1.785156,-1.6e-05,1.0,1.1e-05,-1.785156,6e-06,1.78418,1.783203,-0.002052307,0.999023,0.001056,-1.78418,0.001056,-1.78418,1.78418,-0.00069,0.999512,1.785156,1.783203,0.001056,-1.78418
11,0,1.140625,1.102539,1.092773,-0.32251,12.007812,2.195312,1.785156,0.000284,0.004971,3.181641,18,1.910156,1.092773,3.261719,3,2,0.585449,0.854004,-0.869141,0.495677,1.722656,0.0,1.229492,1.014648,1.842773,0.057404,1.032227,2.640625,0.85498,1.478516,1.102539,-0.682617,0.617676,0.770508,-1.014648,0.431641,1.785156,8e-06,1.0,1.785156,1.6e-05,1.0,1.785156,0.0,1.0,1.1e-05,-1.785156,6e-06,1.78418,1.783203,-0.002037048,0.999023,0.001056,-1.78418,0.001056,-1.78418,1.78418,-0.000673,0.999512,1.785156,1.783203,0.001056,-1.78418
16,0,1.15625,1.102539,1.102539,-0.308105,12.007812,2.205078,1.783203,2.410156,0.0003,0.767578,18,1.910156,1.092773,3.261719,4,1,0.708496,-0.260254,-0.021652,1.090742,1.392578,1.630859,1.254883,0.558105,1.828125,0.044678,1.025391,2.640625,0.857422,1.480469,1.102539,-0.680664,0.618164,0.629883,-1.15332,0.353271,1.783203,0.0,1.0,1.783203,0.0,1.0,1.783203,0.0,1.0,,,,1.78418,1.783203,0.0,1.0,0.001056,-1.782227,0.001056,-1.782227,1.78418,0.001364,1.000977,1.785156,1.783203,0.001056,-1.782227
17,0,1.140625,1.102539,1.092773,-0.32251,12.007812,2.195312,1.785156,0.211304,1.920898,1.054688,18,1.910156,1.092773,3.261719,4,2,0.708496,-0.260254,1.381836,0.158571,1.392578,3.035156,1.254883,0.558105,1.828125,0.042633,1.023438,2.640625,0.855469,1.479492,1.102539,-0.682617,0.617676,0.629883,-1.155273,0.352783,1.785156,-9e-06,1.0,1.785156,0.0,1.0,1.785156,-1.7e-05,1.0,1.2e-05,-1.785156,7e-06,1.78418,1.783203,-0.002054214,0.999023,0.001056,-1.78418,0.001056,-1.78418,1.78418,-0.00069,0.999512,1.785156,1.783203,0.001056,-1.78418


[INFO]2019-06-25 17:02:52,047:main:Start oof_predict
[INFO]2019-06-25 17:02:52,051:main:prediction: 0
[INFO]2019-06-25 17:03:14,829:main:prediction: 1
[INFO]2019-06-25 17:03:37,106:main:prediction: 2
[INFO]2019-06-25 17:03:59,335:main:Finish oof_predict
[INFO]2019-06-25 17:03:59,380:main:Starting predict target(1JHN)


Unnamed: 0,1j_nbonds,dist,dist_x,dist_y,dist_z,molecule_couples,molecule_dist_mean,molecule_dist_min,molecule_dist_max,atom_0_couples_count,atom_1_couples_count,molecule_atom_index_0_x_1_std,molecule_atom_index_0_y_1_mean,molecule_atom_index_0_y_1_mean_diff,molecule_atom_index_0_y_1_mean_div,molecule_atom_index_0_y_1_max,molecule_atom_index_0_y_1_max_diff,molecule_atom_index_0_y_1_std,molecule_atom_index_0_z_1_std,molecule_atom_index_0_dist_mean,molecule_atom_index_0_dist_mean_diff,molecule_atom_index_0_dist_mean_div,molecule_atom_index_0_dist_max,molecule_atom_index_0_dist_max_diff,molecule_atom_index_0_dist_max_div,molecule_atom_index_0_dist_min,molecule_atom_index_0_dist_std,molecule_atom_index_0_dist_std_diff,molecule_atom_index_0_dist_std_div,molecule_atom_index_1_dist_mean,molecule_atom_index_1_dist_mean_diff,molecule_atom_index_1_dist_mean_div,molecule_atom_index_1_dist_max,molecule_atom_index_1_dist_max_diff,molecule_atom_index_1_dist_max_div,molecule_atom_index_1_dist_min,molecule_atom_index_1_dist_min_diff,molecule_atom_index_1_dist_min_div,molecule_atom_index_1_dist_std,molecule_atom_index_1_dist_std_diff,molecule_atom_index_1_dist_std_div,molecule_atom_1_dist_mean,molecule_atom_1_dist_min,molecule_atom_1_dist_min_diff,molecule_atom_1_dist_min_div,molecule_atom_1_dist_std,molecule_atom_1_dist_std_diff,molecule_type_0_dist_std,molecule_type_0_dist_std_diff,molecule_type_dist_mean,molecule_type_dist_mean_diff,molecule_type_dist_mean_div,molecule_type_dist_max,molecule_type_dist_min,molecule_type_dist_std,molecule_type_dist_std_diff
56,3.0,1.008789,0.595215,0.224487,0.198608,14,1.924805,1.008789,3.197266,4,4,0.854492,0.629883,-0.730469,0.463045,1.803711,0.442871,1.140625,0.10022,1.81543,0.806152,1.798828,2.5,1.491211,2.478516,1.008789,0.630859,-0.378174,0.625,1.928711,0.919434,1.911133,3.197266,2.1875,3.167969,1.008789,-0.000327,0.999512,1.099609,0.090393,1.089844,1.928711,1.008789,-0.000327,0.999512,1.017578,0.008858,0.000189,-1.008789,1.008789,-0.000162,1.0,1.008789,1.008789,0.000189,-1.008789
60,3.0,1.008789,0.799805,0.196167,0.0215,14,1.924805,1.008789,3.197266,3,4,0.702148,0.238892,-1.12207,0.175554,1.360352,0.0,1.017578,0.121948,2.070312,1.061523,2.052734,3.197266,2.1875,3.169922,1.008789,1.095703,0.08667,1.085938,1.928711,0.919922,1.912109,3.197266,2.1875,3.167969,1.008789,0.0,1.0,1.099609,0.090759,1.089844,1.928711,1.008789,0.0,1.0,1.017578,0.009186,0.000189,-1.008789,1.008789,0.000165,1.0,1.008789,1.008789,0.000189,-1.008789
65,3.0,1.008789,0.60791,0.202759,0.207642,14,1.924805,1.008789,3.197266,4,4,0.690918,-0.226196,0.397217,0.362886,1.360352,1.984375,1.24707,0.100037,1.81543,0.806152,1.798828,2.5,1.491211,2.478516,1.008789,0.630859,-0.378174,0.625,1.928711,0.919434,1.911133,3.197266,2.1875,3.167969,1.008789,-0.000328,0.999512,1.099609,0.090393,1.089844,1.928711,1.008789,-0.00033,0.999512,1.017578,0.008858,0.000189,-1.008789,1.008789,-0.000165,1.0,1.008789,1.008789,0.000189,-1.008789
69,3.0,1.008789,0.001417,0.996094,0.019806,14,1.924805,1.008789,3.197266,3,4,0.702148,0.238892,0.862305,-0.383241,1.360352,1.984375,1.017578,0.121948,2.070312,1.061523,2.052734,3.197266,2.1875,3.167969,1.008789,1.095703,0.086609,1.085938,1.928711,0.919922,1.912109,3.197266,2.1875,3.169922,1.008789,0.0,1.0,1.099609,0.090759,1.089844,1.928711,1.008789,-2e-06,1.0,1.017578,0.009186,0.000189,-1.008789,1.008789,0.000163,1.0,1.008789,1.008789,0.000189,-1.008789
156,3.0,1.006836,0.006924,0.257812,0.749023,22,2.082031,1.006836,3.402344,4,5,0.245972,-0.225952,-0.264404,-5.898284,1.478516,1.44043,1.354492,0.599121,1.854492,0.847168,1.841797,2.242188,1.235352,2.226562,1.006836,0.570801,-0.435791,0.566895,1.867188,0.859863,1.854492,2.113281,1.105469,2.097656,1.006836,0.0,1.0,0.481689,-0.525391,0.478516,1.867188,1.006836,0.0,1.0,0.481689,-0.525391,0.041046,-0.96582,1.006836,0.0,1.0,1.006836,1.006836,,


[INFO]2019-06-25 17:04:01,591:main:Start oof_predict
[INFO]2019-06-25 17:04:01,593:main:prediction: 0
[INFO]2019-06-25 17:04:06,633:main:prediction: 1
[INFO]2019-06-25 17:04:12,735:main:prediction: 2
[INFO]2019-06-25 17:04:19,385:main:Finish oof_predict
[INFO]2019-06-25 17:04:19,421:main:Starting predict target(3JHN)


Unnamed: 0,3j_norm_vec_02,3j_norm_vec_13,3j_norm_vec_23,3j_cos_023,3j_cos_231,3j_area_023,3j_area_231,3j_dihedral,3j_atom_center_weight,3j_atom_center,3j_sum_norm_vec,dist,dist_x,dist_y,dist_z,molecule_couples,molecule_dist_mean,molecule_dist_min,molecule_dist_max,atom_0_couples_count,atom_1_couples_count,molecule_atom_index_0_x_1_std,molecule_atom_index_0_y_1_mean,molecule_atom_index_0_y_1_mean_diff,molecule_atom_index_0_y_1_mean_div,molecule_atom_index_0_y_1_max,molecule_atom_index_0_y_1_max_diff,molecule_atom_index_0_y_1_std,molecule_atom_index_0_z_1_std,molecule_atom_index_0_dist_mean,molecule_atom_index_0_dist_mean_diff,molecule_atom_index_0_dist_mean_div,molecule_atom_index_0_dist_max,molecule_atom_index_0_dist_max_diff,molecule_atom_index_0_dist_max_div,molecule_atom_index_0_dist_min,molecule_atom_index_0_dist_min_diff,molecule_atom_index_0_dist_min_div,molecule_atom_index_0_dist_std,molecule_atom_index_0_dist_std_diff,molecule_atom_index_0_dist_std_div,molecule_atom_index_1_dist_mean,molecule_atom_index_1_dist_mean_diff,molecule_atom_index_1_dist_mean_div,molecule_atom_index_1_dist_max,molecule_atom_index_1_dist_max_diff,molecule_atom_index_1_dist_max_div,molecule_atom_index_1_dist_min,molecule_atom_index_1_dist_min_diff,molecule_atom_index_1_dist_min_div,molecule_atom_index_1_dist_std,molecule_atom_index_1_dist_std_diff,molecule_atom_index_1_dist_std_div,molecule_atom_1_dist_mean,molecule_atom_1_dist_min,molecule_atom_1_dist_min_diff,molecule_atom_1_dist_min_div,molecule_atom_1_dist_std,molecule_atom_1_dist_std_diff,molecule_type_0_dist_std,molecule_type_0_dist_std_diff,molecule_type_dist_mean,molecule_type_dist_mean_diff,molecule_type_dist_mean_div,molecule_type_dist_max,molecule_type_dist_min,molecule_type_dist_std,molecule_type_dist_std_diff
58,1.008789,1.388672,1.388672,-0.461182,-0.399414,1.243164,1.767578,0.864746,26.015625,1,3.787109,2.5,0.169922,6.042969,0.040741,14,1.924805,1.008789,3.197266,4,4,0.854492,0.629883,1.253906,-1.010844,1.803711,2.427734,1.140625,0.10022,1.81543,-0.685059,0.726074,2.5,0.0,1.0,1.008789,-1.491211,0.403564,0.630859,-1.869141,0.252197,1.928711,-0.571777,0.771484,3.197266,0.696289,1.27832,1.008789,-1.491211,0.40332,1.099609,-1.401367,0.439697,1.928711,1.008789,-1.491211,0.40332,1.017578,-1.482422,0.401855,-2.097656,2.847656,0.348145,1.139648,3.197266,2.5,0.401855,-2.097656
62,1.008789,1.388672,1.388672,-0.384033,-0.399414,1.292969,1.767578,-0.96875,26.015625,1,3.785156,3.197266,4.316406,5.890625,0.009445,14,1.924805,1.008789,3.197266,3,4,0.702148,0.238892,0.862305,-0.383241,1.360352,1.984375,1.017578,0.121948,2.070312,-1.125977,0.647461,3.197266,0.0,1.0,1.008789,-2.1875,0.315674,1.095703,-2.101562,0.342773,1.928711,-1.267578,0.603516,3.197266,0.0,1.0,1.008789,-2.1875,0.315674,1.099609,-2.097656,0.343994,1.928711,1.008789,-2.1875,0.315674,1.017578,-2.177734,0.401855,-2.794922,2.847656,-0.348145,0.891113,3.197266,2.5,0.401855,-2.794922
63,1.008789,1.388672,1.388672,-0.461182,-0.399414,1.243164,1.767578,0.864746,26.015625,1,3.787109,2.5,3.853516,2.353516,0.044891,14,1.924805,1.008789,3.197266,4,4,0.690918,-0.226196,-1.586914,-0.16623,1.360352,0.0,1.24707,0.100037,1.81543,-0.685059,0.726074,2.5,0.0,1.0,1.008789,-1.491211,0.403564,0.630859,-1.869141,0.252197,1.928711,-0.571777,0.771484,3.197266,0.696289,1.27832,1.008789,-1.491211,0.40332,1.099609,-1.401367,0.439697,1.928711,1.008789,-1.491211,0.40332,1.017578,-1.482422,0.401855,-2.097656,2.847656,0.348145,1.139648,3.197266,2.5,0.401855,-2.097656
67,1.008789,1.388672,1.388672,-0.384033,-0.399414,1.292969,1.767578,-0.96875,26.015625,1,3.785156,3.197266,1.313477,8.890625,0.01062,14,1.924805,1.008789,3.197266,3,4,0.702148,0.238892,-1.12207,0.175554,1.360352,0.0,1.017578,0.121948,2.070312,-1.125977,0.647461,3.197266,0.0,1.0,1.008789,-2.1875,0.315674,1.095703,-2.101562,0.342773,1.928711,-1.267578,0.603516,3.197266,0.0,1.0,1.008789,-2.1875,0.315674,1.099609,-2.097656,0.343994,1.928711,1.008789,-2.1875,0.315674,1.017578,-2.177734,0.401855,-2.794922,2.847656,-0.348145,0.891113,3.197266,2.5,0.401855,-2.794922
408,1.116211,1.362305,1.541992,-0.433838,-0.38208,1.551758,1.942383,1.0,24.015625,0,4.019531,2.560547,4.480469,2.072266,0.000154,10,1.972656,1.004883,3.357422,3,3,0.744141,0.176636,-1.148438,0.133281,1.325195,0.0,1.060547,0.005013,1.979492,-0.580078,0.773438,2.560547,0.0,1.0,1.116211,-1.443359,0.436279,0.762207,-1.797852,0.297852,1.524414,-1.036133,0.595215,2.560547,0.0,1.0,1.004883,-1.554688,0.392578,0.896973,-1.663086,0.350342,1.524414,1.004883,-1.554688,0.392578,0.896973,-1.663086,0.460938,-2.097656,2.560547,0.0,1.0,2.560547,2.560547,,


[INFO]2019-06-25 17:04:20,930:main:Start oof_predict
[INFO]2019-06-25 17:04:20,932:main:prediction: 0
[INFO]2019-06-25 17:04:32,782:main:prediction: 1
[INFO]2019-06-25 17:04:44,434:main:prediction: 2
[INFO]2019-06-25 17:04:56,147:main:Finish oof_predict
[INFO]2019-06-25 17:04:56,189:main:Starting predict target(2JHN)


Unnamed: 0,2j_atom_center,2j_area_021,2j_norm_vec_02,2j_norm_vec_12,2j_cos,2j_atom_center_weight,2j_sum_norm_vec,dist,dist_x,dist_y,dist_z,molecule_couples,molecule_dist_mean,molecule_dist_min,molecule_dist_max,atom_0_couples_count,atom_1_couples_count,molecule_atom_index_0_x_1_std,molecule_atom_index_0_y_1_mean,molecule_atom_index_0_y_1_mean_diff,molecule_atom_index_0_y_1_mean_div,molecule_atom_index_0_y_1_max,molecule_atom_index_0_y_1_max_diff,molecule_atom_index_0_y_1_std,molecule_atom_index_0_z_1_std,molecule_atom_index_0_dist_mean,molecule_atom_index_0_dist_mean_diff,molecule_atom_index_0_dist_mean_div,molecule_atom_index_0_dist_max,molecule_atom_index_0_dist_max_diff,molecule_atom_index_0_dist_max_div,molecule_atom_index_0_dist_min,molecule_atom_index_0_dist_min_diff,molecule_atom_index_0_dist_min_div,molecule_atom_index_0_dist_std,molecule_atom_index_0_dist_std_diff,molecule_atom_index_0_dist_std_div,molecule_atom_index_1_dist_mean,molecule_atom_index_1_dist_mean_diff,molecule_atom_index_1_dist_mean_div,molecule_atom_index_1_dist_max,molecule_atom_index_1_dist_max_diff,molecule_atom_index_1_dist_max_div,molecule_atom_index_1_dist_min,molecule_atom_index_1_dist_min_diff,molecule_atom_index_1_dist_min_div,molecule_atom_index_1_dist_std,molecule_atom_index_1_dist_std_diff,molecule_atom_index_1_dist_std_div,molecule_atom_1_dist_mean,molecule_atom_1_dist_min,molecule_atom_1_dist_min_diff,molecule_atom_1_dist_min_div,molecule_atom_1_dist_std,molecule_atom_1_dist_std_diff,molecule_type_0_dist_std,molecule_type_0_dist_std_diff,molecule_type_dist_mean,molecule_type_dist_mean_diff,molecule_type_dist_mean_div,molecule_type_dist_max,molecule_type_dist_min,molecule_type_dist_std,molecule_type_dist_std_diff
141,0,1.480469,1.095703,1.451172,-0.364258,12.007812,2.546875,2.113281,0.328613,3.720703,0.41333,22,2.082031,1.006836,3.402344,6,5,0.498779,0.660645,0.62207,17.239294,1.824219,1.785156,1.154297,0.78125,2.082031,-0.030411,0.98584,3.191406,1.078125,1.510742,1.095703,-1.017578,0.518555,0.719727,-1.392578,0.340576,1.867188,-0.245728,0.883789,2.113281,1.3e-05,1.0,1.006836,-1.105469,0.476562,0.481689,-1.630859,0.228027,1.867188,1.006836,-1.105469,0.476562,0.481689,-1.630859,0.154541,-1.958008,2.082031,-0.030807,0.985352,2.113281,2.044922,0.035919,-2.076172
147,0,1.480469,1.095703,1.451172,-0.364258,12.007812,2.546875,2.113281,1.373047,2.962891,0.126587,22,2.082031,1.006836,3.402344,5,5,0.166748,0.440674,0.402344,11.500521,1.824219,1.785156,1.141602,0.827637,2.142578,0.02977,1.013672,3.191406,1.078125,1.510742,1.095703,-1.017578,0.518555,0.787598,-1.325195,0.372803,1.867188,-0.24585,0.883789,2.113281,0.0,1.0,1.006836,-1.105469,0.476562,0.481689,-1.630859,0.228027,1.867188,1.006836,-1.105469,0.476562,0.481689,-1.630859,0.154541,-1.958008,2.082031,-0.030807,0.985352,2.113281,2.044922,0.035919,-2.076172
152,0,1.510742,1.09082,1.451172,-0.296387,12.007812,2.541016,2.056641,0.00197,3.1875,1.041992,22,2.082031,1.006836,3.402344,4,5,0.19104,0.094788,0.056488,2.474538,1.478516,1.44043,0.969727,0.818359,2.158203,0.10083,1.048828,2.96875,0.912598,1.443359,1.09082,-0.966797,0.530273,0.803223,-1.253906,0.390381,1.867188,-0.190308,0.907715,2.113281,0.055481,1.027344,1.006836,-1.049805,0.489502,0.481689,-1.575195,0.234131,1.867188,1.006836,-1.049805,0.489502,0.481689,-1.575195,0.154541,-1.902344,2.082031,0.024673,1.011719,2.113281,2.044922,0.035919,-2.021484
160,0,1.402344,1.108398,1.359375,-0.366211,12.007812,2.46875,2.044922,0.13208,3.207031,0.842285,22,2.082031,1.006836,3.402344,3,5,0.2229,0.282959,0.244507,7.383074,1.478516,1.44043,1.094727,0.653809,2.185547,0.140625,1.068359,3.402344,1.358398,1.664062,1.108398,-0.936523,0.541992,1.154297,-0.891113,0.564453,1.867188,-0.178101,0.913086,2.113281,0.067749,1.033203,1.006836,-1.038086,0.492432,0.481689,-1.563477,0.235596,1.867188,1.006836,-1.038086,0.492432,0.481689,-1.563477,0.154541,-1.890625,2.082031,0.036926,1.017578,2.113281,2.044922,0.035919,-2.009766
264,0,1.475586,1.104492,1.455078,-0.396729,12.007812,2.558594,2.146484,0.719727,3.802734,0.089783,37,2.210938,1.085938,3.359375,6,7,0.782715,0.666992,0.64502,30.452684,1.819336,1.797852,1.151367,0.747559,2.015625,-0.131348,0.938965,2.640625,0.493408,1.229492,1.104492,-1.042969,0.51416,0.589844,-1.557617,0.274658,2.148438,0.001721,1.000977,2.191406,0.043915,1.020508,2.091797,-0.056335,0.973633,0.042542,-2.105469,0.019821,2.148438,2.091797,-0.056335,0.973633,0.042542,-2.105469,0.186523,-1.960938,2.148438,0.001721,1.000977,2.191406,2.091797,0.042542,-2.105469


[INFO]2019-06-25 17:04:57,941:main:Start oof_predict
[INFO]2019-06-25 17:04:57,943:main:prediction: 0
[INFO]2019-06-25 17:05:07,599:main:prediction: 1
[INFO]2019-06-25 17:05:17,001:main:prediction: 2
[INFO]2019-06-25 17:05:26,299:main:Finish oof_predict


Unnamed: 0,id,scalar_coupling_constant
0,4658147,19.269504
1,4658148,196.538797
2,4658149,2.000638
3,4658150,190.612833
4,4658151,17.258757


0


In [0]:
display(df_submit.head())
df_submit.to_csv('submission.csv', index=False)

Unnamed: 0,id,scalar_coupling_constant
0,4658147,19.269504
1,4658148,196.538797
2,4658149,2.000638
3,4658150,190.612833
4,4658151,17.258757


In [0]:
df_submit.shape

(2505542, 2)