In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import time
from tqdm.notebook import tqdm
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import lightgbm as lgb
from lightgbm import LGBMClassifier
from sklearn.metrics import accuracy_score
from imblearn.under_sampling import RandomUnderSampler
import requests
from bs4 import BeautifulSoup
from datetime import datetime, timedelta
import re
import preprocessing_mojule 
from sklearn.metrics import roc_auc_score
from sklearn.metrics import f1_score
from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score
import preprocessing_mojule as pm
from urllib.request import urlopen
import pickle
import japanize_matplotlib 
from sklearn.metrics import mean_squared_error

In [2]:

class Results:
    def __init__(self, pre_df):
        self.pre_df = pre_df
        self.before_scale = None
        self.post_df = self.preprocessing(self.pre_df)
        
    
    def preprocessing(self, pre_df):
        results = pre_df.copy()
        results = results[~(results["着順"].astype(str).str.contains("\D"))]
        results["着順"] = results["着順"].astype(int)
        results["性齢"] = results["性齢"].astype(str)
        results["性"] = results["性齢"].map(lambda x:x[0])
        results["年齢"] = results["性齢"].map(lambda x:x[1:])
        results["体重"] = results["馬体重"].map(lambda x:x[:3])
        results["増減"] = results["馬体重"].str.split("(").map(lambda x:int(x[-1][:-1]))
        results["人気"] = results["人気"].astype(int)
        results["年齢"] = results["年齢"].astype(int)
        results["体重"] = results["体重"].astype(float)
        results["単勝"] = results["単勝"].astype(float)
        results = pd.concat([results, pd.get_dummies(results["性"])], axis=1)
        results["勝率*騎乗回数"] = results["勝率"] * results["騎乗回数"]
        results["連対率*騎乗回数"] = results["連対率"] * results["騎乗回数"]
        results["複勝率*騎乗回数"] = results["複勝率"] * results["騎乗回数"]
        
        
        results["増減/体重"] = results["増減"] / results["体重"]  
        results["斤量/体重"] = results["斤量"] / results["体重"]
        
        features_addinfo = [
            '着順', '枠番', '馬番', '馬名', '性齢', '斤量', '騎手', 'タイム', '着差', '単勝', '人気',
       '馬体重', '調教師', 'horse_id', 'jockey_id', 'length', 'race_type', 'weather',
       'condition', 'date', '騎乗回数', '勝率', '連対率', '複勝率', '賞金_ave', '賞金_sum',
       '順番_ave', '賞金_ave_2', '賞金_sum_2', '順番_ave_2', '賞金_ave_4', '賞金_sum_4',
       '順番_ave_4', '順番_ave_distorted', '順番_ave_2_distorted',
       '順番_ave_4_distorted', '芝', 'ダ', '障', '短距離', 'マイル距離', '芝_ave_order',
       'ダ_ave_order', '短距離_ave_order', 'マイル距離_ave_order', '芝得意', '短距離得意',
       '中距離_ave_order', '長距離_ave_order', 'マイル距離得意', '中距離得意', '長距離得意',
       'length_match', 'race_type_match', 'horse_ped_score', 'タイム_線形',
       'タイム/length_scaled', "score", 
        ]
        
        drop_features = [
            "枠番", "馬名", "性齢", "騎手", "タイム", 'タイム_線形', "着差", "馬体重", "調教師", "horse_id",
            "jockey_id", "length", "race_type", "weather", "condition", "date", "性",
            '芝', 'ダ', '障', '短距離', 'マイル距離', '芝_ave_order',
            'ダ_ave_order', '短距離_ave_order', 'マイル距離_ave_order', '芝得意', '短距離得意',
            '中距離_ave_order', '長距離_ave_order', 'マイル距離得意', '中距離得意', '長距離得意',
            "タイム/length", 
        ]
        results = results.drop(drop_features, axis=1)
        results = results.astype(float)
        results["着順"] = results["着順"].astype(int)
        self.before_scale = results
        
        keep_features = [
            '着順', 'セ', '牝', '牡', "馬番", "length_match", "race_type_match", \
                         'タイム/length', "人気", 'タイム/length_scaled', "score",
        ]
        scaler = pm.CustomSTDScaler(keep_features)
        results = scaler.fit_transform(results)
    
        return results
 



class Evaluater():
    def __init__(self, X, y):
        train_x, self.__X_test, train_y, self.__y_test = train_test_split(X, y, test_size=0.3, shuffle=True, random_state=1)
        self.__X_train, self.__X_val, self.__y_train, self.__y_val = train_test_split(train_x, train_y, test_size=0.3, shuffle=True, random_state=2)
        self.__params = None
        self.__params = self.params
        self.__model = None
        return 
    @property
    def params(self):
        if self.__params == None:
            param = {
                'objective': 'regression',
                 'random_state': 57,
                 'metric': 'l1',
                 'feature_pre_filter': False,
                 'lambda_l1': 0.0,
                 'lambda_l2': 0.0,
                 'num_leaves': 31,
                 'feature_fraction': 0.6,
                 'bagging_fraction': 1.0,
                 'bagging_freq': 0,
                 'min_child_samples': 20,
                 'num_iterations': 1000
            }
       
        self.__params = param
        return self.__params
    @params.setter
    def params(self, params):
        self.__params = params
    @property
    def model(self):
        return self.__model
    @property
    def X_test(self):
        return self.__X_test
    def predict(self,X_test, threshold=0):
        self.fit()
        pred = self.__model.predict(X_test.drop(["馬番", "単勝", "人気"], axis=1, inplace=False))
        df = pd.DataFrame(pred, index=ev.X_test.index, columns=["pred"])
        df["mean"] = df.groupby(df.index)["pred"].transform("mean")
        df["std"] = df.groupby(df.index)["pred"].transform("std")
        df["pred_scaled"] = (df["pred"] - df["mean"]) / df["std"]
        self.pred = df["pred_scaled"].copy()
        pred[df["pred_scaled"] >= threshold] = 1
        pred[df["pred_scaled"] < threshold] = 0
        return pred
    def fit(self):
        verbose_eval = -1
        self.__model = lgb.LGBMRegressor(**self.__params)
        self.__model.fit(
            self.__X_train.drop(["馬番", "単勝", "人気"], axis=1, inplace=False), self.__y_train, 
            #eval_metric='mae', 
            #eval_set=[(self.__X_val.drop(["馬番", "単勝"], axis=1, inplace=False), self.__y_val)],
            #callbacks=[lgb.early_stopping(stopping_rounds=10, 
            #            verbose=False), # early_stopping用コールバック関数
            #        lgb.log_evaluation(verbose_eval)] # コマンドライン出力用コールバック関数
                 )
        return self.__model
    def RSME(self, X_test, y_test):
        pred = self.predict(X_test)
        loss = mean_squared_error(y_test, pred)
        return loss
        
    def importance(self):
        self.fit()
        importances = pd.DataFrame(
            {
                "features":self.__X_train.drop(["馬番", "単勝", "人気"], axis=1, inplace=False).columns, "importance":self.__model.feature_importances_
            })
            
        importances =  importances.sort_values("importance", ascending=False)
        return importances
    def cal(self, X_test, pay_dict, threshold=0, fukusho=False, tansho=False, umaren=False):
        def cal_tansho(x):
            race_id = x.name
            horse_number = int(x["馬番"])
            ninki = x["人気"]
            ninki2maisu = {1:1, 2:1, 3:1, 4:1, 5:1, 6:1, 7:1, 8:1, 9:1, 
              10:1, 11:1, 12:1, 13:1, 14:1, 15:1, 16:1, 17:1, 18:1}
            maisu = ninki2maisu[ninki]
            odds = x[1]
            df = pay_dict[race_id]
            df_tansho = df.loc["単勝"]
            df_tansho["該当馬"] = list(map(int, df_tansho["該当馬"]))
            df_tansho["金"] = list(map(str, df_tansho["金"]))
            invested = 100 * maisu
            payback = 0
            if horse_number in df_tansho["該当馬"]:
                idx = df_tansho["該当馬"].index(horse_number)
                tmp = df_tansho["金"][idx]
                tmp = tmp.split(",")
                payback += int("".join(tmp))*maisu
            return (payback, invested)
        def cal_umaren(x):
            race_id = x.name
            pay_df = pay_dict[race_id]
            df_tansho = pay_df.loc["単勝"]
            df_fukusho = pay_df.loc["複勝"]
            df_umaren = pay_df.loc["馬連"]
            umaban_list = x["馬番_list"]
            umaban_list = list(map(int, umaban_list))
            length = len(umaban_list)
            invested = 0
            payback_sum = 0
            df_tansho["該当馬"] = list(map(int, df_tansho["該当馬"]))
            df_fukusho["該当馬"] = list(map(int, df_fukusho["該当馬"]))
            df_umaren["該当馬"] = list(map(int, df_umaren["該当馬"]))
            df_tansho["金"] = list(map(str, df_tansho["金"]))
            df_fukusho["金"] = list(map(str, df_fukusho["金"]))
            df_umaren["金"] = list(map(str, df_umaren["金"]))
            #単勝の処理
            for umaban in umaban_list:
                invested += 100 * 1
                if umaban in df_tansho["該当馬"]:
                    idx = df_tansho["該当馬"].index(umaban)
                    tmp = df_tansho["金"][idx]
                    tmp = tmp.split(",")
                    tmp = int("".join(tmp))
                    payback_sum += tmp
            invested += ((length * (length - 1)) // 2) * 100        
            if set(df_umaren["該当馬"]) <= set(umaban_list):
                tmp = df_tansho["金"][0]
                tmp = tmp.split(",")
                tmp = int("".join(tmp))
                payback_sum += tmp
            return (payback_sum, invested)    
        
        def cal_fukusho(x):
            race_id = x.name
            pay_df = pay_dict[race_id]
            df_tansho = pay_df.loc["単勝"]
            df_fukusho = pay_df.loc["複勝"]
            umaban_list = x["馬番_list"]
            umaban_list = list(map(int, umaban_list))
            length = len(umaban_list)
            invested = 0
            payback_sum = 0
            df_tansho["該当馬"] = list(map(int, df_tansho["該当馬"]))
            df_fukusho["該当馬"] = list(map(int, df_fukusho["該当馬"]))
            df_tansho["金"] = list(map(str, df_tansho["金"]))
            df_fukusho["金"] = list(map(str, df_fukusho["金"]))
            #単勝の処理
            for umaban in umaban_list:
                invested += 100 * 1
                if umaban in df_tansho["該当馬"]:
                    idx = df_tansho["該当馬"].index(umaban)
                    tmp = df_tansho["金"][idx]
                    tmp = tmp.split(",")
                    tmp = int("".join(tmp))
                    payback_sum += tmp
            if length >= 2:
                for umaban in umaban_list:
                    invested += 100 * 1
                    if umaban in df_fukusho["該当馬"]:
                        idx = df_fukusho["該当馬"].index(umaban)
                        tmp = df_fukusho["金"][idx]
                        tmp = tmp.split(",")
                        tmp = int("".join(tmp))
                        payback_sum += tmp

            return (payback_sum, invested)    
            
        if all([not tansho, not fukusho, not umaren]):
            raise ValueError("賭け方を指定してください") 
        if sum([tansho, fukusho, umaren]) != 1:
            raise ValueError("賭け方の指定は一つだけです")
            
        pred = self.predict(X_test, threshold)
        parchaced = X_test[pred==1][["馬番", "単勝", "人気"]]
        if tansho:
            payback = parchaced.apply(lambda x:cal_tansho(x)[0], axis=1)
            invested = parchaced.apply(lambda x:cal_tansho(x)[1], axis=1)
            invested = sum(invested)
            payback_sum = sum(payback)
        if fukusho:
            parchaced['馬番_list'] = parchaced.groupby(parchaced.index)['馬番'].apply(list)
            parchaced = parchaced[~parchaced.index.duplicated(keep='first')]
            payback = parchaced.apply(lambda x:cal_fukusho(x)[0], axis=1)
            payback_sum = sum(payback)
            invested = parchaced.apply(lambda x:cal_fukusho(x)[1], axis=1)
            invested = sum(invested)    
        if umaren:
            parchaced['馬番_list'] = parchaced.groupby(parchaced.index)['馬番'].apply(list)
            parchaced = parchaced[~parchaced.index.duplicated(keep='first')]
            payback = parchaced.apply(lambda x:cal_umaren(x)[0], axis=1)
            payback_sum = sum(payback)
            invested = parchaced.apply(lambda x:cal_umaren(x)[1], axis=1)
            invested = sum(invested)
        if isinstance(invested, (int, float)):
            if invested == 0:
                div = 0
                payback_sum = 0
        else:
            invested = 0
            div = 0
            payback_sum = 0
            print(f"investedが数値じゃありません{threshold}")
        div = payback_sum - invested    
        return invested, payback_sum, div
    def visualize(self, X_test, bins=100, fukusho=False, tansho=False, umaren=False):
        thresholds = []
        kaishuuritu = []
        divs = []
        investeds = []
        res = {}
        for i in tqdm(range(bins)):
            threshold = (i / bins)*2
            thresholds.append(threshold)
            invested, payback_sum, div = self.cal(X_test, pay_dict, threshold, tansho=tansho, fukusho=fukusho, umaren=umaren)
            divs.append(div)
            investeds.append(invested/100)
            if invested == 0:
                if payback_sum == 0:
                    kaishuuritu.append(1*100)    
                continue
            kaishuuritu.append((payback_sum / invested)*100)
        res["invested"] = np.array(investeds)
        res["kaishuuritu"] = np.array(kaishuuritu)
        res["div"] = np.array(divs)
        res["threshold"] = np.array(thresholds)
        return res   
    def integral(self, d=None, h=None, bins=100, fukusho=True):
        def private_integral(x, y):
            z = [(d, h) for d, h in zip(x, y)]
            z.sort()
            x = np.array([i[0] for i in z])
            x = x/max(x)
            y = np.array([i[1] for i in z])
            y = y - 100

            length = x.shape[0]
            S = 0
            for i in range(length - 1):
                d = x[i + 1] - x[i]
                a = y[i]
                b = y[i + 1]
                s = (y[i] + y[i + 1]) * d / 2
                S += s
            return S   
        if d is None is h == None:
            res = self.visualize(self.X_test, bins=bins, fukusho=fukusho)
            d, h = res["invested"], res["kaishuuritu"]
        return private_integral(d, h)

In [3]:
results_addinfo = pd.read_pickle("result_addinfo.pickle")
horse_history = pd.read_pickle("horse_history.pickle")
jockey_history = pd.read_pickle("jockey_history.pickle")
dict_horse_history = {idx: horse_history.loc[idx] for idx in horse_history.index.unique()}

with open("pay_dict.pickle", "rb") as f:
    pay_dict = pickle.load(f)
with open("dict_horse_ped.pickle", "rb") as f:
    horse_ped_dict = pickle.load(f)

In [6]:
instance = Results(results_addinfo)
data = instance.post_df
X, y = data.drop(["score"], axis=1), data["score"]

drop_features = [
    '賞金_ave_2', '賞金_sum_2',  '賞金_ave_4', '賞金_sum_4',
        '順番_ave_4', '順番_ave_distorted', '順番_ave_2_distorted',
        '順番_ave_4_distorted',
        '増減', "着順", "騎乗回数",
]
    
X.drop(drop_features, axis=1, inplace=True)

In [7]:
ev = Evaluater(X, y)

In [368]:
tmp_x, y_train, tmp_y, y_test = train_test_split(X, y, test_size=0.3, shuffle=False)
X_train, X_val, y_train, y_val = train_test_split(tmp_x, tmp_y, test_size=0.3, shuffle=False)

In [369]:
import optuna.integration.lightgbm as lgb_o
lgb_train = lgb_o.Dataset(X_train.values, y_train.values)
lgb_val = lgb_o.Dataset(X_val.values, y_val.values)

In [370]:
params = {
    "objective":"regression",
    "random_state":57,
    "metric":"mae"
}

lgb_clf_o = lgb_o.train(params, 
                        lgb_train, valid_sets=(lgb_train, lgb_val),
                       )


[I 2023-12-20 09:55:01,604] A new study created in memory with name: no-name-3fc38583-2872-4035-a7dd-9d3205c3c71f
feature_fraction, val_score: inf:   0%|                                                          | 0/7 [00:00<?, ?it/s]

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001518 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2559
[LightGBM] [Info] Number of data points in the train set: 21185, number of used features: 20
[LightGBM] [Info] Start training from score -0.061519


feature_fraction, val_score: 0.001632:  14%|######4                                      | 1/7 [00:01<00:11,  1.84s/it][I 2023-12-20 09:55:03,452] Trial 0 finished with value: 0.0016317381363111336 and parameters: {'feature_fraction': 0.8}. Best is trial 0 with value: 0.0016317381363111336.
feature_fraction, val_score: 0.001632:  14%|######4                                      | 1/7 [00:01<00:11,  1.84s/it]

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002546 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2559
[LightGBM] [Info] Number of data points in the train set: 21185, number of used features: 20
[LightGBM] [Info] Start training from score -0.061519


feature_fraction, val_score: 0.001628:  29%|############8                                | 2/7 [00:03<00:09,  1.88s/it][I 2023-12-20 09:55:05,363] Trial 1 finished with value: 0.0016278663894701356 and parameters: {'feature_fraction': 0.7}. Best is trial 1 with value: 0.0016278663894701356.
feature_fraction, val_score: 0.001628:  29%|############8                                | 2/7 [00:03<00:09,  1.88s/it]

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001136 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2559
[LightGBM] [Info] Number of data points in the train set: 21185, number of used features: 20
[LightGBM] [Info] Start training from score -0.061519


feature_fraction, val_score: 0.001628:  43%|###################2                         | 3/7 [00:05<00:07,  1.90s/it][I 2023-12-20 09:55:07,290] Trial 2 finished with value: 0.0016349058760860614 and parameters: {'feature_fraction': 0.8999999999999999}. Best is trial 1 with value: 0.0016278663894701356.
feature_fraction, val_score: 0.001628:  43%|###################2                         | 3/7 [00:05<00:07,  1.90s/it]

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000971 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2559
[LightGBM] [Info] Number of data points in the train set: 21185, number of used features: 20
[LightGBM] [Info] Start training from score -0.061519


feature_fraction, val_score: 0.001628:  57%|#########################7                   | 4/7 [00:07<00:05,  1.98s/it][I 2023-12-20 09:55:09,387] Trial 3 finished with value: 0.0016303639850354416 and parameters: {'feature_fraction': 1.0}. Best is trial 1 with value: 0.0016278663894701356.
feature_fraction, val_score: 0.001628:  57%|#########################7                   | 4/7 [00:07<00:05,  1.98s/it]

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001286 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2559
[LightGBM] [Info] Number of data points in the train set: 21185, number of used features: 20
[LightGBM] [Info] Start training from score -0.061519


feature_fraction, val_score: 0.001623:  71%|################################1            | 5/7 [00:09<00:03,  1.86s/it][I 2023-12-20 09:55:11,025] Trial 4 finished with value: 0.0016231643397176085 and parameters: {'feature_fraction': 0.4}. Best is trial 4 with value: 0.0016231643397176085.
feature_fraction, val_score: 0.001623:  71%|################################1            | 5/7 [00:09<00:03,  1.86s/it]

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001593 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2559
[LightGBM] [Info] Number of data points in the train set: 21185, number of used features: 20
[LightGBM] [Info] Start training from score -0.061519


feature_fraction, val_score: 0.001622:  86%|######################################5      | 6/7 [00:11<00:01,  1.84s/it][I 2023-12-20 09:55:12,819] Trial 5 finished with value: 0.0016220249609950812 and parameters: {'feature_fraction': 0.6}. Best is trial 5 with value: 0.0016220249609950812.
feature_fraction, val_score: 0.001622:  86%|######################################5      | 6/7 [00:11<00:01,  1.84s/it]

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000216 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2559
[LightGBM] [Info] Number of data points in the train set: 21185, number of used features: 20
[LightGBM] [Info] Start training from score -0.061519


feature_fraction, val_score: 0.001622: 100%|#############################################| 7/7 [00:13<00:00,  1.96s/it][I 2023-12-20 09:55:15,042] Trial 6 finished with value: 0.0016328075075402004 and parameters: {'feature_fraction': 0.5}. Best is trial 5 with value: 0.0016220249609950812.
feature_fraction, val_score: 0.001622: 100%|#############################################| 7/7 [00:13<00:00,  1.92s/it]
num_leaves, val_score: 0.001622:   0%|                                                          | 0/20 [00:00<?, ?it/s]

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001201 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2559
[LightGBM] [Info] Number of data points in the train set: 21185, number of used features: 20
[LightGBM] [Info] Start training from score -0.061519


num_leaves, val_score: 0.001622:   5%|##5                                               | 1/20 [00:03<00:58,  3.07s/it][I 2023-12-20 09:55:18,117] Trial 7 finished with value: 0.0016392329005846563 and parameters: {'num_leaves': 73}. Best is trial 7 with value: 0.0016392329005846563.
num_leaves, val_score: 0.001622:   5%|##5                                               | 1/20 [00:03<00:58,  3.07s/it]

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001400 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2559
[LightGBM] [Info] Number of data points in the train set: 21185, number of used features: 20
[LightGBM] [Info] Start training from score -0.061519


num_leaves, val_score: 0.001622:  10%|#####                                             | 2/20 [00:08<01:19,  4.40s/it][I 2023-12-20 09:55:23,448] Trial 8 finished with value: 0.001644603875583225 and parameters: {'num_leaves': 136}. Best is trial 7 with value: 0.0016392329005846563.
num_leaves, val_score: 0.001622:  10%|#####                                             | 2/20 [00:08<01:19,  4.40s/it]

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000682 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2559
[LightGBM] [Info] Number of data points in the train set: 21185, number of used features: 20
[LightGBM] [Info] Start training from score -0.061519


num_leaves, val_score: 0.001622:  15%|#######5                                          | 3/20 [00:11<01:03,  3.75s/it][I 2023-12-20 09:55:26,429] Trial 9 finished with value: 0.0016325183841884856 and parameters: {'num_leaves': 49}. Best is trial 9 with value: 0.0016325183841884856.
num_leaves, val_score: 0.001622:  15%|#######5                                          | 3/20 [00:11<01:03,  3.75s/it]

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000222 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2559
[LightGBM] [Info] Number of data points in the train set: 21185, number of used features: 20
[LightGBM] [Info] Start training from score -0.061519


num_leaves, val_score: 0.001622:  20%|##########                                        | 4/20 [00:24<01:56,  7.28s/it][I 2023-12-20 09:55:39,129] Trial 10 finished with value: 0.0016421192109194779 and parameters: {'num_leaves': 249}. Best is trial 9 with value: 0.0016325183841884856.
num_leaves, val_score: 0.001622:  20%|##########                                        | 4/20 [00:24<01:56,  7.28s/it]

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000380 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2559
[LightGBM] [Info] Number of data points in the train set: 21185, number of used features: 20
[LightGBM] [Info] Start training from score -0.061519


num_leaves, val_score: 0.001622:  25%|############5                                     | 5/20 [00:49<03:29, 13.94s/it][I 2023-12-20 09:56:04,878] Trial 11 finished with value: 0.001645392943795017 and parameters: {'num_leaves': 250}. Best is trial 9 with value: 0.0016325183841884856.
num_leaves, val_score: 0.001622:  25%|############5                                     | 5/20 [00:49<03:29, 13.94s/it]

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002885 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2559
[LightGBM] [Info] Number of data points in the train set: 21185, number of used features: 20
[LightGBM] [Info] Start training from score -0.061519


num_leaves, val_score: 0.001622:  30%|###############                                   | 6/20 [01:00<02:57, 12.68s/it][I 2023-12-20 09:56:15,121] Trial 12 finished with value: 0.0016548618565417207 and parameters: {'num_leaves': 159}. Best is trial 9 with value: 0.0016325183841884856.
num_leaves, val_score: 0.001622:  30%|###############                                   | 6/20 [01:00<02:57, 12.68s/it]

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000365 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2559
[LightGBM] [Info] Number of data points in the train set: 21185, number of used features: 20
[LightGBM] [Info] Start training from score -0.061519


num_leaves, val_score: 0.001622:  35%|#################5                                | 7/20 [01:01<01:57,  9.02s/it][I 2023-12-20 09:56:16,603] Trial 13 finished with value: 0.0016294940615092398 and parameters: {'num_leaves': 9}. Best is trial 13 with value: 0.0016294940615092398.
num_leaves, val_score: 0.001622:  35%|#################5                                | 7/20 [01:01<01:57,  9.02s/it]

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002266 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2559
[LightGBM] [Info] Number of data points in the train set: 21185, number of used features: 20
[LightGBM] [Info] Start training from score -0.061519


num_leaves, val_score: 0.001622:  40%|####################                              | 8/20 [01:10<01:46,  8.89s/it][I 2023-12-20 09:56:25,209] Trial 14 finished with value: 0.001640661492347403 and parameters: {'num_leaves': 192}. Best is trial 13 with value: 0.0016294940615092398.
num_leaves, val_score: 0.001622:  40%|####################                              | 8/20 [01:10<01:46,  8.89s/it]

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002431 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2559
[LightGBM] [Info] Number of data points in the train set: 21185, number of used features: 20
[LightGBM] [Info] Start training from score -0.061519


num_leaves, val_score: 0.001622:  45%|######################5                           | 9/20 [01:13<01:19,  7.22s/it][I 2023-12-20 09:56:28,761] Trial 15 finished with value: 0.0016348113201073104 and parameters: {'num_leaves': 71}. Best is trial 13 with value: 0.0016294940615092398.
num_leaves, val_score: 0.001622:  45%|######################5                           | 9/20 [01:13<01:19,  7.22s/it]

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001676 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2559
[LightGBM] [Info] Number of data points in the train set: 21185, number of used features: 20
[LightGBM] [Info] Start training from score -0.061519


num_leaves, val_score: 0.001622:  50%|########################5                        | 10/20 [01:22<01:16,  7.63s/it][I 2023-12-20 09:56:37,320] Trial 16 finished with value: 0.0016438226616002191 and parameters: {'num_leaves': 194}. Best is trial 13 with value: 0.0016294940615092398.
num_leaves, val_score: 0.001622:  50%|########################5                        | 10/20 [01:22<01:16,  7.63s/it]

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000365 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2559
[LightGBM] [Info] Number of data points in the train set: 21185, number of used features: 20
[LightGBM] [Info] Start training from score -0.061519


num_leaves, val_score: 0.001622:  55%|##########################9                      | 11/20 [01:29<01:07,  7.53s/it][I 2023-12-20 09:56:44,603] Trial 17 finished with value: 0.0016398260994868771 and parameters: {'num_leaves': 111}. Best is trial 13 with value: 0.0016294940615092398.
num_leaves, val_score: 0.001622:  55%|##########################9                      | 11/20 [01:29<01:07,  7.53s/it]

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001645 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2559
[LightGBM] [Info] Number of data points in the train set: 21185, number of used features: 20
[LightGBM] [Info] Start training from score -0.061519


num_leaves, val_score: 0.001622:  60%|#############################4                   | 12/20 [01:30<00:44,  5.53s/it][I 2023-12-20 09:56:45,570] Trial 18 finished with value: 0.0016335138870197954 and parameters: {'num_leaves': 5}. Best is trial 13 with value: 0.0016294940615092398.
num_leaves, val_score: 0.001622:  60%|#############################4                   | 12/20 [01:30<00:44,  5.53s/it]

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000370 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2559
[LightGBM] [Info] Number of data points in the train set: 21185, number of used features: 20
[LightGBM] [Info] Start training from score -0.061519


num_leaves, val_score: 0.001622:  65%|###############################8                 | 13/20 [01:42<00:51,  7.42s/it][I 2023-12-20 09:56:57,337] Trial 19 finished with value: 0.0016488655030958538 and parameters: {'num_leaves': 216}. Best is trial 13 with value: 0.0016294940615092398.
num_leaves, val_score: 0.001622:  65%|###############################8                 | 13/20 [01:42<00:51,  7.42s/it]

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000367 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2559
[LightGBM] [Info] Number of data points in the train set: 21185, number of used features: 20
[LightGBM] [Info] Start training from score -0.061519


num_leaves, val_score: 0.001622:  70%|##################################3              | 14/20 [01:48<00:42,  7.10s/it][I 2023-12-20 09:57:03,709] Trial 20 finished with value: 0.001636843823071655 and parameters: {'num_leaves': 113}. Best is trial 13 with value: 0.0016294940615092398.
num_leaves, val_score: 0.001622:  70%|##################################3              | 14/20 [01:48<00:42,  7.10s/it]

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002442 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2559
[LightGBM] [Info] Number of data points in the train set: 21185, number of used features: 20
[LightGBM] [Info] Start training from score -0.061519


num_leaves, val_score: 0.001622:  75%|####################################7            | 15/20 [01:55<00:35,  7.03s/it][I 2023-12-20 09:57:10,557] Trial 21 finished with value: 0.0016475149213318967 and parameters: {'num_leaves': 158}. Best is trial 13 with value: 0.0016294940615092398.
num_leaves, val_score: 0.001622:  75%|####################################7            | 15/20 [01:55<00:35,  7.03s/it]

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000372 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2559
[LightGBM] [Info] Number of data points in the train set: 21185, number of used features: 20
[LightGBM] [Info] Start training from score -0.061519


num_leaves, val_score: 0.001622:  80%|#######################################2         | 16/20 [01:58<00:23,  5.93s/it][I 2023-12-20 09:57:13,931] Trial 22 finished with value: 0.0016325044421735102 and parameters: {'num_leaves': 43}. Best is trial 13 with value: 0.0016294940615092398.
num_leaves, val_score: 0.001622:  80%|#######################################2         | 16/20 [01:58<00:23,  5.93s/it]

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002936 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2559
[LightGBM] [Info] Number of data points in the train set: 21185, number of used features: 20
[LightGBM] [Info] Start training from score -0.061519


num_leaves, val_score: 0.001622:  85%|#########################################6       | 17/20 [02:04<00:17,  5.90s/it][I 2023-12-20 09:57:19,777] Trial 23 finished with value: 0.0016359902298399168 and parameters: {'num_leaves': 89}. Best is trial 13 with value: 0.0016294940615092398.
num_leaves, val_score: 0.001622:  85%|#########################################6       | 17/20 [02:04<00:17,  5.90s/it]

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000432 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2559
[LightGBM] [Info] Number of data points in the train set: 21185, number of used features: 20
[LightGBM] [Info] Start training from score -0.061519


num_leaves, val_score: 0.001622:  90%|############################################1    | 18/20 [02:21<00:18,  9.20s/it][I 2023-12-20 09:57:36,656] Trial 24 finished with value: 0.0016493484059607495 and parameters: {'num_leaves': 227}. Best is trial 13 with value: 0.0016294940615092398.
num_leaves, val_score: 0.001622:  90%|############################################1    | 18/20 [02:21<00:18,  9.20s/it]

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000464 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2559
[LightGBM] [Info] Number of data points in the train set: 21185, number of used features: 20
[LightGBM] [Info] Start training from score -0.061519


num_leaves, val_score: 0.001622:  95%|##############################################5  | 19/20 [02:33<00:10, 10.07s/it][I 2023-12-20 09:57:48,746] Trial 25 finished with value: 0.0016457358479173945 and parameters: {'num_leaves': 174}. Best is trial 13 with value: 0.0016294940615092398.
num_leaves, val_score: 0.001622:  95%|##############################################5  | 19/20 [02:33<00:10, 10.07s/it]

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002440 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2559
[LightGBM] [Info] Number of data points in the train set: 21185, number of used features: 20
[LightGBM] [Info] Start training from score -0.061519


num_leaves, val_score: 0.001622: 100%|#################################################| 20/20 [02:37<00:00,  8.07s/it][I 2023-12-20 09:57:52,154] Trial 26 finished with value: 0.0016226425090577771 and parameters: {'num_leaves': 30}. Best is trial 26 with value: 0.0016226425090577771.
num_leaves, val_score: 0.001622: 100%|#################################################| 20/20 [02:37<00:00,  7.86s/it]
bagging, val_score: 0.001622:   0%|                                                             | 0/10 [00:00<?, ?it/s]

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002256 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2559
[LightGBM] [Info] Number of data points in the train set: 21185, number of used features: 20
[LightGBM] [Info] Start training from score -0.061519


bagging, val_score: 0.001622:  10%|#####3                                               | 1/10 [00:02<00:23,  2.64s/it][I 2023-12-20 09:57:54,801] Trial 27 finished with value: 0.0016493596512061894 and parameters: {'bagging_fraction': 0.7755074963387172, 'bagging_freq': 6}. Best is trial 27 with value: 0.0016493596512061894.
bagging, val_score: 0.001622:  10%|#####3                                               | 1/10 [00:02<00:23,  2.64s/it]

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002369 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2559
[LightGBM] [Info] Number of data points in the train set: 21185, number of used features: 20
[LightGBM] [Info] Start training from score -0.061519


bagging, val_score: 0.001622:  20%|##########6                                          | 2/10 [00:06<00:24,  3.10s/it][I 2023-12-20 09:57:58,222] Trial 28 finished with value: 0.0016677700132282393 and parameters: {'bagging_fraction': 0.4170954987434374, 'bagging_freq': 1}. Best is trial 27 with value: 0.0016493596512061894.
bagging, val_score: 0.001622:  20%|##########6                                          | 2/10 [00:06<00:24,  3.10s/it]

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002628 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2559
[LightGBM] [Info] Number of data points in the train set: 21185, number of used features: 20
[LightGBM] [Info] Start training from score -0.061519


bagging, val_score: 0.001622:  30%|###############9                                     | 3/10 [00:09<00:21,  3.06s/it][I 2023-12-20 09:58:01,232] Trial 29 finished with value: 0.001632913094235269 and parameters: {'bagging_fraction': 0.963611255779326, 'bagging_freq': 2}. Best is trial 29 with value: 0.001632913094235269.
bagging, val_score: 0.001622:  30%|###############9                                     | 3/10 [00:09<00:21,  3.06s/it]

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002609 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2559
[LightGBM] [Info] Number of data points in the train set: 21185, number of used features: 20
[LightGBM] [Info] Start training from score -0.061519


bagging, val_score: 0.001622:  40%|#####################2                               | 4/10 [00:11<00:17,  2.96s/it][I 2023-12-20 09:58:04,046] Trial 30 finished with value: 0.0016788791463913484 and parameters: {'bagging_fraction': 0.43356247325722497, 'bagging_freq': 7}. Best is trial 29 with value: 0.001632913094235269.
bagging, val_score: 0.001622:  40%|#####################2                               | 4/10 [00:11<00:17,  2.96s/it]

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002137 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2559
[LightGBM] [Info] Number of data points in the train set: 21185, number of used features: 20
[LightGBM] [Info] Start training from score -0.061519


bagging, val_score: 0.001622:  50%|##########################5                          | 5/10 [00:14<00:14,  2.88s/it][I 2023-12-20 09:58:06,773] Trial 31 finished with value: 0.0016528367346325499 and parameters: {'bagging_fraction': 0.6973461134159451, 'bagging_freq': 4}. Best is trial 29 with value: 0.001632913094235269.
bagging, val_score: 0.001622:  50%|##########################5                          | 5/10 [00:14<00:14,  2.88s/it]

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002360 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2559
[LightGBM] [Info] Number of data points in the train set: 21185, number of used features: 20
[LightGBM] [Info] Start training from score -0.061519


bagging, val_score: 0.001622:  60%|###############################8                     | 6/10 [00:19<00:14,  3.60s/it][I 2023-12-20 09:58:11,778] Trial 32 finished with value: 0.0016514569298060348 and parameters: {'bagging_fraction': 0.6397454656696505, 'bagging_freq': 4}. Best is trial 29 with value: 0.001632913094235269.
bagging, val_score: 0.001622:  60%|###############################8                     | 6/10 [00:19<00:14,  3.60s/it]

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001073 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2559
[LightGBM] [Info] Number of data points in the train set: 21185, number of used features: 20
[LightGBM] [Info] Start training from score -0.061519


bagging, val_score: 0.001622:  70%|#####################################                | 7/10 [00:23<00:10,  3.63s/it][I 2023-12-20 09:58:15,464] Trial 33 finished with value: 0.001631196475935086 and parameters: {'bagging_fraction': 0.9914842655828336, 'bagging_freq': 5}. Best is trial 33 with value: 0.001631196475935086.
bagging, val_score: 0.001622:  70%|#####################################                | 7/10 [00:23<00:10,  3.63s/it]

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000376 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2559
[LightGBM] [Info] Number of data points in the train set: 21185, number of used features: 20
[LightGBM] [Info] Start training from score -0.061519


bagging, val_score: 0.001622:  80%|##########################################4          | 8/10 [00:26<00:07,  3.55s/it][I 2023-12-20 09:58:18,844] Trial 34 finished with value: 0.0016644787215408846 and parameters: {'bagging_fraction': 0.5449699697036939, 'bagging_freq': 2}. Best is trial 33 with value: 0.001631196475935086.
bagging, val_score: 0.001622:  80%|##########################################4          | 8/10 [00:26<00:07,  3.55s/it]

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000454 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2559
[LightGBM] [Info] Number of data points in the train set: 21185, number of used features: 20
[LightGBM] [Info] Start training from score -0.061519


bagging, val_score: 0.001622:  90%|###############################################7     | 9/10 [00:30<00:03,  3.50s/it][I 2023-12-20 09:58:22,229] Trial 35 finished with value: 0.00164160347508959 and parameters: {'bagging_fraction': 0.8387652257786804, 'bagging_freq': 7}. Best is trial 33 with value: 0.001631196475935086.
bagging, val_score: 0.001622:  90%|###############################################7     | 9/10 [00:30<00:03,  3.50s/it]

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000402 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2559
[LightGBM] [Info] Number of data points in the train set: 21185, number of used features: 20
[LightGBM] [Info] Start training from score -0.061519


bagging, val_score: 0.001622: 100%|####################################################| 10/10 [00:33<00:00,  3.43s/it][I 2023-12-20 09:58:25,495] Trial 36 finished with value: 0.0016707644602210253 and parameters: {'bagging_fraction': 0.5633454603231086, 'bagging_freq': 3}. Best is trial 33 with value: 0.001631196475935086.
bagging, val_score: 0.001622: 100%|####################################################| 10/10 [00:33<00:00,  3.33s/it]
feature_fraction_stage2, val_score: 0.001622:   0%|                                              | 0/6 [00:00<?, ?it/s]

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002672 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2559
[LightGBM] [Info] Number of data points in the train set: 21185, number of used features: 20
[LightGBM] [Info] Start training from score -0.061519


feature_fraction_stage2, val_score: 0.001622:  17%|######3                               | 1/6 [00:02<00:12,  2.45s/it][I 2023-12-20 09:58:27,955] Trial 37 finished with value: 0.0016220249609950812 and parameters: {'feature_fraction': 0.584}. Best is trial 37 with value: 0.0016220249609950812.
feature_fraction_stage2, val_score: 0.001622:  17%|######3                               | 1/6 [00:02<00:12,  2.45s/it]

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000358 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2559
[LightGBM] [Info] Number of data points in the train set: 21185, number of used features: 20
[LightGBM] [Info] Start training from score -0.061519


feature_fraction_stage2, val_score: 0.001622:  33%|############6                         | 2/6 [00:05<00:10,  2.69s/it][I 2023-12-20 09:58:30,819] Trial 38 finished with value: 0.0016328075075402004 and parameters: {'feature_fraction': 0.52}. Best is trial 37 with value: 0.0016220249609950812.
feature_fraction_stage2, val_score: 0.001622:  33%|############6                         | 2/6 [00:05<00:10,  2.69s/it]

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000373 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2559
[LightGBM] [Info] Number of data points in the train set: 21185, number of used features: 20
[LightGBM] [Info] Start training from score -0.061519


feature_fraction_stage2, val_score: 0.001622:  50%|###################                   | 3/6 [00:07<00:07,  2.64s/it][I 2023-12-20 09:58:33,392] Trial 39 finished with value: 0.0016361321403403478 and parameters: {'feature_fraction': 0.552}. Best is trial 37 with value: 0.0016220249609950812.
feature_fraction_stage2, val_score: 0.001622:  50%|###################                   | 3/6 [00:07<00:07,  2.64s/it]

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001658 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2559
[LightGBM] [Info] Number of data points in the train set: 21185, number of used features: 20
[LightGBM] [Info] Start training from score -0.061519


feature_fraction_stage2, val_score: 0.001622:  67%|#########################3            | 4/6 [00:10<00:05,  2.56s/it][I 2023-12-20 09:58:35,844] Trial 40 finished with value: 0.0016377722210092166 and parameters: {'feature_fraction': 0.6479999999999999}. Best is trial 37 with value: 0.0016220249609950812.
feature_fraction_stage2, val_score: 0.001622:  67%|#########################3            | 4/6 [00:10<00:05,  2.56s/it]

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002476 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2559
[LightGBM] [Info] Number of data points in the train set: 21185, number of used features: 20
[LightGBM] [Info] Start training from score -0.061519


feature_fraction_stage2, val_score: 0.001622:  83%|###############################6      | 5/6 [00:12<00:02,  2.46s/it][I 2023-12-20 09:58:38,114] Trial 41 finished with value: 0.0016278663894701356 and parameters: {'feature_fraction': 0.6799999999999999}. Best is trial 37 with value: 0.0016220249609950812.
feature_fraction_stage2, val_score: 0.001622:  83%|###############################6      | 5/6 [00:12<00:02,  2.46s/it]

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000388 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2559
[LightGBM] [Info] Number of data points in the train set: 21185, number of used features: 20
[LightGBM] [Info] Start training from score -0.061519


feature_fraction_stage2, val_score: 0.001622: 100%|######################################| 6/6 [00:16<00:00,  2.78s/it][I 2023-12-20 09:58:41,518] Trial 42 finished with value: 0.0016220249609950812 and parameters: {'feature_fraction': 0.616}. Best is trial 37 with value: 0.0016220249609950812.
feature_fraction_stage2, val_score: 0.001622: 100%|######################################| 6/6 [00:16<00:00,  2.67s/it]
regularization_factors, val_score: 0.001622:   0%|                                              | 0/20 [00:00<?, ?it/s]

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001855 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2559
[LightGBM] [Info] Number of data points in the train set: 21185, number of used features: 20
[LightGBM] [Info] Start training from score -0.061519






regularization_factors, val_score: 0.001622:   5%|#9                                    | 1/20 [00:00<00:18,  1.02it/s][I 2023-12-20 09:58:42,508] Trial 43 finished with value: 0.0016895831615519807 and parameters: {'lambda_l1': 0.6563890674383593, 'lambda_l2': 4.299056882931866e-08}. Best is trial 43 with value: 0.0016895831615519807.
regularization_factors, val_score: 0.001622:   5%|#9                                    | 1/20 [00:00<00:18,  1.02it/s]

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002231 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2559
[LightGBM] [Info] Number of data points in the train set: 21185, number of used features: 20
[LightGBM] [Info] Start training from score -0.061519


regularization_factors, val_score: 0.001622:  10%|###8                                  | 2/20 [00:03<00:36,  2.05s/it][I 2023-12-20 09:58:45,311] Trial 44 finished with value: 0.0016314227019212683 and parameters: {'lambda_l1': 1.1292879205260062e-08, 'lambda_l2': 2.6921402014629017}. Best is trial 44 with value: 0.0016314227019212683.
regularization_factors, val_score: 0.001622:  10%|###8                                  | 2/20 [00:03<00:36,  2.05s/it]

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002276 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2559
[LightGBM] [Info] Number of data points in the train set: 21185, number of used features: 20
[LightGBM] [Info] Start training from score -0.061519


regularization_factors, val_score: 0.001622:  15%|#####7                                | 3/20 [00:06<00:40,  2.40s/it][I 2023-12-20 09:58:48,137] Trial 45 finished with value: 0.0016312589851434115 and parameters: {'lambda_l1': 1.7016801186158288e-06, 'lambda_l2': 0.00048657685813592783}. Best is trial 45 with value: 0.0016312589851434115.
regularization_factors, val_score: 0.001622:  15%|#####7                                | 3/20 [00:06<00:40,  2.40s/it]

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002110 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2559
[LightGBM] [Info] Number of data points in the train set: 21185, number of used features: 20
[LightGBM] [Info] Start training from score -0.061519






regularization_factors, val_score: 0.001622:  20%|#######6                              | 4/20 [00:07<00:28,  1.76s/it][I 2023-12-20 09:58:48,907] Trial 46 finished with value: 0.0018918455822163765 and parameters: {'lambda_l1': 5.985528730320578, 'lambda_l2': 1.209994925594426e-08}. Best is trial 45 with value: 0.0016312589851434115.
regularization_factors, val_score: 0.001622:  20%|#######6                              | 4/20 [00:07<00:28,  1.76s/it]

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002333 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2559
[LightGBM] [Info] Number of data points in the train set: 21185, number of used features: 20
[LightGBM] [Info] Start training from score -0.061519


regularization_factors, val_score: 0.001622:  25%|#########5                            | 5/20 [00:10<00:33,  2.23s/it][I 2023-12-20 09:58:51,983] Trial 47 finished with value: 0.001628382159360871 and parameters: {'lambda_l1': 0.0029013512307943523, 'lambda_l2': 7.792653410765815}. Best is trial 47 with value: 0.001628382159360871.
regularization_factors, val_score: 0.001622:  25%|#########5                            | 5/20 [00:10<00:33,  2.23s/it]

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.003238 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2559
[LightGBM] [Info] Number of data points in the train set: 21185, number of used features: 20
[LightGBM] [Info] Start training from score -0.061519


regularization_factors, val_score: 0.001622:  30%|###########4                          | 6/20 [00:12<00:31,  2.27s/it][I 2023-12-20 09:58:54,325] Trial 48 finished with value: 0.0016262138049432796 and parameters: {'lambda_l1': 0.00028657718481907036, 'lambda_l2': 0.00018801733419459706}. Best is trial 48 with value: 0.0016262138049432796.
regularization_factors, val_score: 0.001622:  30%|###########4                          | 6/20 [00:12<00:31,  2.27s/it]

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002508 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2559
[LightGBM] [Info] Number of data points in the train set: 21185, number of used features: 20
[LightGBM] [Info] Start training from score -0.061519


regularization_factors, val_score: 0.001622:  35%|#############3                        | 7/20 [00:15<00:30,  2.35s/it][I 2023-12-20 09:58:56,843] Trial 49 finished with value: 0.0016330358126225948 and parameters: {'lambda_l1': 0.0032134709092053572, 'lambda_l2': 0.0032907138787242286}. Best is trial 48 with value: 0.0016262138049432796.
regularization_factors, val_score: 0.001622:  35%|#############3                        | 7/20 [00:15<00:30,  2.35s/it]

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001808 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2559
[LightGBM] [Info] Number of data points in the train set: 21185, number of used features: 20
[LightGBM] [Info] Start training from score -0.061519


regularization_factors, val_score: 0.001622:  40%|###############2                      | 8/20 [00:17<00:29,  2.44s/it][I 2023-12-20 09:58:59,467] Trial 50 finished with value: 0.0016309912265234346 and parameters: {'lambda_l1': 2.6762129575645295e-08, 'lambda_l2': 2.520911948808735e-06}. Best is trial 48 with value: 0.0016262138049432796.
regularization_factors, val_score: 0.001622:  40%|###############2                      | 8/20 [00:17<00:29,  2.44s/it]

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002294 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2559
[LightGBM] [Info] Number of data points in the train set: 21185, number of used features: 20
[LightGBM] [Info] Start training from score -0.061519


regularization_factors, val_score: 0.001622:  45%|#################1                    | 9/20 [00:21<00:31,  2.89s/it][I 2023-12-20 09:59:03,355] Trial 51 finished with value: 0.0016363671321976184 and parameters: {'lambda_l1': 3.600090991092059e-05, 'lambda_l2': 0.028161860114878718}. Best is trial 48 with value: 0.0016262138049432796.
regularization_factors, val_score: 0.001622:  45%|#################1                    | 9/20 [00:21<00:31,  2.89s/it]

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001640 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2559
[LightGBM] [Info] Number of data points in the train set: 21185, number of used features: 20
[LightGBM] [Info] Start training from score -0.061519


regularization_factors, val_score: 0.001622:  50%|##################5                  | 10/20 [00:23<00:23,  2.39s/it]



[I 2023-12-20 09:59:04,636] Trial 52 finished with value: 0.0016311981163973906 and parameters: {'lambda_l1': 0.046204494986767576, 'lambda_l2': 5.409043796247451e-06}. Best is trial 48 with value: 0.0016262138049432796.
regularization_factors, val_score: 0.001622:  50%|##################5                  | 10/20 [00:23<00:23,  2.39s/it]

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002195 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2559
[LightGBM] [Info] Number of data points in the train set: 21185, number of used features: 20
[LightGBM] [Info] Start training from score -0.061519


regularization_factors, val_score: 0.001622:  55%|####################3                | 11/20 [00:25<00:22,  2.49s/it][I 2023-12-20 09:59:07,333] Trial 53 finished with value: 0.0016334350591697177 and parameters: {'lambda_l1': 1.1826362176934404e-05, 'lambda_l2': 1.0000920303520344e-05}. Best is trial 48 with value: 0.0016262138049432796.
regularization_factors, val_score: 0.001622:  55%|####################3                | 11/20 [00:25<00:22,  2.49s/it]

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002630 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2559
[LightGBM] [Info] Number of data points in the train set: 21185, number of used features: 20
[LightGBM] [Info] Start training from score -0.061519


regularization_factors, val_score: 0.001622:  60%|######################2              | 12/20 [00:28<00:19,  2.46s/it][I 2023-12-20 09:59:09,733] Trial 54 finished with value: 0.0016267484706749974 and parameters: {'lambda_l1': 5.586646408183726e-07, 'lambda_l2': 0.08450822338161211}. Best is trial 48 with value: 0.0016262138049432796.
regularization_factors, val_score: 0.001622:  60%|######################2              | 12/20 [00:28<00:19,  2.46s/it]

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002092 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2559
[LightGBM] [Info] Number of data points in the train set: 21185, number of used features: 20
[LightGBM] [Info] Start training from score -0.061519


regularization_factors, val_score: 0.001622:  65%|########################             | 13/20 [00:30<00:17,  2.50s/it][I 2023-12-20 09:59:12,331] Trial 55 finished with value: 0.001629436185122353 and parameters: {'lambda_l1': 0.0005671327013265436, 'lambda_l2': 9.98624559726526e-05}. Best is trial 48 with value: 0.0016262138049432796.
regularization_factors, val_score: 0.001622:  65%|########################             | 13/20 [00:30<00:17,  2.50s/it]

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000406 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2559
[LightGBM] [Info] Number of data points in the train set: 21185, number of used features: 20
[LightGBM] [Info] Start training from score -0.061519






regularization_factors, val_score: 0.001622:  70%|#########################9           | 14/20 [00:32<00:13,  2.28s/it][I 2023-12-20 09:59:14,102] Trial 56 finished with value: 0.0016305128809563647 and parameters: {'lambda_l1': 0.05080058387713178, 'lambda_l2': 4.346587275802295e-07}. Best is trial 48 with value: 0.0016262138049432796.
regularization_factors, val_score: 0.001622:  70%|#########################9           | 14/20 [00:32<00:13,  2.28s/it]

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.003166 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2559
[LightGBM] [Info] Number of data points in the train set: 21185, number of used features: 20
[LightGBM] [Info] Start training from score -0.061519


regularization_factors, val_score: 0.001622:  75%|###########################7         | 15/20 [00:34<00:11,  2.28s/it][I 2023-12-20 09:59:16,369] Trial 57 finished with value: 0.0016369544837321094 and parameters: {'lambda_l1': 5.473730993559222e-05, 'lambda_l2': 0.0001215456733101376}. Best is trial 48 with value: 0.0016262138049432796.
regularization_factors, val_score: 0.001622:  75%|###########################7         | 15/20 [00:34<00:11,  2.28s/it]

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000580 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2559
[LightGBM] [Info] Number of data points in the train set: 21185, number of used features: 20
[LightGBM] [Info] Start training from score -0.061519


regularization_factors, val_score: 0.001622:  80%|#############################6       | 16/20 [00:37<00:10,  2.52s/it][I 2023-12-20 09:59:19,444] Trial 58 finished with value: 0.0016319159705257872 and parameters: {'lambda_l1': 6.908390256717932e-07, 'lambda_l2': 0.23761693302689035}. Best is trial 48 with value: 0.0016262138049432796.
regularization_factors, val_score: 0.001622:  80%|#############################6       | 16/20 [00:37<00:10,  2.52s/it]

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000915 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2559
[LightGBM] [Info] Number of data points in the train set: 21185, number of used features: 20
[LightGBM] [Info] Start training from score -0.061519




regularization_factors, val_score: 0.001622:  80%|#############################6       | 16/20 [00:40<00:10,  2.52s/it]



regularization_factors, val_score: 0.001622:  85%|###############################4     | 17/20 [00:40<00:07,  2.43s/it][I 2023-12-20 09:59:21,666] Trial 59 finished with value: 0.0016274616331652528 and parameters: {'lambda_l1': 0.03713398163241429, 'lambda_l2': 0.0030591114852380158}. Best is trial 48 with value: 0.0016262138049432796.
regularization_factors, val_score: 0.001622:  85%|###############################4     | 17/20 [00:40<00:07,  2.43s/it]

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000403 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2559
[LightGBM] [Info] Number of data points in the train set: 21185, number of used features: 20
[LightGBM] [Info] Start training from score -0.061519


regularization_factors, val_score: 0.001622:  90%|#################################3   | 18/20 [00:43<00:05,  2.77s/it][I 2023-12-20 09:59:25,238] Trial 60 finished with value: 0.0016293603772740096 and parameters: {'lambda_l1': 0.00016283237940938145, 'lambda_l2': 3.502276099382809e-07}. Best is trial 48 with value: 0.0016262138049432796.
regularization_factors, val_score: 0.001622:  90%|#################################3   | 18/20 [00:43<00:05,  2.77s/it]

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001062 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2559
[LightGBM] [Info] Number of data points in the train set: 21185, number of used features: 20
[LightGBM] [Info] Start training from score -0.061519


regularization_factors, val_score: 0.001622:  95%|###################################1 | 19/20 [00:46<00:02,  2.77s/it][I 2023-12-20 09:59:27,995] Trial 61 finished with value: 0.0016316478194200526 and parameters: {'lambda_l1': 2.0591069046596326e-07, 'lambda_l2': 0.17273490380505574}. Best is trial 48 with value: 0.0016262138049432796.
regularization_factors, val_score: 0.001622:  95%|###################################1 | 19/20 [00:46<00:02,  2.77s/it]

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002101 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2559
[LightGBM] [Info] Number of data points in the train set: 21185, number of used features: 20
[LightGBM] [Info] Start training from score -0.061519


regularization_factors, val_score: 0.001622: 100%|#####################################| 20/20 [00:48<00:00,  2.59s/it][I 2023-12-20 09:59:30,171] Trial 62 finished with value: 0.0016318052803044828 and parameters: {'lambda_l1': 4.179614154189947e-06, 'lambda_l2': 0.023092105276304634}. Best is trial 48 with value: 0.0016262138049432796.
regularization_factors, val_score: 0.001622: 100%|#####################################| 20/20 [00:48<00:00,  2.43s/it]
min_child_samples, val_score: 0.001622:   0%|                                                    | 0/5 [00:00<?, ?it/s]

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000378 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2559
[LightGBM] [Info] Number of data points in the train set: 21185, number of used features: 20
[LightGBM] [Info] Start training from score -0.061519


min_child_samples, val_score: 0.001622:  20%|########8                                   | 1/5 [00:03<00:12,  3.01s/it][I 2023-12-20 09:59:33,195] Trial 63 finished with value: 0.0016368802696244155 and parameters: {'min_child_samples': 25}. Best is trial 63 with value: 0.0016368802696244155.
min_child_samples, val_score: 0.001622:  20%|########8                                   | 1/5 [00:03<00:12,  3.01s/it]

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000415 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2559
[LightGBM] [Info] Number of data points in the train set: 21185, number of used features: 20
[LightGBM] [Info] Start training from score -0.061519


min_child_samples, val_score: 0.001622:  40%|#################6                          | 2/5 [00:06<00:09,  3.20s/it][I 2023-12-20 09:59:36,528] Trial 64 finished with value: 0.0016323977553120853 and parameters: {'min_child_samples': 50}. Best is trial 64 with value: 0.0016323977553120853.
min_child_samples, val_score: 0.001622:  40%|#################6                          | 2/5 [00:06<00:09,  3.20s/it]

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001745 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2559
[LightGBM] [Info] Number of data points in the train set: 21185, number of used features: 20
[LightGBM] [Info] Start training from score -0.061519


min_child_samples, val_score: 0.001622:  60%|##########################4                 | 3/5 [00:08<00:05,  2.67s/it][I 2023-12-20 09:59:38,556] Trial 65 finished with value: 0.001627624259347788 and parameters: {'min_child_samples': 10}. Best is trial 65 with value: 0.001627624259347788.
min_child_samples, val_score: 0.001622:  60%|##########################4                 | 3/5 [00:08<00:05,  2.67s/it]

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000418 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2559
[LightGBM] [Info] Number of data points in the train set: 21185, number of used features: 20
[LightGBM] [Info] Start training from score -0.061519


min_child_samples, val_score: 0.001622:  80%|###################################2        | 4/5 [00:11<00:03,  3.00s/it][I 2023-12-20 09:59:42,079] Trial 66 finished with value: 0.001633198724925455 and parameters: {'min_child_samples': 5}. Best is trial 65 with value: 0.001627624259347788.
min_child_samples, val_score: 0.001622:  80%|###################################2        | 4/5 [00:11<00:03,  3.00s/it]

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002097 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2559
[LightGBM] [Info] Number of data points in the train set: 21185, number of used features: 20
[LightGBM] [Info] Start training from score -0.061519


min_child_samples, val_score: 0.001622: 100%|############################################| 5/5 [00:14<00:00,  2.88s/it][I 2023-12-20 09:59:44,726] Trial 67 finished with value: 0.0016371461733758884 and parameters: {'min_child_samples': 100}. Best is trial 65 with value: 0.001627624259347788.
min_child_samples, val_score: 0.001622: 100%|############################################| 5/5 [00:14<00:00,  2.91s/it]


In [None]:
url = "https://db.netkeiba.com/race/"

def scraping_horse_ped(horse_id_dic, pre_horse_ped_dic={}):
    horse_ped_dic = pre_horse_ped_dic.copy()
    for horse_id, url in tqdm(horse_id_dic.items()):
        try:
            if horse_id in horse_ped_dic.keys():
                continue
            time.sleep(1)
            res = requests.get(url)
            res.encoding = "EUC-JP"
            soup = BeautifulSoup(res.text, "html.parser")
            li = soup.find("dd", attrs={"class":"DB_ProfHead_dd_01"}).find_all("a")
            id_name_list = []
            for i in li:
                horse_name = i.get_text()
                horse_parent_id = str(i).split("/")[3]
                id_name_list.append((horse_parent_id, horse_name))
            dic = {}
            dic["1_parent"] = (id_name_list[0], id_name_list[3])
            dic["2_parent"] = (id_name_list[1], id_name_list[2], id_name_list[4], id_name_list[5])
            horse_ped_dic[horse_id] = dic
        except Exception as e:
            print(e)
            break
    return horse_ped_dic   

def scraping_results(race_url_dic , pre_race_results={}):
    time.sleep(0.5)
    race_results = pre_race_results.copy()
    for race_id, race_url in tqdm(race_url_dic.items()):
        #time.sleep(1)
        if race_id in race_results.keys():
            continue
        
        try:
            df_tmp = pd.read_html(race_url)[0]
            res = requests.get(race_url)
            res.encoding = "EUC-JP"
            soup = BeautifulSoup(res.text, "html.parser")
            texts_horse = soup.find("table", summary="レース結果").find_all("a", attrs={"href":re.compile("/horse")})
            texts_jockey = soup.find("table", summary="レース結果").find_all("a", attrs={"href":re.compile("/jockey")})
            horse_ids = []
            jockey_ids = []
            for text in texts_horse:
                horse_id = re.findall("\d+", text["href"])[0]
                horse_ids.append(horse_id)
            for text in texts_jockey:
                jockey_id = re.findall("\d+", text["href"])[0]
                jockey_ids.append(jockey_id) 
            df_tmp["horse_id"] = horse_ids
            df_tmp["jockey_id"] = jockey_ids
            race_results[race_id] = df_tmp
        except UnicodeDecodeError:
            continue
        except IndexError:
            continue
        except:
            print(race_url)
            break 
    return race_results       

def make_url():
    url = "https://db.netkeiba.com/race/"
    race_url_dic = {}
    for place in range(1, 11):
        for kai in range(1, 6):
            for day in range(1, 13):
                for race in range(1, 13):
                    race_id = str(2023) + str(place).zfill(2) + str(kai).zfill(2) + str(day).zfill(2) + str(race).zfill(2)
                    race_url_dic[race_id] = url + race_id
    return race_url_dic            

def dic_to_df(dic):
    dic_tmp = dic.copy()
    for key in dic_tmp:
        dic_tmp[key].index = [key] * len(dic_tmp[key])
    df = pd.concat([dic_tmp[key] for key in dic_tmp], sort=False)
    return df

def horse_pre_prizes_ave(x, gap=365):
    horse_id = x["horse_id"]
    today = x["date"]
    first = True
    try:
        if horse_id not in dict_horse_history.keys():
            return 0
        df_horse = dict_horse_history[horse_id]
        if isinstance(df_horse, pd.Series):
            if today - df_horse["日付"] > timedelta(gap) or today <= df_horse["日付"]:
                return 0
            else:
                average_prize = df_horse["賞金"]
        else:
            df_horse_present = df_horse[today - df_horse["日付"] <= timedelta(gap)]
            df_horse_present = df_horse_present[today - df_horse_present["日付"] > timedelta(0)]
            df_horse_present_prize = df_horse_present["賞金"]
            average_prize = df_horse_present_prize.mean()
        if np.isnan(average_prize):
            return 0
        else:
            return average_prize
    except KeyError:
        if first:
            first = False
            print(f"エラー race_id = {race_id}, horse_id = {horse_id}, today = {today}")
        return 0

def horse_pre_prizes_sum(x, gap=365):
    horse_id = x["horse_id"]
    today = x["date"]
    first = True
    try:
        if horse_id not in dict_horse_history.keys():
            return 0
        df_horse = dict_horse_history[horse_id]

        if isinstance(df_horse, pd.Series):
            if df_horse["日付"] >= today or today - df_horse["日付"] > timedelta(gap):
                return 0
            else:
                average_prize = df_horse["賞金"]
        else:
            df_horse_present = df_horse[today - df_horse["日付"] <= timedelta(gap)]
            df_horse_present = df_horse_present[today - df_horse_present["日付"] > timedelta(0)]
            df_horse_present_prize = df_horse_present["賞金"]
            average_prize = df_horse_present_prize.sum()

        if np.isnan(average_prize):
            return 0

        else:
            return average_prize
    except KeyError:
        if first:
            first = False
            print(f"エラー race_id = {race_id}, horse_id = {horse_id}, today = {today}")
        return 0   


def horse_pre_order_ave(x, gap=365):
    def to_int(s):
        try:
            int(s)
            return int(s)
        except ValueError:
            return substitute_num
        
    substitute_num = (sum([i+1 for i in range(16)])/16 + sum([i+1 for i in range(9)])/9) / 2
    horse_id = x["horse_id"]
    today = x["date"]
    first = True
    try:
        if horse_id not in dict_horse_history.keys():
            return substitute_num
        df_horse = dict_horse_history[horse_id]

        if isinstance(df_horse, pd.Series):
            if df_horse["日付"] >= today or today - df_horse["日付"] > timedelta(gap):
                return substitute_num
            else:
                average_order = to_int(df_horse["着順"])
                
        else:
            df_horse_present = df_horse[today - df_horse["日付"] <= timedelta(gap)]
            df_horse_present = df_horse_present[today - df_horse_present["日付"] > timedelta(0)]
            df_horse_present_order = df_horse_present["着順"]
            #df_horse_heads = df_horse_present["頭数"]
            df_horse_present_order = df_horse_present_order.map(lambda x:to_int(x))
            #df_horse_heads = df_horse_heads.map(lambda x:to_int(x))
            #df_horse_present_order = df_horse_present_order[df_horse_heads > 0]
            #df_horse_heads = df_horse_heads[df_horse_heads > 0]
            #average_order = df_horse_present_order / df_horse_heads
            average_order = df_horse_present_order.mean()
            

        if np.isnan(average_order):
            return substitute_num

        else:
            return average_order
    except KeyError:
        if first:
            first = False
            print(f"エラー race_id = {race_id}, horse_id = {horse_id}, today = {today}")
        return substitute_num
    
def scraping_infos(race_url_dic , pre_race_infos={}):
    time.sleep(1)
    race_infos = pre_race_infos.copy()
    for race_id, race_url in tqdm(race_url_dic.items()):
        if race_id in race_infos.keys():
            continue
        try:
            res = requests.get(race_url)
            res.encoding = "EUC-JP"
            soup = BeautifulSoup(res.text, "html.parser")
            texts = soup.find("div", attrs={"class":"data_intro"}).find_all("p")[0].get_text() + soup.find("div", attrs={"class":"data_intro"}).find_all("p")[1].get_text()
            texts = re.findall(r"\w+", texts)
            race_infos[race_id] = {}
            for idx, text in enumerate(texts):
                if text in ["芝" , "ダート"]:
                    race_infos[race_id]["condition"] = texts[idx + 1]
                if text in ["天候"]:
                    race_infos[race_id]["weather"] = texts[idx + 1]
                for date in [str(i) for i in range(2000, 2025)]:
                    if date in text:
                        race_infos[race_id]["date"] = text
                if "m" in text:
                    race_infos[race_id]["length"] = text[-5:-1]
                if "race_type" in race_infos[race_id].keys() and race_infos[race_id]["race_type"] == "障":
                    continue
                for race_type in ["障", "芝", "ダート"]:
                    if race_type in text:
                        race_infos[race_id]["race_type"] = race_type

        except UnicodeDecodeError:
            continue
        except AttributeError:
            continue
        except:
            print(race_url)
            break 
    return race_infos 

def scraping_horse(horse_id_dic, pre_horse_history={}):
    horse_history = pre_horse_history.copy()
    time.sleep(1)
    for horse_id, horse_url in tqdm(horse_id_dic.items()):
        tmp_df = pd.read_html(horse_url)[3]
        horse_history[horse_id] = tmp_df
    return horse_history    

def make_horse_url(horse_id_list):
    horse_id_dic = {}
    for horse_id in horse_id_list:
        horse_url = "https://db.netkeiba.com/" + "horse/" + str(horse_id)
        horse_id_dic[horse_id] = horse_url
        
    return horse_id_dic

def scraping_jockey(jockey_id_dic, pre_jockey_history):
    jockey_history = pre_jockey_history.copy()
    for jockey_id, jockey_url in tqdm(jockey_id_dic.items()):
        time.sleep(1)
        tmp_df = pd.read_html(jockey_url)[2]
        jockey_history[jockey_id] = tmp_df
        
    return jockey_history    

def make_jockey_url(jockey_id_list):
    jockey_id_dic = {}
    for jockey_id in jockey_id_list:
        jockey_url = "https://db.netkeiba.com/" + "jockey/" + str(jockey_id)
        jockey_id_dic[jockey_id] = jockey_url
        
    return jockey_id_dic   

def scraping_pay(race_url_dic, pre_pay={}):
    pay = pre_pay.copy()
    for race_id, url in tqdm(race_url_dic.items()):
        if race_id in pay.keys():
            continue
        
        try:
            time.sleep(0.1)
            f = urlopen(url)
            html = f.read()
            html = html.replace(b"<br />", b"br")
            df = pd.concat([pd.read_html(html)[1], pd.read_html(html)[2]], axis=0)
            df.columns = ["種類", "該当馬", "金", "人気"]
            df.set_index("種類", inplace=True)
            df["人気"] = df["人気"].map(lambda x:re.split("br|-|→", x))
            df["該当馬"] = df["該当馬"].map(lambda x:re.split("br|-|→", x))
            df["金"] = df["金"].map(lambda x:re.split("br|-|→", x))
            pay[race_id] = df
        except UnicodeDecodeError:
            print((race_id, url))
            continue
        except IndexError:
            print((race_id, url))
            continue
        except:
            print((race_id, url))
            break
    return pay    