In [1]:
import pandas as pd
import numpy as np
import datetime
from tqdm.notebook import tqdm
from sklearn.metrics import roc_auc_score
from sklearn.preprocessing import LabelEncoder
import lightgbm as lgb
import requests
from bs4 import BeautifulSoup
import time
import re
from urllib.request import urlopen
from itertools import combinations, permutations
import matplotlib.pyplot as plt

In [2]:
pd.set_option('display.max_columns', 50)

In [3]:
race_results = pd.read_pickle("../data/original_data/race_results.pickle")

In [4]:
def preprocessing(race_results):
    df = race_results.copy()
    # 着順に数字以外の文字列が含まれているものを取り除く
    df['着順'] = pd.to_numeric(df['着順'], errors='coerce')
    df.dropna(subset=['着順'], inplace=True)
    df['着順'] = df['着順'].astype(int)
    df['rank'] = df['着順'].map(lambda x:1 if x<4 else 0)

    # 性齢を性と年齢に分ける
    df["gender"] = df["性齢"].map(lambda x: str(x)[0])
    df["age"] = df["性齢"].map(lambda x: str(x)[1:]).astype(int)

    # 馬体重を体重と体重変化に分ける
    df["weight"] = df["馬体重"].str.split("(", expand=True)[0]
    df["change_weight"] = df["馬体重"].str.split("(", expand=True)[1].str[:-1]
    df['weight'] = pd.to_numeric(df['weight'], errors='coerce')
    df['change_weight'] = pd.to_numeric(df['change_weight'], errors='coerce')
    df.dropna(subset=['weight',"change_weight"], inplace=True)
    df["weight"] = df["weight"].astype(int)
    df["change_weight"] = df["change_weight"].astype(int)
    
    #出走数追加
    df['n_horses'] = df.index.map(df.index.value_counts())

    # 単勝をfloatに変換
    df["単勝"] = df["単勝"].astype(float)

    # 距離は10の位を切り捨てる
    df["course_len"] = df["course_len"].astype(float) // 100

    # 人気をfloatに変換
    df["人気"] = df["人気"].astype(float)

    # 日付をdatetimeに変更
    df["date"] = pd.to_datetime(df["date"], format="%Y年%m月%d日")

    # 枠番をintに変更
    df["枠番"] = df["枠番"].astype(int)

    # 不要な列を削除
    df.drop(["タイム", "着差", "調教師", "性齢", "馬体重", '馬名', '騎手'], axis=1, inplace=True)
    #開催場所
    df['place'] = df.index.map(lambda x:str(x)[4:6])
	
    return df

In [5]:
p_race_results = preprocessing(race_results)

In [6]:
horse_results = pd.read_pickle("../data/original_data/horse_results.pickle")

In [7]:
def horse_preprocessing(horse_results):
  df = horse_results[['日付', '着順', '賞金', "頭数"]]

  # 数値以外の欠損値を削除
  df["着順"] = pd.to_numeric(df["着順"], errors="coerce")
  df.dropna(subset=["着順"], inplace=True)
  df["着順"] = df["着順"].astype(int)

  # 賞金のNANを0で埋める
  df["賞金"].fillna(0, inplace=True)

  # (1 - 着順/頭数)の計算
  df["着順/頭数"] = (1 - df["着順"] / df["頭数"]) * 100

  # 日付型に変換
  df["date"] = pd.to_datetime(df["日付"])
  df.drop(["日付"], axis=1, inplace=True)
  return df

In [8]:
p_horse_results = horse_preprocessing(horse_results)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["着順"] = pd.to_numeric(df["着順"], errors="coerce")
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.dropna(subset=["着順"], inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["着順"] = df["着順"].astype(int)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-d

In [9]:
def Merge_race_with_horse(race_results, horse_results, n_samples={3, 5, 7, "all"}):
  df_r = race_results.copy()
  df_h = horse_results.copy()
  date_list = df_r["date"].unique()
  for n_sample in n_samples:
    merged_all_df = pd.DataFrame()
    for date in date_list:
      df = df_r[df_r["date"] == date]
      horse_id_list = df["horse_id"]
      target_df = df_h.loc[horse_id_list]
      if n_sample == "all":
        filtered_df = target_df[target_df["date"] < date]
      else:
        filtered_df = target_df[target_df["date"] < date].sort_values("date", ascending = False).groupby(level=0).head(n_sample)
      average_df = filtered_df.groupby(level=0)[["着順", "賞金", "着順/頭数"]].mean()
      average_df.rename(columns={"着順":"着順_{}R".format(n_sample), "賞金":"賞金_{}R".format(n_sample) ,"着順/頭数":"着順/頭数_{}R".format(n_sample)},inplace=True)
      merged_df = pd.merge(df, average_df, how="left", left_on="horse_id", right_index=True)
      merged_all_df = pd.concat([merged_all_df, merged_df])
    df_r = merged_all_df.copy()
  return df_r

In [10]:
m_race_results = Merge_race_with_horse(p_race_results, p_horse_results)

In [20]:
m_horse_id_list = m_race_results[m_race_results["着順_allR"].isnull()]["horse_id"].unique()

In [21]:
horse_id_list = p_race_results["horse_id"].unique()

In [24]:
len(horse_id_list)

26190

In [25]:
len(m_horse_id_list)

19139

In [31]:
m_horse_id_list

array(['2016104246', '2017105100', '2017104177', ..., '2020104795',
       '2020100913', '2020103115'], dtype=object)

In [27]:
len(set(horse_id_list) & set(m_horse_id_list))

19139

In [34]:
p_horse_results[p_horse_results.index == "2017104177"]

Unnamed: 0,着順,賞金,頭数,着順/頭数,date
2017104177,12,0.0,14.0,14.285714,2021-03-03
2017104177,15,0.0,16.0,6.25,2020-12-19
2017104177,14,0.0,15.0,6.666667,2020-07-11
2017104177,1,510.0,16.0,93.75,2020-06-21
2017104177,6,0.0,16.0,62.5,2020-06-06
2017104177,2,200.0,15.0,86.666667,2020-05-09
2017104177,2,200.0,16.0,87.5,2020-04-12
2017104177,10,0.0,18.0,44.444444,2020-03-08
2017104177,8,0.0,14.0,42.857143,2019-10-26
2017104177,10,0.0,15.0,33.333333,2019-09-15


In [35]:
p_race_results[p_race_results["horse_id"] == "2017104177"]

Unnamed: 0,着順,枠番,馬番,斤量,単勝,人気,course_len,weather,race_type,ground_state,date,horse_id,jockey_id,rank,gender,age,weight,change_weight,n_horses,place
201901010105,2,7,8,54.0,67.0,8.0,15.0,晴,芝,良,2019-07-27,2017104177,1109,1,牡,2,448,0,9,1
201901020209,4,6,6,54.0,6.2,2.0,15.0,晴,芝,良,2019-08-18,2017104177,1109,0,牡,2,456,8,8,1
201901020610,12,6,9,54.0,51.2,8.0,12.0,晴,芝,良,2019-09-01,2017104177,1109,0,牡,2,458,2,14,1
201904030505,8,8,14,52.0,7.7,4.0,14.0,曇,芝,重,2019-10-26,2017104177,1164,0,牡,2,464,2,13,4
201906040403,10,6,11,54.0,24.9,6.0,18.0,晴,芝,良,2019-09-15,2017104177,1109,0,牡,2,462,4,15,6
202003020312,14,7,13,54.0,25.2,10.0,17.0,小雨,ダート,稍重,2020-07-11,2017104177,1143,0,牡,3,480,0,15,3
202005020502,2,5,8,56.0,4.9,3.0,13.0,晴,ダート,良,2020-05-09,2017104177,1092,1,牡,3,476,-4,15,5
202005030103,6,4,7,56.0,6.2,4.0,13.0,曇,ダート,良,2020-06-06,2017104177,1092,0,牡,3,476,0,16,5
202005030604,1,8,16,56.0,8.6,4.0,16.0,晴,ダート,稍重,2020-06-21,2017104177,1092,1,牡,3,480,4,16,5
202006030604,2,7,13,56.0,7.1,4.0,12.0,曇,ダート,稍重,2020-04-12,2017104177,1092,1,牡,3,480,4,16,6


In [232]:
d_df = pd.get_dummies(m_race_results)

In [28]:
d_df

NameError: name 'd_df' is not defined

In [234]:
#時系列に沿って訓練データとテストデータに分ける関数
def split_data(df, test_size=0.3):
    sorted_id_list = df.sort_values("date").index.unique()
    train_id_list = sorted_id_list[: round(len(sorted_id_list) * (1 - test_size))]
    test_id_list = sorted_id_list[round(len(sorted_id_list) * (1 - test_size)) :]
    train = df.loc[train_id_list]
    test = df.loc[test_id_list]
    return train, test

In [235]:
train, valid  = split_data(d_df)

In [236]:
train.shape

(135147, 26511)

In [304]:
valid

Unnamed: 0,着順,枠番,馬番,斤量,単勝,人気,course_len,date,rank,age,weight,change_weight,n_horses,着順_allR,賞金_allR,着順/頭数_allR,着順_3R,賞金_3R,着順/頭数_3R,着順_5R,賞金_5R,着順/頭数_5R,着順_7R,賞金_7R,着順/頭数_7R,...,jockey_id_05622,jockey_id_05623,jockey_id_05624,jockey_id_05625,jockey_id_05626,jockey_id_05627,jockey_id_05628,jockey_id_05629,jockey_id_05630,jockey_id_05631,jockey_id_05632,jockey_id_05633,gender_セ,gender_牝,gender_牡,place_01,place_02,place_03,place_04,place_05,place_06,place_07,place_08,place_09,place_10
202109050103,1,2,4,55.0,5.7,3.0,16.0,2021-11-06,1,2,472,-4,17,7.500000,90.000000,47.500000,7.500000,90.000000,47.500000,7.50,90.0,47.500000,7.500000,90.000000,47.500000,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0
202109050103,2,6,11,55.0,18.7,5.0,16.0,2021-11-06,1,2,522,2,17,5.000000,70.000000,68.750000,5.000000,70.000000,68.750000,5.00,70.0,68.750000,5.000000,70.000000,68.750000,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0
202109050103,3,1,2,55.0,12.9,4.0,16.0,2021-11-06,1,2,452,4,17,4.000000,110.000000,75.000000,4.000000,110.000000,75.000000,4.00,110.0,75.000000,4.000000,110.000000,75.000000,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0
202109050103,4,7,13,54.0,2.0,1.0,16.0,2021-11-06,0,2,496,-8,17,2.250000,202.500000,76.515152,2.333333,176.666667,74.242424,2.25,202.5,76.515152,2.250000,202.500000,76.515152,...,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0
202109050103,5,6,12,55.0,448.1,16.0,16.0,2021-11-06,0,2,442,2,17,10.000000,0.000000,33.333333,10.000000,0.000000,33.333333,10.00,0.0,33.333333,10.000000,0.000000,33.333333,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
202306010812,5,6,6,57.0,8.2,5.0,16.0,2023-01-22,0,4,512,-4,9,6.000000,226.714286,53.350221,10.000000,0.000000,23.232323,7.00,266.0,46.356976,6.000000,226.714286,53.350221,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0
202306010812,6,7,7,56.0,4.3,2.0,16.0,2023-01-22,0,5,476,12,9,5.083333,241.366667,64.518782,6.666667,202.000000,53.212670,5.40,275.2,57.344268,4.571429,285.200000,64.812810,...,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0
202306010812,7,5,5,58.0,74.2,9.0,16.0,2023-01-22,0,5,478,2,9,9.105263,110.105263,29.573774,9.000000,0.000000,36.698718,9.40,0.0,26.463675,10.571429,0.000000,21.680403,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0
202306010812,8,8,8,56.0,47.3,8.0,16.0,2023-01-22,0,5,476,2,9,4.571429,206.700000,71.237245,8.666667,37.000000,42.592593,7.60,78.2,50.019841,6.000000,219.542857,61.125283,...,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0


In [238]:
X_train = train.drop(['rank', 'date', "着順"], axis=1)
y_train = train['rank']
X_valid = valid.drop(['rank', 'date', "着順"], axis=1)
y_valid = valid['rank']

In [239]:
import optuna.integration.lightgbm as lgb_o

In [165]:
# #データセットを作成
# lgb_train = lgb_o.Dataset(X_train.values, y_train.values)
# lgb_valid = lgb_o.Dataset(X_valid.values, y_valid.values)

# params = {
#     'objective': 'binary', #今回は0or1の二値予測なのでbinaryを指定
#     'random_state': 100
# }

# #チューニング実行
# lgb_clf_o = lgb_o.train(params, lgb_train,
#                         valid_sets=(lgb_train, lgb_valid),
#                         verbose_eval=100,
#                         early_stopping_rounds=10,
#                         optuna_seed=100 #optunaのseed固定
#                         )

[32m[I 2023-01-30 02:30:58,014][0m A new study created in memory with name: no-name-2b89d31f-afb5-4512-b9d3-e7af68de781f[0m


[A[A



[LightGBM] [Info] Number of positive: 1662, number of negative: 6071
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 11638
[LightGBM] [Info] Number of data points in the train set: 7733, number of used features: 5576




[A[A

[A[A[32m[I 2023-01-30 02:30:59,681][0m Trial 0 finished with value: 0.40972776524762594 and parameters: {'feature_fraction': 0.8999999999999999}. Best is trial 0 with value: 0.40972776524762594.[0m


[A[A

[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.214923 -> initscore=-1.295502
[LightGBM] [Info] Start training from score -1.295502
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[29]	valid_0's binary_logloss: 0.355943	valid_1's binary_logloss: 0.409728
[LightGBM] [Info] Number of positive: 1662, number of negative: 6071
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 11638
[LightGBM] [Info] Number of data points in the train set: 7733, number of used features: 5576




[A[A

[A[A[32m[I 2023-01-30 02:31:01,348][0m Trial 1 finished with value: 0.41360824790740874 and parameters: {'feature_fraction': 0.5}. Best is trial 0 with value: 0.40972776524762594.[0m


[A[A

[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.214923 -> initscore=-1.295502
[LightGBM] [Info] Start training from score -1.295502
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[42]	valid_0's binary_logloss: 0.347397	valid_1's binary_logloss: 0.413608
[LightGBM] [Info] Number of positive: 1662, number of negative: 6071
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 11638
[LightGBM] [Info] Number of data points in the train set: 7733, number of used features: 5576




[A[A

[A[A[32m[I 2023-01-30 02:31:03,002][0m Trial 2 finished with value: 0.4086452726520066 and parameters: {'feature_fraction': 1.0}. Best is trial 2 with value: 0.4086452726520066.[0m


[A[A

[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.214923 -> initscore=-1.295502
[LightGBM] [Info] Start training from score -1.295502
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[28]	valid_0's binary_logloss: 0.356906	valid_1's binary_logloss: 0.408645
[LightGBM] [Info] Number of positive: 1662, number of negative: 6071
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 11638
[LightGBM] [Info] Number of data points in the train set: 7733, number of used features: 5576




[A[A

[A[A[32m[I 2023-01-30 02:31:04,687][0m Trial 3 finished with value: 0.40940318553653904 and parameters: {'feature_fraction': 0.8}. Best is trial 2 with value: 0.4086452726520066.[0m


[A[A

[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.214923 -> initscore=-1.295502
[LightGBM] [Info] Start training from score -1.295502
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[30]	valid_0's binary_logloss: 0.355622	valid_1's binary_logloss: 0.409403
[LightGBM] [Info] Number of positive: 1662, number of negative: 6071
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 11638
[LightGBM] [Info] Number of data points in the train set: 7733, number of used features: 5576




[A[A

[A[A[32m[I 2023-01-30 02:31:06,358][0m Trial 4 finished with value: 0.4129553729811211 and parameters: {'feature_fraction': 0.4}. Best is trial 2 with value: 0.4086452726520066.[0m


[A[A

[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.214923 -> initscore=-1.295502
[LightGBM] [Info] Start training from score -1.295502
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[41]	valid_0's binary_logloss: 0.351099	valid_1's binary_logloss: 0.412955
[LightGBM] [Info] Number of positive: 1662, number of negative: 6071
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 11638
[LightGBM] [Info] Number of data points in the train set: 7733, number of used features: 5576




[A[A

[A[A[32m[I 2023-01-30 02:31:08,014][0m Trial 5 finished with value: 0.4098559656341321 and parameters: {'feature_fraction': 0.7}. Best is trial 2 with value: 0.4086452726520066.[0m


[A[A

[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.214923 -> initscore=-1.295502
[LightGBM] [Info] Start training from score -1.295502
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[36]	valid_0's binary_logloss: 0.349415	valid_1's binary_logloss: 0.409856
[LightGBM] [Info] Number of positive: 1662, number of negative: 6071
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 11638
[LightGBM] [Info] Number of data points in the train set: 7733, number of used features: 5576




[A[A

[A[A[32m[I 2023-01-30 02:31:09,697][0m Trial 6 finished with value: 0.4116444317577667 and parameters: {'feature_fraction': 0.6}. Best is trial 2 with value: 0.4086452726520066.[0m
feature_fraction, val_score: 0.408645: 100%|##########| 7/7 [00:11<00:00,  1.67s/it]


[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.214923 -> initscore=-1.295502
[LightGBM] [Info] Start training from score -1.295502
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[34]	valid_0's binary_logloss: 0.35457	valid_1's binary_logloss: 0.411644




[A[A

[A[A

[LightGBM] [Info] Number of positive: 1662, number of negative: 6071
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 11638
[LightGBM] [Info] Number of data points in the train set: 7733, number of used features: 5576
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.214923 -> initscore=-1.295502
[LightGBM] [Info] Start training from score -1.295502
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[19]	valid_0's binary_logloss: 0.297146	valid_1's binary_logloss: 0.423045




[A[A

[A[A[32m[I 2023-01-30 02:31:11,525][0m Trial 7 finished with value: 0.4230449890445591 and parameters: {'num_leaves': 140}. Best is trial 7 with value: 0.4230449890445591.[0m


[A[A

[LightGBM] [Info] Number of positive: 1662, number of negative: 6071
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 11638
[LightGBM] [Info] Number of data points in the train set: 7733, number of used features: 5576




[A[A

[A[A[32m[I 2023-01-30 02:31:13,262][0m Trial 8 finished with value: 0.41370116784190597 and parameters: {'num_leaves': 72}. Best is trial 8 with value: 0.41370116784190597.[0m


[A[A

[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.214923 -> initscore=-1.295502
[LightGBM] [Info] Start training from score -1.295502
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[27]	valid_0's binary_logloss: 0.31263	valid_1's binary_logloss: 0.413701
[LightGBM] [Info] Number of positive: 1662, number of negative: 6071
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 11638
[LightGBM] [Info] Number of data points in the train set: 7733, number of used features: 5576
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.214923 -> initscore=-1.295502
[LightGBM] [Info] Start training from score -1.295502
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[25]	valid_0's binary_logloss: 0.288654	valid_1's binary_logloss: 0.420465




[A[A

[A[A[32m[I 2023-01-30 02:31:15,096][0m Trial 9 finished with value: 0.42046482078070163 and parameters: {'num_leaves': 110}. Best is trial 8 with value: 0.41370116784190597.[0m


[A[A

[LightGBM] [Info] Number of positive: 1662, number of negative: 6071
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 11638
[LightGBM] [Info] Number of data points in the train set: 7733, number of used features: 5576
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.214923 -> initscore=-1.295502
[LightGBM] [Info] Start training from score -1.295502
Training until validation scores don't improve for 10 rounds




[A[A

[A[A[32m[I 2023-01-30 02:31:17,038][0m Trial 10 finished with value: 0.4234147037040568 and parameters: {'num_leaves': 217}. Best is trial 8 with value: 0.41370116784190597.[0m


[A[A

Early stopping, best iteration is:
[18]	valid_0's binary_logloss: 0.281877	valid_1's binary_logloss: 0.423415
[LightGBM] [Info] Number of positive: 1662, number of negative: 6071
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 11638
[LightGBM] [Info] Number of data points in the train set: 7733, number of used features: 5576




[A[A

[A[A[32m[I 2023-01-30 02:31:18,676][0m Trial 11 finished with value: 0.40427724000964876 and parameters: {'num_leaves': 3}. Best is trial 11 with value: 0.40427724000964876.[0m


[A[A

[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.214923 -> initscore=-1.295502
[LightGBM] [Info] Start training from score -1.295502
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[59]	valid_0's binary_logloss: 0.397834	valid_1's binary_logloss: 0.404277
[LightGBM] [Info] Number of positive: 1662, number of negative: 6071
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 11638
[LightGBM] [Info] Number of data points in the train set: 7733, number of used features: 5576




[A[A

[A[A[32m[I 2023-01-30 02:31:20,351][0m Trial 12 finished with value: 0.4086854393814434 and parameters: {'num_leaves': 33}. Best is trial 11 with value: 0.40427724000964876.[0m


[A[A

[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.214923 -> initscore=-1.295502
[LightGBM] [Info] Start training from score -1.295502
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[31]	valid_0's binary_logloss: 0.348969	valid_1's binary_logloss: 0.408685
[LightGBM] [Info] Number of positive: 1662, number of negative: 6071
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 11638
[LightGBM] [Info] Number of data points in the train set: 7733, number of used features: 5576
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.214923 -> initscore=-1.295502
[LightGBM] [Info] Start training from score -1.295502
Training until validation scores don't improve for 10 rounds




[A[A

[A[A[32m[I 2023-01-30 02:31:22,211][0m Trial 13 finished with value: 0.42130830574089967 and parameters: {'num_leaves': 173}. Best is trial 11 with value: 0.40427724000964876.[0m


[A[A

Early stopping, best iteration is:
[20]	valid_0's binary_logloss: 0.27914	valid_1's binary_logloss: 0.421308
[LightGBM] [Info] Number of positive: 1662, number of negative: 6071
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 11638
[LightGBM] [Info] Number of data points in the train set: 7733, number of used features: 5576
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.214923 -> initscore=-1.295502
[LightGBM] [Info] Start training from score -1.295502
Training until validation scores don't improve for 10 rounds




[A[A

[A[A[32m[I 2023-01-30 02:31:24,131][0m Trial 14 finished with value: 0.4235990069066105 and parameters: {'num_leaves': 212}. Best is trial 11 with value: 0.40427724000964876.[0m


[A[A

Early stopping, best iteration is:
[19]	valid_0's binary_logloss: 0.274296	valid_1's binary_logloss: 0.423599
[LightGBM] [Info] Number of positive: 1662, number of negative: 6071
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 11638
[LightGBM] [Info] Number of data points in the train set: 7733, number of used features: 5576




[A[A

[A[A[32m[I 2023-01-30 02:31:25,797][0m Trial 15 finished with value: 0.40955654189253127 and parameters: {'num_leaves': 36}. Best is trial 11 with value: 0.40427724000964876.[0m


[A[A

[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.214923 -> initscore=-1.295502
[LightGBM] [Info] Start training from score -1.295502
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[25]	valid_0's binary_logloss: 0.355917	valid_1's binary_logloss: 0.409557
[LightGBM] [Info] Number of positive: 1662, number of negative: 6071
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 11638
[LightGBM] [Info] Number of data points in the train set: 7733, number of used features: 5576
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.214923 -> initscore=-1.295502
[LightGBM] [Info] Start training from score -1.295502
Training until validation scores don't improve for 10 rounds




[A[A

[A[A[32m[I 2023-01-30 02:31:27,630][0m Trial 16 finished with value: 0.4220176854940224 and parameters: {'num_leaves': 148}. Best is trial 11 with value: 0.40427724000964876.[0m


[A[A

Early stopping, best iteration is:
[21]	valid_0's binary_logloss: 0.284406	valid_1's binary_logloss: 0.422018
[LightGBM] [Info] Number of positive: 1662, number of negative: 6071
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 11638
[LightGBM] [Info] Number of data points in the train set: 7733, number of used features: 5576




[A[A

[A[A[32m[I 2023-01-30 02:31:29,253][0m Trial 17 finished with value: 0.40458806520423757 and parameters: {'num_leaves': 2}. Best is trial 11 with value: 0.40427724000964876.[0m


[A[A

[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.214923 -> initscore=-1.295502
[LightGBM] [Info] Start training from score -1.295502
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[82]	valid_0's binary_logloss: 0.39974	valid_1's binary_logloss: 0.404588
[LightGBM] [Info] Number of positive: 1662, number of negative: 6071
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 11638
[LightGBM] [Info] Number of data points in the train set: 7733, number of used features: 5576




[A[A

[A[A[32m[I 2023-01-30 02:31:30,900][0m Trial 18 finished with value: 0.4058835749101154 and parameters: {'num_leaves': 9}. Best is trial 11 with value: 0.40427724000964876.[0m


[A[A

[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.214923 -> initscore=-1.295502
[LightGBM] [Info] Start training from score -1.295502
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[33]	valid_0's binary_logloss: 0.389379	valid_1's binary_logloss: 0.405884
[LightGBM] [Info] Number of positive: 1662, number of negative: 6071
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 11638
[LightGBM] [Info] Number of data points in the train set: 7733, number of used features: 5576




[A[A

[A[A[32m[I 2023-01-30 02:31:32,531][0m Trial 19 finished with value: 0.40458806520423757 and parameters: {'num_leaves': 2}. Best is trial 11 with value: 0.40427724000964876.[0m


[A[A

[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.214923 -> initscore=-1.295502
[LightGBM] [Info] Start training from score -1.295502
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[82]	valid_0's binary_logloss: 0.39974	valid_1's binary_logloss: 0.404588
[LightGBM] [Info] Number of positive: 1662, number of negative: 6071
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 11638
[LightGBM] [Info] Number of data points in the train set: 7733, number of used features: 5576




[A[A

[A[A[32m[I 2023-01-30 02:31:34,300][0m Trial 20 finished with value: 0.4160681218312039 and parameters: {'num_leaves': 75}. Best is trial 11 with value: 0.40427724000964876.[0m


[A[A

[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.214923 -> initscore=-1.295502
[LightGBM] [Info] Start training from score -1.295502
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[24]	valid_0's binary_logloss: 0.318783	valid_1's binary_logloss: 0.416068
[LightGBM] [Info] Number of positive: 1662, number of negative: 6071
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 11638
[LightGBM] [Info] Number of data points in the train set: 7733, number of used features: 5576




[A[A

[A[A[32m[I 2023-01-30 02:31:36,020][0m Trial 21 finished with value: 0.4151508216112874 and parameters: {'num_leaves': 71}. Best is trial 11 with value: 0.40427724000964876.[0m


[A[A

[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.214923 -> initscore=-1.295502
[LightGBM] [Info] Start training from score -1.295502
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[24]	valid_0's binary_logloss: 0.320997	valid_1's binary_logloss: 0.415151
[LightGBM] [Info] Number of positive: 1662, number of negative: 6071
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 11638
[LightGBM] [Info] Number of data points in the train set: 7733, number of used features: 5576
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.214923 -> initscore=-1.295502
[LightGBM] [Info] Start training from score -1.295502
Training until validation scores don't improve for 10 rounds




[A[A

[A[A[32m[I 2023-01-30 02:31:37,975][0m Trial 22 finished with value: 0.4248898471734256 and parameters: {'num_leaves': 254}. Best is trial 11 with value: 0.40427724000964876.[0m


[A[A

Early stopping, best iteration is:
[18]	valid_0's binary_logloss: 0.279658	valid_1's binary_logloss: 0.42489
[LightGBM] [Info] Number of positive: 1662, number of negative: 6071
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 11638
[LightGBM] [Info] Number of data points in the train set: 7733, number of used features: 5576




[A[A

[A[A[32m[I 2023-01-30 02:31:39,664][0m Trial 23 finished with value: 0.411280961704476 and parameters: {'num_leaves': 40}. Best is trial 11 with value: 0.40427724000964876.[0m


[A[A

[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.214923 -> initscore=-1.295502
[LightGBM] [Info] Start training from score -1.295502
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[28]	valid_0's binary_logloss: 0.344844	valid_1's binary_logloss: 0.411281
[LightGBM] [Info] Number of positive: 1662, number of negative: 6071
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 11638
[LightGBM] [Info] Number of data points in the train set: 7733, number of used features: 5576
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.214923 -> initscore=-1.295502
[LightGBM] [Info] Start training from score -1.295502
Training until validation scores don't improve for 10 rounds




[A[A

[A[A[32m[I 2023-01-30 02:31:41,491][0m Trial 24 finished with value: 0.4232994356645677 and parameters: {'num_leaves': 113}. Best is trial 11 with value: 0.40427724000964876.[0m


[A[A

Early stopping, best iteration is:
[20]	valid_0's binary_logloss: 0.308757	valid_1's binary_logloss: 0.423299
[LightGBM] [Info] Number of positive: 1662, number of negative: 6071
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 11638
[LightGBM] [Info] Number of data points in the train set: 7733, number of used features: 5576




[A[A

[A[A[32m[I 2023-01-30 02:31:43,209][0m Trial 25 finished with value: 0.41181461955021165 and parameters: {'num_leaves': 54}. Best is trial 11 with value: 0.40427724000964876.[0m


[A[A

[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.214923 -> initscore=-1.295502
[LightGBM] [Info] Start training from score -1.295502
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[25]	valid_0's binary_logloss: 0.337055	valid_1's binary_logloss: 0.411815
[LightGBM] [Info] Number of positive: 1662, number of negative: 6071
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 11638
[LightGBM] [Info] Number of data points in the train set: 7733, number of used features: 5576




[A[A

[A[A[32m[I 2023-01-30 02:31:44,885][0m Trial 26 finished with value: 0.40657831801615074 and parameters: {'num_leaves': 15}. Best is trial 11 with value: 0.40427724000964876.[0m
num_leaves, val_score: 0.404277: 100%|##########| 20/20 [00:35<00:00,  1.76s/it]


[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.214923 -> initscore=-1.295502
[LightGBM] [Info] Start training from score -1.295502
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[29]	valid_0's binary_logloss: 0.379534	valid_1's binary_logloss: 0.406578




[A[A

[A[A

[LightGBM] [Info] Number of positive: 1662, number of negative: 6071
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 11638
[LightGBM] [Info] Number of data points in the train set: 7733, number of used features: 5576




[A[A

[A[A[32m[I 2023-01-30 02:31:46,541][0m Trial 27 finished with value: 0.4032287161560884 and parameters: {'bagging_fraction': 0.7260429650751228, 'bagging_freq': 2}. Best is trial 27 with value: 0.4032287161560884.[0m


[A[A

[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.214923 -> initscore=-1.295502
[LightGBM] [Info] Start training from score -1.295502
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[63]	valid_0's binary_logloss: 0.397377	valid_1's binary_logloss: 0.403229
[LightGBM] [Info] Number of positive: 1662, number of negative: 6071
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 11638
[LightGBM] [Info] Number of data points in the train set: 7733, number of used features: 5576




[A[A

[A[A[32m[I 2023-01-30 02:31:48,192][0m Trial 28 finished with value: 0.40491940171796975 and parameters: {'bagging_fraction': 0.6547105544499044, 'bagging_freq': 6}. Best is trial 27 with value: 0.4032287161560884.[0m


[A[A

[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.214923 -> initscore=-1.295502
[LightGBM] [Info] Start training from score -1.295502
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[54]	valid_0's binary_logloss: 0.39852	valid_1's binary_logloss: 0.404919
[LightGBM] [Info] Number of positive: 1662, number of negative: 6071
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 11638
[LightGBM] [Info] Number of data points in the train set: 7733, number of used features: 5576




[A[A

[A[A[32m[I 2023-01-30 02:31:49,833][0m Trial 29 finished with value: 0.4040084012939048 and parameters: {'bagging_fraction': 0.4028313137145883, 'bagging_freq': 1}. Best is trial 27 with value: 0.4032287161560884.[0m


[A[A

[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.214923 -> initscore=-1.295502
[LightGBM] [Info] Start training from score -1.295502
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[46]	valid_0's binary_logloss: 0.399364	valid_1's binary_logloss: 0.404008
[LightGBM] [Info] Number of positive: 1662, number of negative: 6071
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 11638
[LightGBM] [Info] Number of data points in the train set: 7733, number of used features: 5576




[A[A

[A[A[32m[I 2023-01-30 02:31:51,457][0m Trial 30 finished with value: 0.4039963082416649 and parameters: {'bagging_fraction': 0.802449450836738, 'bagging_freq': 6}. Best is trial 27 with value: 0.4032287161560884.[0m


[A[A

[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.214923 -> initscore=-1.295502
[LightGBM] [Info] Start training from score -1.295502
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[85]	valid_0's binary_logloss: 0.395403	valid_1's binary_logloss: 0.403996
[LightGBM] [Info] Number of positive: 1662, number of negative: 6071
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 11638
[LightGBM] [Info] Number of data points in the train set: 7733, number of used features: 5576




[A[A

[A[A[32m[I 2023-01-30 02:31:53,102][0m Trial 31 finished with value: 0.40397558502916886 and parameters: {'bagging_fraction': 0.4820239538111085, 'bagging_freq': 5}. Best is trial 27 with value: 0.4032287161560884.[0m


[A[A

[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.214923 -> initscore=-1.295502
[LightGBM] [Info] Start training from score -1.295502
Training until validation scores don't improve for 10 rounds
[100]	valid_0's binary_logloss: 0.394557	valid_1's binary_logloss: 0.404719
Early stopping, best iteration is:
[90]	valid_0's binary_logloss: 0.395214	valid_1's binary_logloss: 0.403976
[LightGBM] [Info] Number of positive: 1662, number of negative: 6071
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 11638
[LightGBM] [Info] Number of data points in the train set: 7733, number of used features: 5576




[A[A

[A[A[32m[I 2023-01-30 02:31:54,741][0m Trial 32 finished with value: 0.40396391201526227 and parameters: {'bagging_fraction': 0.9347931725882498, 'bagging_freq': 2}. Best is trial 27 with value: 0.4032287161560884.[0m


[A[A

[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.214923 -> initscore=-1.295502
[LightGBM] [Info] Start training from score -1.295502
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[60]	valid_0's binary_logloss: 0.397607	valid_1's binary_logloss: 0.403964
[LightGBM] [Info] Number of positive: 1662, number of negative: 6071
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 11638
[LightGBM] [Info] Number of data points in the train set: 7733, number of used features: 5576




[A[A

[A[A[32m[I 2023-01-30 02:31:56,350][0m Trial 33 finished with value: 0.4039407294630518 and parameters: {'bagging_fraction': 0.5111969317302304, 'bagging_freq': 1}. Best is trial 27 with value: 0.4032287161560884.[0m


[A[A

[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.214923 -> initscore=-1.295502
[LightGBM] [Info] Start training from score -1.295502
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[56]	valid_0's binary_logloss: 0.398045	valid_1's binary_logloss: 0.403941
[LightGBM] [Info] Number of positive: 1662, number of negative: 6071
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 11638
[LightGBM] [Info] Number of data points in the train set: 7733, number of used features: 5576




[A[A

[A[A[32m[I 2023-01-30 02:31:57,964][0m Trial 34 finished with value: 0.4058541582286716 and parameters: {'bagging_fraction': 0.531818495575215, 'bagging_freq': 7}. Best is trial 27 with value: 0.4032287161560884.[0m


[A[A

[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.214923 -> initscore=-1.295502
[LightGBM] [Info] Start training from score -1.295502
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[51]	valid_0's binary_logloss: 0.399359	valid_1's binary_logloss: 0.405854
[LightGBM] [Info] Number of positive: 1662, number of negative: 6071
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 11638
[LightGBM] [Info] Number of data points in the train set: 7733, number of used features: 5576




[A[A

[A[A[32m[I 2023-01-30 02:31:59,580][0m Trial 35 finished with value: 0.4042130328577899 and parameters: {'bagging_fraction': 0.8870098894544057, 'bagging_freq': 2}. Best is trial 27 with value: 0.4032287161560884.[0m


[A[A

[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.214923 -> initscore=-1.295502
[LightGBM] [Info] Start training from score -1.295502
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[46]	valid_0's binary_logloss: 0.399298	valid_1's binary_logloss: 0.404213
[LightGBM] [Info] Number of positive: 1662, number of negative: 6071
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 11638
[LightGBM] [Info] Number of data points in the train set: 7733, number of used features: 5576




[A[A

[A[A[32m[I 2023-01-30 02:32:01,188][0m Trial 36 finished with value: 0.40440980982895147 and parameters: {'bagging_fraction': 0.8897348492363203, 'bagging_freq': 2}. Best is trial 27 with value: 0.4032287161560884.[0m
bagging, val_score: 0.403229: 100%|##########| 10/10 [00:16<00:00,  1.63s/it]


[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.214923 -> initscore=-1.295502
[LightGBM] [Info] Start training from score -1.295502
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[60]	valid_0's binary_logloss: 0.397347	valid_1's binary_logloss: 0.40441




[A[A

[A[A

[LightGBM] [Info] Number of positive: 1662, number of negative: 6071
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 11638
[LightGBM] [Info] Number of data points in the train set: 7733, number of used features: 5576




[A[A

[A[A[32m[I 2023-01-30 02:32:02,813][0m Trial 37 finished with value: 0.4031668551223441 and parameters: {'feature_fraction': 0.9520000000000001}. Best is trial 37 with value: 0.4031668551223441.[0m


[A[A

[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.214923 -> initscore=-1.295502
[LightGBM] [Info] Start training from score -1.295502
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[64]	valid_0's binary_logloss: 0.397271	valid_1's binary_logloss: 0.403167
[LightGBM] [Info] Number of positive: 1662, number of negative: 6071
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 11638
[LightGBM] [Info] Number of data points in the train set: 7733, number of used features: 5576




[A[A

[A[A[32m[I 2023-01-30 02:32:04,428][0m Trial 38 finished with value: 0.4032287161560884 and parameters: {'feature_fraction': 0.9840000000000001}. Best is trial 37 with value: 0.4031668551223441.[0m


[A[A

[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.214923 -> initscore=-1.295502
[LightGBM] [Info] Start training from score -1.295502
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[63]	valid_0's binary_logloss: 0.397377	valid_1's binary_logloss: 0.403229
[LightGBM] [Info] Number of positive: 1662, number of negative: 6071
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 11638
[LightGBM] [Info] Number of data points in the train set: 7733, number of used features: 5576




[A[A

[A[A[32m[I 2023-01-30 02:32:06,052][0m Trial 39 finished with value: 0.40333638145035194 and parameters: {'feature_fraction': 0.92}. Best is trial 37 with value: 0.4031668551223441.[0m
feature_fraction_stage2, val_score: 0.403167: 100%|##########| 3/3 [00:04<00:00,  1.62s/it]


[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.214923 -> initscore=-1.295502
[LightGBM] [Info] Start training from score -1.295502
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[64]	valid_0's binary_logloss: 0.397132	valid_1's binary_logloss: 0.403336




[A[A

[A[A

[LightGBM] [Info] Number of positive: 1662, number of negative: 6071
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 11638
[LightGBM] [Info] Number of data points in the train set: 7733, number of used features: 5576




[A[A

[A[A[32m[I 2023-01-30 02:32:07,679][0m Trial 40 finished with value: 0.4031668842744717 and parameters: {'lambda_l1': 0.0007773998922821829, 'lambda_l2': 3.2012859298995277e-06}. Best is trial 40 with value: 0.4031668842744717.[0m


[A[A

[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.214923 -> initscore=-1.295502
[LightGBM] [Info] Start training from score -1.295502
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[64]	valid_0's binary_logloss: 0.397271	valid_1's binary_logloss: 0.403167
[LightGBM] [Info] Number of positive: 1662, number of negative: 6071
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 11638
[LightGBM] [Info] Number of data points in the train set: 7733, number of used features: 5576




[A[A

[A[A[32m[I 2023-01-30 02:32:09,305][0m Trial 41 finished with value: 0.40320733820165333 and parameters: {'lambda_l1': 6.616957066014342e-05, 'lambda_l2': 0.400853048601546}. Best is trial 40 with value: 0.4031668842744717.[0m


[A[A

[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.214923 -> initscore=-1.295502
[LightGBM] [Info] Start training from score -1.295502
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[60]	valid_0's binary_logloss: 0.397812	valid_1's binary_logloss: 0.403207
[LightGBM] [Info] Number of positive: 1662, number of negative: 6071
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 11638
[LightGBM] [Info] Number of data points in the train set: 7733, number of used features: 5576




[A[A

[A[A[32m[I 2023-01-30 02:32:10,936][0m Trial 42 finished with value: 0.4031668551220663 and parameters: {'lambda_l1': 1.1027313099672533e-08, 'lambda_l2': 1.242001404761155e-07}. Best is trial 42 with value: 0.4031668551220663.[0m


[A[A

[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.214923 -> initscore=-1.295502
[LightGBM] [Info] Start training from score -1.295502
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[64]	valid_0's binary_logloss: 0.397271	valid_1's binary_logloss: 0.403167
[LightGBM] [Info] Number of positive: 1662, number of negative: 6071
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 11638
[LightGBM] [Info] Number of data points in the train set: 7733, number of used features: 5576




[A[A

[A[A[32m[I 2023-01-30 02:32:12,576][0m Trial 43 finished with value: 0.4031143198287146 and parameters: {'lambda_l1': 0.010882827930218712, 'lambda_l2': 0.2708162972907513}. Best is trial 43 with value: 0.4031143198287146.[0m


[A[A

[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.214923 -> initscore=-1.295502
[LightGBM] [Info] Start training from score -1.295502
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[61]	valid_0's binary_logloss: 0.397658	valid_1's binary_logloss: 0.403114
[LightGBM] [Info] Number of positive: 1662, number of negative: 6071
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 11638
[LightGBM] [Info] Number of data points in the train set: 7733, number of used features: 5576




[A[A

[A[A[32m[I 2023-01-30 02:32:14,192][0m Trial 44 finished with value: 0.4031668167575042 and parameters: {'lambda_l1': 1.6996492507894156e-07, 'lambda_l2': 0.0014991323116035308}. Best is trial 43 with value: 0.4031143198287146.[0m


[A[A

[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.214923 -> initscore=-1.295502
[LightGBM] [Info] Start training from score -1.295502
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[64]	valid_0's binary_logloss: 0.397271	valid_1's binary_logloss: 0.403167
[LightGBM] [Info] Number of positive: 1662, number of negative: 6071
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 11638
[LightGBM] [Info] Number of data points in the train set: 7733, number of used features: 5576




[A[A

[A[A[32m[I 2023-01-30 02:32:15,815][0m Trial 45 finished with value: 0.40290604003923475 and parameters: {'lambda_l1': 1.0517138394360073, 'lambda_l2': 7.635176818135586e-07}. Best is trial 45 with value: 0.40290604003923475.[0m


[A[A

[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.214923 -> initscore=-1.295502
[LightGBM] [Info] Start training from score -1.295502
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[64]	valid_0's binary_logloss: 0.39774	valid_1's binary_logloss: 0.402906
[LightGBM] [Info] Number of positive: 1662, number of negative: 6071
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 11638
[LightGBM] [Info] Number of data points in the train set: 7733, number of used features: 5576




[A[A

[A[A[32m[I 2023-01-30 02:32:17,449][0m Trial 46 finished with value: 0.40316685513779 and parameters: {'lambda_l1': 4.655367559816141e-07, 'lambda_l2': 9.449134137745608e-08}. Best is trial 45 with value: 0.40290604003923475.[0m


[A[A

[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.214923 -> initscore=-1.295502
[LightGBM] [Info] Start training from score -1.295502
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[64]	valid_0's binary_logloss: 0.397271	valid_1's binary_logloss: 0.403167
[LightGBM] [Info] Number of positive: 1662, number of negative: 6071
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 11638
[LightGBM] [Info] Number of data points in the train set: 7733, number of used features: 5576




[A[A

[A[A[32m[I 2023-01-30 02:32:19,088][0m Trial 47 finished with value: 0.40269799389611555 and parameters: {'lambda_l1': 9.490245203532942e-07, 'lambda_l2': 6.421168438428032}. Best is trial 47 with value: 0.40269799389611555.[0m


[A[A

[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.214923 -> initscore=-1.295502
[LightGBM] [Info] Start training from score -1.295502
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[88]	valid_0's binary_logloss: 0.396845	valid_1's binary_logloss: 0.402698
[LightGBM] [Info] Number of positive: 1662, number of negative: 6071
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 11638
[LightGBM] [Info] Number of data points in the train set: 7733, number of used features: 5576




[A[A

[A[A[32m[I 2023-01-30 02:32:20,723][0m Trial 48 finished with value: 0.4031545721139141 and parameters: {'lambda_l1': 0.2019055894080857, 'lambda_l2': 3.5275169933928286e-07}. Best is trial 47 with value: 0.40269799389611555.[0m


[A[A

[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.214923 -> initscore=-1.295502
[LightGBM] [Info] Start training from score -1.295502
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[64]	valid_0's binary_logloss: 0.397436	valid_1's binary_logloss: 0.403155
[LightGBM] [Info] Number of positive: 1662, number of negative: 6071
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 11638
[LightGBM] [Info] Number of data points in the train set: 7733, number of used features: 5576




[A[A

[A[A[32m[I 2023-01-30 02:32:22,358][0m Trial 49 finished with value: 0.4031551976074371 and parameters: {'lambda_l1': 0.22183125618514202, 'lambda_l2': 2.9286247167445133e-06}. Best is trial 47 with value: 0.40269799389611555.[0m


[A[A

[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.214923 -> initscore=-1.295502
[LightGBM] [Info] Start training from score -1.295502
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[64]	valid_0's binary_logloss: 0.397445	valid_1's binary_logloss: 0.403155
[LightGBM] [Info] Number of positive: 1662, number of negative: 6071
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 11638
[LightGBM] [Info] Number of data points in the train set: 7733, number of used features: 5576




[A[A

[A[A[32m[I 2023-01-30 02:32:23,995][0m Trial 50 finished with value: 0.40274412024357975 and parameters: {'lambda_l1': 3.0057843641607915e-05, 'lambda_l2': 8.38297710342227}. Best is trial 47 with value: 0.40269799389611555.[0m


[A[A

[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.214923 -> initscore=-1.295502
[LightGBM] [Info] Start training from score -1.295502
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[88]	valid_0's binary_logloss: 0.397269	valid_1's binary_logloss: 0.402744
[LightGBM] [Info] Number of positive: 1662, number of negative: 6071
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 11638
[LightGBM] [Info] Number of data points in the train set: 7733, number of used features: 5576




[A[A

[A[A[32m[I 2023-01-30 02:32:25,622][0m Trial 51 finished with value: 0.40284670517681836 and parameters: {'lambda_l1': 1.313258865327316e-05, 'lambda_l2': 7.718638255265963}. Best is trial 47 with value: 0.40269799389611555.[0m


[A[A

[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.214923 -> initscore=-1.295502
[LightGBM] [Info] Start training from score -1.295502
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[65]	valid_0's binary_logloss: 0.39874	valid_1's binary_logloss: 0.402847
[LightGBM] [Info] Number of positive: 1662, number of negative: 6071
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 11638
[LightGBM] [Info] Number of data points in the train set: 7733, number of used features: 5576




[A[A

[A[A[32m[I 2023-01-30 02:32:27,269][0m Trial 52 finished with value: 0.4027443781855442 and parameters: {'lambda_l1': 4.6023489639462855e-06, 'lambda_l2': 8.389123215314347}. Best is trial 47 with value: 0.40269799389611555.[0m


[A[A

[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.214923 -> initscore=-1.295502
[LightGBM] [Info] Start training from score -1.295502
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[88]	valid_0's binary_logloss: 0.397269	valid_1's binary_logloss: 0.402744
[LightGBM] [Info] Number of positive: 1662, number of negative: 6071
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 11638
[LightGBM] [Info] Number of data points in the train set: 7733, number of used features: 5576




[A[A

[A[A[32m[I 2023-01-30 02:32:28,904][0m Trial 53 finished with value: 0.40323158862810166 and parameters: {'lambda_l1': 0.0004367319305426777, 'lambda_l2': 0.08533075355103018}. Best is trial 47 with value: 0.40269799389611555.[0m


[A[A

[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.214923 -> initscore=-1.295502
[LightGBM] [Info] Start training from score -1.295502
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[64]	valid_0's binary_logloss: 0.397292	valid_1's binary_logloss: 0.403232
[LightGBM] [Info] Number of positive: 1662, number of negative: 6071
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 11638
[LightGBM] [Info] Number of data points in the train set: 7733, number of used features: 5576




[A[A

[A[A[32m[I 2023-01-30 02:32:30,546][0m Trial 54 finished with value: 0.4031666468373704 and parameters: {'lambda_l1': 4.147618544306146e-06, 'lambda_l2': 0.008214974969922708}. Best is trial 47 with value: 0.40269799389611555.[0m


[A[A

[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.214923 -> initscore=-1.295502
[LightGBM] [Info] Start training from score -1.295502
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[64]	valid_0's binary_logloss: 0.397275	valid_1's binary_logloss: 0.403167
[LightGBM] [Info] Number of positive: 1662, number of negative: 6071
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 11638
[LightGBM] [Info] Number of data points in the train set: 7733, number of used features: 5576




[A[A

[A[A[32m[I 2023-01-30 02:32:32,169][0m Trial 55 finished with value: 0.4029156100906453 and parameters: {'lambda_l1': 7.274122678542945e-05, 'lambda_l2': 6.9292223015182755}. Best is trial 47 with value: 0.40269799389611555.[0m


[A[A

[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.214923 -> initscore=-1.295502
[LightGBM] [Info] Start training from score -1.295502
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[65]	valid_0's binary_logloss: 0.398483	valid_1's binary_logloss: 0.402916
[LightGBM] [Info] Number of positive: 1662, number of negative: 6071
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 11638
[LightGBM] [Info] Number of data points in the train set: 7733, number of used features: 5576




[A[A

[A[A[32m[I 2023-01-30 02:32:33,828][0m Trial 56 finished with value: 0.4031669928424635 and parameters: {'lambda_l1': 0.0036866969189136233, 'lambda_l2': 4.40010699274024e-05}. Best is trial 47 with value: 0.40269799389611555.[0m


[A[A

[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.214923 -> initscore=-1.295502
[LightGBM] [Info] Start training from score -1.295502
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[64]	valid_0's binary_logloss: 0.397272	valid_1's binary_logloss: 0.403167
[LightGBM] [Info] Number of positive: 1662, number of negative: 6071
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 11638
[LightGBM] [Info] Number of data points in the train set: 7733, number of used features: 5576




[A[A

[A[A[32m[I 2023-01-30 02:32:35,470][0m Trial 57 finished with value: 0.4037787289143046 and parameters: {'lambda_l1': 8.923803146534746, 'lambda_l2': 0.020503066366016717}. Best is trial 47 with value: 0.40269799389611555.[0m


[A[A

[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.214923 -> initscore=-1.295502
[LightGBM] [Info] Start training from score -1.295502
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[88]	valid_0's binary_logloss: 0.398833	valid_1's binary_logloss: 0.403779
[LightGBM] [Info] Number of positive: 1662, number of negative: 6071
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 11638
[LightGBM] [Info] Number of data points in the train set: 7733, number of used features: 5576




[A[A

[A[A[32m[I 2023-01-30 02:32:37,116][0m Trial 58 finished with value: 0.4032638301998521 and parameters: {'lambda_l1': 4.416301392171742e-07, 'lambda_l2': 0.7812505014923614}. Best is trial 47 with value: 0.40269799389611555.[0m


[A[A

[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.214923 -> initscore=-1.295502
[LightGBM] [Info] Start training from score -1.295502
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[60]	valid_0's binary_logloss: 0.398069	valid_1's binary_logloss: 0.403264
[LightGBM] [Info] Number of positive: 1662, number of negative: 6071
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 11638
[LightGBM] [Info] Number of data points in the train set: 7733, number of used features: 5576




[A[A

[A[A[32m[I 2023-01-30 02:32:38,772][0m Trial 59 finished with value: 0.4031668470587231 and parameters: {'lambda_l1': 3.109980300965093e-05, 'lambda_l2': 0.00036012195933527775}. Best is trial 47 with value: 0.40269799389611555.[0m
regularization_factors, val_score: 0.402698: 100%|##########| 20/20 [00:32<00:00,  1.64s/it]


[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.214923 -> initscore=-1.295502
[LightGBM] [Info] Start training from score -1.295502
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[64]	valid_0's binary_logloss: 0.397271	valid_1's binary_logloss: 0.403167




[A[A

[A[A

[LightGBM] [Info] Number of positive: 1662, number of negative: 6071
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 11638
[LightGBM] [Info] Number of data points in the train set: 7733, number of used features: 5576




[A[A

[A[A[32m[I 2023-01-30 02:32:40,695][0m Trial 60 finished with value: 0.4027594676181243 and parameters: {'min_child_samples': 50}. Best is trial 60 with value: 0.4027594676181243.[0m


[A[A

[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.214923 -> initscore=-1.295502
[LightGBM] [Info] Start training from score -1.295502
Training until validation scores don't improve for 10 rounds
[100]	valid_0's binary_logloss: 0.396422	valid_1's binary_logloss: 0.403264
Early stopping, best iteration is:
[90]	valid_0's binary_logloss: 0.396917	valid_1's binary_logloss: 0.402759
[LightGBM] [Info] Number of positive: 1662, number of negative: 6071
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 11638
[LightGBM] [Info] Number of data points in the train set: 7733, number of used features: 5576




[A[A

[A[A[32m[I 2023-01-30 02:32:42,353][0m Trial 61 finished with value: 0.40269799389611555 and parameters: {'min_child_samples': 25}. Best is trial 61 with value: 0.40269799389611555.[0m


[A[A

[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.214923 -> initscore=-1.295502
[LightGBM] [Info] Start training from score -1.295502
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[88]	valid_0's binary_logloss: 0.396845	valid_1's binary_logloss: 0.402698
[LightGBM] [Info] Number of positive: 1662, number of negative: 6071
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 11638
[LightGBM] [Info] Number of data points in the train set: 7733, number of used features: 5576




[A[A

[A[A[32m[I 2023-01-30 02:32:43,997][0m Trial 62 finished with value: 0.4027566027693828 and parameters: {'min_child_samples': 100}. Best is trial 61 with value: 0.40269799389611555.[0m


[A[A

[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.214923 -> initscore=-1.295502
[LightGBM] [Info] Start training from score -1.295502
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[66]	valid_0's binary_logloss: 0.398489	valid_1's binary_logloss: 0.402757
[LightGBM] [Info] Number of positive: 1662, number of negative: 6071
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 11638
[LightGBM] [Info] Number of data points in the train set: 7733, number of used features: 5576




[A[A

[A[A[32m[I 2023-01-30 02:32:45,741][0m Trial 63 finished with value: 0.40269799389611555 and parameters: {'min_child_samples': 5}. Best is trial 61 with value: 0.40269799389611555.[0m


[A[A

[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.214923 -> initscore=-1.295502
[LightGBM] [Info] Start training from score -1.295502
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[88]	valid_0's binary_logloss: 0.396845	valid_1's binary_logloss: 0.402698
[LightGBM] [Info] Number of positive: 1662, number of negative: 6071
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 11638
[LightGBM] [Info] Number of data points in the train set: 7733, number of used features: 5576




[A[A

[A[A[32m[I 2023-01-30 02:32:47,407][0m Trial 64 finished with value: 0.40269799389611555 and parameters: {'min_child_samples': 10}. Best is trial 61 with value: 0.40269799389611555.[0m
min_data_in_leaf, val_score: 0.402698: 100%|##########| 5/5 [00:08<00:00,  1.73s/it]

[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.214923 -> initscore=-1.295502
[LightGBM] [Info] Start training from score -1.295502
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[88]	valid_0's binary_logloss: 0.396845	valid_1's binary_logloss: 0.402698





In [240]:
params = {'objective': 'binary',
 'random_state': 100,
 'feature_pre_filter': False,
 'lambda_l1': 9.490245203532942e-07,
 'lambda_l2': 6.421168438428032,
 'num_leaves': 3,
 'feature_fraction': 0.9520000000000001,
 'bagging_fraction': 0.7260429650751228,
 'bagging_freq': 2,
 'min_child_samples': 20,
 'num_iterations': 1000,}

In [241]:
lgb_clf = lgb.LGBMClassifier(**params)
lgb_clf.fit(X_train.values, y_train.values)





In [245]:
# モデルの保存
import pickle
file = 'trained_model.pkl'
pickle.dump(lgb_clf, open(file, 'wb'))

In [246]:
y_pred_train = lgb_clf.predict_proba(X_train)[:,1]

In [247]:
y_pred_train

array([0.8032649 , 0.18645396, 0.21127822, ..., 0.13426204, 0.06753806,
       0.05473325])

In [248]:
y_pred = lgb_clf.predict_proba(X_valid)[:,1]

In [302]:
X_train

Unnamed: 0,枠番,馬番,斤量,単勝,人気,course_len,age,weight,change_weight,n_horses,着順_allR,賞金_allR,着順/頭数_allR,着順_3R,賞金_3R,着順/頭数_3R,着順_5R,賞金_5R,着順/頭数_5R,着順_7R,賞金_7R,着順/頭数_7R,weather_小雨,weather_小雪,weather_晴,...,jockey_id_05622,jockey_id_05623,jockey_id_05624,jockey_id_05625,jockey_id_05626,jockey_id_05627,jockey_id_05628,jockey_id_05629,jockey_id_05630,jockey_id_05631,jockey_id_05632,jockey_id_05633,gender_セ,gender_牝,gender_牡,place_01,place_02,place_03,place_04,place_05,place_06,place_07,place_08,place_09,place_10
201908010105,7,14,56.0,1.5,1.0,18.0,3,490,0,16,,,,,,,,,,,,,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0
201908010105,2,4,54.0,18.9,5.0,18.0,3,474,0,16,,,,,,,,,,,,,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0
201908010105,1,2,53.0,18.1,4.0,18.0,3,464,0,16,,,,,,,,,,,,,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0
201908010105,4,7,56.0,5.5,2.0,18.0,3,456,0,16,,,,,,,,,,,,,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0
201908010105,8,16,54.0,115.6,12.0,18.0,3,398,0,16,,,,,,,,,,,,,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
202109050102,6,7,55.0,3.4,1.0,12.0,2,498,8,11,3.0,180.0,62.500000,3.0,180.0,62.500000,3.0,180.0,62.500000,3.0,180.0,62.500000,0,0,1,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0
202109050102,1,1,55.0,308.6,11.0,12.0,2,476,4,11,9.4,0.0,11.914530,9.0,0.0,16.153846,9.4,0.0,11.914530,9.4,0.0,11.914530,0,0,1,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0
202109050102,3,3,54.0,33.6,7.0,12.0,2,446,4,11,11.5,0.0,8.823529,11.5,0.0,8.823529,11.5,0.0,8.823529,11.5,0.0,8.823529,0,0,1,...,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0
202109050102,8,10,55.0,56.6,8.0,12.0,2,468,0,11,15.0,0.0,5.882353,15.0,0.0,5.882353,15.0,0.0,5.882353,15.0,0.0,5.882353,0,0,1,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0


In [301]:
X_valid

Unnamed: 0,枠番,馬番,斤量,単勝,人気,course_len,age,weight,change_weight,n_horses,着順_allR,賞金_allR,着順/頭数_allR,着順_3R,賞金_3R,着順/頭数_3R,着順_5R,賞金_5R,着順/頭数_5R,着順_7R,賞金_7R,着順/頭数_7R,weather_小雨,weather_小雪,weather_晴,...,jockey_id_05622,jockey_id_05623,jockey_id_05624,jockey_id_05625,jockey_id_05626,jockey_id_05627,jockey_id_05628,jockey_id_05629,jockey_id_05630,jockey_id_05631,jockey_id_05632,jockey_id_05633,gender_セ,gender_牝,gender_牡,place_01,place_02,place_03,place_04,place_05,place_06,place_07,place_08,place_09,place_10
202109050103,2,4,55.0,5.7,3.0,16.0,2,472,-4,17,7.500000,90.000000,47.500000,7.500000,90.000000,47.500000,7.50,90.0,47.500000,7.500000,90.000000,47.500000,0,0,1,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0
202109050103,6,11,55.0,18.7,5.0,16.0,2,522,2,17,5.000000,70.000000,68.750000,5.000000,70.000000,68.750000,5.00,70.0,68.750000,5.000000,70.000000,68.750000,0,0,1,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0
202109050103,1,2,55.0,12.9,4.0,16.0,2,452,4,17,4.000000,110.000000,75.000000,4.000000,110.000000,75.000000,4.00,110.0,75.000000,4.000000,110.000000,75.000000,0,0,1,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0
202109050103,7,13,54.0,2.0,1.0,16.0,2,496,-8,17,2.250000,202.500000,76.515152,2.333333,176.666667,74.242424,2.25,202.5,76.515152,2.250000,202.500000,76.515152,0,0,1,...,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0
202109050103,6,12,55.0,448.1,16.0,16.0,2,442,2,17,10.000000,0.000000,33.333333,10.000000,0.000000,33.333333,10.00,0.0,33.333333,10.000000,0.000000,33.333333,0,0,1,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
202306010812,6,6,57.0,8.2,5.0,16.0,4,512,-4,9,6.000000,226.714286,53.350221,10.000000,0.000000,23.232323,7.00,266.0,46.356976,6.000000,226.714286,53.350221,0,0,1,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0
202306010812,7,7,56.0,4.3,2.0,16.0,5,476,12,9,5.083333,241.366667,64.518782,6.666667,202.000000,53.212670,5.40,275.2,57.344268,4.571429,285.200000,64.812810,0,0,1,...,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0
202306010812,5,5,58.0,74.2,9.0,16.0,5,478,2,9,9.105263,110.105263,29.573774,9.000000,0.000000,36.698718,9.40,0.0,26.463675,10.571429,0.000000,21.680403,0,0,1,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0
202306010812,8,8,56.0,47.3,8.0,16.0,5,476,2,9,4.571429,206.700000,71.237245,8.666667,37.000000,42.592593,7.60,78.2,50.019841,6.000000,219.542857,61.125283,0,0,1,...,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0


In [250]:
result_data = lgb_clf.predict_proba(X_valid)[:, 1]

In [303]:
result_data

array([0.43207796, 0.20913978, 0.28305063, ..., 0.06539425, 0.10492492,
       0.09392313])

In [251]:
X_valid

Unnamed: 0,枠番,馬番,斤量,単勝,人気,course_len,age,weight,change_weight,n_horses,着順_allR,賞金_allR,着順/頭数_allR,着順_3R,賞金_3R,着順/頭数_3R,着順_5R,賞金_5R,着順/頭数_5R,着順_7R,賞金_7R,着順/頭数_7R,weather_小雨,weather_小雪,weather_晴,...,jockey_id_05622,jockey_id_05623,jockey_id_05624,jockey_id_05625,jockey_id_05626,jockey_id_05627,jockey_id_05628,jockey_id_05629,jockey_id_05630,jockey_id_05631,jockey_id_05632,jockey_id_05633,gender_セ,gender_牝,gender_牡,place_01,place_02,place_03,place_04,place_05,place_06,place_07,place_08,place_09,place_10
202109050103,2,4,55.0,5.7,3.0,16.0,2,472,-4,17,7.500000,90.000000,47.500000,7.500000,90.000000,47.500000,7.50,90.0,47.500000,7.500000,90.000000,47.500000,0,0,1,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0
202109050103,6,11,55.0,18.7,5.0,16.0,2,522,2,17,5.000000,70.000000,68.750000,5.000000,70.000000,68.750000,5.00,70.0,68.750000,5.000000,70.000000,68.750000,0,0,1,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0
202109050103,1,2,55.0,12.9,4.0,16.0,2,452,4,17,4.000000,110.000000,75.000000,4.000000,110.000000,75.000000,4.00,110.0,75.000000,4.000000,110.000000,75.000000,0,0,1,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0
202109050103,7,13,54.0,2.0,1.0,16.0,2,496,-8,17,2.250000,202.500000,76.515152,2.333333,176.666667,74.242424,2.25,202.5,76.515152,2.250000,202.500000,76.515152,0,0,1,...,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0
202109050103,6,12,55.0,448.1,16.0,16.0,2,442,2,17,10.000000,0.000000,33.333333,10.000000,0.000000,33.333333,10.00,0.0,33.333333,10.000000,0.000000,33.333333,0,0,1,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
202306010812,6,6,57.0,8.2,5.0,16.0,4,512,-4,9,6.000000,226.714286,53.350221,10.000000,0.000000,23.232323,7.00,266.0,46.356976,6.000000,226.714286,53.350221,0,0,1,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0
202306010812,7,7,56.0,4.3,2.0,16.0,5,476,12,9,5.083333,241.366667,64.518782,6.666667,202.000000,53.212670,5.40,275.2,57.344268,4.571429,285.200000,64.812810,0,0,1,...,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0
202306010812,5,5,58.0,74.2,9.0,16.0,5,478,2,9,9.105263,110.105263,29.573774,9.000000,0.000000,36.698718,9.40,0.0,26.463675,10.571429,0.000000,21.680403,0,0,1,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0
202306010812,8,8,56.0,47.3,8.0,16.0,5,476,2,9,4.571429,206.700000,71.237245,8.666667,37.000000,42.592593,7.60,78.2,50.019841,6.000000,219.542857,61.125283,0,0,1,...,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0


In [252]:
len(result_data)

57533

In [253]:
result_data

array([0.43207796, 0.20913978, 0.28305063, ..., 0.06539425, 0.10492492,
       0.09392313])

In [254]:
valid

Unnamed: 0,着順,枠番,馬番,斤量,単勝,人気,course_len,date,rank,age,weight,change_weight,n_horses,着順_allR,賞金_allR,着順/頭数_allR,着順_3R,賞金_3R,着順/頭数_3R,着順_5R,賞金_5R,着順/頭数_5R,着順_7R,賞金_7R,着順/頭数_7R,...,jockey_id_05622,jockey_id_05623,jockey_id_05624,jockey_id_05625,jockey_id_05626,jockey_id_05627,jockey_id_05628,jockey_id_05629,jockey_id_05630,jockey_id_05631,jockey_id_05632,jockey_id_05633,gender_セ,gender_牝,gender_牡,place_01,place_02,place_03,place_04,place_05,place_06,place_07,place_08,place_09,place_10
202109050103,1,2,4,55.0,5.7,3.0,16.0,2021-11-06,1,2,472,-4,17,7.500000,90.000000,47.500000,7.500000,90.000000,47.500000,7.50,90.0,47.500000,7.500000,90.000000,47.500000,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0
202109050103,2,6,11,55.0,18.7,5.0,16.0,2021-11-06,1,2,522,2,17,5.000000,70.000000,68.750000,5.000000,70.000000,68.750000,5.00,70.0,68.750000,5.000000,70.000000,68.750000,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0
202109050103,3,1,2,55.0,12.9,4.0,16.0,2021-11-06,1,2,452,4,17,4.000000,110.000000,75.000000,4.000000,110.000000,75.000000,4.00,110.0,75.000000,4.000000,110.000000,75.000000,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0
202109050103,4,7,13,54.0,2.0,1.0,16.0,2021-11-06,0,2,496,-8,17,2.250000,202.500000,76.515152,2.333333,176.666667,74.242424,2.25,202.5,76.515152,2.250000,202.500000,76.515152,...,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0
202109050103,5,6,12,55.0,448.1,16.0,16.0,2021-11-06,0,2,442,2,17,10.000000,0.000000,33.333333,10.000000,0.000000,33.333333,10.00,0.0,33.333333,10.000000,0.000000,33.333333,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
202306010812,5,6,6,57.0,8.2,5.0,16.0,2023-01-22,0,4,512,-4,9,6.000000,226.714286,53.350221,10.000000,0.000000,23.232323,7.00,266.0,46.356976,6.000000,226.714286,53.350221,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0
202306010812,6,7,7,56.0,4.3,2.0,16.0,2023-01-22,0,5,476,12,9,5.083333,241.366667,64.518782,6.666667,202.000000,53.212670,5.40,275.2,57.344268,4.571429,285.200000,64.812810,...,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0
202306010812,7,5,5,58.0,74.2,9.0,16.0,2023-01-22,0,5,478,2,9,9.105263,110.105263,29.573774,9.000000,0.000000,36.698718,9.40,0.0,26.463675,10.571429,0.000000,21.680403,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0
202306010812,8,8,8,56.0,47.3,8.0,16.0,2023-01-22,0,5,476,2,9,4.571429,206.700000,71.237245,8.666667,37.000000,42.592593,7.60,78.2,50.019841,6.000000,219.542857,61.125283,...,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0


In [255]:
result_data

array([0.43207796, 0.20913978, 0.28305063, ..., 0.06539425, 0.10492492,
       0.09392313])

In [256]:
y_results_2 = [0 if p<0.5 else 1 for p in result_data]

In [257]:
pd.Series(y_results_2).value_counts()

0    50914
1     6619
dtype: int64

In [267]:
pred_table = valid[["着順","rank","人気"]]
pred_table

Unnamed: 0,着順,rank,人気
202109050103,1,1,3.0
202109050103,2,1,5.0
202109050103,3,1,4.0
202109050103,4,0,1.0
202109050103,5,0,16.0
...,...,...,...
202306010812,5,0,5.0
202306010812,6,0,2.0
202306010812,7,0,9.0
202306010812,8,0,8.0


In [268]:
pred_table["pred"] = y_results_2

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pred_table["pred"] = y_results_2


In [269]:
pred_table

Unnamed: 0,着順,rank,人気,pred
202109050103,1,1,3.0,0
202109050103,2,1,5.0,0
202109050103,3,1,4.0,0
202109050103,4,0,1.0,1
202109050103,5,0,16.0,0
...,...,...,...,...
202306010812,5,0,5.0,0
202306010812,6,0,2.0,1
202306010812,7,0,9.0,0
202306010812,8,0,8.0,0


In [270]:
atari = pred_table[(pred_table["pred"] == 1) & (pred_table["rank"] == pred_table["pred"])]

In [271]:
kounyu =  pred_table[pred_table["pred"] == 1]

In [272]:
atari

Unnamed: 0,着順,rank,人気,pred
202109050106,1,1,1.0,1
202109050106,2,1,2.0,1
202109050107,1,1,2.0,1
202109050104,1,1,1.0,1
202109050105,1,1,1.0,1
...,...,...,...,...
202307010805,2,1,3.0,1
202307010804,2,1,3.0,1
202307010804,3,1,1.0,1
202306010812,1,1,3.0,1


In [273]:
atari["人気"].value_counts()

1.0    2508
2.0    1321
3.0     329
4.0      34
5.0       2
Name: 人気, dtype: int64

In [274]:
return_df = pd.read_pickle("../data/original_data/return_tables.pickle")

In [275]:
return_df

Unnamed: 0,0,1,2,3
202205050101,単勝,2,290,2
202205050101,複勝,2br8br3,150br810br210,2br7br3
202205050101,枠連,1 - 4,310,1
202205050101,馬連,2 - 8,9730,22
202205050101,ワイド,2 - 8br2 - 3br3 - 8,"2,010br420br3,560",19br4br28
...,...,...,...,...
202210040812,馬連,10 - 15,1150,2
202210040812,ワイド,10 - 15br3 - 15br3 - 10,"490br1,940br1,470",2br21br19
202210040812,馬単,15 → 10,2010,3
202210040812,三連複,3 - 10 - 15,7020,21


In [276]:
def tansho(df):
    tansho = df[df[0]=='単勝'][[1,2]]
    tansho.columns = ['win', 'return']
    
    for column in tansho.columns:
        tansho[column] = pd.to_numeric(tansho[column], errors='coerce')
        
    return tansho

In [277]:
p_return_df = tansho(return_df)

In [278]:
p_return_df

Unnamed: 0,win,return
202205050101,2.0,290.0
202205050102,10.0,770.0
202205050103,10.0,13120.0
202205050104,6.0,220.0
202205050105,16.0,1510.0
...,...,...
202210040808,8.0,280.0
202210040809,18.0,600.0
202210040810,6.0,1290.0
202210040811,1.0,740.0


In [279]:
tansho_kekka = atari[atari["着順"] == 1]

In [280]:
tansho_kekka

Unnamed: 0,着順,rank,人気,pred
202109050106,1,1,1.0,1
202109050107,1,1,2.0,1
202109050104,1,1,1.0,1
202109050105,1,1,1.0,1
202105050211,1,1,1.0,1
...,...,...,...,...
202306010803,1,1,1.0,1
202307010802,1,1,1.0,1
202307010801,1,1,1.0,1
202307010805,1,1,2.0,1


In [281]:
df_m = pd.merge(tansho_kekka, p_return_df, how="inner", left_index=True, right_index=True)

In [282]:
df_m

Unnamed: 0,着順,rank,人気,pred,win,return
202109050106,1,1,1.0,1,1.0,230.0
202109050107,1,1,2.0,1,5.0,420.0
202109050104,1,1,1.0,1,2.0,170.0
202109050105,1,1,1.0,1,5.0,250.0
202105050211,1,1,1.0,1,10.0,300.0
...,...,...,...,...,...,...
202306010803,1,1,1.0,1,4.0,190.0
202307010802,1,1,1.0,1,1.0,190.0
202307010801,1,1,1.0,1,9.0,120.0
202307010805,1,1,2.0,1,11.0,280.0


In [283]:
df_m["return"].sum()

547650.0

In [284]:
kounyu

Unnamed: 0,着順,rank,人気,pred
202109050103,4,0,1.0,1
202109050103,15,0,2.0,1
202105050112,4,0,1.0,1
202109050106,1,1,1.0,1
202109050106,2,1,2.0,1
...,...,...,...,...
202307010804,4,0,2.0,1
202306010811,5,0,1.0,1
202306010812,1,1,3.0,1
202306010812,3,1,1.0,1


In [286]:
kounyu[kounyu["人気"] != 1]

Unnamed: 0,着順,rank,人気,pred
202109050103,15,0,2.0,1
202109050106,2,1,2.0,1
202109050107,1,1,2.0,1
202109050104,4,0,2.0,1
202109050105,2,1,3.0,1
...,...,...,...,...
202307010805,2,1,3.0,1
202307010804,2,1,3.0,1
202307010804,4,0,2.0,1
202306010812,1,1,3.0,1


In [293]:
kounyu["人気"].value_counts()

1.0    3669
2.0    2265
3.0     634
4.0      49
5.0       2
Name: 人気, dtype: int64

In [None]:
21400

In [294]:
tansho_without_1 = kounyu[(kounyu["人気"] != 1) & (kounyu["人気"] != 2) & (kounyu["rank"] == kounyu["pred"]) & (kounyu["着順"] == 1)]

In [295]:
df_m = pd.merge(tansho_without_1, p_return_df, how="inner", left_index=True, right_index=True)

In [296]:
df_m

Unnamed: 0,着順,rank,人気,pred,win,return
202105050205,1,1,3.0,1,1.0,480.0
202109050306,1,1,3.0,1,8.0,470.0
202105050410,1,1,3.0,1,7.0,440.0
202105050406,1,1,3.0,1,4.0,650.0
202105050506,1,1,3.0,1,9.0,390.0
...,...,...,...,...,...,...
202307010506,1,1,3.0,1,3.0,380.0
202307010701,1,1,3.0,1,13.0,430.0
202310010403,1,1,3.0,1,12.0,480.0
202310010407,1,1,3.0,1,4.0,420.0


In [299]:
kounyu[(kounyu["人気"] != 1) & (kounyu["人気"] != 2)]

Unnamed: 0,着順,rank,人気,pred
202109050105,2,1,3.0,1
202105050209,4,0,3.0,1
202103020201,8,0,3.0,1
202109050203,6,0,3.0,1
202105050205,1,1,3.0,1
...,...,...,...,...
202310010407,1,1,3.0,1
202306010807,5,0,3.0,1
202307010805,2,1,3.0,1
202307010804,2,1,3.0,1


In [297]:
df_m["return"].sum()

60530.0

In [300]:
68500 - 60530

7970

In [290]:
21400 - 17280

4120

In [291]:
247380 - 295000

-47620