![image.png](attachment:37cd9521-4a03-4bc0-b4fa-8013f6fce265.png)

### 初めての日本オーナーのコンペ。また未来予測系のコンペですね。
### 毎日少しずつ解いていきます。
### コードが競争力のあるもになったら、いつものようにprivateにします。

### First Japanese owner's competition. It's another future prediction type competition.
### I will solve it a little every day.
### Once the code is competitive, I will make it PRIVATE as usual.

<span style="color: orange; font-family: Segoe UI; font-size: 1.9em; font-weight: 300;">Import</span>

In [None]:
import os
import pickle
import sys
import warnings
from glob import glob

import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from lightgbm import LGBMRegressor
import shap
import xgboost
from scipy.stats import spearmanr
from sklearn.ensemble import (
    ExtraTreesRegressor,
    GradientBoostingRegressor,
    RandomForestRegressor,
)
from sklearn.metrics import accuracy_score, mean_squared_error
from tqdm.notebook import tqdm

<span style="color: orange; font-family: Segoe UI; font-size: 1.9em; font-weight: 300;">data_specifications</span>

In [None]:
dataset_dir="../input/jpx-tokyo-stock-exchange-prediction/data_specifications/"
# 読み込むファイルを定義します。
inputs = {
    "options_spec": f"{dataset_dir}/options_spec.csv",
    "stock_fin_spec": f"{dataset_dir}/stock_fin_spec.csv",
    "stock_list_spec.csv": f"{dataset_dir}/stock_list_spec.csv",
    "stock_price_spec": f"{dataset_dir}/stock_price_spec.csv",
    "trades_spec": f"{dataset_dir}/trades_spec.csv",
}

# ファイルを読み込みます
data_specifications = {}
for k, v in inputs.items():
    print(k)
    data_specifications[k] = pd.read_csv(v)

<span style="color: orange; font-family: Segoe UI; font-size: 1.9em; font-weight: 300;">example_test_files</span>

### APIから提供されるものと同じデータ

### Same data as provided by the API

In [None]:
dataset_dir="../input/jpx-tokyo-stock-exchange-prediction/example_test_files/"
# 読み込むファイルを定義します。
inputs = {
    "financials": f"{dataset_dir}/financials.csv",
    "options": f"{dataset_dir}/options.csv",
    "secondary_stock_prices": f"{dataset_dir}secondary_stock_prices.csv",
    "stock_prices": f"{dataset_dir}/stock_prices.csv",
    "trades": f"{dataset_dir}/trades.csv",
}

# ファイルを読み込みます
example_test_files = {}
for k, v in inputs.items():
    print(k)
    example_test_files[k] = pd.read_csv(v)

### これが提出物になる。
### Rankを予想することになる。
### This will be the submission.
### You will be expected to predict Rank.

In [None]:
sample_submission = pd.read_csv('../input/jpx-tokyo-stock-exchange-prediction/example_test_files/sample_submission.csv')
sample_submission

<span style="color: orange; font-family: Segoe UI; font-size: 1.9em; font-weight: 300;">supplemental_files</span>

補足トレーニングデータの動的ウィンドウを含むデータフォルダ

Data folder containing dynamic window of supplemental training data

In [None]:
dataset_dir="../input/jpx-tokyo-stock-exchange-prediction/supplemental_files/"
# 読み込むファイルを定義します。
inputs = {
    "financials": f"{dataset_dir}/financials.csv",
    "options": f"{dataset_dir}/options.csv",
    "secondary_stock_prices": f"{dataset_dir}secondary_stock_prices.csv",
    "stock_prices": f"{dataset_dir}/stock_prices.csv",
    "trades": f"{dataset_dir}/trades.csv",
}

# ファイルを読み込みます
supplemental_files = {}
for k, v in inputs.items():
    print(k)
    supplemental_files[k] = pd.read_csv(v)

<span style="color: orange; font-family: Segoe UI; font-size: 1.9em; font-weight: 300;">train_files</span>

### 四半期決算報告の結果
### Results of Quarterly Financial Reporting

In [None]:
dataset_dir="../input/jpx-tokyo-stock-exchange-prediction/train_files/"
# 読み込むファイルを定義します。
inputs = {
    "financials": f"{dataset_dir}/financials.csv",
    "options": f"{dataset_dir}/options.csv",
    "secondary_stock_prices": f"{dataset_dir}secondary_stock_prices.csv",
    "stock_prices": f"{dataset_dir}/stock_prices.csv",
    "trades": f"{dataset_dir}/trades.csv",
}

# ファイルを読み込みます
train = {}
for k, v in inputs.items():
    print(k)
    train[k] = pd.read_csv(v)

### 試しに1301の銘柄コードを分析してみる
### Let's analyze the code of the 1301 issue to see if it works.

In [None]:
price = train["stock_prices"]

# 特定の銘柄コードに絞り込み
code = 1301
price_data = price[price["SecuritiesCode"] == code]
price_data

In [None]:
price = train["stock_prices"]

# 特定の銘柄コードに絞り込み
code = 1301
price_data = price[price["SecuritiesCode"] == code]

# プロット
fig, ax = plt.subplots(figsize=(20, 8))

ax.plot(price_data['Date'],price_data["Open"], label=f"securities code : {code}.T")
ax.plot(price_data['Date'],price_data["High"], label=f"securities code : {code}.T")
ax.plot(price_data['Date'],price_data["Low"], label=f"securities code : {code}.T")
ax.plot(price_data['Date'],price_data["Close"], label=f"securities code : {code}.T")
plt.xticks([0, 200,400,600,800, 1000,1200])
ax.set_ylabel("stock_price")
ax.set_xlabel("datetime")
ax.grid(True)
ax.legend()

### Target

In [None]:
fig, ax = plt.subplots(figsize=(20, 8))
ax.plot(price_data['Date'],price_data["Target"], label=f"securities code : {code}.T")
plt.xticks([0, 200,400,600,800, 1000,1200])
ax.set_ylabel("stock_price")
ax.set_xlabel("datetime")
ax.grid(True)
ax.legend()

# 移動平均
# moving average

​移​動​平​均​に​も​さ​ま​ざ​ま​な​種​類​が​あ​り​ま​す​が、​こ​こ​で​は​単​純​移​動​平​均​線​を​用​い​ま​す。​単​純​移​動​平​均​線​と​い​う​の​は、​例​え​ば、​5​日​線​で​あ​れ​ば、​直​近​5​営​業​日​の​価​格​の​平​均​値​で​す。​こ​れ​を​1​つ​ず​つ​期​間​を​ス​ラ​イ​ド​し​な​が​ら​計​算​し​た​も​の​に​な​り​ま​す。

Moving averages There are many types of but here we will use a simple moving The average line is used. A simple moving average is For example, a 5-day line is the last 5 business days. The average of the price over the past The following is a summary of the results of the survey. This is the period Sliding down the The calculation is based on The following table shows the results.

In [None]:

# 5日、25日、75日の移動平均を算出
periods = [5, 25, 75]
cols = []
for period in periods:
    col = "{} windows simple moving average".format(period)
    price_data[col] = price_data["Open"].rolling(period, min_periods=1).mean()
    cols.append(col)

# プロット
fig, ax = plt.subplots(figsize=(20, 8))

for col in cols:
    ax.plot(price_data['Date'],price_data[col], label=col)
ax.set_ylabel("stock_price")
ax.set_xlabel("datetime")
plt.xticks([0, 200,400,600,800, 1000,1200])
ax.grid(True)
ax.legend()

### 

# 価格変化率
# Price change ratio

価​格​変​化​率​は、​価​格​が​そ​の​期​間​で​ど​れ​く​ら​い​変​化​し​た​か​を​(%)​で​表​現​し​た​も​の​で​す。​相​場​の​勢​い​や​方​向​性​等​を​判​断​す​る​際​に​よ​く​使​わ​れ​ま​す

The rate of price change is the rate at which the price The amount of time between the change in (%). The following table shows the number of shares of the company's common stock. market momentum and direction. The following is a list of the most common It is often used

In [None]:
periods = [5, 25, 75]
cols = []
for period in periods:
    col = "{} windows rate of return".format(period)
    price_data[col] = price_data["Open"].pct_change(period) * 100
    cols.append(col)

# プロット
fig, ax = plt.subplots(figsize=(20, 8))

for col in cols:
    ax.plot(price_data['Date'],price_data[col], label=col)
ax.set_ylabel("rate of return (%)")
ax.set_xlabel("datetime")
plt.xticks([0, 200,400,600,800, 1000,1200])
ax.grid(True)
ax.legend()

# ヒストリカル・ボラティリティ
# Historical volatility

​ヒ​ス​ト​リ​カ​ル・​ボ​ラ​ティ​リ​ティ​を​計​算​し​ま​す。​こ​こ​で​計​算​す​る​ヒ​ス​ト​リ​カ​ル・​ボ​ラ​ティ​リ​ティ​は、​5​日、​25​日、​75​日​の​対​数​リ​ター​ン​の​標​準​偏​差​で​す。​ヒ​ス​ト​リ​カ​ル・​ボ​ラ​ティ​リ​ティ​は​リ​ス​ク​指​標​の​一​つ​で、​価​格​が​ど​の​程​度​激​し​く​変​動​し​た​か​を​把​握​す​る​た​め​に​利​用​し​ま​す。​一​般​的​に​ヒ​ス​ト​リ​カ​ル・​ボ​ラ​ティ​リ​ティ​が​大​き​い​銘​柄​は、​小​さ​い​銘​柄​よ​り​も​資​産​と​し​て​保​持​す​る​リ​ス​ク​が​相​対​的​に​高​い​と​考​え​ら​れ​ま​す。

Historical Volatility Calculate the equity. The histo Volatility is the number of days between the 5-day, 25-day, and 75-day Standard deviation of logarithmic returns The following table shows the number of shares of each company. Historical Volatility The risk indicator is a risk measure of It is one of the how severely they have fluctuated. The purpose of this method is to The following is a list of the most common Generally, historical The large volatility of the stocks are more likely to be than the asset Relative risk of holding The number of The following is a summary of the results of the survey.

In [None]:

# 5日、25日、75日のヒストリカル・ボラティリティを算出
periods = [5, 25, 75]
cols = []
for period in periods:
    col = "{} windows volatility".format(period)
    price_data[col] = np.log(price_data["Open"]).diff().rolling(period).std()
    cols.append(col)

# プロット
fig, ax = plt.subplots(figsize=(20, 8))

for col in cols:
    ax.plot(price_data['Date'],price_data[col], label=col)
ax.set_ylabel("volatility")
ax.set_xlabel("datetime")
plt.xticks([0, 200,400,600,800, 1000,1200])
ax.grid(True)
ax.legend()

## 欠損値処理
## Missing value processing

結構な欠損値があります。

There are quite a few missing values.

In [None]:
price_data['year'] = price_data.Date.str[:4]

In [None]:
print(price_data.isna().sum())

### データの欠損値をプロット
### Plot missing data values

In [None]:
fig, ax = plt.subplots(figsize=(20, 5))
sns.heatmap(price_data.groupby("year").agg("sum"), ax=ax)

In [None]:
# float64型の列に絞り込み
#price_data = price_data.select_dtypes(include=["float64"])

# 欠損値を0でフィル
#price_data = price_data.fillna(0)

## 特徴量の生成
## Generate features

### 定常性を意識した特徴量設計
### Stationarity-aware feature design

時系列データを扱う際には、定常性を意識して特徴量を設計することが重要です。

株価をそのまま学習させたケースと定常性がある特徴量を利用するケースについて考えてみます。

株価をそのまま学習させたケース: 例えば、モデルの訓練期間における株価が、100円〜110円の範囲で動いたとします。もし、この数値をそのままモデルに投入すると、モデルは株価が100円〜110円近辺で動くことを暗黙に学習します。しかし、この暗黙の仮定は実際のマーケットでは成立しておらず、テスト期間で株価が高騰すると、モデルがうまく動かないことがあります。他にも株価に特有の例としては株式分割や株式併合により株価のレンジが大きく変動する場合があります。

定常性がある特徴量を利用するケース: 例えば、20日の価格変化率を考えると、これは正規分布ではありませんが、一部のマーケットの混乱期を除けばほぼ0を中心とした正規分布に近い分布になります。特徴量は、2%の上昇や4%の下落といった0を中心とした時系列となっており、将来に渡っても似たような分布になることが期待でき、株価範囲に対する暗黙の仮定を学ぶ恐れがなくなります。このように将来に渡っても似たような分布を期待できる特徴量は定常性があるといえます。

定常性を意識すると、正規化処理における様々な注意点が見えてきます。たとえば、最小値と最大値を-1から1などにマッピングするMinMax正規化を株価に適用したとしても定常性を期待することはできません。株価をMinMax正規化したときにその最大値・最小値が未来に対しても適用できる保証ができないためです。このように時系列の特徴量の設計をするとき、定常性を意識しながら特徴量を設計していくことが重要です。

When dealing with time series data, it is important to design features with stationarity in mind.

Let us consider the case of learning stock prices as they are and the case of using features with stationarity.

Case where stock prices are learned as they are: Suppose, for example, that the stock prices during the training period of the model move in the range of 100-110 yen. If this value is input directly into the model, the model implicitly learns that the stock price moves in the range of 100-110 yen. However, this implicit assumption is not valid in the actual market, and the model may not work well if the stock price soars during the test period. Another example specific to stock prices is when the range of stock prices fluctuates significantly due to stock splits or reverse stock splits.

Cases that use features that are stationary: For example, considering a 20-day price change rate, this is not a normal distribution, but it is close to a normal distribution centered around approximately 0, except during some periods of market turmoil. The characteristics are time series centered around 0, such as a 2% rise or a 4% decline, and can be expected to have a similar distribution in the future, eliminating the fear of learning implicit assumptions about the stock price range. Thus, a feature that can be expected to have a similar distribution in the future can be said to be stationary.

With stationarity in mind, various caveats in the normalization process become apparent. For example, if we apply MinMax normalization, which maps the minimum and maximum values from -1 to 1, to stock prices, we cannot expect stationarity. This is because there is no guarantee that the maximum and minimum values of MinMax normalization for stock prices will be applicable in the future. Thus, when designing time series features, it is important to design features with stationarity in mind.

In [None]:
# 終値のみに絞る
feats = price_data.copy()
# 終値の20営業日リターン
feats["return_1month"] = feats["Close"].pct_change(20)
# 終値の40営業日リターン
feats["return_2month"] = feats["Close"].pct_change(40)
# 終値の60営業日リターン
feats["return_3month"] = feats["Close"].pct_change(60)
# 終値の20営業日ボラティリティ
feats["volatility_1month"] = (
    np.log(feats["Close"]).diff().rolling(20).std()
)
# 終値の40営業日ボラティリティ
feats["volatility_2month"] = (
    np.log(feats["Close"]).diff().rolling(40).std()
)
# 終値の60営業日ボラティリティ
feats["volatility_3month"] = (
    np.log(feats["Close"]).diff().rolling(60).std()
)
# 終値と20営業日の単純移動平均線の乖離
feats["MA_gap_1month"] = feats["Close"] / (
    feats["Close"].rolling(20).mean()
)
# 終値と40営業日の単純移動平均線の乖離
feats["MA_gap_2month"] = feats["Close"] / (
    feats["Close"].rolling(40).mean()
)
# 終値と60営業日の単純移動平均線の乖離
feats["MA_gap_3month"] = feats["Close"] / (
    feats["Close"].rolling(60).mean()
)
# 欠損値処理
feats = feats.fillna(0)
# 元データのカラムを削除
feats = feats.drop(["Close"], axis=1)

###  終値の20営業日リターン
### 20 business day return of closing price

In [None]:
# プロット
fig, ax = plt.subplots(figsize=(20, 8))

ax.plot(feats['Date'],feats["return_1month"])
ax.set_ylabel("volatility")
ax.set_xlabel("datetime")
plt.xticks([0, 200,400,600,800, 1000,1200])
ax.grid(True)
ax.legend()

###  終値の40営業日リターン
### 40 business day return of closing price

In [None]:
# プロット
fig, ax = plt.subplots(figsize=(20, 8))

ax.plot(feats['Date'],feats["return_2month"])
ax.set_ylabel("volatility")
ax.set_xlabel("datetime")
plt.xticks([0, 200,400,600,800, 1000,1200])
ax.grid(True)
ax.legend()

###  終値の60営業日リターン
### 60 business day return of closing price

In [None]:
# プロット
fig, ax = plt.subplots(figsize=(20, 8))

ax.plot(feats['Date'],feats["return_3month"])
ax.set_ylabel("volatility")
ax.set_xlabel("datetime")
plt.xticks([0, 200,400,600,800, 1000,1200])
ax.grid(True)
ax.legend()

## 終値の20営業日ボラティリティ
## 20 business day volatility of closing price

In [None]:
# プロット
fig, ax = plt.subplots(figsize=(20, 8))

ax.plot(feats['Date'],feats["volatility_1month"])
ax.set_ylabel("volatility")
ax.set_xlabel("datetime")
plt.xticks([0, 200,400,600,800, 1000,1200])
ax.grid(True)
ax.legend()

## 終値の40営業日ボラティリティ
## 40 business day volatility of closing price

In [None]:
# プロット
fig, ax = plt.subplots(figsize=(20, 8))

ax.plot(feats['Date'],feats["volatility_2month"])
ax.set_ylabel("volatility")
ax.set_xlabel("datetime")
plt.xticks([0, 200,400,600,800, 1000,1200])
ax.grid(True)
ax.legend()

## 終値の60営業日ボラティリティ
## 60 business day volatility of closing price

In [None]:
# プロット
fig, ax = plt.subplots(figsize=(20, 8))

ax.plot(feats['Date'],feats["volatility_3month"])
ax.set_ylabel("volatility")
ax.set_xlabel("datetime")
plt.xticks([0, 200,400,600,800, 1000,1200])
ax.grid(True)
ax.legend()

### 終値と20営業日の単純移動平均線の乖離
### Deviation between closing price and 20 business day simple moving average

In [None]:
# プロット
fig, ax = plt.subplots(figsize=(20, 8))

ax.plot(feats['Date'],feats["MA_gap_1month"])
ax.set_ylabel("volatility")
ax.set_xlabel("datetime")
plt.xticks([0, 200,400,600,800, 1000,1200])
ax.grid(True)
ax.legend()

### 終値と40営業日の単純移動平均線の乖離
### Deviation between closing price and 40 business day simple moving average

In [None]:
# プロット
fig, ax = plt.subplots(figsize=(20, 8))

ax.plot(feats['Date'],feats["MA_gap_2month"])
ax.set_ylabel("volatility")
ax.set_xlabel("datetime")
plt.xticks([0, 200,400,600,800, 1000,1200])
ax.grid(True)
ax.legend()

### 終値と60営業日の単純移動平均線の乖離
### Deviation between closing price and 60 business day simple moving average

In [None]:
# プロット
fig, ax = plt.subplots(figsize=(20, 8))

ax.plot(feats['Date'],feats["MA_gap_3month"])
ax.set_ylabel("volatility")
ax.set_xlabel("datetime")
plt.xticks([0, 200,400,600,800, 1000,1200])
ax.grid(True)
ax.legend()

### secondary_stock_prices

In [None]:
secondary_stock_prices = train['secondary_stock_prices']

# 特定の銘柄コードに絞り込み
code = 1305
secondary_stock_prices_data = secondary_stock_prices[secondary_stock_prices["SecuritiesCode"] == code]
# プロット
fig, ax = plt.subplots(figsize=(20, 8))
ax.plot(secondary_stock_prices_data['Date'],secondary_stock_prices_data["Open"])
ax.plot(secondary_stock_prices_data['Date'],secondary_stock_prices_data["Close"])
ax.plot(secondary_stock_prices_data['Date'],secondary_stock_prices_data["High"])
ax.plot(secondary_stock_prices_data['Date'],secondary_stock_prices_data["Low"])
plt.xticks([0, 200,400,600,800, 1000,1200])
ax.set_ylabel("price")
ax.set_xlabel("datetime")
ax.grid(True)
ax.legend()

In [None]:
fig, ax = plt.subplots(figsize=(20, 8))
ax.plot(secondary_stock_prices_data['Date'],secondary_stock_prices_data["Volume"])
ax.set_ylabel("volume")
ax.set_xlabel("datetime")
plt.xticks([0, 200,400,600,800, 1000,1200])
ax.grid(True)
ax.legend()

In [None]:
qx = sns.jointplot(x=price.SecuritiesCode.unique(), y=price.groupby(['SecuritiesCode'])['Open'].mean().values, kind="reg", 
                   height=8, joint_kws={'line_kws':{'color':'red'}})

In [None]:
sns.distplot(price['Open'], label='Open')

In [None]:
#sns.boxplot(x='SecuritiesCode',y='Open',data=price)

## モデルの構築
## Build the model

In [None]:
price['date'] = price.Date.str[:4]+price.Date.str[5:7]+price.Date.str[8:10]
price['date'] = price['date'].astype(int)
price

In [None]:
def get_features_for_predict(price, code, start_dt=20170301):
    price_data = price.copy()
    
    feats = price_data[price_data.SecuritiesCode == code]
    feats = feats.select_dtypes(include=["float64","int64"])
    
    # 終値の20営業日リターン
    feats["return_1month"] = feats["Close"].pct_change(20)
    # 終値の40営業日リターン
    feats["return_2month"] = feats["Close"].pct_change(40)
    # 終値の60営業日リターン
    feats["return_3month"] = feats["Close"].pct_change(60)
    # 終値の20営業日ボラティリティ
    feats["volatility_1month"] = (
        np.log(feats["Close"]).diff().rolling(20).std()
    )
    # 終値の40営業日ボラティリティ
    feats["volatility_2month"] = (
        np.log(feats["Close"]).diff().rolling(40).std() 
    )
    # 終値の60営業日ボラティリティ
    feats["volatility_3month"] = (
        np.log(feats["Close"]).diff().rolling(60).std()
    )
    # 終値と20営業日の単純移動平均線の乖離
    feats["MA_gap_1month"] = feats["Close"] / (
        feats["Close"].rolling(20).mean()
    )
    # 終値と40営業日の単純移動平均線の乖離
    feats["MA_gap_2month"] = feats["Close"] / (
        feats["Close"].rolling(40).mean()
    )
    # 終値と60営業日の単純移動平均線の乖離
    feats["MA_gap_3month"] = feats["Close"] / (
        feats["Close"].rolling(60).mean()
    )
    
    #不要データ削除
    feats = feats.loc[feats.date > start_dt] #
    # 欠損値処理
    feats = feats.fillna(0)
    # 元データのカラムを削除
    #feats = feats.drop(["Close"], axis=1)
    return feats

In [None]:
df = get_features_for_predict(price, 1301)
df.T

### 目的変数の対応付け及び訓練データ、評価データ、テストデータの分割
### Mapping of objective variables and division of training data, evaluation data, and test data

次に目的変数を定義します。目的変数は、データセットの stock_labels 内にあり、利用する際は先ほど定義した特徴量のデータセットに対して、行（日付）を一致させる必要があります。
データセットの訓練期間、評価期間、テスト期間への分割処理も合わせて実施します。

Next, define the objective variable. The objective variable is located in the stock_labels of the dataset and must match the rows (dates) against the dataset of features defined earlier when used.
The splitting of the dataset into training, evaluation, and test periods is also performed in conjunction with this process.

In [None]:
# 対象の目的変数を定義
labels = {"Target"}
# 目的変数毎にデータを保存するための変数
train_X, val_X, test_X = {}, {}, {}
train_y, val_y, test_y = {}, {}, {}

# 予測対象銘柄を取得
codes = price["SecuritiesCode"].unique()

# 特徴量を作成
buff = []
for code in tqdm(codes):
    feat = get_features_for_predict(price, code)
    buff.append(feat)
    del feat
feature = pd.concat(buff)


In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import seaborn as sns
from tqdm import tqdm
import re
import joblib
import gc
from scipy import stats

import lightgbm as lgb
from sklearn.model_selection import KFold, StratifiedKFold
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score

import sys

import warnings
warnings.simplefilter('ignore')

In [None]:
feature

In [None]:
from sklearn.model_selection import train_test_split
feature['target_enc'] = price.groupby(['SecuritiesCode'])['Target'].transform(np.mean)
feature = feature.dropna(subset=['Target'])
y = feature['Target']
X = feature.drop(['Target'],axis=1)#,'SupervisionFlag','RowId','Date'
X = X.fillna(0)



In [None]:
    %%capture
    import optuna 
    import optuna.integration.lightgbm as lgbo
    import lightgbm as lgb
    params = {'objective': 'regression',  'metric': 'rmse' } #'objective': 'mean_squared_error','regression'

    x_train, x_valid, y_train, y_valid = train_test_split(X, y, test_size=0.2, random_state=42)
    lgb_train = lgb.Dataset(x_train, y_train)
    lgb_valid = lgb.Dataset(x_valid, y_valid)
    optuna.logging.disable_default_handler()
    model = lgbo.train(params, lgb_train, valid_sets=[lgb_valid], verbose_eval=False, num_boost_round=100, early_stopping_rounds=5)

In [None]:
model.params

In [None]:
      model = lgb.train(params=model.params,
                    train_set=lgb_train,
                    valid_sets=[lgb_train, lgb_valid],
                    num_boost_round=50000, 
                    early_stopping_rounds=100,        
                    verbose_eval=1000)#,
                    #categorical_feature=categorical_features,)

In [None]:
import jpx_tokyo_market_prediction

env = jpx_tokyo_market_prediction.make_env()   # initialize the environment
iter_test = env.iter_test()    # an iterator which loops over the test files

for (prices, options, financials, trades, secondary_prices, sample_prediction) in iter_test:
    codes = prices["SecuritiesCode"].unique()
    prices['date'] = prices.Date.str[:4]+prices.Date.str[5:7]+prices.Date.str[8:10]
    prices['date'] = prices['date'].astype(int)
    #prices = prices.drop(['SupervisionFlag','RowId','Date'],axis=1)
    
    # 特徴量を作成
    codes = prices["SecuritiesCode"].unique()
    buff = []
    for code in tqdm(codes):
        feat = get_features_for_predict(prices, code)
        buff.append(feat)
    del feat
    feature = pd.concat(buff)
    feature['target_enc'] = price.groupby(['SecuritiesCode'])['Target'].transform(np.mean)
    
    pred = model.predict(feature)
    sample_prediction['Rank'] = np.argsort(pred) # make your predictions here
    env.predict(sample_prediction)   # register your predictions