In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import lightgbm as lgb
from lightgbm import early_stopping, log_evaluation
from sklearn.metrics import accuracy_score, log_loss, f1_score, classification_report
from IPython.display import display

import pickle
import gc
import os
import datetime as dt
from datetime import datetime
import json

In [2]:
# 最大列数を None（制限なし）に
pd.set_option('display.max_columns', None)

# 最大行数も None に
pd.set_option('display.max_rows', None)

In [3]:
import os

# Notebook用にスクリプトの場所を手動で設定
BASE_DIR = r"C:\Users\image\Portfolio\Soccer analysis\soccer_pipeline\notebooks"

# プロジェクトのルート（soccer_pipeline）
ROOT_DIR = os.path.abspath(os.path.join(BASE_DIR, ".."))

# モデル保存ディレクトリへのパス
MODEL_DIR = os.path.join(ROOT_DIR, "models")

data_file = os.path.join(ROOT_DIR,"data","processed_data.csv")

# データ読み込み

In [4]:
#データの読み込み
df = pd.read_csv(data_file)

df.head()

Unnamed: 0,fixture_id,date,season,home_team,away_team,home_score,away_score,status,home_shots_on_goal,home_possession,home_passes,home_passes_accuracy,home_fouls,home_corners,home_yellow_cards,home_red_cards,away_shots_on_goal,away_possession,away_passes,away_passes_accuracy,away_fouls,away_corners,away_yellow_cards,away_red_cards,home_last_position,home_last_won,home_last_drawn,home_last_lost,home_last_gf,home_last_ga,home_last_gd,home_last_points,away_last_position,away_last_won,away_last_drawn,away_last_lost,away_last_gf,away_last_ga,away_last_gd,away_last_points
0,710556,2021-08-13 19:00:00+00:00,2021,Brentford,Arsenal,2.0,0.0,FT,3.0,35.0,309.0,201.0,12.0,2.0,0.0,0.0,4.0,65.0,568.0,488.0,8.0,5.0,0.0,0.0,17,10,9,19,33,55,-22,39,8,18,7,13,55,39,16,61
1,710557,2021-08-14 14:00:00+00:00,2021,Burnley,Brighton,1.0,2.0,FT,3.0,36.0,259.0,181.0,10.0,7.0,2.0,0.0,8.0,64.0,518.0,424.0,7.0,6.0,1.0,0.0,17,10,9,19,33,55,-22,39,16,9,14,15,40,46,-6,41
2,710558,2021-08-14 14:00:00+00:00,2021,Chelsea,Crystal Palace,3.0,0.0,FT,6.0,62.0,678.0,623.0,15.0,5.0,0.0,0.0,1.0,38.0,423.0,363.0,11.0,2.0,0.0,0.0,4,19,10,9,58,36,22,67,14,12,8,18,41,66,-25,44
3,710559,2021-08-14 14:00:00+00:00,2021,Everton,Southampton,3.0,1.0,FT,6.0,48.0,337.0,235.0,13.0,6.0,2.0,0.0,3.0,52.0,370.0,256.0,15.0,8.0,0.0,0.0,10,17,8,13,47,48,-1,59,15,12,7,19,47,68,-21,43
4,710560,2021-08-14 14:00:00+00:00,2021,Leicester,Wolves,1.0,0.0,FT,5.0,56.0,584.0,505.0,6.0,5.0,1.0,0.0,3.0,44.0,443.0,366.0,10.0,4.0,2.0,0.0,5,20,6,12,68,50,18,66,13,12,9,17,36,52,-16,45


In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1900 entries, 0 to 1899
Data columns (total 40 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   fixture_id            1900 non-null   int64  
 1   date                  1900 non-null   object 
 2   season                1900 non-null   int64  
 3   home_team             1900 non-null   object 
 4   away_team             1900 non-null   object 
 5   home_score            1650 non-null   float64
 6   away_score            1650 non-null   float64
 7   status                1900 non-null   object 
 8   home_shots_on_goal    1650 non-null   float64
 9   home_possession       1650 non-null   float64
 10  home_passes           1650 non-null   float64
 11  home_passes_accuracy  1650 non-null   float64
 12  home_fouls            1650 non-null   float64
 13  home_corners          1650 non-null   float64
 14  home_yellow_cards     1650 non-null   float64
 15  home_red_cards       

In [6]:
# データを日付とfixture_idでソート (時系列順に並べるため)
df = df.sort_values(by=['date', 'fixture_id']).reset_index(drop=True)

In [7]:
df["status"].unique()

array(['FT', 'NS'], dtype=object)

# 特徴量エンジニアリング

## ターゲットカラム作成

In [8]:
#targetカラム作成(試合の勝敗カラム)　H:home win, A:away win, D:draw
def target_create(row):
    if row["home_score"] > row["away_score"]:
        return  "H"    
    
    elif row["home_score"] < row["away_score"]:
        return  "A"  
    
    else:
        return  "D"  

df["target"] = df.apply(target_create,axis = 1)

In [9]:
# objectからcategoryに変換したいカラム
columns = ["home_team","away_team","status","target"]

# タイプ変換関数
def change_type(columns):
    for col in columns:
        df[col] = df[col].astype("category")
    return df

df = change_type(columns) 

In [10]:
df['date'] = pd.to_datetime(df['date'], errors='coerce')
# タイムゾーン情報を削除
df['date'] = df['date'].dt.tz_localize(None)

## 得失点差カラム作成

In [11]:
# ホーム勝利を示す一時的な列を作成
# 'target'が 'H' の場合に1、それ以外は0
df['is_home_win'] = (df['target'] == 'H').astype(int)
# アウェイ勝利を示す一時的な列を作成
# 'target'が 'Ａ' の場合に1、それ以外は0
df['is_away_win'] = (df['target'] == 'A').astype(int)


In [12]:
# 得失点差カラム作成
df["home_goal_difference"] = df["home_score"] - df["away_score"]
df["away_goal_difference"] = df["away_score"] - df["home_score"]

## 直近N試合の勝利数/スコア/得失点差のローリング計算関数作成

In [13]:
# --- 直近N試合の勝利数/スコア/得失点差のローリング計算関数 ---
def calculate_rolling_feature(df, group_col, target_col, window_size, new_col_name, agg_func='sum'):
    """
    グループごとにローリング集計を行い、1つシフトした結果を新しいカラムとして追加する。
    group_col: グループ化するカラム ('home_team' or 'away_team')
    target_col: 集計対象のカラム ('is_home_win', 'home_score', 'home_goal_difference' など)
    window_size: ローリングウィンドウサイズ (直近N試合なら N+1)
    new_col_name: 新しい特徴量のカラム名
    agg_func: 集計関数 ('sum' or 'mean')
    """
    # ローリング計算。window_sizeは現在の行を含むため、直近N試合を見る場合は N+1
    # shift(1)で現在の試合結果を除外し、fillna(0)で試合前のNaNを埋める
    if agg_func == 'sum':
        new_feature = df.groupby(group_col, observed=False)[target_col].transform(
            lambda x: x.rolling(window=window_size, min_periods=1).sum().shift(1).fillna(0)
        ).astype(int)
    elif agg_func == 'mean':
        # 勝率の場合、結果をパーセンテージにし、小数点第2位まで丸める
        new_feature = df.groupby(group_col, observed=False)[target_col].transform(
            lambda x: (x.rolling(window=window_size, min_periods=1).mean().shift(1) * 100).round(2).fillna(0)
        )
    else:
        raise ValueError("Unsupported agg_func")
        
    df[new_col_name] = new_feature
    return df

In [14]:
# 直近5試合 (window=6) の計算
df = calculate_rolling_feature(df, 'home_team', 'is_home_win', 6, 'home_team_recent_5_wins')
df = calculate_rolling_feature(df, 'away_team', 'is_away_win', 6, 'away_team_recent_5_wins')

df = calculate_rolling_feature(df, 'home_team', 'home_score', 6, 'home_recent_5_scores')
df = calculate_rolling_feature(df, 'away_team', 'away_score', 6, 'away_recent_5_scores')

df = calculate_rolling_feature(df, 'home_team', 'away_score', 6, 'home_recent_5_goal_against')
df = calculate_rolling_feature(df, 'away_team', 'home_score', 6, 'away_recent_5_goal_against')

df = calculate_rolling_feature(df, 'home_team', 'home_goal_difference', 6, 'home_recent_5_goal_diff')
df = calculate_rolling_feature(df, 'away_team', 'away_goal_difference', 6, 'away_recent_5_goal_diff')


# 直近10試合 (window=11) の計算
df = calculate_rolling_feature(df, 'home_team', 'home_score', 11, 'home_recent_10_scores')
df = calculate_rolling_feature(df, 'away_team', 'away_score', 11, 'away_recent_10_scores')

df = calculate_rolling_feature(df, 'home_team', 'away_score', 11, 'home_recent_10_goal_against')
df = calculate_rolling_feature(df, 'away_team', 'home_score', 11, 'away_recent_10_goal_against')

df = calculate_rolling_feature(df, 'home_team', 'home_goal_difference', 11, 'home_recent_10_goal_diff')
df = calculate_rolling_feature(df, 'away_team', 'away_goal_difference', 11, 'away_recent_10_goal_diff')


# 直近20試合 (window=21) の計算
df = calculate_rolling_feature(df, 'home_team', 'home_score', 21, 'home_recent_20_scores')
df = calculate_rolling_feature(df, 'away_team', 'away_score', 21, 'away_recent_20_scores')

df = calculate_rolling_feature(df, 'home_team', 'away_score', 21, 'home_recent_20_goal_against')
df = calculate_rolling_feature(df, 'away_team', 'home_score', 21, 'away_recent_20_goal_against')

df = calculate_rolling_feature(df, 'home_team', 'home_goal_difference', 21, 'home_recent_20_goal_diff')
df = calculate_rolling_feature(df, 'away_team', 'away_goal_difference', 21, 'away_recent_20_goal_diff')

## 勝ち点カラム作成

In [15]:
#------------------勝ち点カラム作成 (ホーム/アウェイ区別なしの全体成績)--------------------
# 
# 各試合に一意のIDを付与 (後のマージのために利用)
df['match_id'] = df.index 

# 2. チーム視点でのデータ作成 (スタッキング)
# -----------------------------------------------

# ① ホームチーム視点のデータフレームを作成
df_home = df[['match_id','season','date', 'home_team', 'target']].copy()
df_home.rename(columns={'home_team': 'team'}, inplace=True)
# 勝ち点計算
df_home['points'] = df_home['target'].map({"H":3,"D":1,"A":0})
# 勝利フラグ (勝率計算用)
df_home['is_win'] = (df_home['target'] == 'H').astype(int)

# # ② アウェイチーム視点のデータフレームを作成
df_away = df[['match_id','season', 'date', 'away_team', 'target']].copy()
df_away.rename(columns={'away_team': 'team'}, inplace=True)

# 勝ち点計算
df_away['points'] = df_away['target'].map({"H":0,"D":1,"A":3})

# 勝利フラグ (勝率計算用)
df_away['is_win'] = (df_away['target'] == 'A').astype(int)

# # ③ 2つの視点のデータを結合し、時系列順にソート
df_stacked = pd.concat([df_home, df_away],ignore_index=True)
df_stacked = df_stacked.sort_values(by=['date', 'match_id']).reset_index(drop=True)


# seasonとteamでグループ化し、各シーズン内での合計勝ち点 (試合前まで) を計算
# transform を利用して累積勝ち点を計算
# transform は、計算結果を元のdf_stackedと同じインデックス・行数で返すため、applyよりも高速
df_stacked['total_points'] = df_stacked.groupby(["season","team"], observed=False)["points"].transform(
    # 勝ち点の累積和を計算し、1つシフト (現在の試合結果を除く)、NaNを0で埋める
    lambda x: x.rolling(window = 38,min_periods = 1).sum().shift(1).fillna(0)
).astype(int) 


In [16]:
# ホームチームの総合成績フィーチャーを抽出 (match_id と team で紐づけ)
df_home_points = df_stacked[['match_id','team','total_points']].copy()
df_home_points.rename(columns={'team': 'home_team', 'total_points': 'home_total_points'}, inplace=True)

# マージ
df = pd.merge(
    df,
    df_home_points,
    on = ['match_id','home_team'],
    how = 'left'
)
# アウェイチームの総合成績フィーチャーを抽出
df_away_points = df_stacked[['match_id','team','total_points']].copy()
df_away_points.rename(columns={'team': 'away_team', 'total_points': 'away_total_points'}, inplace=True)

# マージ
df = pd.merge(
    df,
    df_away_points,
    on = ['match_id','away_team'],
    how = 'left'
)

df['home_total_points'] = df['home_total_points'].astype(int)
df['away_total_points'] = df['away_total_points'].astype(int)


# home teamとaway teamの勝ち点差カラムを作成
df["points_difference"] = df['home_total_points'] - df['away_total_points']

In [17]:
# --------------各チームの直近5試合での勝利数カラム作成(home、アウェイの区別なし)-----------------

# 総合成績のローリング計算 (home, away区別なし)

# チーム視点での直近5試合の勝率
df_stacked['recent_5_wins_overall'] = df_stacked.groupby(['season','team'], observed=False)['is_win'].transform(
    lambda x: x.rolling(window=6, min_periods=1).mean().shift(1).fillna(0)
).astype(int)

# チーム視点でのシーズン勝率
df_stacked['season_wins_ave_overall_temp'] = df_stacked.groupby(['season','team'], observed=False)['is_win'].transform(
    lambda x: (x.rolling(window=38, min_periods=1).mean().shift(1) * 100).round(2).fillna(0)
)


# -------------------home teamへのマージ------------------------

# ホームチームの総合成績フィーチャーを抽出
df_home_feature = df_stacked[['match_id', 'team', 'recent_5_wins_overall', 'season_wins_ave_overall_temp']].copy()
df_home_feature.rename(columns={
    'team': 'home_team',
    'recent_5_wins_overall': 'home_team_recent_5_wins_overall',
    'season_wins_ave_overall_temp': 'home_season_wins_ave_overall'
}, inplace=True)

# 元のデータフレームにマージ
df = pd.merge(
    df, 
    df_home_feature, 
    on=['match_id', 'home_team'], 
    how='left'
)


# ------------------away teamへのマージ--------------------

# アウェイチームの総合成績フィーチャーを抽出
df_away_feature = df_stacked[['match_id', 'team', 'recent_5_wins_overall', 'season_wins_ave_overall_temp']].copy()
df_away_feature.rename(columns={
    'team': 'away_team',
    'recent_5_wins_overall': 'away_team_recent_5_wins_overall',
    'season_wins_ave_overall_temp': 'away_season_wins_ave_overall'
}, inplace=True)

# 元のデータフレームにマージ
df = pd.merge(
    df, 
    df_away_feature, 
    on=['match_id', 'away_team'], 
    how='left'
)


## NSの試合データの置き換え

In [18]:
# 2025年シーズンのまだ行われていない試合(status="NS")に対し、直前の "FT" (Full Time) の試合結果でデータを補完する。
# NSのデータの中でも1か月後、2か月後の試合のデータに関しては、rollingfunctionを使って計算した、直近の試合の勝率や得点などの合計数や割合のカラムは、
# 直近の試合もおこなわれていないため、勝率がホームアウェイともに0などの数字に入ってしまい、適切な予測ができなくなってしまう。
# そのため、最新試合の時点でのデータで、そのシーズンの残りの全試合のデータを置き換えていく

fill_features_home = ['home_team_recent_5_wins',
                      'home_recent_5_scores', 
                      'home_recent_5_goal_diff', 
                      'home_recent_5_goal_against', 
                      'home_recent_10_scores', 
                      'home_recent_10_goal_diff', 
                      'home_recent_10_goal_against', 
                      'home_recent_20_scores', 
                      'home_recent_20_goal_diff', 
                      'home_recent_20_goal_against']


fill_features_away = ['away_team_recent_5_wins',
                      'away_recent_5_scores',
                      'away_recent_5_goal_diff',
                      'away_recent_5_goal_against',
                      'away_recent_10_scores',
                      'away_recent_10_goal_diff',
                      'away_recent_10_goal_against',
                      'away_recent_20_scores',
                      'away_recent_20_goal_diff',
                      'away_recent_20_goal_against']

fill_features_home_overall = ['home_total_points','home_team_recent_5_wins_overall','home_season_wins_ave_overall']

fill_features_away_overall = ['away_total_points','away_team_recent_5_wins_overall','away_season_wins_ave_overall']

# 2025年シーズンのチームリストを取得
teams = df[df["season"] == 2025]["home_team"].unique()

# ホーム限定データ (homeでの試合のみでカウントしているデータ) の補充
for team in teams:
    # 置き換え元（FTのそのチームの最終ホーム試合）の fill_features を取得
    source_df = df[(df["status"] == "FT") & (df["home_team"] == team)].sort_values("date")
    if not source_df.empty:
        source_vals = source_df.iloc[-1][fill_features_home]
    
        # 置き換え先（NSのそのチームのホーム試合）に代入
        df.loc[(df["status"] == "NS") & (df["home_team"] == team), fill_features_home] = source_vals.values
    
# アウェイ限定データ (awayでの試合のみでカウントしているデータ) の補充
for team in teams:
    # 置き換え元（FTのそのチームの最終アウェイ試合）の fill_features を取得
    source_df = df[(df["status"] == "FT") & (df["away_team"] == team)].sort_values("date")
    if not source_df.empty:
        source_vals = source_df.iloc[-1][fill_features_away]
    
        # 置き換え先（NSのそのチームのアウェイ試合）に代入
        df.loc[(df["status"] == "NS") & (df["away_team"] == team), fill_features_away] = source_vals.values

for team in teams:
    # 1. 最新試合を日付順で取得
    source_df = df[(df["status"] == "FT") & ((df["home_team"] == team) | (df["away_team"] == team))].sort_values("date")
    if source_df.empty:
        continue
    
    last_row = source_df.iloc[-1]
    
    # 2. 対象チームが最新試合でホームだったかアウェイだったかを判別
    team_was_home = last_row["home_team"] == team
    
    # 3. 最新試合の行から、対象チーム自身の統計値のみを1セット取得
    if team_was_home:
        # チームがホームの場合、統計値は home_overall カラムにある
        latest_team_vals = last_row[fill_features_home_overall].values
    else:
        # チームがアウェイの場合、統計値は away_overall カラムにある
        latest_team_vals = last_row[fill_features_away_overall].values
        
    # 4. NS の行ごとに index を取得
    ns_home_idx = df[(df["status"] == "NS") & (df["home_team"] == team)].index
    ns_away_idx = df[(df["status"] == "NS") & (df["away_team"] == team)].index
    
    # 5. ホームチームとして NS の行に、対象チームの統計値で代入
    if len(ns_home_idx) > 0:
        # 未来のホーム試合の「ホーム側」カラムに、対象チームの統計値を埋める
        df.loc[ns_home_idx, fill_features_home_overall] = [latest_team_vals] * len(ns_home_idx)
    
    # 6. アウェイチームとして NS の行に、対象チームの統計値で代入
    if len(ns_away_idx) > 0:
        # 未来のアウェイ試合の「アウェイ側」カラムに、対象チームの統計値を埋める
        df.loc[ns_away_idx, fill_features_away_overall] = [latest_team_vals] * len(ns_away_idx)


In [19]:
# 置き換えがうまくいっているかを確認
teams_2025 = df[df["season"] == 2025]["home_team"].unique()

for team in teams_2025:
    # 検証対象チーム
    test_team = team 
    
    # 1. チームAの最新の完了した試合 (FT) の統計値を取得
    latest_ft_row = df[(df["status"] == "FT") & ((df["home_team"] == test_team) | (df["away_team"] == test_team))].sort_values("date").iloc[-1]
    
    # 2. チームA自身の最新のoverall統計値を取得
    team_was_home_ft = latest_ft_row["home_team"] == test_team
    if team_was_home_ft:
        latest_overall_vals = latest_ft_row[fill_features_home_overall].values[0]
    else:
        latest_overall_vals = latest_ft_row[fill_features_away_overall].values[0]
    
    # 3. チームAの最新のhome/away限定統計値を取得
    latest_home_only_vals = df[(df["status"] == "FT") & (df["home_team"] == test_team)].sort_values("date").iloc[-1][fill_features_home].values[0]
    latest_away_only_vals = df[(df["status"] == "FT") & (df["away_team"] == test_team)].sort_values("date").iloc[-1][fill_features_away].values[0]
    
    # 4. チームAがホームのNS試合の最初の行を取得
    ns_home_row = df[(df["status"] == "NS") & (df["home_team"] == test_team)].iloc[0]
    
    # 5. 値の比較と検証
    print(f"--- {test_team} ホームNS試合の検証 ---")
    
    # (A) ホーム限定データ: 最終ホームFT試合と一致するか
    home_only_match = (ns_home_row[fill_features_home].values[0] == latest_home_only_vals).all()
    print(f"ホーム限定データ一致: {home_only_match}") 
    
    # (B) Overallデータ（ホーム側カラム）: 最終FT試合のOverallと一致するか
    overall_match = (ns_home_row[fill_features_home_overall].values[0] == latest_overall_vals).all()
    print(f"Overallデータ一致: {overall_match}") 
    
    # 6. チームAがアウェイのNS試合の最初の行を取得
    ns_away_row = df[(df["status"] == "NS") & (df["away_team"] == test_team)].iloc[0]
    
    print(f"\n--- {test_team} アウェイNS試合の検証 ---")
    
    # (C) アウェイ限定データ: 最終アウェイFT試合と一致するか
    away_only_match = (ns_away_row[fill_features_away].values[0] == latest_away_only_vals).all()
    print(f"アウェイ限定データ一致: {away_only_match}") 
    
    # (D) Overallデータ（アウェイ側カラム）: 最終FT試合のOverallと一致するか
    overall_match_away = (ns_away_row[fill_features_away_overall].values[0] == latest_overall_vals).all()
    print(f"Overallデータ一致: {overall_match_away}")

--- Liverpool ホームNS試合の検証 ---
ホーム限定データ一致: True
Overallデータ一致: True

--- Liverpool アウェイNS試合の検証 ---
アウェイ限定データ一致: True
Overallデータ一致: True
--- Aston Villa ホームNS試合の検証 ---
ホーム限定データ一致: True
Overallデータ一致: True

--- Aston Villa アウェイNS試合の検証 ---
アウェイ限定データ一致: True
Overallデータ一致: True
--- Brighton ホームNS試合の検証 ---
ホーム限定データ一致: True
Overallデータ一致: True

--- Brighton アウェイNS試合の検証 ---
アウェイ限定データ一致: True
Overallデータ一致: True
--- Sunderland ホームNS試合の検証 ---
ホーム限定データ一致: True
Overallデータ一致: True

--- Sunderland アウェイNS試合の検証 ---
アウェイ限定データ一致: True
Overallデータ一致: True
--- Tottenham ホームNS試合の検証 ---
ホーム限定データ一致: True
Overallデータ一致: True

--- Tottenham アウェイNS試合の検証 ---
アウェイ限定データ一致: True
Overallデータ一致: True
--- Wolves ホームNS試合の検証 ---
ホーム限定データ一致: True
Overallデータ一致: True

--- Wolves アウェイNS試合の検証 ---
アウェイ限定データ一致: True
Overallデータ一致: True
--- Nottingham Forest ホームNS試合の検証 ---
ホーム限定データ一致: True
Overallデータ一致: True

--- Nottingham Forest アウェイNS試合の検証 ---
アウェイ限定データ一致: True
Overallデータ一致: True
--- Chelsea ホームNS試合の検証 ---
ホーム限定データ一致: True
Overallデータ一

In [20]:
for team in teams_2025:
    print("team",team)
    display(df.loc[(df["status"] == "FT") & (df["home_team"] == team), fill_features_home].tail(1))
    display(df.loc[(df["status"] == "NS") & (df["home_team"] == team), fill_features_home].head(2))
    print()
    display(df.loc[(df["status"] == "FT") & (df["away_team"] == team), fill_features_away].tail(1))
    display(df.loc[(df["status"] == "NS") & (df["away_team"] == team), fill_features_away].head(2))
    print()
    
    source_df = df[(df["status"] == "FT") & ((df["home_team"] == team) | (df["away_team"] == team))].sort_values("date")
    if source_df.empty:
        continue
    
    last_row = source_df.iloc[-1]

    # 2. 対象チームが最新試合でホームだったかアウェイだったかを判別
    team_was_home = last_row["home_team"] == team
    
    # 3. 最新試合の行から、対象チーム自身の統計値のみを1セット取得
    if team_was_home:
        # チームがホームの場合、統計値は home_overall カラムにある
        display(last_row[fill_features_home_overall])
        display(df.loc[(df["status"] == "NS") & (df["home_team"] == team), fill_features_home_overall].head(2))
        display(df.loc[(df["status"] == "NS") & (df["away_team"] == team), fill_features_away_overall].head(2))
    else:
        display(last_row[fill_features_away_overall])
        display(df.loc[(df["status"] == "NS") & (df["home_team"] == team), fill_features_home_overall].head(2))
        display(df.loc[(df["status"] == "NS") & (df["away_team"] == team), fill_features_away_overall].head(2))

team Liverpool


Unnamed: 0,home_team_recent_5_wins,home_recent_5_scores,home_recent_5_goal_diff,home_recent_5_goal_against,home_recent_10_scores,home_recent_10_goal_diff,home_recent_10_goal_against,home_recent_20_scores,home_recent_20_goal_diff,home_recent_20_goal_against
1634,4,11,5,6,24,13,11,47,27,20


Unnamed: 0,home_team_recent_5_wins,home_recent_5_scores,home_recent_5_goal_diff,home_recent_5_goal_against,home_recent_10_scores,home_recent_10_goal_diff,home_recent_10_goal_against,home_recent_20_scores,home_recent_20_goal_diff,home_recent_20_goal_against
1658,4,11,5,6,24,13,11,47,27,20
1671,4,11,5,6,24,13,11,47,27,20





Unnamed: 0,away_team_recent_5_wins,away_recent_5_scores,away_recent_5_goal_diff,away_recent_5_goal_against,away_recent_10_scores,away_recent_10_goal_diff,away_recent_10_goal_against,away_recent_20_scores,away_recent_20_goal_diff,away_recent_20_goal_against
1648,2,8,-4,12,16,-5,21,44,8,36


Unnamed: 0,away_team_recent_5_wins,away_recent_5_scores,away_recent_5_goal_diff,away_recent_5_goal_against,away_recent_10_scores,away_recent_10_goal_diff,away_recent_10_goal_against,away_recent_20_scores,away_recent_20_goal_diff,away_recent_20_goal_against
1666,2,8,-4,12,16,-5,21,44,8,36
1685,2,8,-4,12,16,-5,21,44,8,36





away_total_points                    18
away_team_recent_5_wins_overall       0
away_season_wins_ave_overall       50.0
Name: 1648, dtype: object

Unnamed: 0,home_total_points,home_team_recent_5_wins_overall,home_season_wins_ave_overall
1658,18,0,50.0
1671,18,0,50.0


Unnamed: 0,away_total_points,away_team_recent_5_wins_overall,away_season_wins_ave_overall
1666,18,0,50.0
1685,18,0,50.0


team Aston Villa


Unnamed: 0,home_team_recent_5_wins,home_recent_5_scores,home_recent_5_goal_diff,home_recent_5_goal_against,home_recent_10_scores,home_recent_10_goal_diff,home_recent_10_goal_against,home_recent_20_scores,home_recent_20_goal_diff,home_recent_20_goal_against
1646,4,10,5,5,21,13,8,38,18,20


Unnamed: 0,home_team_recent_5_wins,home_recent_5_scores,home_recent_5_goal_diff,home_recent_5_goal_against,home_recent_10_scores,home_recent_10_goal_diff,home_recent_10_goal_against,home_recent_20_scores,home_recent_20_goal_diff,home_recent_20_goal_against
1660,4,10,5,5,21,13,8,38,18,20
1688,4,10,5,5,21,13,8,38,18,20





Unnamed: 0,away_team_recent_5_wins,away_recent_5_scores,away_recent_5_goal_diff,away_recent_5_goal_against,away_recent_10_scores,away_recent_10_goal_diff,away_recent_10_goal_against,away_recent_20_scores,away_recent_20_goal_diff,away_recent_20_goal_against
1637,1,3,-4,7,12,3,9,21,-11,32


Unnamed: 0,away_team_recent_5_wins,away_recent_5_scores,away_recent_5_goal_diff,away_recent_5_goal_against,away_recent_10_scores,away_recent_10_goal_diff,away_recent_10_goal_against,away_recent_20_scores,away_recent_20_goal_diff,away_recent_20_goal_against
1654,1,3,-4,7,12,3,9,21,-11,32
1677,1,3,-4,7,12,3,9,21,-11,32





home_total_points                    21
home_team_recent_5_wins_overall       0
home_season_wins_ave_overall       50.0
Name: 1646, dtype: object

Unnamed: 0,home_total_points,home_team_recent_5_wins_overall,home_season_wins_ave_overall
1660,21,0,50.0
1688,21,0,50.0


Unnamed: 0,away_total_points,away_team_recent_5_wins_overall,away_season_wins_ave_overall
1654,21,0,50.0
1677,21,0,50.0


team Brighton


Unnamed: 0,home_team_recent_5_wins,home_recent_5_scores,home_recent_5_goal_diff,home_recent_5_goal_against,home_recent_10_scores,home_recent_10_goal_diff,home_recent_10_goal_against,home_recent_20_scores,home_recent_20_goal_diff,home_recent_20_goal_against
1632,4,13,6,7,21,5,16,36,8,28


Unnamed: 0,home_team_recent_5_wins,home_recent_5_scores,home_recent_5_goal_diff,home_recent_5_goal_against,home_recent_10_scores,home_recent_10_goal_diff,home_recent_10_goal_against,home_recent_20_scores,home_recent_20_goal_diff,home_recent_20_goal_against
1654,4,13,6,7,21,5,16,36,8,28
1667,4,13,6,7,21,5,16,36,8,28





Unnamed: 0,away_team_recent_5_wins,away_recent_5_scores,away_recent_5_goal_diff,away_recent_5_goal_against,away_recent_10_scores,away_recent_10_goal_diff,away_recent_10_goal_against,away_recent_20_scores,away_recent_20_goal_diff,away_recent_20_goal_against
1647,1,7,-3,10,18,-1,19,36,-2,38


Unnamed: 0,away_team_recent_5_wins,away_recent_5_scores,away_recent_5_goal_diff,away_recent_5_goal_against,away_recent_10_scores,away_recent_10_goal_diff,away_recent_10_goal_against,away_recent_20_scores,away_recent_20_goal_diff,away_recent_20_goal_against
1671,1,7,-3,10,18,-1,19,36,-2,38
1692,1,7,-3,10,18,-1,19,36,-2,38





away_total_points                     19
away_team_recent_5_wins_overall        0
away_season_wins_ave_overall       41.67
Name: 1647, dtype: object

Unnamed: 0,home_total_points,home_team_recent_5_wins_overall,home_season_wins_ave_overall
1654,19,0,41.67
1667,19,0,41.67


Unnamed: 0,away_total_points,away_team_recent_5_wins_overall,away_season_wins_ave_overall
1671,19,0,41.67
1692,19,0,41.67


team Sunderland


Unnamed: 0,home_team_recent_5_wins,home_recent_5_scores,home_recent_5_goal_diff,home_recent_5_goal_against,home_recent_10_scores,home_recent_10_goal_diff,home_recent_10_goal_against,home_recent_20_scores,home_recent_20_goal_diff,home_recent_20_goal_against
1642,3,11,6,5,11,6,5,11,6,5


Unnamed: 0,home_team_recent_5_wins,home_recent_5_scores,home_recent_5_goal_diff,home_recent_5_goal_against,home_recent_10_scores,home_recent_10_goal_diff,home_recent_10_goal_against,home_recent_20_scores,home_recent_20_goal_diff,home_recent_20_goal_against
1676,3,11,6,5,11,6,5,11,6,5
1698,3,11,6,5,11,6,5,11,6,5





Unnamed: 0,away_team_recent_5_wins,away_recent_5_scores,away_recent_5_goal_diff,away_recent_5_goal_against,away_recent_10_scores,away_recent_10_goal_diff,away_recent_10_goal_against,away_recent_20_scores,away_recent_20_goal_diff,away_recent_20_goal_against
1633,2,3,-2,5,3,-2,5,3,-2,5


Unnamed: 0,away_team_recent_5_wins,away_recent_5_scores,away_recent_5_goal_diff,away_recent_5_goal_against,away_recent_10_scores,away_recent_10_goal_diff,away_recent_10_goal_against,away_recent_20_scores,away_recent_20_goal_diff,away_recent_20_goal_against
1658,2,3,-2,5,3,-2,5,3,-2,5
1663,2,3,-2,5,3,-2,5,3,-2,5





home_total_points                     19
home_team_recent_5_wins_overall        0
home_season_wins_ave_overall       41.67
Name: 1642, dtype: object

Unnamed: 0,home_total_points,home_team_recent_5_wins_overall,home_season_wins_ave_overall
1676,19,0,41.67
1698,19,0,41.67


Unnamed: 0,away_total_points,away_team_recent_5_wins_overall,away_season_wins_ave_overall
1658,19,0,41.67
1663,19,0,41.67


team Tottenham


Unnamed: 0,home_team_recent_5_wins,home_recent_5_scores,home_recent_5_goal_diff,home_recent_5_goal_against,home_recent_10_scores,home_recent_10_goal_diff,home_recent_10_goal_against,home_recent_20_scores,home_recent_20_goal_diff,home_recent_20_goal_against
1644,1,7,0,7,14,-4,18,31,-8,39


Unnamed: 0,home_team_recent_5_wins,home_recent_5_scores,home_recent_5_goal_diff,home_recent_5_goal_against,home_recent_10_scores,home_recent_10_goal_diff,home_recent_10_goal_against,home_recent_20_scores,home_recent_20_goal_diff,home_recent_20_goal_against
1665,1,7,0,7,14,-4,18,31,-8,39
1685,1,7,0,7,14,-4,18,31,-8,39





Unnamed: 0,away_team_recent_5_wins,away_recent_5_scores,away_recent_5_goal_diff,away_recent_5_goal_against,away_recent_10_scores,away_recent_10_goal_diff,away_recent_10_goal_against,away_recent_20_scores,away_recent_20_goal_diff,away_recent_20_goal_against
1638,4,12,7,5,16,-2,18,36,6,30


Unnamed: 0,away_team_recent_5_wins,away_recent_5_scores,away_recent_5_goal_diff,away_recent_5_goal_against,away_recent_10_scores,away_recent_10_goal_diff,away_recent_10_goal_against,away_recent_20_scores,away_recent_20_goal_diff,away_recent_20_goal_against
1652,4,12,7,5,16,-2,18,36,6,30
1675,4,12,7,5,16,-2,18,36,6,30





home_total_points                     18
home_team_recent_5_wins_overall        0
home_season_wins_ave_overall       41.67
Name: 1644, dtype: object

Unnamed: 0,home_total_points,home_team_recent_5_wins_overall,home_season_wins_ave_overall
1665,18,0,41.67
1685,18,0,41.67


Unnamed: 0,away_total_points,away_team_recent_5_wins_overall,away_season_wins_ave_overall
1652,18,0,41.67
1675,18,0,41.67


team Wolves


Unnamed: 0,home_team_recent_5_wins,home_recent_5_scores,home_recent_5_goal_diff,home_recent_5_goal_against,home_recent_10_scores,home_recent_10_goal_diff,home_recent_10_goal_against,home_recent_20_scores,home_recent_20_goal_diff,home_recent_20_goal_against
1635,0,7,-8,15,16,-4,20,29,-7,36


Unnamed: 0,home_team_recent_5_wins,home_recent_5_scores,home_recent_5_goal_diff,home_recent_5_goal_against,home_recent_10_scores,home_recent_10_goal_diff,home_recent_10_goal_against,home_recent_20_scores,home_recent_20_goal_diff,home_recent_20_goal_against
1656,0,7,-8,15,16,-4,20,29,-7,36
1669,0,7,-8,15,16,-4,20,29,-7,36





Unnamed: 0,away_team_recent_5_wins,away_recent_5_scores,away_recent_5_goal_diff,away_recent_5_goal_against,away_recent_10_scores,away_recent_10_goal_diff,away_recent_10_goal_against,away_recent_20_scores,away_recent_20_goal_diff,away_recent_20_goal_against
1646,0,1,-10,11,8,-10,18,23,-14,37


Unnamed: 0,away_team_recent_5_wins,away_recent_5_scores,away_recent_5_goal_diff,away_recent_5_goal_against,away_recent_10_scores,away_recent_10_goal_diff,away_recent_10_goal_against,away_recent_20_scores,away_recent_20_goal_diff,away_recent_20_goal_against
1673,0,1,-10,11,8,-10,18,23,-14,37
1695,0,1,-10,11,8,-10,18,23,-14,37





away_total_points                    2
away_team_recent_5_wins_overall      0
away_season_wins_ave_overall       0.0
Name: 1646, dtype: object

Unnamed: 0,home_total_points,home_team_recent_5_wins_overall,home_season_wins_ave_overall
1656,2,0,0.0
1669,2,0,0.0


Unnamed: 0,away_total_points,away_team_recent_5_wins_overall,away_season_wins_ave_overall
1673,2,0,0.0
1695,2,0,0.0


team Nottingham Forest


Unnamed: 0,home_team_recent_5_wins,home_recent_5_scores,home_recent_5_goal_diff,home_recent_5_goal_against,home_recent_10_scores,home_recent_10_goal_diff,home_recent_10_goal_against,home_recent_20_scores,home_recent_20_goal_diff,home_recent_20_goal_against
1647,2,8,-3,11,11,-6,17,31,7,24


Unnamed: 0,home_team_recent_5_wins,home_recent_5_scores,home_recent_5_goal_diff,home_recent_5_goal_against,home_recent_10_scores,home_recent_10_goal_diff,home_recent_10_goal_against,home_recent_20_scores,home_recent_20_goal_diff,home_recent_20_goal_against
1675,2,8,-3,11,11,-6,17,31,7,24
1691,2,8,-3,11,11,-6,17,31,7,24





Unnamed: 0,away_team_recent_5_wins,away_recent_5_scores,away_recent_5_goal_diff,away_recent_5_goal_against,away_recent_10_scores,away_recent_10_goal_diff,away_recent_10_goal_against,away_recent_20_scores,away_recent_20_goal_diff,away_recent_20_goal_against
1634,1,4,-6,10,15,-5,20,30,-7,37


Unnamed: 0,away_team_recent_5_wins,away_recent_5_scores,away_recent_5_goal_diff,away_recent_5_goal_against,away_recent_10_scores,away_recent_10_goal_diff,away_recent_10_goal_against,away_recent_20_scores,away_recent_20_goal_diff,away_recent_20_goal_against
1656,1,4,-6,10,15,-5,20,30,-7,37
1662,1,4,-6,10,15,-5,20,30,-7,37





home_total_points                    12
home_team_recent_5_wins_overall       0
home_season_wins_ave_overall       25.0
Name: 1647, dtype: object

Unnamed: 0,home_total_points,home_team_recent_5_wins_overall,home_season_wins_ave_overall
1675,12,0,25.0
1691,12,0,25.0


Unnamed: 0,away_total_points,away_team_recent_5_wins_overall,away_season_wins_ave_overall
1656,12,0,25.0
1662,12,0,25.0


team Chelsea


Unnamed: 0,home_team_recent_5_wins,home_recent_5_scores,home_recent_5_goal_diff,home_recent_5_goal_against,home_recent_10_scores,home_recent_10_goal_diff,home_recent_10_goal_against,home_recent_20_scores,home_recent_20_goal_diff,home_recent_20_goal_against
1649,3,9,3,6,17,8,9,38,20,18


Unnamed: 0,home_team_recent_5_wins,home_recent_5_scores,home_recent_5_goal_diff,home_recent_5_goal_against,home_recent_10_scores,home_recent_10_goal_diff,home_recent_10_goal_against,home_recent_20_scores,home_recent_20_goal_diff,home_recent_20_goal_against
1670,3,9,3,6,17,8,9,38,20,18
1697,3,9,3,6,17,8,9,38,20,18





Unnamed: 0,away_team_recent_5_wins,away_recent_5_scores,away_recent_5_goal_diff,away_recent_5_goal_against,away_recent_10_scores,away_recent_10_goal_diff,away_recent_10_goal_against,away_recent_20_scores,away_recent_20_goal_diff,away_recent_20_goal_against
1630,4,13,8,5,16,5,11,31,3,28


Unnamed: 0,away_team_recent_5_wins,away_recent_5_scores,away_recent_5_goal_diff,away_recent_5_goal_against,away_recent_10_scores,away_recent_10_goal_diff,away_recent_10_goal_against,away_recent_20_scores,away_recent_20_goal_diff,away_recent_20_goal_against
1657,4,13,8,5,16,5,11,31,3,28
1661,4,13,8,5,16,5,11,31,3,28





home_total_points                     23
home_team_recent_5_wins_overall        0
home_season_wins_ave_overall       58.33
Name: 1649, dtype: object

Unnamed: 0,home_total_points,home_team_recent_5_wins_overall,home_season_wins_ave_overall
1670,23,0,58.33
1697,23,0,58.33


Unnamed: 0,away_total_points,away_team_recent_5_wins_overall,away_season_wins_ave_overall
1657,23,0,58.33
1661,23,0,58.33


team Manchester United


Unnamed: 0,home_team_recent_5_wins,home_recent_5_scores,home_recent_5_goal_diff,home_recent_5_goal_against,home_recent_10_scores,home_recent_10_goal_diff,home_recent_10_goal_against,home_recent_20_scores,home_recent_20_goal_diff,home_recent_20_goal_against
1639,5,13,7,6,17,5,12,33,5,28


Unnamed: 0,home_team_recent_5_wins,home_recent_5_scores,home_recent_5_goal_diff,home_recent_5_goal_against,home_recent_10_scores,home_recent_10_goal_diff,home_recent_10_goal_against,home_recent_20_scores,home_recent_20_goal_diff,home_recent_20_goal_against
1659,5,13,7,6,17,5,12,33,5,28
1679,5,13,7,6,17,5,12,33,5,28





Unnamed: 0,away_team_recent_5_wins,away_recent_5_scores,away_recent_5_goal_diff,away_recent_5_goal_against,away_recent_10_scores,away_recent_10_goal_diff,away_recent_10_goal_against,away_recent_20_scores,away_recent_20_goal_diff,away_recent_20_goal_against
1645,1,8,-4,12,13,-10,23,25,-11,36


Unnamed: 0,away_team_recent_5_wins,away_recent_5_scores,away_recent_5_goal_diff,away_recent_5_goal_against,away_recent_10_scores,away_recent_10_goal_diff,away_recent_10_goal_against,away_recent_20_scores,away_recent_20_goal_diff,away_recent_20_goal_against
1669,1,8,-4,12,13,-10,23,25,-11,36
1688,1,8,-4,12,13,-10,23,25,-11,36





away_total_points                     18
away_team_recent_5_wins_overall        0
away_season_wins_ave_overall       41.67
Name: 1645, dtype: object

Unnamed: 0,home_total_points,home_team_recent_5_wins_overall,home_season_wins_ave_overall
1659,18,0,41.67
1679,18,0,41.67


Unnamed: 0,away_total_points,away_team_recent_5_wins_overall,away_season_wins_ave_overall
1669,18,0,41.67
1688,18,0,41.67


team Leeds


Unnamed: 0,home_team_recent_5_wins,home_recent_5_scores,home_recent_5_goal_diff,home_recent_5_goal_against,home_recent_10_scores,home_recent_10_goal_diff,home_recent_10_goal_against,home_recent_20_scores,home_recent_20_goal_diff,home_recent_20_goal_against
1637,2,7,-2,9,14,-10,24,26,-14,40


Unnamed: 0,home_team_recent_5_wins,home_recent_5_scores,home_recent_5_goal_diff,home_recent_5_goal_against,home_recent_10_scores,home_recent_10_goal_diff,home_recent_10_goal_against,home_recent_20_scores,home_recent_20_goal_diff,home_recent_20_goal_against
1657,2,7,-2,9,14,-10,24,26,-14,40
1666,2,7,-2,9,14,-10,24,26,-14,40





Unnamed: 0,away_team_recent_5_wins,away_recent_5_scores,away_recent_5_goal_diff,away_recent_5_goal_against,away_recent_10_scores,away_recent_10_goal_diff,away_recent_10_goal_against,away_recent_20_scores,away_recent_20_goal_diff,away_recent_20_goal_against
1641,1,4,-11,15,9,-21,30,21,-25,46


Unnamed: 0,away_team_recent_5_wins,away_recent_5_scores,away_recent_5_goal_diff,away_recent_5_goal_against,away_recent_10_scores,away_recent_10_goal_diff,away_recent_10_goal_against,away_recent_20_scores,away_recent_20_goal_diff,away_recent_20_goal_against
1678,1,4,-11,15,9,-21,30,21,-25,46
1698,1,4,-11,15,9,-21,30,21,-25,46





away_total_points                    11
away_team_recent_5_wins_overall       0
away_season_wins_ave_overall       25.0
Name: 1641, dtype: object

Unnamed: 0,home_total_points,home_team_recent_5_wins_overall,home_season_wins_ave_overall
1657,11,0,25.0
1666,11,0,25.0


Unnamed: 0,away_total_points,away_team_recent_5_wins_overall,away_season_wins_ave_overall
1678,11,0,25.0
1698,11,0,25.0


team West Ham


Unnamed: 0,home_team_recent_5_wins,home_recent_5_scores,home_recent_5_goal_diff,home_recent_5_goal_against,home_recent_10_scores,home_recent_10_goal_diff,home_recent_10_goal_against,home_recent_20_scores,home_recent_20_goal_diff,home_recent_20_goal_against
1648,2,8,-7,15,13,-9,22,25,-15,40


Unnamed: 0,home_team_recent_5_wins,home_recent_5_scores,home_recent_5_goal_diff,home_recent_5_goal_against,home_recent_10_scores,home_recent_10_goal_diff,home_recent_10_goal_against,home_recent_20_scores,home_recent_20_goal_diff,home_recent_20_goal_against
1677,2,8,-7,15,13,-9,22,25,-15,40
1696,2,8,-7,15,13,-9,22,25,-15,40





Unnamed: 0,away_team_recent_5_wins,away_recent_5_scores,away_recent_5_goal_diff,away_recent_5_goal_against,away_recent_10_scores,away_recent_10_goal_diff,away_recent_10_goal_against,away_recent_20_scores,away_recent_20_goal_diff,away_recent_20_goal_against
1631,2,8,-1,9,14,-2,16,24,-10,34


Unnamed: 0,away_team_recent_5_wins,away_recent_5_scores,away_recent_5_goal_diff,away_recent_5_goal_against,away_recent_10_scores,away_recent_10_goal_diff,away_recent_10_goal_against,away_recent_20_scores,away_recent_20_goal_diff,away_recent_20_goal_against
1659,2,8,-1,9,14,-2,16,24,-10,34
1667,2,8,-1,9,14,-2,16,24,-10,34





home_total_points                    11
home_team_recent_5_wins_overall       0
home_season_wins_ave_overall       25.0
Name: 1648, dtype: object

Unnamed: 0,home_total_points,home_team_recent_5_wins_overall,home_season_wins_ave_overall
1677,11,0,25.0
1696,11,0,25.0


Unnamed: 0,away_total_points,away_team_recent_5_wins_overall,away_season_wins_ave_overall
1659,11,0,25.0
1667,11,0,25.0


team Manchester City


Unnamed: 0,home_team_recent_5_wins,home_recent_5_scores,home_recent_5_goal_diff,home_recent_5_goal_against,home_recent_10_scores,home_recent_10_goal_diff,home_recent_10_goal_against,home_recent_20_scores,home_recent_20_goal_diff,home_recent_20_goal_against
1641,5,16,12,4,29,21,8,48,27,21


Unnamed: 0,home_team_recent_5_wins,home_recent_5_scores,home_recent_5_goal_diff,home_recent_5_goal_against,home_recent_10_scores,home_recent_10_goal_diff,home_recent_10_goal_against,home_recent_20_scores,home_recent_20_goal_diff,home_recent_20_goal_against
1663,5,16,12,4,29,21,8,48,27,21
1683,5,16,12,4,29,21,8,48,27,21





Unnamed: 0,away_team_recent_5_wins,away_recent_5_scores,away_recent_5_goal_diff,away_recent_5_goal_against,away_recent_10_scores,away_recent_10_goal_diff,away_recent_10_goal_against,away_recent_20_scores,away_recent_20_goal_diff,away_recent_20_goal_against
1636,3,9,5,4,12,7,5,30,7,23


Unnamed: 0,away_team_recent_5_wins,away_recent_5_scores,away_recent_5_goal_diff,away_recent_5_goal_against,away_recent_10_scores,away_recent_10_goal_diff,away_recent_10_goal_against,away_recent_20_scores,away_recent_20_goal_diff,away_recent_20_goal_against
1651,3,9,5,4,12,7,5,30,7,23
1674,3,9,5,4,12,7,5,30,7,23





home_total_points                     22
home_team_recent_5_wins_overall        0
home_season_wins_ave_overall       58.33
Name: 1641, dtype: object

Unnamed: 0,home_total_points,home_team_recent_5_wins_overall,home_season_wins_ave_overall
1663,22,0,58.33
1683,22,0,58.33


Unnamed: 0,away_total_points,away_team_recent_5_wins_overall,away_season_wins_ave_overall
1651,22,0,58.33
1674,22,0,58.33


team Bournemouth


Unnamed: 0,home_team_recent_5_wins,home_recent_5_scores,home_recent_5_goal_diff,home_recent_5_goal_against,home_recent_10_scores,home_recent_10_goal_diff,home_recent_10_goal_against,home_recent_20_scores,home_recent_20_goal_diff,home_recent_20_goal_against
1631,5,10,8,2,14,6,8,27,12,15


Unnamed: 0,home_team_recent_5_wins,home_recent_5_scores,home_recent_5_goal_diff,home_recent_5_goal_against,home_recent_10_scores,home_recent_10_goal_diff,home_recent_10_goal_against,home_recent_20_scores,home_recent_20_goal_diff,home_recent_20_goal_against
1650,5,10,8,2,14,6,8,27,12,15
1661,5,10,8,2,14,6,8,27,12,15





Unnamed: 0,away_team_recent_5_wins,away_recent_5_scores,away_recent_5_goal_diff,away_recent_5_goal_against,away_recent_10_scores,away_recent_10_goal_diff,away_recent_10_goal_against,away_recent_20_scores,away_recent_20_goal_diff,away_recent_20_goal_against
1642,1,9,-7,16,16,-8,24,40,1,39


Unnamed: 0,away_team_recent_5_wins,away_recent_5_scores,away_recent_5_goal_diff,away_recent_5_goal_against,away_recent_10_scores,away_recent_10_goal_diff,away_recent_10_goal_against,away_recent_20_scores,away_recent_20_goal_diff,away_recent_20_goal_against
1679,1,9,-7,16,16,-8,24,40,1,39
1693,1,9,-7,16,16,-8,24,40,1,39





away_total_points                     19
away_team_recent_5_wins_overall        0
away_season_wins_ave_overall       41.67
Name: 1642, dtype: object

Unnamed: 0,home_total_points,home_team_recent_5_wins_overall,home_season_wins_ave_overall
1650,19,0,41.67
1661,19,0,41.67


Unnamed: 0,away_total_points,away_team_recent_5_wins_overall,away_season_wins_ave_overall
1679,19,0,41.67
1693,19,0,41.67


team Brentford


Unnamed: 0,home_team_recent_5_wins,home_recent_5_scores,home_recent_5_goal_diff,home_recent_5_goal_against,home_recent_10_scores,home_recent_10_goal_diff,home_recent_10_goal_against,home_recent_20_scores,home_recent_20_goal_diff,home_recent_20_goal_against
1640,4,12,5,7,22,6,16,41,5,36


Unnamed: 0,home_team_recent_5_wins,home_recent_5_scores,home_recent_5_goal_diff,home_recent_5_goal_against,home_recent_10_scores,home_recent_10_goal_diff,home_recent_10_goal_against,home_recent_20_scores,home_recent_20_goal_diff,home_recent_20_goal_against
1678,4,12,5,7,22,6,16,41,5,36
1693,4,12,5,7,22,6,16,41,5,36





Unnamed: 0,away_team_recent_5_wins,away_recent_5_scores,away_recent_5_goal_diff,away_recent_5_goal_against,away_recent_10_scores,away_recent_10_goal_diff,away_recent_10_goal_against,away_recent_20_scores,away_recent_20_goal_diff,away_recent_20_goal_against
1632,1,6,-5,11,13,-2,15,29,4,25


Unnamed: 0,away_team_recent_5_wins,away_recent_5_scores,away_recent_5_goal_diff,away_recent_5_goal_against,away_recent_10_scores,away_recent_10_goal_diff,away_recent_10_goal_against,away_recent_20_scores,away_recent_20_goal_diff,away_recent_20_goal_against
1653,1,6,-5,11,13,-2,15,29,4,25
1665,1,6,-5,11,13,-2,15,29,4,25





home_total_points                     16
home_team_recent_5_wins_overall        0
home_season_wins_ave_overall       41.67
Name: 1640, dtype: object

Unnamed: 0,home_total_points,home_team_recent_5_wins_overall,home_season_wins_ave_overall
1678,16,0,41.67
1693,16,0,41.67


Unnamed: 0,away_total_points,away_team_recent_5_wins_overall,away_season_wins_ave_overall
1653,16,0,41.67
1665,16,0,41.67


team Burnley


Unnamed: 0,home_team_recent_5_wins,home_recent_5_scores,home_recent_5_goal_diff,home_recent_5_goal_against,home_recent_10_scores,home_recent_10_goal_diff,home_recent_10_goal_against,home_recent_20_scores,home_recent_20_goal_diff,home_recent_20_goal_against
1630,2,6,0,6,11,-4,15,21,-15,36


Unnamed: 0,home_team_recent_5_wins,home_recent_5_scores,home_recent_5_goal_diff,home_recent_5_goal_against,home_recent_10_scores,home_recent_10_goal_diff,home_recent_10_goal_against,home_recent_20_scores,home_recent_20_goal_diff,home_recent_20_goal_against
1655,2,6,0,6,11,-4,15,21,-15,36
1672,2,6,0,6,11,-4,15,21,-15,36





Unnamed: 0,away_team_recent_5_wins,away_recent_5_scores,away_recent_5_goal_diff,away_recent_5_goal_against,away_recent_10_scores,away_recent_10_goal_diff,away_recent_10_goal_against,away_recent_20_scores,away_recent_20_goal_diff,away_recent_20_goal_against
1640,1,9,-9,18,17,-8,25,28,-18,46


Unnamed: 0,away_team_recent_5_wins,away_recent_5_scores,away_recent_5_goal_diff,away_recent_5_goal_against,away_recent_10_scores,away_recent_10_goal_diff,away_recent_10_goal_against,away_recent_20_scores,away_recent_20_goal_diff,away_recent_20_goal_against
1664,1,9,-9,18,17,-8,25,28,-18,46
1681,1,9,-9,18,17,-8,25,28,-18,46





away_total_points                    10
away_team_recent_5_wins_overall       0
away_season_wins_ave_overall       25.0
Name: 1640, dtype: object

Unnamed: 0,home_total_points,home_team_recent_5_wins_overall,home_season_wins_ave_overall
1655,10,0,25.0
1672,10,0,25.0


Unnamed: 0,away_total_points,away_team_recent_5_wins_overall,away_season_wins_ave_overall
1664,10,0,25.0
1681,10,0,25.0


team Arsenal


Unnamed: 0,home_team_recent_5_wins,home_recent_5_scores,home_recent_5_goal_diff,home_recent_5_goal_against,home_recent_10_scores,home_recent_10_goal_diff,home_recent_10_goal_against,home_recent_20_scores,home_recent_20_goal_diff,home_recent_20_goal_against
1638,5,13,12,1,20,13,7,40,25,15


Unnamed: 0,home_team_recent_5_wins,home_recent_5_scores,home_recent_5_goal_diff,home_recent_5_goal_against,home_recent_10_scores,home_recent_10_goal_diff,home_recent_10_goal_against,home_recent_20_scores,home_recent_20_goal_diff,home_recent_20_goal_against
1653,5,13,12,1,20,13,7,40,25,15
1673,5,13,12,1,20,13,7,40,25,15





Unnamed: 0,away_team_recent_5_wins,away_recent_5_scores,away_recent_5_goal_diff,away_recent_5_goal_against,away_recent_10_scores,away_recent_10_goal_diff,away_recent_10_goal_against,away_recent_20_scores,away_recent_20_goal_diff,away_recent_20_goal_against
1649,4,8,4,4,18,9,9,37,20,17


Unnamed: 0,away_team_recent_5_wins,away_recent_5_scores,away_recent_5_goal_diff,away_recent_5_goal_against,away_recent_10_scores,away_recent_10_goal_diff,away_recent_10_goal_against,away_recent_20_scores,away_recent_20_goal_diff,away_recent_20_goal_against
1660,4,8,4,4,18,9,9,37,20,17
1686,4,8,4,4,18,9,9,37,20,17





away_total_points                    29
away_team_recent_5_wins_overall       0
away_season_wins_ave_overall       75.0
Name: 1649, dtype: object

Unnamed: 0,home_total_points,home_team_recent_5_wins_overall,home_season_wins_ave_overall
1653,29,0,75.0
1673,29,0,75.0


Unnamed: 0,away_total_points,away_team_recent_5_wins_overall,away_season_wins_ave_overall
1660,29,0,75.0
1686,29,0,75.0


team Crystal Palace


Unnamed: 0,home_team_recent_5_wins,home_recent_5_scores,home_recent_5_goal_diff,home_recent_5_goal_against,home_recent_10_scores,home_recent_10_goal_diff,home_recent_10_goal_against,home_recent_20_scores,home_recent_20_goal_diff,home_recent_20_goal_against
1645,2,8,3,5,16,7,9,30,4,26


Unnamed: 0,home_team_recent_5_wins,home_recent_5_scores,home_recent_5_goal_diff,home_recent_5_goal_against,home_recent_10_scores,home_recent_10_goal_diff,home_recent_10_goal_against,home_recent_20_scores,home_recent_20_goal_diff,home_recent_20_goal_against
1674,2,8,3,5,16,7,9,30,4,26
1699,2,8,3,5,16,7,9,30,4,26





Unnamed: 0,away_team_recent_5_wins,away_recent_5_scores,away_recent_5_goal_diff,away_recent_5_goal_against,away_recent_10_scores,away_recent_10_goal_diff,away_recent_10_goal_against,away_recent_20_scores,away_recent_20_goal_diff,away_recent_20_goal_against
1635,2,7,2,5,14,-4,18,30,6,24


Unnamed: 0,away_team_recent_5_wins,away_recent_5_scores,away_recent_5_goal_diff,away_recent_5_goal_against,away_recent_10_scores,away_recent_10_goal_diff,away_recent_10_goal_against,away_recent_20_scores,away_recent_20_goal_diff,away_recent_20_goal_against
1655,2,7,2,5,14,-4,18,30,6,24
1668,2,7,2,5,14,-4,18,30,6,24





home_total_points                     20
home_team_recent_5_wins_overall        0
home_season_wins_ave_overall       41.67
Name: 1645, dtype: object

Unnamed: 0,home_total_points,home_team_recent_5_wins_overall,home_season_wins_ave_overall
1674,20,0,41.67
1699,20,0,41.67


Unnamed: 0,away_total_points,away_team_recent_5_wins_overall,away_season_wins_ave_overall
1655,20,0,41.67
1668,20,0,41.67


team Everton


Unnamed: 0,home_team_recent_5_wins,home_recent_5_scores,home_recent_5_goal_diff,home_recent_5_goal_against,home_recent_10_scores,home_recent_10_goal_diff,home_recent_10_goal_against,home_recent_20_scores,home_recent_20_goal_diff,home_recent_20_goal_against
1643,3,7,2,5,13,2,11,29,8,21


Unnamed: 0,home_team_recent_5_wins,home_recent_5_scores,home_recent_5_goal_diff,home_recent_5_goal_against,home_recent_10_scores,home_recent_10_goal_diff,home_recent_10_goal_against,home_recent_20_scores,home_recent_20_goal_diff,home_recent_20_goal_against
1662,3,7,2,5,13,2,11,29,8,21
1686,3,7,2,5,13,2,11,29,8,21





Unnamed: 0,away_team_recent_5_wins,away_recent_5_scores,away_recent_5_goal_diff,away_recent_5_goal_against,away_recent_10_scores,away_recent_10_goal_diff,away_recent_10_goal_against,away_recent_20_scores,away_recent_20_goal_diff,away_recent_20_goal_against
1639,2,6,-2,8,11,-1,12,18,-3,21


Unnamed: 0,away_team_recent_5_wins,away_recent_5_scores,away_recent_5_goal_diff,away_recent_5_goal_against,away_recent_10_scores,away_recent_10_goal_diff,away_recent_10_goal_against,away_recent_20_scores,away_recent_20_goal_diff,away_recent_20_goal_against
1650,2,6,-2,8,11,-1,12,18,-3,21
1670,2,6,-2,8,11,-1,12,18,-3,21





home_total_points                     18
home_team_recent_5_wins_overall        0
home_season_wins_ave_overall       41.67
Name: 1643, dtype: object

Unnamed: 0,home_total_points,home_team_recent_5_wins_overall,home_season_wins_ave_overall
1662,18,0,41.67
1686,18,0,41.67


Unnamed: 0,away_total_points,away_team_recent_5_wins_overall,away_season_wins_ave_overall
1650,18,0,41.67
1670,18,0,41.67


team Fulham


Unnamed: 0,home_team_recent_5_wins,home_recent_5_scores,home_recent_5_goal_diff,home_recent_5_goal_against,home_recent_10_scores,home_recent_10_goal_diff,home_recent_10_goal_against,home_recent_20_scores,home_recent_20_goal_diff,home_recent_20_goal_against
1633,3,8,3,5,15,1,14,29,-1,30


Unnamed: 0,home_team_recent_5_wins,home_recent_5_scores,home_recent_5_goal_diff,home_recent_5_goal_against,home_recent_10_scores,home_recent_10_goal_diff,home_recent_10_goal_against,home_recent_20_scores,home_recent_20_goal_diff,home_recent_20_goal_against
1651,3,8,3,5,15,1,14,29,-1,30
1668,3,8,3,5,15,1,14,29,-1,30





Unnamed: 0,away_team_recent_5_wins,away_recent_5_scores,away_recent_5_goal_diff,away_recent_5_goal_against,away_recent_10_scores,away_recent_10_goal_diff,away_recent_10_goal_against,away_recent_20_scores,away_recent_20_goal_diff,away_recent_20_goal_against
1644,0,4,-9,13,10,-10,20,27,-5,32


Unnamed: 0,away_team_recent_5_wins,away_recent_5_scores,away_recent_5_goal_diff,away_recent_5_goal_against,away_recent_10_scores,away_recent_10_goal_diff,away_recent_10_goal_against,away_recent_20_scores,away_recent_20_goal_diff,away_recent_20_goal_against
1672,0,4,-9,13,10,-10,20,27,-5,32
1696,0,4,-9,13,10,-10,20,27,-5,32





away_total_points                     14
away_team_recent_5_wins_overall        0
away_season_wins_ave_overall       33.33
Name: 1644, dtype: object

Unnamed: 0,home_total_points,home_team_recent_5_wins_overall,home_season_wins_ave_overall
1651,14,0,33.33
1668,14,0,33.33


Unnamed: 0,away_total_points,away_team_recent_5_wins_overall,away_season_wins_ave_overall
1672,14,0,33.33
1696,14,0,33.33


team Newcastle


Unnamed: 0,home_team_recent_5_wins,home_recent_5_scores,home_recent_5_goal_diff,home_recent_5_goal_against,home_recent_10_scores,home_recent_10_goal_diff,home_recent_10_goal_against,home_recent_20_scores,home_recent_20_goal_diff,home_recent_20_goal_against
1636,3,8,1,7,24,15,9,44,20,24


Unnamed: 0,home_team_recent_5_wins,home_recent_5_scores,home_recent_5_goal_diff,home_recent_5_goal_against,home_recent_10_scores,home_recent_10_goal_diff,home_recent_10_goal_against,home_recent_20_scores,home_recent_20_goal_diff,home_recent_20_goal_against
1652,3,8,1,7,24,15,9,44,20,24
1664,3,8,1,7,24,15,9,44,20,24





Unnamed: 0,away_team_recent_5_wins,away_recent_5_scores,away_recent_5_goal_diff,away_recent_5_goal_against,away_recent_10_scores,away_recent_10_goal_diff,away_recent_10_goal_against,away_recent_20_scores,away_recent_20_goal_diff,away_recent_20_goal_against
1643,0,3,-5,8,9,-5,14,27,-3,30


Unnamed: 0,away_team_recent_5_wins,away_recent_5_scores,away_recent_5_goal_diff,away_recent_5_goal_against,away_recent_10_scores,away_recent_10_goal_diff,away_recent_10_goal_against,away_recent_20_scores,away_recent_20_goal_diff,away_recent_20_goal_against
1676,0,3,-5,8,9,-5,14,27,-3,30
1690,0,3,-5,8,9,-5,14,27,-3,30





away_total_points                     15
away_team_recent_5_wins_overall        0
away_season_wins_ave_overall       33.33
Name: 1643, dtype: object

Unnamed: 0,home_total_points,home_team_recent_5_wins_overall,home_season_wins_ave_overall
1652,15,0,33.33
1664,15,0,33.33


Unnamed: 0,away_total_points,away_team_recent_5_wins_overall,away_season_wins_ave_overall
1676,15,0,33.33
1690,15,0,33.33


In [21]:
# 一時的なカラムを削除
df.drop(columns=['home_goal_difference', 'away_goal_difference', 'is_home_win', 'is_away_win', 'match_id'], inplace=True, errors='ignore')


#　データセット作成

In [22]:
# statusがFTの試合を除外して学習データを作成
train_df = df[df["status"] == "FT"].copy().reset_index(drop=True)
# 予測対象データ（statusがNSの試合）を抽出
predict_df = df[df["status"] == "NS"].copy().reset_index(drop=True)

# モデル学習に使用する特徴量の設定 & 説明変数と目的変数にデータを分割

In [23]:
#現在保有している特徴量名を確認
train_df.columns

Index(['fixture_id', 'date', 'season', 'home_team', 'away_team', 'home_score',
       'away_score', 'status', 'home_shots_on_goal', 'home_possession',
       'home_passes', 'home_passes_accuracy', 'home_fouls', 'home_corners',
       'home_yellow_cards', 'home_red_cards', 'away_shots_on_goal',
       'away_possession', 'away_passes', 'away_passes_accuracy', 'away_fouls',
       'away_corners', 'away_yellow_cards', 'away_red_cards',
       'home_last_position', 'home_last_won', 'home_last_drawn',
       'home_last_lost', 'home_last_gf', 'home_last_ga', 'home_last_gd',
       'home_last_points', 'away_last_position', 'away_last_won',
       'away_last_drawn', 'away_last_lost', 'away_last_gf', 'away_last_ga',
       'away_last_gd', 'away_last_points', 'target', 'home_team_recent_5_wins',
       'away_team_recent_5_wins', 'home_recent_5_scores',
       'away_recent_5_scores', 'home_recent_5_goal_against',
       'away_recent_5_goal_against', 'home_recent_5_goal_diff',
       'away_recent_5

In [24]:
# features = ["home_team","away_team",'home_season_wins_ave_overall',
#            'away_season_wins_ave_overall','home_last_points','away_last_points']
features = ["home_team","away_team",'home_season_wins_ave_overall',
            'away_season_wins_ave_overall','home_last_points','away_last_points',
            'home_last_gd','away_last_gd', 'home_recent_10_goal_diff','away_recent_10_goal_diff','points_difference']
target = "target"

In [25]:
x_all = train_df[features]
y_all = train_df[target]

print(x_all.shape)
print(y_all.shape)

(1650, 11)
(1650,)


# 学習データと検証データの期間設定


In [26]:
# --------------------------------------------------------------------------------
# 動的folds生成関数 
# --------------------------------------------------------------------------------
def generate_dynamic_folds(end_date_str, n_folds=3, val_period_days=30, gap_days=10):
    """
    現在の実行日を基準に、バックテスティング用のfoldsを動的に生成する
    """
    end_date = datetime.strptime(end_date_str, '%Y-%m-%d')
    folds = []
    
    for i in range(n_folds):
        val_end = end_date - dt.timedelta(days=i * gap_days)
        val_start = val_end - dt.timedelta(days=val_period_days)
        train_end = val_start - dt.timedelta(days=1)
        
        folds.append({
            "train_end": train_end.strftime('%Y-%m-%d'),
            "val_start": val_start.strftime('%Y-%m-%d'),
            "val_end": val_end.strftime('%Y-%m-%d'),
        })
        
    print("動的に生成されたfolds:")
    for fold in folds:
        print(f"  Train End: {fold['train_end']}, Val Period: {fold['val_start']} ~ {fold['val_end']}")
        
    return folds

In [27]:
# 4. 動的foldsの生成
# 最新の結果が出ている試合の日付を基準にする
latest_match_date = train_df['date'].max()
print(latest_match_date)
folds = generate_dynamic_folds(
    end_date_str=latest_match_date.strftime('%Y-%m-%d'), 
    n_folds=3, 
    val_period_days=30,
    gap_days=10
)


2025-11-30 16:30:00
動的に生成されたfolds:
  Train End: 2025-10-30, Val Period: 2025-10-31 ~ 2025-11-30
  Train End: 2025-10-20, Val Period: 2025-10-21 ~ 2025-11-20
  Train End: 2025-10-10, Val Period: 2025-10-11 ~ 2025-11-10


# モデル学習&評価関数

In [28]:
#ハイパーパラメータの設定
# params = {
#     "n_estimators":1000,
#     "learning_rate":0.05,
#     "num_leaves":32
# }

In [29]:
#訓練データと検証データのindex作成
# foldsの中から、今回設定した範囲を取り出し、その範囲に入っているかどうかを判断し、その範囲内のデータのみを訓練データと検証データとしていく。
#これを3周する


def train_lgb(original_df,
              input_x,
              input_y,
              params,
              list_nfold=[0,1,2],
              folds=folds,
              save_model=True,
              ):

    #推論値を格納する変数の作成
    df_valid_pred = pd.DataFrame()
    #評価値を入れる変数の作成
    metrics_tr = [] #学習データ用
    metrics_val = [] #検証データ用
    #重要度を格納するデータフレームの作成
    df_imp = pd.DataFrame()
    #レポートを入れる変数の作成
    report_tr_list = []
    report_val_list = []

    # 'H', 'D', 'A' のラベルを数値 (0, 1, 2) に変換
    input_y_factorized, target_labels = pd.factorize(input_y)

    print(f"ターゲットラベルの順序: {target_labels}")

    

    for i,nfold in enumerate(list_nfold):
        print("fold:",i)
        # 学習用
        train_idx = original_df["date"] <= folds[nfold]["train_end"]
        x_tr = input_x[train_idx]
        y_tr = input_y_factorized[train_idx]
        
        # 検証用
        val_idx = (original_df["date"] >= folds[nfold]["val_start"]) & (original_df["date"] <= folds[nfold]["val_end"])
        x_val = input_x[val_idx]
        y_val = input_y_factorized[val_idx]

        # LightGBM モデル
        model = lgb.LGBMClassifier(**params)
        
        
        # モデルの訓練
        model.fit(
            x_tr, y_tr,
            eval_set=[(x_val, y_val)],
            eval_metric="multi_logloss",
            callbacks=[
            early_stopping(stopping_rounds=50),  # 早期停止
            log_evaluation(50)                   # 50回ごとにログ表示
            ]
            )

        if save_model:
            #保存するモデルのファイル名
            filename = "model_lgb_fold{}.pickle".format(nfold)
    
            # モデルの保存
            final_model_path = os.path.join(MODEL_DIR, filename)
            os.makedirs(MODEL_DIR, exist_ok=True)
            with open(final_model_path, 'wb') as f:
                pickle.dump(model, f)
                
            print(f"最終モデルを {final_model_path} に保存しました。")




#-------------------学習データの評価のコード-----------------------

        acc_tr,ll_tr,f1_macro_tr,f1_weighted_tr,report_tr,y_pred_tr,y_pred_proba_tr = evaluate_model(model,x_tr,y_tr,target_labels)    
        
        print(f"Fold {i+1} ACC: {acc_tr:.4f}, LogLoss: {ll_tr:.4f}, F1(macro): {f1_macro_tr:.4f}, F1(weighted): {f1_weighted_tr:.4f}")

        #検証スコアを各foldごとに格納 #あとでデータフレーム型に変更
        metrics_tr.append({
            "nfold": nfold,
            "accuracy": acc_tr,
            "log_loss": ll_tr,
            "f1_macro": f1_macro_tr,
            "f1_weighted": f1_weighted_tr,
            # per-class f1 を追加
            "f1_A": report_tr["A"]["f1-score"] if "A" in report_tr else None,
            "f1_D": report_tr["D"]["f1-score"] if "D" in report_tr else None,
            "f1_H": report_tr["H"]["f1-score"] if "H" in report_tr else None,
        })

        report_tr_list.append({
            "fold": nfold,
            "report": report_tr
            })


        
#----------------------以下、検証データの評価のコード---------------------------------

        acc_val,ll_val,f1_macro_val,f1_weighted_val,report_val,y_pred_val,y_pred_proba_val = evaluate_model(model,x_val,y_val,target_labels)    

        print(f"Fold {i+1} ACC: {acc_val:.4f}, LogLoss: {ll_val:.4f}, F1(macro): {f1_macro_val:.4f}, F1(weighted): {f1_weighted_val:.4f}")

        #検証スコアを各foldごとに格納 #あとでデータフレーム型に変更
        metrics_val.append({
            "nfold": nfold,
            "accuracy": acc_val,
            "log_loss": ll_val,
            "f1_macro": f1_macro_val,
            "f1_weighted": f1_weighted_val,
            # per-class f1 を追加
            "f1_A": report_val["A"]["f1-score"] if "A" in report_val else None,
            "f1_D": report_val["D"]["f1-score"] if "D" in report_val else None,
            "f1_H": report_val["H"]["f1-score"] if "H" in report_val else None,
        })

        report_val_list.append({
            "fold": nfold,
            "report": report_val
            })



#----------------検証データの予測値の格納と特徴量重要度の格納---------------------



        #各foldごとに実際の値と予測値を格納したDataFrameを作成
        
        df_pred = pd.DataFrame({
            "nfold": [nfold] * len(y_val),
            "true": y_val,
            "pred": y_pred_val,
            "prob_A": y_pred_proba_val[:, 0],
            "prob_D": y_pred_proba_val[:, 1],
            "prob_H": y_pred_proba_val[:, 2]
        })


        # df_pred = pd.DataFrame({"nfold":nfold,"true":y_val,"pred_proba":y_pred_proba,"pred":y_pred})
        df_valid_pred = pd.concat([df_valid_pred,df_pred],axis = 0,ignore_index=True)
        
        #各foldごとに各変数の重要度を格納したデータフレームを作成
        tmp_imp = pd.DataFrame({"nfold":nfold,"col":x_tr.columns,"imp":model.feature_importances_})
        df_imp = pd.concat([df_imp,tmp_imp])

#------------------------最終処理---------------------------
    
    print("-"*10,"result","-"*10)
    
    #各foldごとの評価値をデータフレームに格納
    df_metrics_tr = pd.DataFrame(metrics_tr,columns = ["nfold","accuracy","log_loss","f1_macro","f1_weighted","f1_A","f1_D","f1_H"]) #学習データ評価値
    df_metrics_val = pd.DataFrame(metrics_val,columns = ["nfold","accuracy","log_loss","f1_macro","f1_weighted","f1_A","f1_D","f1_H"]) #検証データ評価値

    print("-"*10,"学習データ","-"*10)
    print("各foldのaccuracyの平均:{:.2f}".format(df_metrics_tr['accuracy'].mean()))
    print("各foldのlog_lossの平均:{:.2f}".format(df_metrics_tr['log_loss'].mean()))
    print("各foldのf1_macroの平均:{:.2f}".format(df_metrics_tr['f1_macro'].mean()))
    print("各foldのf1_weightedの平均:{:.2f}".format(df_metrics_tr['f1_weighted'].mean()))
    print("各foldのf1_Hの平均:{:.2f}".format(df_metrics_tr['f1_A'].mean()))
    print("各foldのf1_Aの平均:{:.2f}".format(df_metrics_tr['f1_D'].mean()))
    print("各foldのf1_Dの平均:{:.2f}".format(df_metrics_tr['f1_H'].mean()))
    print()
    print("-"*10,"検証データ","-"*10)
    print("各foldのaccuracyの平均:{:.2f}".format(df_metrics_val['accuracy'].mean()))
    print("各foldのlog_lossの平均:{:.2f}".format(df_metrics_val['log_loss'].mean()))
    print("各foldのf1_macroの平均:{:.2f}".format(df_metrics_val['f1_macro'].mean()))
    print("各foldのf1_weightedの平均:{:.2f}".format(df_metrics_val['f1_weighted'].mean()))
    print("各foldのf1_Hの平均:{:.2f}".format(df_metrics_val['f1_A'].mean()))
    print("各foldのf1_Aの平均:{:.2f}".format(df_metrics_val['f1_D'].mean()))
    print("各foldのf1_Dの平均:{:.2f}".format(df_metrics_val['f1_H'].mean()))


    return df_valid_pred,df_imp,df_metrics_tr,df_metrics_val,report_tr_list,report_val_list,target_labels

In [30]:
def evaluate_model(model, X, y,target_labels):
    y_pred_proba = model.predict_proba(X)
    pred_idx = np.argmax(y_pred_proba, axis=1)
    # y_pred = model.classes_[pred_idx]
    y_pred = pred_idx

    # ロス計算 (ラベルが factorize された数値であることを前提)
    try:
        # yのユニークな値の順序をlabelsとして指定
        unique_y = np.unique(y)
        ll = log_loss(y, y_pred_proba, labels=unique_y) 
    except ValueError:
        ll = np.nan
    
    acc = accuracy_score(y, y_pred)
    # ll = log_loss(y, y_pred_proba, labels=model.classes_)
    f1_macro = f1_score(y, y_pred, average="macro")
    f1_weighted = f1_score(y, y_pred, average="weighted")
    report = classification_report(
        y, y_pred, 
        labels=[0,1,2],
        target_names=target_labels,
        output_dict=True
    )

    return acc, ll, f1_macro, f1_weighted, report, y_pred, y_pred_proba

In [31]:
import optuna

def objective(trial):

    # Optuna が探索するパラメータ
    params = {
      "objective": "multiclass",
      "num_class": 3,
      "class_weight": "balanced",
      "n_estimators": trial.suggest_int("n_estimators", 300, 1500),
      "learning_rate": trial.suggest_float("learning_rate", 0.01, 0.2, log=True),

      # 木構造
      "num_leaves": trial.suggest_int("num_leaves", 16, 200),
      "max_depth": trial.suggest_int("max_depth", -1, 16),

      # 正則化
      "min_data_in_leaf": trial.suggest_int("min_data_in_leaf", 10, 200),
      "lambda_l1": trial.suggest_float("lambda_l1", 0.0, 5.0),
      "lambda_l2": trial.suggest_float("lambda_l2", 0.0, 5.0),

      # ランダム性（過学習抑制）
      "feature_fraction": trial.suggest_float("feature_fraction", 0.6, 1.0),
      "bagging_fraction": trial.suggest_float("bagging_fraction", 0.6, 1.0),
      "bagging_freq": trial.suggest_int("bagging_freq", 1, 7),

      # 追加（高速化・安定性）
      "min_gain_to_split": trial.suggest_float("min_gain_to_split", 0.0, 1.5),
  }


    # 1回の trial で全 fold を実行
    df_valid_pred, df_imp, df_metrics_tr, df_metrics_val, _, _, target_labels = train_lgb(
        train_df,
        x_all,
        y_all,
        params,
        list_nfold=[0,1,2],     # 時系列 fold 全て使う
        folds=folds,
        save_model=False
    )

    # 検証スコア（例：f1_macro）で最適化
    score = df_metrics_val['f1_macro'].mean()

    return score


  from .autonotebook import tqdm as notebook_tqdm


In [32]:
study = optuna.create_study(direction="maximize")  # f1 を最大化
study.optimize(objective, n_trials=30)  # 試行回数は適宜調整

[I 2025-12-04 13:17:04,949] A new study created in memory with name: no-name-d7dba2c5-4edf-49c1-b2c2-2774ed1af833


ターゲットラベルの順序: CategoricalIndex(['H', 'A', 'D'], categories=['A', 'D', 'H'], ordered=False, dtype='category')
fold: 0
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000150 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 888
[LightGBM] [Info] Number of data points in the train set: 1610, number of used features: 11
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
Training until validation scores don't improve for 50 rounds
[50]	valid_0's multi_logloss: 1.00239
[100]	valid_0's multi_logloss: 0.971622
[150]	valid_0's multi_logloss: 0.957253
[200]	valid_0's multi_logloss: 0.93571
[250]	valid_0's multi_logloss: 0.918764
[300]	valid_0's multi_logloss: 0.909857
[350]	valid_0's multi_logloss: 0.906064
[400]	valid_0's multi_logloss: 0.

[I 2025-12-04 13:17:17,509] Trial 0 finished with value: 0.5919074642740854 and parameters: {'n_estimators': 933, 'learning_rate': 0.07481036315398612, 'num_leaves': 59, 'max_depth': 1, 'min_data_in_leaf': 129, 'lambda_l1': 1.838044207778804, 'lambda_l2': 0.23140190125003957, 'feature_fraction': 0.8118715395013658, 'bagging_fraction': 0.8954738984182926, 'bagging_freq': 1, 'min_gain_to_split': 0.013725200230663037}. Best is trial 0 with value: 0.5919074642740854.


[300]	valid_0's multi_logloss: 0.987496
Early stopping, best iteration is:
[287]	valid_0's multi_logloss: 0.98549
Fold 3 ACC: 0.5654, LogLoss: 0.9294, F1(macro): 0.5429, F1(weighted): 0.5693
Fold 3 ACC: 0.5750, LogLoss: 0.9855, F1(macro): 0.5406, F1(weighted): 0.5863
---------- result ----------
---------- 学習データ ----------
各foldのaccuracyの平均:0.56
各foldのlog_lossの平均:0.93
各foldのf1_macroの平均:0.54
各foldのf1_weightedの平均:0.57
各foldのf1_Hの平均:0.59
各foldのf1_Aの平均:0.39
各foldのf1_Dの平均:0.64

---------- 検証データ ----------
各foldのaccuracyの平均:0.64
各foldのlog_lossの平均:0.96
各foldのf1_macroの平均:0.59
各foldのf1_weightedの平均:0.65
各foldのf1_Hの平均:0.57
各foldのf1_Aの平均:0.48
各foldのf1_Dの平均:0.73
ターゲットラベルの順序: CategoricalIndex(['H', 'A', 'D'], categories=['A', 'D', 'H'], ordered=False, dtype='category')
fold: 0
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000156 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 888
[LightGBM] [Info] Number of dat

[I 2025-12-04 13:17:18,037] Trial 1 finished with value: 0.5250773917440584 and parameters: {'n_estimators': 995, 'learning_rate': 0.09457612531144871, 'num_leaves': 68, 'max_depth': 13, 'min_data_in_leaf': 30, 'lambda_l1': 3.397827639121631, 'lambda_l2': 3.92786840980049, 'feature_fraction': 0.6112896355401433, 'bagging_fraction': 0.7232455946939622, 'bagging_freq': 1, 'min_gain_to_split': 0.9387777405306414}. Best is trial 0 with value: 0.5919074642740854.


Fold 3 ACC: 0.6000, LogLoss: 1.0098, F1(macro): 0.5633, F1(weighted): 0.6230
---------- result ----------
---------- 学習データ ----------
各foldのaccuracyの平均:0.63
各foldのlog_lossの平均:0.86
各foldのf1_macroの平均:0.62
各foldのf1_weightedの平均:0.64
各foldのf1_Hの平均:0.66
各foldのf1_Aの平均:0.52
各foldのf1_Dの平均:0.68

---------- 検証データ ----------
各foldのaccuracyの平均:0.59
各foldのlog_lossの平均:0.98
各foldのf1_macroの平均:0.53
各foldのf1_weightedの平均:0.61
各foldのf1_Hの平均:0.49
各foldのf1_Aの平均:0.37
各foldのf1_Dの平均:0.71
ターゲットラベルの順序: CategoricalIndex(['H', 'A', 'D'], categories=['A', 'D', 'H'], ordered=False, dtype='category')
fold: 0
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000171 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 888
[LightGBM] [Info] Number of data points in the train set: 1610, number of used features: 11
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Star

[I 2025-12-04 13:17:18,461] Trial 2 finished with value: 0.47701409953551943 and parameters: {'n_estimators': 338, 'learning_rate': 0.1786223647842561, 'num_leaves': 53, 'max_depth': 1, 'min_data_in_leaf': 171, 'lambda_l1': 2.660923126725827, 'lambda_l2': 0.6347084446084744, 'feature_fraction': 0.9295454869579313, 'bagging_fraction': 0.9956751123140872, 'bagging_freq': 7, 'min_gain_to_split': 0.7670533681195453}. Best is trial 0 with value: 0.5919074642740854.


Fold 3 ACC: 0.5560, LogLoss: 0.9474, F1(macro): 0.5306, F1(weighted): 0.5591
Fold 3 ACC: 0.5500, LogLoss: 0.9944, F1(macro): 0.5019, F1(weighted): 0.5643
---------- result ----------
---------- 学習データ ----------
各foldのaccuracyの平均:0.56
各foldのlog_lossの平均:0.95
各foldのf1_macroの平均:0.53
各foldのf1_weightedの平均:0.56
各foldのf1_Hの平均:0.60
各foldのf1_Aの平均:0.36
各foldのf1_Dの平均:0.64

---------- 検証データ ----------
各foldのaccuracyの平均:0.54
各foldのlog_lossの平均:0.98
各foldのf1_macroの平均:0.48
各foldのf1_weightedの平均:0.56
各foldのf1_Hの平均:0.43
各foldのf1_Aの平均:0.34
各foldのf1_Dの平均:0.66
ターゲットラベルの順序: CategoricalIndex(['H', 'A', 'D'], categories=['A', 'D', 'H'], ordered=False, dtype='category')
fold: 0
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000159 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 888
[LightGBM] [Info] Number of data points in the train set: 1610, number of used features: 11
[LightGBM] [Info] Start training from score -1.098612

[I 2025-12-04 13:17:19,022] Trial 3 finished with value: 0.518207263821299 and parameters: {'n_estimators': 716, 'learning_rate': 0.15936714484161693, 'num_leaves': 30, 'max_depth': 4, 'min_data_in_leaf': 141, 'lambda_l1': 0.34451697608897824, 'lambda_l2': 3.6756311731320364, 'feature_fraction': 0.76556267848845, 'bagging_fraction': 0.8471999418601522, 'bagging_freq': 3, 'min_gain_to_split': 0.8384547399919018}. Best is trial 0 with value: 0.5919074642740854.


Fold 2 ACC: 0.6000, LogLoss: 0.9799, F1(macro): 0.4830, F1(weighted): 0.6304
fold: 2
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000156 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 888
[LightGBM] [Info] Number of data points in the train set: 1590, number of used features: 11
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
Training until validation scores don't improve for 50 rounds
[50]	valid_0's multi_logloss: 1.03544
Early stopping, best iteration is:
[32]	valid_0's multi_logloss: 1.01875
Fold 3 ACC: 0.6226, LogLoss: 0.8626, F1(macro): 0.6075, F1(weighted): 0.6255
Fold 3 ACC: 0.5500, LogLoss: 1.0188, F1(macro): 0.5273, F1(weighted): 0.5688
---------- result ----------
---------- 学習データ ----------
各foldのaccuracyの平均:0.66
各foldのlog_lossの平均:0.81
各foldのf1_macroの平均:0.65
各foldのf1_weigh

[I 2025-12-04 13:17:21,076] Trial 4 finished with value: 0.5005792571902971 and parameters: {'n_estimators': 615, 'learning_rate': 0.018729265317845842, 'num_leaves': 38, 'max_depth': 9, 'min_data_in_leaf': 162, 'lambda_l1': 0.28363728605173943, 'lambda_l2': 0.6818632122053842, 'feature_fraction': 0.7075214095469469, 'bagging_fraction': 0.847193045806928, 'bagging_freq': 5, 'min_gain_to_split': 0.7289557036520999}. Best is trial 0 with value: 0.5919074642740854.


Fold 3 ACC: 0.6201, LogLoss: 0.8839, F1(macro): 0.6045, F1(weighted): 0.6234
Fold 3 ACC: 0.5500, LogLoss: 1.0170, F1(macro): 0.4975, F1(weighted): 0.5788
---------- result ----------
---------- 学習データ ----------
各foldのaccuracyの平均:0.64
各foldのlog_lossの平均:0.85
各foldのf1_macroの平均:0.63
各foldのf1_weightedの平均:0.65
各foldのf1_Hの平均:0.67
各foldのf1_Aの平均:0.53
各foldのf1_Dの平均:0.69

---------- 検証データ ----------
各foldのaccuracyの平均:0.58
各foldのlog_lossの平均:0.98
各foldのf1_macroの平均:0.50
各foldのf1_weightedの平均:0.60
各foldのf1_Hの平均:0.44
各foldのf1_Aの平均:0.33
各foldのf1_Dの平均:0.73
ターゲットラベルの順序: CategoricalIndex(['H', 'A', 'D'], categories=['A', 'D', 'H'], ordered=False, dtype='category')
fold: 0
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000837 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 888
[LightGBM] [Info] Number of data points in the train set: 1610, number of used features: 11
[LightGBM] [Info] Start training from score -1.098612

[I 2025-12-04 13:17:21,776] Trial 5 finished with value: 0.5007150437692907 and parameters: {'n_estimators': 780, 'learning_rate': 0.13629262154380387, 'num_leaves': 45, 'max_depth': 13, 'min_data_in_leaf': 101, 'lambda_l1': 4.421183678745673, 'lambda_l2': 3.6777180968799605, 'feature_fraction': 0.627840855658992, 'bagging_fraction': 0.7396372640837051, 'bagging_freq': 4, 'min_gain_to_split': 1.018814029758019}. Best is trial 0 with value: 0.5919074642740854.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000297 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 888
[LightGBM] [Info] Number of data points in the train set: 1590, number of used features: 11
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
Training until validation scores don't improve for 50 rounds
[50]	valid_0's multi_logloss: 1.04849
Early stopping, best iteration is:
[19]	valid_0's multi_logloss: 1.02978
Fold 3 ACC: 0.5698, LogLoss: 0.9430, F1(macro): 0.5452, F1(weighted): 0.5710
Fold 3 ACC: 0.5500, LogLoss: 1.0298, F1(macro): 0.5082, F1(weighted): 0.5734
---------- result ----------
---------- 学習データ ----------
各foldのaccuracyの平均:0.59
各foldのlog_lossの平均:0.90
各foldのf1_macroの平均:0.58
各foldのf1_weightedの平均:0.60
各foldのf1_Hの平均:0.63
各foldのf1_Aの平均:0.45
各foldのf1_Dの平均:0.65

---------- 検証デー

[I 2025-12-04 13:17:22,317] Trial 6 finished with value: 0.4923300464967131 and parameters: {'n_estimators': 817, 'learning_rate': 0.1458625787746288, 'num_leaves': 125, 'max_depth': 16, 'min_data_in_leaf': 159, 'lambda_l1': 3.725898252773927, 'lambda_l2': 4.763524473917006, 'feature_fraction': 0.941360603765993, 'bagging_fraction': 0.6707145894199307, 'bagging_freq': 2, 'min_gain_to_split': 1.194171883516662}. Best is trial 0 with value: 0.5919074642740854.


Fold 3 ACC: 0.5774, LogLoss: 0.9432, F1(macro): 0.5541, F1(weighted): 0.5796
Fold 3 ACC: 0.5750, LogLoss: 1.0235, F1(macro): 0.5466, F1(weighted): 0.5930
---------- result ----------
---------- 学習データ ----------
各foldのaccuracyの平均:0.58
各foldのlog_lossの平均:0.93
各foldのf1_macroの平均:0.56
各foldのf1_weightedの平均:0.58
各foldのf1_Hの平均:0.61
各foldのf1_Aの平均:0.42
各foldのf1_Dの平均:0.65

---------- 検証データ ----------
各foldのaccuracyの平均:0.54
各foldのlog_lossの平均:1.01
各foldのf1_macroの平均:0.49
各foldのf1_weightedの平均:0.57
各foldのf1_Hの平均:0.47
各foldのf1_Aの平均:0.35
各foldのf1_Dの平均:0.66
ターゲットラベルの順序: CategoricalIndex(['H', 'A', 'D'], categories=['A', 'D', 'H'], ordered=False, dtype='category')
fold: 0
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000510 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 888
[LightGBM] [Info] Number of data points in the train set: 1610, number of used features: 11
[LightGBM] [Info] Start training from score -1.098612

[I 2025-12-04 13:17:23,099] Trial 7 finished with value: 0.5387017378887298 and parameters: {'n_estimators': 831, 'learning_rate': 0.08129626224820281, 'num_leaves': 30, 'max_depth': 13, 'min_data_in_leaf': 15, 'lambda_l1': 1.8391774017097884, 'lambda_l2': 4.169426485606023, 'feature_fraction': 0.7349133621345694, 'bagging_fraction': 0.6964953154121584, 'bagging_freq': 6, 'min_gain_to_split': 0.9245414560474159}. Best is trial 0 with value: 0.5919074642740854.


---------- result ----------
---------- 学習データ ----------
各foldのaccuracyの平均:0.64
各foldのlog_lossの平均:0.86
各foldのf1_macroの平均:0.63
各foldのf1_weightedの平均:0.64
各foldのf1_Hの平均:0.66
各foldのf1_Aの平均:0.53
各foldのf1_Dの平均:0.69

---------- 検証データ ----------
各foldのaccuracyの平均:0.59
各foldのlog_lossの平均:0.97
各foldのf1_macroの平均:0.54
各foldのf1_weightedの平均:0.61
各foldのf1_Hの平均:0.47
各foldのf1_Aの平均:0.45
各foldのf1_Dの平均:0.70
ターゲットラベルの順序: CategoricalIndex(['H', 'A', 'D'], categories=['A', 'D', 'H'], ordered=False, dtype='category')
fold: 0
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000405 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 888
[LightGBM] [Info] Number of data points in the train set: 1610, number of used features: 11
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
Training until validation scores don't improv

[I 2025-12-04 13:17:24,224] Trial 8 finished with value: 0.4999495538969223 and parameters: {'n_estimators': 858, 'learning_rate': 0.07849415371743157, 'num_leaves': 112, 'max_depth': 5, 'min_data_in_leaf': 21, 'lambda_l1': 4.838983966355478, 'lambda_l2': 1.4350813413796932, 'feature_fraction': 0.8254123585053612, 'bagging_fraction': 0.6710369486812138, 'bagging_freq': 1, 'min_gain_to_split': 0.650308760036945}. Best is trial 0 with value: 0.5919074642740854.


Fold 3 ACC: 0.5956, LogLoss: 0.9148, F1(macro): 0.5777, F1(weighted): 0.5978
Fold 3 ACC: 0.5500, LogLoss: 1.0132, F1(macro): 0.5273, F1(weighted): 0.5688
---------- result ----------
---------- 学習データ ----------
各foldのaccuracyの平均:0.63
各foldのlog_lossの平均:0.87
各foldのf1_macroの平均:0.61
各foldのf1_weightedの平均:0.63
各foldのf1_Hの平均:0.65
各foldのf1_Aの平均:0.51
各foldのf1_Dの平均:0.68

---------- 検証データ ----------
各foldのaccuracyの平均:0.55
各foldのlog_lossの平均:0.98
各foldのf1_macroの平均:0.50
各foldのf1_weightedの平均:0.57
各foldのf1_Hの平均:0.45
各foldのf1_Aの平均:0.38
各foldのf1_Dの平均:0.67
ターゲットラベルの順序: CategoricalIndex(['H', 'A', 'D'], categories=['A', 'D', 'H'], ordered=False, dtype='category')
fold: 0
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000418 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 888
[LightGBM] [Info] Number of data points in the train set: 1610, number of used features: 11
[LightGBM] [Info] Start training from score -1.098612

[I 2025-12-04 13:17:26,540] Trial 9 finished with value: 0.5014440086553656 and parameters: {'n_estimators': 1366, 'learning_rate': 0.018075487935226887, 'num_leaves': 142, 'max_depth': 0, 'min_data_in_leaf': 48, 'lambda_l1': 1.801242871317228, 'lambda_l2': 3.099181161648546, 'feature_fraction': 0.9953124975068728, 'bagging_fraction': 0.7478418641048276, 'bagging_freq': 6, 'min_gain_to_split': 0.5660474989881483}. Best is trial 0 with value: 0.5919074642740854.


Fold 3 ACC: 0.6610, LogLoss: 0.8483, F1(macro): 0.6491, F1(weighted): 0.6629
Fold 3 ACC: 0.5500, LogLoss: 1.0013, F1(macro): 0.5184, F1(weighted): 0.5686
---------- result ----------
---------- 学習データ ----------
各foldのaccuracyの平均:0.68
各foldのlog_lossの平均:0.81
各foldのf1_macroの平均:0.67
各foldのf1_weightedの平均:0.69
各foldのf1_Hの平均:0.70
各foldのf1_Aの平均:0.60
各foldのf1_Dの平均:0.72

---------- 検証データ ----------
各foldのaccuracyの平均:0.57
各foldのlog_lossの平均:0.97
各foldのf1_macroの平均:0.50
各foldのf1_weightedの平均:0.59
各foldのf1_Hの平均:0.44
各foldのf1_Aの平均:0.36
各foldのf1_Dの平均:0.70
ターゲットラベルの順序: CategoricalIndex(['H', 'A', 'D'], categories=['A', 'D', 'H'], ordered=False, dtype='category')
fold: 0
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000716 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 888
[LightGBM] [Info] Number of data points in the train set: 1610, number of used features: 11
[LightGBM] [Info] Start training from score -1.098612

[I 2025-12-04 13:17:29,244] Trial 10 finished with value: 0.4989621442002823 and parameters: {'n_estimators': 1189, 'learning_rate': 0.036173536225817655, 'num_leaves': 189, 'max_depth': -1, 'min_data_in_leaf': 94, 'lambda_l1': 1.3198156343787832, 'lambda_l2': 2.1018903039446455, 'feature_fraction': 0.8473088537784217, 'bagging_fraction': 0.9648923734725549, 'bagging_freq': 3, 'min_gain_to_split': 0.042975592796927996}. Best is trial 0 with value: 0.5919074642740854.


Fold 3 ACC: 0.8006, LogLoss: 0.6490, F1(macro): 0.7976, F1(weighted): 0.8012
Fold 3 ACC: 0.5500, LogLoss: 0.9767, F1(macro): 0.5014, F1(weighted): 0.5686
---------- result ----------
---------- 学習データ ----------
各foldのaccuracyの平均:0.74
各foldのlog_lossの平均:0.75
各foldのf1_macroの平均:0.73
各foldのf1_weightedの平均:0.74
各foldのf1_Hの平均:0.74
各foldのf1_Aの平均:0.69
各foldのf1_Dの平均:0.77

---------- 検証データ ----------
各foldのaccuracyの平均:0.56
各foldのlog_lossの平均:0.96
各foldのf1_macroの平均:0.50
各foldのf1_weightedの平均:0.59
各foldのf1_Hの平均:0.44
各foldのf1_Aの平均:0.36
各foldのf1_Dの平均:0.69
ターゲットラベルの順序: CategoricalIndex(['H', 'A', 'D'], categories=['A', 'D', 'H'], ordered=False, dtype='category')
fold: 0
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000595 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 888
[LightGBM] [Info] Number of data points in the train set: 1610, number of used features: 11
[LightGBM] [Info] Start training from score -1.098612

[I 2025-12-04 13:17:30,751] Trial 11 finished with value: 0.4862983503334381 and parameters: {'n_estimators': 1053, 'learning_rate': 0.04929720717866405, 'num_leaves': 81, 'max_depth': 9, 'min_data_in_leaf': 68, 'lambda_l1': 1.8316549073977342, 'lambda_l2': 0.009510747268166142, 'feature_fraction': 0.7229736951841054, 'bagging_fraction': 0.9076080364983357, 'bagging_freq': 7, 'min_gain_to_split': 1.493902037253525}. Best is trial 0 with value: 0.5919074642740854.


Fold 3 ACC: 0.6226, LogLoss: 0.8818, F1(macro): 0.6043, F1(weighted): 0.6247
Fold 3 ACC: 0.4750, LogLoss: 1.0137, F1(macro): 0.4625, F1(weighted): 0.4866
---------- result ----------
---------- 学習データ ----------
各foldのaccuracyの平均:0.62
各foldのlog_lossの平均:0.90
各foldのf1_macroの平均:0.60
各foldのf1_weightedの平均:0.62
各foldのf1_Hの平均:0.64
各foldのf1_Aの平均:0.48
各foldのf1_Dの平均:0.67

---------- 検証データ ----------
各foldのaccuracyの平均:0.54
各foldのlog_lossの平均:0.99
各foldのf1_macroの平均:0.49
各foldのf1_weightedの平均:0.56
各foldのf1_Hの平均:0.47
各foldのf1_Aの平均:0.34
各foldのf1_Dの平均:0.65
ターゲットラベルの順序: CategoricalIndex(['H', 'A', 'D'], categories=['A', 'D', 'H'], ordered=False, dtype='category')
fold: 0
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000613 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 888
[LightGBM] [Info] Number of data points in the train set: 1610, number of used features: 11
[LightGBM] [Info] Start training from score -1.098612

[I 2025-12-04 13:17:33,072] Trial 12 finished with value: 0.528143792592361 and parameters: {'n_estimators': 516, 'learning_rate': 0.05348988097996933, 'num_leaves': 21, 'max_depth': 6, 'min_data_in_leaf': 108, 'lambda_l1': 2.5435815681336416, 'lambda_l2': 4.843471723424033, 'feature_fraction': 0.7723976703713757, 'bagging_fraction': 0.6033432656954117, 'bagging_freq': 5, 'min_gain_to_split': 0.23654729317665402}. Best is trial 0 with value: 0.5919074642740854.


ターゲットラベルの順序: CategoricalIndex(['H', 'A', 'D'], categories=['A', 'D', 'H'], ordered=False, dtype='category')
fold: 0
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000534 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 888
[LightGBM] [Info] Number of data points in the train set: 1610, number of used features: 11
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
Training until validation scores don't improve for 50 rounds
[50]	valid_0's multi_logloss: 0.985426
[100]	valid_0's multi_logloss: 0.964801
[150]	valid_0's multi_logloss: 0.948827
[200]	valid_0's multi_logloss: 0.944571
[250]	valid_0's multi_logloss: 0.925252
Early stopping, best iteration is:
[247]	valid_0's multi_logloss: 0.923503
Fold 1 ACC: 0.6329, LogLoss: 0.8541, F1(macro): 0.6183, F1(weighted): 0.6353
Fold 1 ACC: 0.6286, Log

[I 2025-12-04 13:17:34,856] Trial 13 finished with value: 0.5337390798238038 and parameters: {'n_estimators': 1178, 'learning_rate': 0.030583628023402566, 'num_leaves': 87, 'max_depth': 12, 'min_data_in_leaf': 197, 'lambda_l1': 1.1841887678803953, 'lambda_l2': 2.379656903334682, 'feature_fraction': 0.6962785876324793, 'bagging_fraction': 0.8312926058564298, 'bagging_freq': 6, 'min_gain_to_split': 0.38878675648272243}. Best is trial 0 with value: 0.5919074642740854.


Early stopping, best iteration is:
[97]	valid_0's multi_logloss: 1.00871
Fold 3 ACC: 0.5868, LogLoss: 0.9224, F1(macro): 0.5652, F1(weighted): 0.5890
Fold 3 ACC: 0.6000, LogLoss: 1.0087, F1(macro): 0.5663, F1(weighted): 0.6193
---------- result ----------
---------- 学習データ ----------
各foldのaccuracyの平均:0.61
各foldのlog_lossの平均:0.89
各foldのf1_macroの平均:0.59
各foldのf1_weightedの平均:0.61
各foldのf1_Hの平均:0.63
各foldのf1_Aの平均:0.47
各foldのf1_Dの平均:0.67

---------- 検証データ ----------
各foldのaccuracyの平均:0.60
各foldのlog_lossの平均:0.98
各foldのf1_macroの平均:0.53
各foldのf1_weightedの平均:0.62
各foldのf1_Hの平均:0.49
各foldのf1_Aの平均:0.39
各foldのf1_Dの平均:0.72
ターゲットラベルの順序: CategoricalIndex(['H', 'A', 'D'], categories=['A', 'D', 'H'], ordered=False, dtype='category')
fold: 0
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000422 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 888
[LightGBM] [Info] Number of data points in the train set: 1610, number o

[I 2025-12-04 13:17:35,590] Trial 14 finished with value: 0.4660642914038499 and parameters: {'n_estimators': 1002, 'learning_rate': 0.0825857367378062, 'num_leaves': 17, 'max_depth': 3, 'min_data_in_leaf': 128, 'lambda_l1': 2.0445411980571206, 'lambda_l2': 1.5902499359029427, 'feature_fraction': 0.870847955872316, 'bagging_fraction': 0.9107407146961758, 'bagging_freq': 4, 'min_gain_to_split': 1.208355059285207}. Best is trial 0 with value: 0.5919074642740854.


Fold 3 ACC: 0.4750, LogLoss: 1.0386, F1(macro): 0.4521, F1(weighted): 0.4967
---------- result ----------
---------- 学習データ ----------
各foldのaccuracyの平均:0.59
各foldのlog_lossの平均:0.92
各foldのf1_macroの平均:0.58
各foldのf1_weightedの平均:0.60
各foldのf1_Hの平均:0.63
各foldのf1_Aの平均:0.45
各foldのf1_Dの平均:0.65

---------- 検証データ ----------
各foldのaccuracyの平均:0.52
各foldのlog_lossの平均:1.02
各foldのf1_macroの平均:0.47
各foldのf1_weightedの平均:0.54
各foldのf1_Hの平均:0.46
各foldのf1_Aの平均:0.31
各foldのf1_Dの平均:0.63
ターゲットラベルの順序: CategoricalIndex(['H', 'A', 'D'], categories=['A', 'D', 'H'], ordered=False, dtype='category')
fold: 0
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000532 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 888
[LightGBM] [Info] Number of data points in the train set: 1610, number of used features: 11
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Star

[I 2025-12-04 13:17:41,045] Trial 15 finished with value: 0.5202266702266702 and parameters: {'n_estimators': 1481, 'learning_rate': 0.010267807175573447, 'num_leaves': 62, 'max_depth': 8, 'min_data_in_leaf': 66, 'lambda_l1': 0.982308040639561, 'lambda_l2': 2.9602895133820777, 'feature_fraction': 0.7907917045568261, 'bagging_fraction': 0.7814878726052104, 'bagging_freq': 2, 'min_gain_to_split': 0.0261053286451469}. Best is trial 0 with value: 0.5919074642740854.


Fold 3 ACC: 0.5750, LogLoss: 1.0144, F1(macro): 0.5466, F1(weighted): 0.5930
---------- result ----------
---------- 学習データ ----------
各foldのaccuracyの平均:0.71
各foldのlog_lossの平均:0.79
各foldのf1_macroの平均:0.70
各foldのf1_weightedの平均:0.71
各foldのf1_Hの平均:0.72
各foldのf1_Aの平均:0.65
各foldのf1_Dの平均:0.74

---------- 検証データ ----------
各foldのaccuracyの平均:0.58
各foldのlog_lossの平均:0.97
各foldのf1_macroの平均:0.52
各foldのf1_weightedの平均:0.60
各foldのf1_Hの平均:0.47
各foldのf1_Aの平均:0.39
各foldのf1_Dの平均:0.70
ターゲットラベルの順序: CategoricalIndex(['H', 'A', 'D'], categories=['A', 'D', 'H'], ordered=False, dtype='category')
fold: 0
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000465 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 888
[LightGBM] [Info] Number of data points in the train set: 1610, number of used features: 11
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Star

[I 2025-12-04 13:17:42,749] Trial 16 finished with value: 0.5332704744343598 and parameters: {'n_estimators': 496, 'learning_rate': 0.06413679093812895, 'num_leaves': 90, 'max_depth': 16, 'min_data_in_leaf': 11, 'lambda_l1': 3.1908133237139102, 'lambda_l2': 4.301020714461723, 'feature_fraction': 0.6674109231845994, 'bagging_fraction': 0.9117535005450262, 'bagging_freq': 5, 'min_gain_to_split': 0.4147712045824272}. Best is trial 0 with value: 0.5919074642740854.


Fold 3 ACC: 0.5500, LogLoss: 1.0036, F1(macro): 0.5183, F1(weighted): 0.5662
---------- result ----------
---------- 学習データ ----------
各foldのaccuracyの平均:0.70
各foldのlog_lossの平均:0.78
各foldのf1_macroの平均:0.69
各foldのf1_weightedの平均:0.70
各foldのf1_Hの平均:0.72
各foldのf1_Aの平均:0.63
各foldのf1_Dの平均:0.72

---------- 検証データ ----------
各foldのaccuracyの平均:0.60
各foldのlog_lossの平均:0.97
各foldのf1_macroの平均:0.53
各foldのf1_weightedの平均:0.62
各foldのf1_Hの平均:0.44
各foldのf1_Aの平均:0.44
各foldのf1_Dの平均:0.73
ターゲットラベルの順序: CategoricalIndex(['H', 'A', 'D'], categories=['A', 'D', 'H'], ordered=False, dtype='category')
fold: 0
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000638 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 888
[LightGBM] [Info] Number of data points in the train set: 1610, number of used features: 11
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Star

[I 2025-12-04 13:17:44,122] Trial 17 finished with value: 0.47990511036950667 and parameters: {'n_estimators': 1180, 'learning_rate': 0.11248816950528072, 'num_leaves': 154, 'max_depth': 11, 'min_data_in_leaf': 81, 'lambda_l1': 0.7783075874595726, 'lambda_l2': 2.9476086851854646, 'feature_fraction': 0.8785546156350919, 'bagging_fraction': 0.604957054453599, 'bagging_freq': 6, 'min_gain_to_split': 1.154285633932624}. Best is trial 0 with value: 0.5919074642740854.


Fold 3 ACC: 0.6421, LogLoss: 0.8497, F1(macro): 0.6289, F1(weighted): 0.6452
Fold 3 ACC: 0.5250, LogLoss: 0.9948, F1(macro): 0.5098, F1(weighted): 0.5380
---------- result ----------
---------- 学習データ ----------
各foldのaccuracyの平均:0.65
各foldのlog_lossの平均:0.83
各foldのf1_macroの平均:0.64
各foldのf1_weightedの平均:0.65
各foldのf1_Hの平均:0.66
各foldのf1_Aの平均:0.55
各foldのf1_Dの平均:0.70

---------- 検証データ ----------
各foldのaccuracyの平均:0.53
各foldのlog_lossの平均:0.94
各foldのf1_macroの平均:0.48
各foldのf1_weightedの平均:0.55
各foldのf1_Hの平均:0.41
各foldのf1_Aの平均:0.39
各foldのf1_Dの平均:0.64
ターゲットラベルの順序: CategoricalIndex(['H', 'A', 'D'], categories=['A', 'D', 'H'], ordered=False, dtype='category')
fold: 0
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000307 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 888
[LightGBM] [Info] Number of data points in the train set: 1610, number of used features: 11
[LightGBM] [Info] Start training from score -1.098612

[I 2025-12-04 13:17:45,715] Trial 18 finished with value: 0.5250835179089318 and parameters: {'n_estimators': 932, 'learning_rate': 0.03445155840508387, 'num_leaves': 70, 'max_depth': 2, 'min_data_in_leaf': 119, 'lambda_l1': 2.0453205979278546, 'lambda_l2': 1.6386543810579546, 'feature_fraction': 0.7402097294877503, 'bagging_fraction': 0.6721893506552206, 'bagging_freq': 3, 'min_gain_to_split': 0.4536857479737987}. Best is trial 0 with value: 0.5919074642740854.


Fold 3 ACC: 0.5610, LogLoss: 0.9702, F1(macro): 0.5382, F1(weighted): 0.5638
Fold 3 ACC: 0.5500, LogLoss: 1.0202, F1(macro): 0.5205, F1(weighted): 0.5651
---------- result ----------
---------- 学習データ ----------
各foldのaccuracyの平均:0.59
各foldのlog_lossの平均:0.92
各foldのf1_macroの平均:0.57
各foldのf1_weightedの平均:0.60
各foldのf1_Hの平均:0.63
各foldのf1_Aの平均:0.44
各foldのf1_Dの平均:0.66

---------- 検証データ ----------
各foldのaccuracyの平均:0.58
各foldのlog_lossの平均:0.98
各foldのf1_macroの平均:0.53
各foldのf1_weightedの平均:0.60
各foldのf1_Hの平均:0.55
各foldのf1_Aの平均:0.34
各foldのf1_Dの平均:0.69
ターゲットラベルの順序: CategoricalIndex(['H', 'A', 'D'], categories=['A', 'D', 'H'], ordered=False, dtype='category')
fold: 0
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000444 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 888
[LightGBM] [Info] Number of data points in the train set: 1610, number of used features: 11
[LightGBM] [Info] Start training from score -1.098612

[I 2025-12-04 13:17:46,591] Trial 19 finished with value: 0.5288652948740667 and parameters: {'n_estimators': 668, 'learning_rate': 0.0685757736098831, 'num_leaves': 48, 'max_depth': 6, 'min_data_in_leaf': 142, 'lambda_l1': 2.873333455041771, 'lambda_l2': 0.933431359301252, 'feature_fraction': 0.6663180465370253, 'bagging_fraction': 0.811327669161455, 'bagging_freq': 4, 'min_gain_to_split': 1.4456507244222294}. Best is trial 0 with value: 0.5919074642740854.


Fold 3 ACC: 0.5698, LogLoss: 0.9380, F1(macro): 0.5478, F1(weighted): 0.5728
Fold 3 ACC: 0.5750, LogLoss: 1.0149, F1(macro): 0.5466, F1(weighted): 0.5930
---------- result ----------
---------- 学習データ ----------
各foldのaccuracyの平均:0.58
各foldのlog_lossの平均:0.93
各foldのf1_macroの平均:0.56
各foldのf1_weightedの平均:0.58
各foldのf1_Hの平均:0.62
各foldのf1_Aの平均:0.41
各foldのf1_Dの平均:0.64

---------- 検証データ ----------
各foldのaccuracyの平均:0.58
各foldのlog_lossの平均:0.99
各foldのf1_macroの平均:0.53
各foldのf1_weightedの平均:0.60
各foldのf1_Hの平均:0.49
各foldのf1_Aの平均:0.40
各foldのf1_Dの平均:0.69
ターゲットラベルの順序: CategoricalIndex(['H', 'A', 'D'], categories=['A', 'D', 'H'], ordered=False, dtype='category')
fold: 0
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000392 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 888
[LightGBM] [Info] Number of data points in the train set: 1610, number of used features: 11
[LightGBM] [Info] Start training from score -1.098612

[I 2025-12-04 13:17:49,439] Trial 20 finished with value: 0.5280392518764612 and parameters: {'n_estimators': 1109, 'learning_rate': 0.023767094513413693, 'num_leaves': 98, 'max_depth': 14, 'min_data_in_leaf': 49, 'lambda_l1': 1.5505311736330176, 'lambda_l2': 0.06161419270652324, 'feature_fraction': 0.8181502299417919, 'bagging_fraction': 0.881904705725288, 'bagging_freq': 2, 'min_gain_to_split': 0.21676658244386615}. Best is trial 0 with value: 0.5919074642740854.


Fold 3 ACC: 0.7780, LogLoss: 0.7095, F1(macro): 0.7743, F1(weighted): 0.7787
Fold 3 ACC: 0.5500, LogLoss: 0.9745, F1(macro): 0.5008, F1(weighted): 0.5660
---------- result ----------
---------- 学習データ ----------
各foldのaccuracyの平均:0.80
各foldのlog_lossの平均:0.68
各foldのf1_macroの平均:0.80
各foldのf1_weightedの平均:0.80
各foldのf1_Hの平均:0.79
各foldのf1_Aの平均:0.78
各foldのf1_Dの平均:0.81

---------- 検証データ ----------
各foldのaccuracyの平均:0.60
各foldのlog_lossの平均:0.94
各foldのf1_macroの平均:0.53
各foldのf1_weightedの平均:0.62
各foldのf1_Hの平均:0.46
各foldのf1_Aの平均:0.40
各foldのf1_Dの平均:0.73
ターゲットラベルの順序: CategoricalIndex(['H', 'A', 'D'], categories=['A', 'D', 'H'], ordered=False, dtype='category')
fold: 0
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000660 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 888
[LightGBM] [Info] Number of data points in the train set: 1610, number of used features: 11
[LightGBM] [Info] Start training from score -1.098612

[I 2025-12-04 13:17:50,712] Trial 21 finished with value: 0.528377567851252 and parameters: {'n_estimators': 1291, 'learning_rate': 0.035568097971165176, 'num_leaves': 78, 'max_depth': 12, 'min_data_in_leaf': 194, 'lambda_l1': 1.0751582787527405, 'lambda_l2': 2.4769796810415436, 'feature_fraction': 0.6899408954549553, 'bagging_fraction': 0.808541947899229, 'bagging_freq': 6, 'min_gain_to_split': 0.22515014147508922}. Best is trial 0 with value: 0.5919074642740854.


Fold 3 ACC: 0.5750, LogLoss: 1.0048, F1(macro): 0.5500, F1(weighted): 0.6000
---------- result ----------
---------- 学習データ ----------
各foldのaccuracyの平均:0.61
各foldのlog_lossの平均:0.89
各foldのf1_macroの平均:0.59
各foldのf1_weightedの平均:0.61
各foldのf1_Hの平均:0.64
各foldのf1_Aの平均:0.47
各foldのf1_Dの平均:0.67

---------- 検証データ ----------
各foldのaccuracyの平均:0.57
各foldのlog_lossの平均:0.98
各foldのf1_macroの平均:0.53
各foldのf1_weightedの平均:0.59
各foldのf1_Hの平均:0.54
各foldのf1_Aの平均:0.38
各foldのf1_Dの平均:0.67
ターゲットラベルの順序: CategoricalIndex(['H', 'A', 'D'], categories=['A', 'D', 'H'], ordered=False, dtype='category')
fold: 0
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000345 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 888
[LightGBM] [Info] Number of data points in the train set: 1610, number of used features: 11
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Star

[I 2025-12-04 13:17:51,873] Trial 22 finished with value: 0.49805353840441563 and parameters: {'n_estimators': 911, 'learning_rate': 0.0296252217475937, 'num_leaves': 111, 'max_depth': 10, 'min_data_in_leaf': 191, 'lambda_l1': 2.295062167387344, 'lambda_l2': 2.1072880604366144, 'feature_fraction': 0.7400589250508057, 'bagging_fraction': 0.8490619450977863, 'bagging_freq': 7, 'min_gain_to_split': 0.369605347310769}. Best is trial 0 with value: 0.5919074642740854.


Fold 3 ACC: 0.5912, LogLoss: 0.9204, F1(macro): 0.5714, F1(weighted): 0.5941
Fold 3 ACC: 0.5000, LogLoss: 1.0200, F1(macro): 0.4817, F1(weighted): 0.5206
---------- result ----------
---------- 学習データ ----------
各foldのaccuracyの平均:0.60
各foldのlog_lossの平均:0.90
各foldのf1_macroの平均:0.59
各foldのf1_weightedの平均:0.61
各foldのf1_Hの平均:0.63
各foldのf1_Aの平均:0.46
各foldのf1_Dの平均:0.66

---------- 検証データ ----------
各foldのaccuracyの平均:0.56
各foldのlog_lossの平均:0.99
各foldのf1_macroの平均:0.50
各foldのf1_weightedの平均:0.58
各foldのf1_Hの平均:0.45
各foldのf1_Aの平均:0.37
各foldのf1_Dの平均:0.67
ターゲットラベルの順序: CategoricalIndex(['H', 'A', 'D'], categories=['A', 'D', 'H'], ordered=False, dtype='category')
fold: 0
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000458 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 888
[LightGBM] [Info] Number of data points in the train set: 1610, number of used features: 11
[LightGBM] [Info] Start training from score -1.098612

[I 2025-12-04 13:17:52,715] Trial 23 finished with value: 0.5718772993912061 and parameters: {'n_estimators': 1314, 'learning_rate': 0.10518751849717825, 'num_leaves': 59, 'max_depth': 14, 'min_data_in_leaf': 174, 'lambda_l1': 0.7358847962425203, 'lambda_l2': 4.222580081051698, 'feature_fraction': 0.651249720172428, 'bagging_fraction': 0.9541702757001186, 'bagging_freq': 6, 'min_gain_to_split': 0.5796954120305311}. Best is trial 0 with value: 0.5919074642740854.


Fold 3 ACC: 0.6616, LogLoss: 0.8198, F1(macro): 0.6498, F1(weighted): 0.6642
Fold 3 ACC: 0.6000, LogLoss: 1.0029, F1(macro): 0.5386, F1(weighted): 0.6100
---------- result ----------
---------- 学習データ ----------
各foldのaccuracyの平均:0.66
各foldのlog_lossの平均:0.82
各foldのf1_macroの平均:0.65
各foldのf1_weightedの平均:0.66
各foldのf1_Hの平均:0.68
各foldのf1_Aの平均:0.56
各foldのf1_Dの平均:0.70

---------- 検証データ ----------
各foldのaccuracyの平均:0.66
各foldのlog_lossの平均:0.96
各foldのf1_macroの平均:0.57
各foldのf1_weightedの平均:0.67
各foldのf1_Hの平均:0.45
各foldのf1_Aの平均:0.48
各foldのf1_Dの平均:0.79
ターゲットラベルの順序: CategoricalIndex(['H', 'A', 'D'], categories=['A', 'D', 'H'], ordered=False, dtype='category')
fold: 0
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000259 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 888
[LightGBM] [Info] Number of data points in the train set: 1610, number of used features: 11
[LightGBM] [Info] Start training from score -1.098612

[I 2025-12-04 13:17:53,312] Trial 24 finished with value: 0.5363798176527675 and parameters: {'n_estimators': 1479, 'learning_rate': 0.1091411216370433, 'num_leaves': 34, 'max_depth': 15, 'min_data_in_leaf': 178, 'lambda_l1': 0.6480464278385459, 'lambda_l2': 4.083182527307688, 'feature_fraction': 0.6441394022579654, 'bagging_fraction': 0.9688584442311949, 'bagging_freq': 5, 'min_gain_to_split': 0.9509482111301981}. Best is trial 0 with value: 0.5919074642740854.


[50]	valid_0's multi_logloss: 1.03349
[100]	valid_0's multi_logloss: 1.02964
Early stopping, best iteration is:
[57]	valid_0's multi_logloss: 1.02812
Fold 3 ACC: 0.6113, LogLoss: 0.8811, F1(macro): 0.5936, F1(weighted): 0.6143
Fold 3 ACC: 0.5750, LogLoss: 1.0294, F1(macro): 0.5211, F1(weighted): 0.5905
---------- result ----------
---------- 学習データ ----------
各foldのaccuracyの平均:0.61
各foldのlog_lossの平均:0.88
各foldのf1_macroの平均:0.59
各foldのf1_weightedの平均:0.61
各foldのf1_Hの平均:0.64
各foldのf1_Aの平均:0.48
各foldのf1_Dの平均:0.67

---------- 検証データ ----------
各foldのaccuracyの平均:0.60
各foldのlog_lossの平均:0.98
各foldのf1_macroの平均:0.54
各foldのf1_weightedの平均:0.61
各foldのf1_Hの平均:0.45
各foldのf1_Aの平均:0.44
各foldのf1_Dの平均:0.72
ターゲットラベルの順序: CategoricalIndex(['H', 'A', 'D'], categories=['A', 'D', 'H'], ordered=False, dtype='category')
fold: 0
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000319 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins

[I 2025-12-04 13:17:54,615] Trial 25 finished with value: 0.4938770432614481 and parameters: {'n_estimators': 1303, 'learning_rate': 0.05763777002746301, 'num_leaves': 56, 'max_depth': 14, 'min_data_in_leaf': 148, 'lambda_l1': 1.3764548169686286, 'lambda_l2': 3.3039685998751867, 'feature_fraction': 0.7829504891818114, 'bagging_fraction': 0.9449367433962873, 'bagging_freq': 6, 'min_gain_to_split': 0.5710436452032346}. Best is trial 0 with value: 0.5919074642740854.


Fold 3 ACC: 0.5750, LogLoss: 0.9982, F1(macro): 0.5169, F1(weighted): 0.5965
---------- result ----------
---------- 学習データ ----------
各foldのaccuracyの平均:0.67
各foldのlog_lossの平均:0.80
各foldのf1_macroの平均:0.66
各foldのf1_weightedの平均:0.67
各foldのf1_Hの平均:0.69
各foldのf1_Aの平均:0.58
各foldのf1_Dの平均:0.71

---------- 検証データ ----------
各foldのaccuracyの平均:0.57
各foldのlog_lossの平均:0.98
各foldのf1_macroの平均:0.49
各foldのf1_weightedの平均:0.59
各foldのf1_Hの平均:0.44
各foldのf1_Aの平均:0.32
各foldのf1_Dの平均:0.71
ターゲットラベルの順序: CategoricalIndex(['H', 'A', 'D'], categories=['A', 'D', 'H'], ordered=False, dtype='category')
fold: 0
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000329 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 888
[LightGBM] [Info] Number of data points in the train set: 1610, number of used features: 11
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Star

[I 2025-12-04 13:17:55,522] Trial 26 finished with value: 0.5255284275408114 and parameters: {'n_estimators': 753, 'learning_rate': 0.11607324687100297, 'num_leaves': 16, 'max_depth': 7, 'min_data_in_leaf': 123, 'lambda_l1': 0.16664815180590953, 'lambda_l2': 4.4527077560913435, 'feature_fraction': 0.7537237670783964, 'bagging_fraction': 0.8797910405112178, 'bagging_freq': 7, 'min_gain_to_split': 1.066418336552727}. Best is trial 0 with value: 0.5919074642740854.


Fold 3 ACC: 0.6390, LogLoss: 0.8419, F1(macro): 0.6274, F1(weighted): 0.6421
Fold 3 ACC: 0.4500, LogLoss: 1.0259, F1(macro): 0.4156, F1(weighted): 0.4726
---------- result ----------
---------- 学習データ ----------
各foldのaccuracyの平均:0.65
各foldのlog_lossの平均:0.84
各foldのf1_macroの平均:0.64
各foldのf1_weightedの平均:0.65
各foldのf1_Hの平均:0.67
各foldのf1_Aの平均:0.55
各foldのf1_Dの平均:0.69

---------- 検証データ ----------
各foldのaccuracyの平均:0.58
各foldのlog_lossの平均:0.96
各foldのf1_macroの平均:0.53
各foldのf1_weightedの平均:0.60
各foldのf1_Hの平均:0.42
各foldのf1_Aの平均:0.46
各foldのf1_Dの平均:0.69
ターゲットラベルの順序: CategoricalIndex(['H', 'A', 'D'], categories=['A', 'D', 'H'], ordered=False, dtype='category')
fold: 0
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000566 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 888
[LightGBM] [Info] Number of data points in the train set: 1610, number of used features: 11
[LightGBM] [Info] Start training from score -1.098612

[I 2025-12-04 13:17:56,442] Trial 27 finished with value: 0.51315147712238 and parameters: {'n_estimators': 581, 'learning_rate': 0.08562817854051945, 'num_leaves': 33, 'max_depth': 11, 'min_data_in_leaf': 133, 'lambda_l1': 0.546759640730428, 'lambda_l2': 4.468594260036096, 'feature_fraction': 0.657398364737966, 'bagging_fraction': 0.946409470372991, 'bagging_freq': 5, 'min_gain_to_split': 1.3109741513438609}. Best is trial 0 with value: 0.5919074642740854.


Fold 3 ACC: 0.6101, LogLoss: 0.8853, F1(macro): 0.5947, F1(weighted): 0.6137
Fold 3 ACC: 0.5500, LogLoss: 1.0433, F1(macro): 0.5183, F1(weighted): 0.5662
---------- result ----------
---------- 学習データ ----------
各foldのaccuracyの平均:0.61
各foldのlog_lossの平均:0.89
各foldのf1_macroの平均:0.60
各foldのf1_weightedの平均:0.62
各foldのf1_Hの平均:0.65
各foldのf1_Aの平均:0.48
各foldのf1_Dの平均:0.67

---------- 検証データ ----------
各foldのaccuracyの平均:0.57
各foldのlog_lossの平均:1.00
各foldのf1_macroの平均:0.51
各foldのf1_weightedの平均:0.59
各foldのf1_Hの平均:0.43
各foldのf1_Aの平均:0.43
各foldのf1_Dの平均:0.69
ターゲットラベルの順序: CategoricalIndex(['H', 'A', 'D'], categories=['A', 'D', 'H'], ordered=False, dtype='category')
fold: 0
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000376 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 888
[LightGBM] [Info] Number of data points in the train set: 1610, number of used features: 11
[LightGBM] [Info] Start training from score -1.098612

[I 2025-12-04 13:17:57,457] Trial 28 finished with value: 0.5990357979046562 and parameters: {'n_estimators': 365, 'learning_rate': 0.18976879170580419, 'num_leaves': 66, 'max_depth': 15, 'min_data_in_leaf': 178, 'lambda_l1': 1.6393955038383417, 'lambda_l2': 3.4116231365393865, 'feature_fraction': 0.601522164670825, 'bagging_fraction': 0.7103139883195798, 'bagging_freq': 1, 'min_gain_to_split': 0.1172017501201085}. Best is trial 28 with value: 0.5990357979046562.


---------- result ----------
---------- 学習データ ----------
各foldのaccuracyの平均:0.62
各foldのlog_lossの平均:0.86
各foldのf1_macroの平均:0.61
各foldのf1_weightedの平均:0.63
各foldのf1_Hの平均:0.65
各foldのf1_Aの平均:0.49
各foldのf1_Dの平均:0.68

---------- 検証データ ----------
各foldのaccuracyの平均:0.65
各foldのlog_lossの平均:0.94
各foldのf1_macroの平均:0.60
各foldのf1_weightedの平均:0.66
各foldのf1_Hの平均:0.53
各foldのf1_Aの平均:0.53
各foldのf1_Dの平均:0.74
ターゲットラベルの順序: CategoricalIndex(['H', 'A', 'D'], categories=['A', 'D', 'H'], ordered=False, dtype='category')
fold: 0
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000404 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 888
[LightGBM] [Info] Number of data points in the train set: 1610, number of used features: 11
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
Training until validation scores don't improv

[I 2025-12-04 13:17:59,107] Trial 29 finished with value: 0.5153106076738688 and parameters: {'n_estimators': 424, 'learning_rate': 0.1888046952198824, 'num_leaves': 70, 'max_depth': 15, 'min_data_in_leaf': 176, 'lambda_l1': 0.02597632105325587, 'lambda_l2': 3.6227630967031716, 'feature_fraction': 0.6114026634926046, 'bagging_fraction': 0.774994356081383, 'bagging_freq': 1, 'min_gain_to_split': 0.08065258184122887}. Best is trial 28 with value: 0.5990357979046562.


Fold 3 ACC: 0.6000, LogLoss: 0.9957, F1(macro): 0.5809, F1(weighted): 0.6123
---------- result ----------
---------- 学習データ ----------
各foldのaccuracyの平均:0.70
各foldのlog_lossの平均:0.77
各foldのf1_macroの平均:0.69
各foldのf1_weightedの平均:0.70
各foldのf1_Hの平均:0.71
各foldのf1_Aの平均:0.62
各foldのf1_Dの平均:0.74

---------- 検証データ ----------
各foldのaccuracyの平均:0.58
各foldのlog_lossの平均:0.97
各foldのf1_macroの平均:0.52
各foldのf1_weightedの平均:0.60
各foldのf1_Hの平均:0.48
各foldのf1_Aの平均:0.37
各foldのf1_Dの平均:0.69


In [33]:
print("Best Score:", study.best_value)
print("Best Params:", study.best_params)

Best Score: 0.5990357979046562
Best Params: {'n_estimators': 365, 'learning_rate': 0.18976879170580419, 'num_leaves': 66, 'max_depth': 15, 'min_data_in_leaf': 178, 'lambda_l1': 1.6393955038383417, 'lambda_l2': 3.4116231365393865, 'feature_fraction': 0.601522164670825, 'bagging_fraction': 0.7103139883195798, 'bagging_freq': 1, 'min_gain_to_split': 0.1172017501201085}


In [34]:
best_params = study.best_params

df_valid_pred, df_imp, df_metrics_tr, df_metrics_val, report_tr, report_val, target_labels = train_lgb(train_df,
                                                                                                       x_all,
                                                                                                       y_all,
                                                                                                       best_params,
                                                                                                       list_nfold=[0,1,2],
                                                                                                       folds=folds,
                                                                                                       save_model=True
                                                                                                       )

ターゲットラベルの順序: CategoricalIndex(['H', 'A', 'D'], categories=['A', 'D', 'H'], ordered=False, dtype='category')
fold: 0
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000740 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 888
[LightGBM] [Info] Number of data points in the train set: 1610, number of used features: 11
[LightGBM] [Info] Start training from score -0.801964
[LightGBM] [Info] Start training from score -1.132086
[LightGBM] [Info] Start training from score -1.473193
Training until validation scores don't improve for 50 rounds
[50]	valid_0's multi_logloss: 0.918439
Early stopping, best iteration is:
[20]	valid_0's multi_logloss: 0.873352
最終モデルを C:\Users\image\Portfolio\Soccer analysis\soccer_pipeline\models\model_lgb_fold0.pickle に保存しました。
Fold 1 ACC: 0.5913, LogLoss: 0.8945, F1(macro): 0.4833, F1(weighted): 0.5413
Fold 1 ACC: 0.6857, LogLoss: 0.8734, F1(macro): 0.5471, F1(weighted): 0.6490
fold

#　モデルの評価

In [35]:
#各変数ごとに3回のfoldでの重要度の平均を算出
print("モデルの特徴量の重要度")
df_imp_mean = df_imp.groupby("col")["imp"].agg(["mean","std"]).sort_values("mean",ascending=False)
df_imp_mean

モデルの特徴量の重要度


Unnamed: 0_level_0,mean,std
col,Unnamed: 1_level_1,Unnamed: 2_level_1
home_team,62.0,16.462078
away_team,60.666667,9.609024
home_last_points,19.333333,3.05505
away_season_wins_ave_overall,19.0,4.582576
home_season_wins_ave_overall,15.0,5.291503
points_difference,13.333333,4.041452
away_last_gd,11.666667,6.429101
home_recent_10_goal_diff,10.333333,1.527525
away_last_points,10.0,2.645751
away_recent_10_goal_diff,10.0,4.0


In [36]:
print("学習データスコア表")
display(df_metrics_tr)
print()
print("検証データスコア表")
display(df_metrics_val)

print("各foldのaccuracyの平均:{:.2f}".format(df_metrics_tr['accuracy'].mean()))
print("各foldのlog_lossの平均:{:.2f}".format(df_metrics_tr['log_loss'].mean()))
print("各foldのf1_macroの平均:{:.2f}".format(df_metrics_tr['f1_macro'].mean()))
print("各foldのf1_weightedの平均:{:.2f}".format(df_metrics_tr['f1_weighted'].mean()))
print("各foldのf1_Hの平均:{:.2f}".format(df_metrics_tr['f1_A'].mean()))
print("各foldのf1_Aの平均:{:.2f}".format(df_metrics_tr['f1_D'].mean()))
print("各foldのf1_Dの平均:{:.2f}".format(df_metrics_tr['f1_H'].mean()))
print()
print("-"*10,"検証データ","-"*10)
print("各foldのaccuracyの平均:{:.2f}".format(df_metrics_val['accuracy'].mean()))
print("各foldのlog_lossの平均:{:.2f}".format(df_metrics_val['log_loss'].mean()))
print("各foldのf1_macroの平均:{:.2f}".format(df_metrics_val['f1_macro'].mean()))
print("各foldのf1_weightedの平均:{:.2f}".format(df_metrics_val['f1_weighted'].mean()))
print("各foldのf1_Hの平均:{:.2f}".format(df_metrics_val['f1_A'].mean()))
print("各foldのf1_Aの平均:{:.2f}".format(df_metrics_val['f1_D'].mean()))
print("各foldのf1_Dの平均:{:.2f}".format(df_metrics_val['f1_H'].mean()))

学習データスコア表


Unnamed: 0,nfold,accuracy,log_loss,f1_macro,f1_weighted,f1_A,f1_D,f1_H
0,0,0.591304,0.894477,0.483292,0.541266,0.606884,0.147806,0.695187
1,1,0.58474,0.900355,0.479593,0.535637,0.607273,0.146789,0.684717
2,2,0.603145,0.875475,0.508006,0.55971,0.624426,0.200445,0.699147



検証データスコア表


Unnamed: 0,nfold,accuracy,log_loss,f1_macro,f1_weighted,f1_A,f1_D,f1_H
0,0,0.685714,0.873352,0.54709,0.64898,0.555556,0.285714,0.8
1,1,0.7,0.851684,0.58153,0.695527,0.363636,0.571429,0.809524
2,2,0.65,0.890711,0.560212,0.641714,0.47619,0.444444,0.76


各foldのaccuracyの平均:0.59
各foldのlog_lossの平均:0.89
各foldのf1_macroの平均:0.49
各foldのf1_weightedの平均:0.55
各foldのf1_Hの平均:0.61
各foldのf1_Aの平均:0.17
各foldのf1_Dの平均:0.69

---------- 検証データ ----------
各foldのaccuracyの平均:0.68
各foldのlog_lossの平均:0.87
各foldのf1_macroの平均:0.56
各foldのf1_weightedの平均:0.66
各foldのf1_Hの平均:0.47
各foldのf1_Aの平均:0.43
各foldのf1_Dの平均:0.79


In [37]:
df_metrics_mean = df_metrics_val.mean()

df_metrics_mean = df_metrics_mean.to_frame().T
df_metrics_mean = df_metrics_mean.drop("nfold",axis = 1)

df_metrics_mean

Unnamed: 0,accuracy,log_loss,f1_macro,f1_weighted,f1_A,f1_D,f1_H
0,0.678571,0.871916,0.562944,0.662074,0.465127,0.433862,0.789841


In [38]:
import json  

# 定数
LOG_FILE_PATH = os.path.join(ROOT_DIR,"data","evaluation_results_log.csv")

# 複合的な一意なIDを格納するカラム名
COMPOSITE_ID_COLUMN = 'Composite_ID'

def create_feature_id(feature_list):
    """
    特徴量リストをソートし、カンマ区切りの一意の文字列IDに変換する。
    """
    sorted_features = sorted(feature_list)
    return ",".join(sorted_features)

def create_composite_id(feature_list: list, params: dict) -> str:
    """
    特徴量リストIDとハイパーパラメータを組み合わせて一意の複合IDを作成する。
    """
    # 1. 特徴量IDを作成
    feature_id = create_feature_id(feature_list)
    
    # 2. パラメータをソートし、JSON文字列に変換
    sorted_params = dict(sorted(params.items()))
    params_str = json.dumps(sorted_params, sort_keys=True)
    
    # 3. 複合IDを作成
    # 区切り文字に '|' を使用して結合
    composite_id = f"{feature_id}|{params_str}"
    return composite_id


def update_evaluation_log(feature_list: list, metrics_df: pd.DataFrame, params: dict):
    """
    モデルの評価結果とパラメータをログファイルに追加する。
    特徴量とパラメータの組み合わせが既に存在する場合は、スキップする。
    
    Args:
        feature_list (list): モデルに使用した特徴量のリスト。
        metrics_df (pd.DataFrame): 1行7列の評価指標データフレーム。
        params (dict): モデルに使用したハイパーパラメータの辞書。
    """
    # 1. 複合IDの作成
    composite_id = create_composite_id(feature_list, params)



    # 2. メタデータ（複合ID、特徴量、タイムスタンプ）データフレームの作成
    metadata_df = pd.DataFrame({
        COMPOSITE_ID_COLUMN: [composite_id], # 重複チェックに使用するID
        'Features': [str(feature_list)],     # 特徴量リスト (視認性のため)
        'Params_JSON': [json.dumps(params)], # パラメータ全体をJSON文字列として保存
        'Timestamp': [datetime.now().strftime('%Y-%m-%d %H:%M:%S')] 
    })
    
    # 3. すべてのデータを結合 (メタデータ + パラメータ詳細 + 評価指標)
    new_result_df = pd.concat([metadata_df, metrics_df], axis=1)
    
    # 4. ログファイルが存在するかチェック
    if not os.path.exists(LOG_FILE_PATH):
        print("新しいログファイルを作成します。")
        new_result_df.to_csv(LOG_FILE_PATH, index=False)
        return

    # 5. ログファイルが存在する場合: 読み込みと重複チェック
    existing_log_df = pd.read_csv(LOG_FILE_PATH)
    
    # 複合IDが既存のログに含まれているかを確認
    is_duplicate = composite_id in existing_log_df[COMPOSITE_ID_COLUMN].values
    
    if is_duplicate:
        # 重複がある場合はスキップ
        print(f"✅ スキップ: 特徴量とパラメータの組み合わせ '{composite_id}' は既に存在します。")
    else:
        # 重複がない場合は追記
        new_result_df.to_csv(LOG_FILE_PATH, mode='a', header=False, index=False)
        print(f"➡️ 追加: 新しい評価結果をログに追加しました。 ({composite_id})")

In [39]:
update_evaluation_log(features,df_metrics_mean,best_params)

新しいログファイルを作成します。
