In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import lightgbm as lgb
from lightgbm import early_stopping, log_evaluation
from sklearn.metrics import accuracy_score, log_loss, f1_score, classification_report
from IPython.display import display

import pickle
import gc
import os
import datetime as dt
from datetime import datetime
import json

In [2]:
# 最大列数を None（制限なし）に
pd.set_option('display.max_columns', None)

# 最大行数も None に
pd.set_option('display.max_rows', None)

In [3]:
import os

# Notebook用にスクリプトの場所を手動で設定
BASE_DIR = r"C:\Users\image\Portfolio\Soccer analysis\soccer_pipeline\notebooks"

# プロジェクトのルート（soccer_pipeline）
ROOT_DIR = os.path.abspath(os.path.join(BASE_DIR, ".."))

# モデル保存ディレクトリへのパス
MODEL_DIR = os.path.join(ROOT_DIR, "models")

data_file = os.path.join(ROOT_DIR,"data","processed_data.csv")

# データ読み込み

In [4]:
#データの読み込み
df = pd.read_csv(data_file)

df.head()

Unnamed: 0,fixture_id,date,season,home_team,away_team,home_score,away_score,status,home_shots_on_goal,home_possession,home_passes,home_passes_accuracy,home_fouls,home_corners,home_yellow_cards,home_red_cards,away_shots_on_goal,away_possession,away_passes,away_passes_accuracy,away_fouls,away_corners,away_yellow_cards,away_red_cards,home_last_position,home_last_won,home_last_drawn,home_last_lost,home_last_gf,home_last_ga,home_last_gd,home_last_points,away_last_position,away_last_won,away_last_drawn,away_last_lost,away_last_gf,away_last_ga,away_last_gd,away_last_points
0,710556,2021-08-13 19:00:00+00:00,2021,Brentford,Arsenal,2.0,0.0,FT,3.0,35.0,309.0,201.0,12.0,2.0,0.0,0.0,4.0,65.0,568.0,488.0,8.0,5.0,0.0,0.0,17,10,9,19,33,55,-22,39,8,18,7,13,55,39,16,61
1,710557,2021-08-14 14:00:00+00:00,2021,Burnley,Brighton,1.0,2.0,FT,3.0,36.0,259.0,181.0,10.0,7.0,2.0,0.0,8.0,64.0,518.0,424.0,7.0,6.0,1.0,0.0,17,10,9,19,33,55,-22,39,16,9,14,15,40,46,-6,41
2,710558,2021-08-14 14:00:00+00:00,2021,Chelsea,Crystal Palace,3.0,0.0,FT,6.0,62.0,678.0,623.0,15.0,5.0,0.0,0.0,1.0,38.0,423.0,363.0,11.0,2.0,0.0,0.0,4,19,10,9,58,36,22,67,14,12,8,18,41,66,-25,44
3,710559,2021-08-14 14:00:00+00:00,2021,Everton,Southampton,3.0,1.0,FT,6.0,48.0,337.0,235.0,13.0,6.0,2.0,0.0,3.0,52.0,370.0,256.0,15.0,8.0,0.0,0.0,10,17,8,13,47,48,-1,59,15,12,7,19,47,68,-21,43
4,710560,2021-08-14 14:00:00+00:00,2021,Leicester,Wolves,1.0,0.0,FT,5.0,56.0,584.0,505.0,6.0,5.0,1.0,0.0,3.0,44.0,443.0,366.0,10.0,4.0,2.0,0.0,5,20,6,12,68,50,18,66,13,12,9,17,36,52,-16,45


In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1900 entries, 0 to 1899
Data columns (total 40 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   fixture_id            1900 non-null   int64  
 1   date                  1900 non-null   object 
 2   season                1900 non-null   int64  
 3   home_team             1900 non-null   object 
 4   away_team             1900 non-null   object 
 5   home_score            1639 non-null   float64
 6   away_score            1639 non-null   float64
 7   status                1900 non-null   object 
 8   home_shots_on_goal    1639 non-null   float64
 9   home_possession       1639 non-null   float64
 10  home_passes           1639 non-null   float64
 11  home_passes_accuracy  1639 non-null   float64
 12  home_fouls            1639 non-null   float64
 13  home_corners          1639 non-null   float64
 14  home_yellow_cards     1639 non-null   float64
 15  home_red_cards       

In [6]:
# データを日付とfixture_idでソート (時系列順に並べるため)
df = df.sort_values(by=['date', 'fixture_id']).reset_index(drop=True)

In [7]:
df["status"].unique()

array(['FT', 'NS'], dtype=object)

# 特徴量エンジニアリング

## ターゲットカラム作成

In [8]:
#targetカラム作成(試合の勝敗カラム)　H:home win, A:away win, D:draw
def target_create(row):
    if row["home_score"] > row["away_score"]:
        return  "H"    
    
    elif row["home_score"] < row["away_score"]:
        return  "A"  
    
    else:
        return  "D"  

df["target"] = df.apply(target_create,axis = 1)

In [9]:
# objectからcategoryに変換したいカラム
columns = ["home_team","away_team","status","target"]

# タイプ変換関数
def change_type(columns):
    for col in columns:
        df[col] = df[col].astype("category")
    return df

df = change_type(columns) 

In [10]:
df['date'] = pd.to_datetime(df['date'], errors='coerce')
# タイムゾーン情報を削除
df['date'] = df['date'].dt.tz_localize(None)

## 得失点差カラム作成

In [11]:
# ホーム勝利を示す一時的な列を作成
# 'target'が 'H' の場合に1、それ以外は0
df['is_home_win'] = (df['target'] == 'H').astype(int)
# アウェイ勝利を示す一時的な列を作成
# 'target'が 'Ａ' の場合に1、それ以外は0
df['is_away_win'] = (df['target'] == 'A').astype(int)


In [12]:
# 得失点差カラム作成
df["home_goal_difference"] = df["home_score"] - df["away_score"]
df["away_goal_difference"] = df["away_score"] - df["home_score"]

## 直近N試合の勝利数/スコア/得失点差のローリング計算関数作成

In [13]:
# --- 直近N試合の勝利数/スコア/得失点差のローリング計算関数 ---
def calculate_rolling_feature(df, group_col, target_col, window_size, new_col_name, agg_func='sum'):
    """
    グループごとにローリング集計を行い、1つシフトした結果を新しいカラムとして追加する。
    group_col: グループ化するカラム ('home_team' or 'away_team')
    target_col: 集計対象のカラム ('is_home_win', 'home_score', 'home_goal_difference' など)
    window_size: ローリングウィンドウサイズ (直近N試合なら N+1)
    new_col_name: 新しい特徴量のカラム名
    agg_func: 集計関数 ('sum' or 'mean')
    """
    # ローリング計算。window_sizeは現在の行を含むため、直近N試合を見る場合は N+1
    # shift(1)で現在の試合結果を除外し、fillna(0)で試合前のNaNを埋める
    if agg_func == 'sum':
        new_feature = df.groupby(group_col, observed=False)[target_col].transform(
            lambda x: x.rolling(window=window_size, min_periods=1).sum().shift(1).fillna(0)
        ).astype(int)
    elif agg_func == 'mean':
        # 勝率の場合、結果をパーセンテージにし、小数点第2位まで丸める
        new_feature = df.groupby(group_col, observed=False)[target_col].transform(
            lambda x: (x.rolling(window=window_size, min_periods=1).mean().shift(1) * 100).round(2).fillna(0)
        )
    else:
        raise ValueError("Unsupported agg_func")
        
    df[new_col_name] = new_feature
    return df

In [14]:
# 直近5試合 (window=6) の計算
df = calculate_rolling_feature(df, 'home_team', 'is_home_win', 6, 'home_team_recent_5_wins')
df = calculate_rolling_feature(df, 'away_team', 'is_away_win', 6, 'away_team_recent_5_wins')

df = calculate_rolling_feature(df, 'home_team', 'home_score', 6, 'home_recent_5_scores')
df = calculate_rolling_feature(df, 'away_team', 'away_score', 6, 'away_recent_5_scores')

df = calculate_rolling_feature(df, 'home_team', 'away_score', 6, 'home_recent_5_goal_against')
df = calculate_rolling_feature(df, 'away_team', 'home_score', 6, 'away_recent_5_goal_against')

df = calculate_rolling_feature(df, 'home_team', 'home_goal_difference', 6, 'home_recent_5_goal_diff')
df = calculate_rolling_feature(df, 'away_team', 'away_goal_difference', 6, 'away_recent_5_goal_diff')


# 直近10試合 (window=11) の計算
df = calculate_rolling_feature(df, 'home_team', 'home_score', 11, 'home_recent_10_scores')
df = calculate_rolling_feature(df, 'away_team', 'away_score', 11, 'away_recent_10_scores')

df = calculate_rolling_feature(df, 'home_team', 'away_score', 11, 'home_recent_10_goal_against')
df = calculate_rolling_feature(df, 'away_team', 'home_score', 11, 'away_recent_10_goal_against')

df = calculate_rolling_feature(df, 'home_team', 'home_goal_difference', 11, 'home_recent_10_goal_diff')
df = calculate_rolling_feature(df, 'away_team', 'away_goal_difference', 11, 'away_recent_10_goal_diff')


# 直近20試合 (window=21) の計算
df = calculate_rolling_feature(df, 'home_team', 'home_score', 21, 'home_recent_20_scores')
df = calculate_rolling_feature(df, 'away_team', 'away_score', 21, 'away_recent_20_scores')

df = calculate_rolling_feature(df, 'home_team', 'away_score', 21, 'home_recent_20_goal_against')
df = calculate_rolling_feature(df, 'away_team', 'home_score', 21, 'away_recent_20_goal_against')

df = calculate_rolling_feature(df, 'home_team', 'home_goal_difference', 21, 'home_recent_20_goal_diff')
df = calculate_rolling_feature(df, 'away_team', 'away_goal_difference', 21, 'away_recent_20_goal_diff')

## 勝ち点カラム作成

In [15]:
#------------------勝ち点カラム作成 (ホーム/アウェイ区別なしの全体成績)--------------------
# 
# 各試合に一意のIDを付与 (後のマージのために利用)
df['match_id'] = df.index 

# 2. チーム視点でのデータ作成 (スタッキング)
# -----------------------------------------------

# ① ホームチーム視点のデータフレームを作成
df_home = df[['match_id','season','date', 'home_team', 'target']].copy()
df_home.rename(columns={'home_team': 'team'}, inplace=True)
# 勝ち点計算
df_home['points'] = df_home['target'].map({"H":3,"D":1,"A":0})
# 勝利フラグ (勝率計算用)
df_home['is_win'] = (df_home['target'] == 'H').astype(int)

# # ② アウェイチーム視点のデータフレームを作成
df_away = df[['match_id','season', 'date', 'away_team', 'target']].copy()
df_away.rename(columns={'away_team': 'team'}, inplace=True)

# 勝ち点計算
df_away['points'] = df_away['target'].map({"H":0,"D":1,"A":3})

# 勝利フラグ (勝率計算用)
df_away['is_win'] = (df_away['target'] == 'A').astype(int)

# # ③ 2つの視点のデータを結合し、時系列順にソート
df_stacked = pd.concat([df_home, df_away],ignore_index=True)
df_stacked = df_stacked.sort_values(by=['date', 'match_id']).reset_index(drop=True)


# seasonとteamでグループ化し、各シーズン内での合計勝ち点 (試合前まで) を計算
# transform を利用して累積勝ち点を計算
# transform は、計算結果を元のdf_stackedと同じインデックス・行数で返すため、applyよりも高速
df_stacked['total_points'] = df_stacked.groupby(["season","team"], observed=False)["points"].transform(
    # 勝ち点の累積和を計算し、1つシフト (現在の試合結果を除く)、NaNを0で埋める
    lambda x: x.rolling(window = 38,min_periods = 1).sum().shift(1).fillna(0)
).astype(int) 


In [16]:
# ホームチームの総合成績フィーチャーを抽出 (match_id と team で紐づけ)
df_home_points = df_stacked[['match_id','team','total_points']].copy()
df_home_points.rename(columns={'team': 'home_team', 'total_points': 'home_total_points'}, inplace=True)

# マージ
df = pd.merge(
    df,
    df_home_points,
    on = ['match_id','home_team'],
    how = 'left'
)
# アウェイチームの総合成績フィーチャーを抽出
df_away_points = df_stacked[['match_id','team','total_points']].copy()
df_away_points.rename(columns={'team': 'away_team', 'total_points': 'away_total_points'}, inplace=True)

# マージ
df = pd.merge(
    df,
    df_away_points,
    on = ['match_id','away_team'],
    how = 'left'
)

df['home_total_points'] = df['home_total_points'].astype(int)
df['away_total_points'] = df['away_total_points'].astype(int)


# home teamとaway teamの勝ち点差カラムを作成
df["points_difference"] = df['home_total_points'] - df['away_total_points']

In [17]:
# --------------各チームの直近5試合での勝利数カラム作成(home、アウェイの区別なし)-----------------

# 総合成績のローリング計算 (home, away区別なし)

# チーム視点での直近5試合の勝率
df_stacked['recent_5_wins_overall'] = df_stacked.groupby(['season','team'], observed=False)['is_win'].transform(
    lambda x: x.rolling(window=6, min_periods=1).mean().shift(1).fillna(0)
).astype(int)

# チーム視点でのシーズン勝率
df_stacked['season_wins_ave_overall_temp'] = df_stacked.groupby(['season','team'], observed=False)['is_win'].transform(
    lambda x: (x.rolling(window=38, min_periods=1).mean().shift(1) * 100).round(2).fillna(0)
)


# -------------------home teamへのマージ------------------------

# ホームチームの総合成績フィーチャーを抽出
df_home_feature = df_stacked[['match_id', 'team', 'recent_5_wins_overall', 'season_wins_ave_overall_temp']].copy()
df_home_feature.rename(columns={
    'team': 'home_team',
    'recent_5_wins_overall': 'home_team_recent_5_wins_overall',
    'season_wins_ave_overall_temp': 'home_season_wins_ave_overall'
}, inplace=True)

# 元のデータフレームにマージ
df = pd.merge(
    df, 
    df_home_feature, 
    on=['match_id', 'home_team'], 
    how='left'
)


# ------------------away teamへのマージ--------------------

# アウェイチームの総合成績フィーチャーを抽出
df_away_feature = df_stacked[['match_id', 'team', 'recent_5_wins_overall', 'season_wins_ave_overall_temp']].copy()
df_away_feature.rename(columns={
    'team': 'away_team',
    'recent_5_wins_overall': 'away_team_recent_5_wins_overall',
    'season_wins_ave_overall_temp': 'away_season_wins_ave_overall'
}, inplace=True)

# 元のデータフレームにマージ
df = pd.merge(
    df, 
    df_away_feature, 
    on=['match_id', 'away_team'], 
    how='left'
)


## NSの試合データの置き換え

In [18]:
# 2025年シーズンのまだ行われていない試合(status="NS")に対し、直前の "FT" (Full Time) の試合結果でデータを補完する。
# NSのデータの中でも1か月後、2か月後の試合のデータに関しては、rollingfunctionを使って計算した、直近の試合の勝率や得点などの合計数や割合のカラムは、
# 直近の試合もおこなわれていないため、勝率がホームアウェイともに0などの数字に入ってしまい、適切な予測ができなくなってしまう。
# そのため、最新試合の時点でのデータで、そのシーズンの残りの全試合のデータを置き換えていく

fill_features_home = ['home_team_recent_5_wins',
                      'home_recent_5_scores', 
                      'home_recent_5_goal_diff', 
                      'home_recent_5_goal_against', 
                      'home_recent_10_scores', 
                      'home_recent_10_goal_diff', 
                      'home_recent_10_goal_against', 
                      'home_recent_20_scores', 
                      'home_recent_20_goal_diff', 
                      'home_recent_20_goal_against']


fill_features_away = ['away_team_recent_5_wins',
                      'away_recent_5_scores',
                      'away_recent_5_goal_diff',
                      'away_recent_5_goal_against',
                      'away_recent_10_scores',
                      'away_recent_10_goal_diff',
                      'away_recent_10_goal_against',
                      'away_recent_20_scores',
                      'away_recent_20_goal_diff',
                      'away_recent_20_goal_against']

fill_features_home_overall = ['home_total_points','home_team_recent_5_wins_overall','home_season_wins_ave_overall']

fill_features_away_overall = ['away_total_points','away_team_recent_5_wins_overall','away_season_wins_ave_overall']

# 2025年シーズンのチームリストを取得
teams = df[df["season"] == 2025]["home_team"].unique()

# ホーム限定データ (homeでの試合のみでカウントしているデータ) の補充
for team in teams:
    # 置き換え元（FTのそのチームの最終ホーム試合）の fill_features を取得
    source_df = df[(df["status"] == "FT") & (df["home_team"] == team)].sort_values("date")
    if not source_df.empty:
        source_vals = source_df.iloc[-1][fill_features_home]
    
        # 置き換え先（NSのそのチームのホーム試合）に代入
        df.loc[(df["status"] == "NS") & (df["home_team"] == team), fill_features_home] = source_vals.values
    
# アウェイ限定データ (awayでの試合のみでカウントしているデータ) の補充
for team in teams:
    # 置き換え元（FTのそのチームの最終アウェイ試合）の fill_features を取得
    source_df = df[(df["status"] == "FT") & (df["away_team"] == team)].sort_values("date")
    if not source_df.empty:
        source_vals = source_df.iloc[-1][fill_features_away]
    
        # 置き換え先（NSのそのチームのアウェイ試合）に代入
        df.loc[(df["status"] == "NS") & (df["away_team"] == team), fill_features_away] = source_vals.values

for team in teams:
    # 1. 最新試合を日付順で取得
    source_df = df[(df["status"] == "FT") & ((df["home_team"] == team) | (df["away_team"] == team))].sort_values("date")
    if source_df.empty:
        continue
    
    last_row = source_df.iloc[-1]
    
    # 2. 対象チームが最新試合でホームだったかアウェイだったかを判別
    team_was_home = last_row["home_team"] == team
    
    # 3. 最新試合の行から、対象チーム自身の統計値のみを1セット取得
    if team_was_home:
        # チームがホームの場合、統計値は home_overall カラムにある
        latest_team_vals = last_row[fill_features_home_overall].values
    else:
        # チームがアウェイの場合、統計値は away_overall カラムにある
        latest_team_vals = last_row[fill_features_away_overall].values
        
    # 4. NS の行ごとに index を取得
    ns_home_idx = df[(df["status"] == "NS") & (df["home_team"] == team)].index
    ns_away_idx = df[(df["status"] == "NS") & (df["away_team"] == team)].index
    
    # 5. ホームチームとして NS の行に、対象チームの統計値で代入
    if len(ns_home_idx) > 0:
        # 未来のホーム試合の「ホーム側」カラムに、対象チームの統計値を埋める
        df.loc[ns_home_idx, fill_features_home_overall] = [latest_team_vals] * len(ns_home_idx)
    
    # 6. アウェイチームとして NS の行に、対象チームの統計値で代入
    if len(ns_away_idx) > 0:
        # 未来のアウェイ試合の「アウェイ側」カラムに、対象チームの統計値を埋める
        df.loc[ns_away_idx, fill_features_away_overall] = [latest_team_vals] * len(ns_away_idx)


In [19]:
# 置き換えがうまくいっているかを確認
teams_2025 = df[df["season"] == 2025]["home_team"].unique()

for team in teams_2025:
    # 検証対象チーム
    test_team = team 
    
    # 1. チームAの最新の完了した試合 (FT) の統計値を取得
    latest_ft_row = df[(df["status"] == "FT") & ((df["home_team"] == test_team) | (df["away_team"] == test_team))].sort_values("date").iloc[-1]
    
    # 2. チームA自身の最新のoverall統計値を取得
    team_was_home_ft = latest_ft_row["home_team"] == test_team
    if team_was_home_ft:
        latest_overall_vals = latest_ft_row[fill_features_home_overall].values[0]
    else:
        latest_overall_vals = latest_ft_row[fill_features_away_overall].values[0]
    
    # 3. チームAの最新のhome/away限定統計値を取得
    latest_home_only_vals = df[(df["status"] == "FT") & (df["home_team"] == test_team)].sort_values("date").iloc[-1][fill_features_home].values[0]
    latest_away_only_vals = df[(df["status"] == "FT") & (df["away_team"] == test_team)].sort_values("date").iloc[-1][fill_features_away].values[0]
    
    # 4. チームAがホームのNS試合の最初の行を取得
    ns_home_row = df[(df["status"] == "NS") & (df["home_team"] == test_team)].iloc[0]
    
    # 5. 値の比較と検証
    print(f"--- {test_team} ホームNS試合の検証 ---")
    
    # (A) ホーム限定データ: 最終ホームFT試合と一致するか
    home_only_match = (ns_home_row[fill_features_home].values[0] == latest_home_only_vals).all()
    print(f"ホーム限定データ一致: {home_only_match}") 
    
    # (B) Overallデータ（ホーム側カラム）: 最終FT試合のOverallと一致するか
    overall_match = (ns_home_row[fill_features_home_overall].values[0] == latest_overall_vals).all()
    print(f"Overallデータ一致: {overall_match}") 
    
    # 6. チームAがアウェイのNS試合の最初の行を取得
    ns_away_row = df[(df["status"] == "NS") & (df["away_team"] == test_team)].iloc[0]
    
    print(f"\n--- {test_team} アウェイNS試合の検証 ---")
    
    # (C) アウェイ限定データ: 最終アウェイFT試合と一致するか
    away_only_match = (ns_away_row[fill_features_away].values[0] == latest_away_only_vals).all()
    print(f"アウェイ限定データ一致: {away_only_match}") 
    
    # (D) Overallデータ（アウェイ側カラム）: 最終FT試合のOverallと一致するか
    overall_match_away = (ns_away_row[fill_features_away_overall].values[0] == latest_overall_vals).all()
    print(f"Overallデータ一致: {overall_match_away}")

--- Liverpool ホームNS試合の検証 ---
ホーム限定データ一致: True
Overallデータ一致: True

--- Liverpool アウェイNS試合の検証 ---
アウェイ限定データ一致: True
Overallデータ一致: True
--- Aston Villa ホームNS試合の検証 ---
ホーム限定データ一致: True
Overallデータ一致: True

--- Aston Villa アウェイNS試合の検証 ---
アウェイ限定データ一致: True
Overallデータ一致: True
--- Brighton ホームNS試合の検証 ---
ホーム限定データ一致: True
Overallデータ一致: True

--- Brighton アウェイNS試合の検証 ---
アウェイ限定データ一致: True
Overallデータ一致: True
--- Sunderland ホームNS試合の検証 ---
ホーム限定データ一致: True
Overallデータ一致: True

--- Sunderland アウェイNS試合の検証 ---
アウェイ限定データ一致: True
Overallデータ一致: True
--- Tottenham ホームNS試合の検証 ---
ホーム限定データ一致: True
Overallデータ一致: True

--- Tottenham アウェイNS試合の検証 ---
アウェイ限定データ一致: True
Overallデータ一致: True
--- Wolves ホームNS試合の検証 ---
ホーム限定データ一致: True
Overallデータ一致: True

--- Wolves アウェイNS試合の検証 ---
アウェイ限定データ一致: True
Overallデータ一致: True
--- Nottingham Forest ホームNS試合の検証 ---
ホーム限定データ一致: True
Overallデータ一致: True

--- Nottingham Forest アウェイNS試合の検証 ---
アウェイ限定データ一致: True
Overallデータ一致: True
--- Chelsea ホームNS試合の検証 ---
ホーム限定データ一致: True
Overallデータ一

In [20]:
for team in teams_2025:
    print("team",team)
    display(df.loc[(df["status"] == "FT") & (df["home_team"] == team), fill_features_home].tail(1))
    display(df.loc[(df["status"] == "NS") & (df["home_team"] == team), fill_features_home].head(2))
    print()
    display(df.loc[(df["status"] == "FT") & (df["away_team"] == team), fill_features_away].tail(1))
    display(df.loc[(df["status"] == "NS") & (df["away_team"] == team), fill_features_away].head(2))
    print()
    
    source_df = df[(df["status"] == "FT") & ((df["home_team"] == team) | (df["away_team"] == team))].sort_values("date")
    if source_df.empty:
        continue
    
    last_row = source_df.iloc[-1]

    # 2. 対象チームが最新試合でホームだったかアウェイだったかを判別
    team_was_home = last_row["home_team"] == team
    
    # 3. 最新試合の行から、対象チーム自身の統計値のみを1セット取得
    if team_was_home:
        # チームがホームの場合、統計値は home_overall カラムにある
        display(last_row[fill_features_home_overall])
        display(df.loc[(df["status"] == "NS") & (df["home_team"] == team), fill_features_home_overall].head(2))
        display(df.loc[(df["status"] == "NS") & (df["away_team"] == team), fill_features_away_overall].head(2))
    else:
        display(last_row[fill_features_away_overall])
        display(df.loc[(df["status"] == "NS") & (df["home_team"] == team), fill_features_home_overall].head(2))
        display(df.loc[(df["status"] == "NS") & (df["away_team"] == team), fill_features_away_overall].head(2))

team Liverpool


Unnamed: 0,home_team_recent_5_wins,home_recent_5_scores,home_recent_5_goal_diff,home_recent_5_goal_against,home_recent_10_scores,home_recent_10_goal_diff,home_recent_10_goal_against,home_recent_20_scores,home_recent_20_goal_diff,home_recent_20_goal_against
1634,4,11,5,6,24,13,11,47,27,20


Unnamed: 0,home_team_recent_5_wins,home_recent_5_scores,home_recent_5_goal_diff,home_recent_5_goal_against,home_recent_10_scores,home_recent_10_goal_diff,home_recent_10_goal_against,home_recent_20_scores,home_recent_20_goal_diff,home_recent_20_goal_against
1658,4,11,5,6,24,13,11,47,27,20
1671,4,11,5,6,24,13,11,47,27,20





Unnamed: 0,away_team_recent_5_wins,away_recent_5_scores,away_recent_5_goal_diff,away_recent_5_goal_against,away_recent_10_scores,away_recent_10_goal_diff,away_recent_10_goal_against,away_recent_20_scores,away_recent_20_goal_diff,away_recent_20_goal_against
1629,2,10,-2,12,18,-2,20,45,12,33


Unnamed: 0,away_team_recent_5_wins,away_recent_5_scores,away_recent_5_goal_diff,away_recent_5_goal_against,away_recent_10_scores,away_recent_10_goal_diff,away_recent_10_goal_against,away_recent_20_scores,away_recent_20_goal_diff,away_recent_20_goal_against
1648,2,10,-2,12,18,-2,20,45,12,33
1666,2,10,-2,12,18,-2,20,45,12,33





home_total_points                     18
home_team_recent_5_wins_overall        0
home_season_wins_ave_overall       54.55
Name: 1634, dtype: object

Unnamed: 0,home_total_points,home_team_recent_5_wins_overall,home_season_wins_ave_overall
1658,18,0,54.55
1671,18,0,54.55


Unnamed: 0,away_total_points,away_team_recent_5_wins_overall,away_season_wins_ave_overall
1648,18,0,54.55
1666,18,0,54.55


team Aston Villa


Unnamed: 0,home_team_recent_5_wins,home_recent_5_scores,home_recent_5_goal_diff,home_recent_5_goal_against,home_recent_10_scores,home_recent_10_goal_diff,home_recent_10_goal_against,home_recent_20_scores,home_recent_20_goal_diff,home_recent_20_goal_against
1625,4,8,3,5,19,9,10,34,14,20


Unnamed: 0,home_team_recent_5_wins,home_recent_5_scores,home_recent_5_goal_diff,home_recent_5_goal_against,home_recent_10_scores,home_recent_10_goal_diff,home_recent_10_goal_against,home_recent_20_scores,home_recent_20_goal_diff,home_recent_20_goal_against
1646,4,8,3,5,19,9,10,34,14,20
1660,4,8,3,5,19,9,10,34,14,20





Unnamed: 0,away_team_recent_5_wins,away_recent_5_scores,away_recent_5_goal_diff,away_recent_5_goal_against,away_recent_10_scores,away_recent_10_goal_diff,away_recent_10_goal_against,away_recent_20_scores,away_recent_20_goal_diff,away_recent_20_goal_against
1637,1,3,-4,7,12,3,9,21,-11,32


Unnamed: 0,away_team_recent_5_wins,away_recent_5_scores,away_recent_5_goal_diff,away_recent_5_goal_against,away_recent_10_scores,away_recent_10_goal_diff,away_recent_10_goal_against,away_recent_20_scores,away_recent_20_goal_diff,away_recent_20_goal_against
1654,1,3,-4,7,12,3,9,21,-11,32
1677,1,3,-4,7,12,3,9,21,-11,32





away_total_points                     18
away_team_recent_5_wins_overall        0
away_season_wins_ave_overall       45.45
Name: 1637, dtype: object

Unnamed: 0,home_total_points,home_team_recent_5_wins_overall,home_season_wins_ave_overall
1646,18,0,45.45
1660,18,0,45.45


Unnamed: 0,away_total_points,away_team_recent_5_wins_overall,away_season_wins_ave_overall
1654,18,0,45.45
1677,18,0,45.45


team Brighton


Unnamed: 0,home_team_recent_5_wins,home_recent_5_scores,home_recent_5_goal_diff,home_recent_5_goal_against,home_recent_10_scores,home_recent_10_goal_diff,home_recent_10_goal_against,home_recent_20_scores,home_recent_20_goal_diff,home_recent_20_goal_against
1632,4,13,6,7,21,5,16,36,8,28


Unnamed: 0,home_team_recent_5_wins,home_recent_5_scores,home_recent_5_goal_diff,home_recent_5_goal_against,home_recent_10_scores,home_recent_10_goal_diff,home_recent_10_goal_against,home_recent_20_scores,home_recent_20_goal_diff,home_recent_20_goal_against
1654,4,13,6,7,21,5,16,36,8,28
1667,4,13,6,7,21,5,16,36,8,28





Unnamed: 0,away_team_recent_5_wins,away_recent_5_scores,away_recent_5_goal_diff,away_recent_5_goal_against,away_recent_10_scores,away_recent_10_goal_diff,away_recent_10_goal_against,away_recent_20_scores,away_recent_20_goal_diff,away_recent_20_goal_against
1627,2,11,0,11,22,3,19,37,-1,38


Unnamed: 0,away_team_recent_5_wins,away_recent_5_scores,away_recent_5_goal_diff,away_recent_5_goal_against,away_recent_10_scores,away_recent_10_goal_diff,away_recent_10_goal_against,away_recent_20_scores,away_recent_20_goal_diff,away_recent_20_goal_against
1647,2,11,0,11,22,3,19,37,-1,38
1671,2,11,0,11,22,3,19,37,-1,38





home_total_points                     16
home_team_recent_5_wins_overall        0
home_season_wins_ave_overall       36.36
Name: 1632, dtype: object

Unnamed: 0,home_total_points,home_team_recent_5_wins_overall,home_season_wins_ave_overall
1654,16,0,36.36
1667,16,0,36.36


Unnamed: 0,away_total_points,away_team_recent_5_wins_overall,away_season_wins_ave_overall
1647,16,0,36.36
1671,16,0,36.36


team Sunderland


Unnamed: 0,home_team_recent_5_wins,home_recent_5_scores,home_recent_5_goal_diff,home_recent_5_goal_against,home_recent_10_scores,home_recent_10_goal_diff,home_recent_10_goal_against,home_recent_20_scores,home_recent_20_goal_diff,home_recent_20_goal_against
1623,3,9,6,3,9,6,3,9,6,3


Unnamed: 0,home_team_recent_5_wins,home_recent_5_scores,home_recent_5_goal_diff,home_recent_5_goal_against,home_recent_10_scores,home_recent_10_goal_diff,home_recent_10_goal_against,home_recent_20_scores,home_recent_20_goal_diff,home_recent_20_goal_against
1642,3,9,6,3,9,6,3,9,6,3
1676,3,9,6,3,9,6,3,9,6,3





Unnamed: 0,away_team_recent_5_wins,away_recent_5_scores,away_recent_5_goal_diff,away_recent_5_goal_against,away_recent_10_scores,away_recent_10_goal_diff,away_recent_10_goal_against,away_recent_20_scores,away_recent_20_goal_diff,away_recent_20_goal_against
1633,2,3,-2,5,3,-2,5,3,-2,5


Unnamed: 0,away_team_recent_5_wins,away_recent_5_scores,away_recent_5_goal_diff,away_recent_5_goal_against,away_recent_10_scores,away_recent_10_goal_diff,away_recent_10_goal_against,away_recent_20_scores,away_recent_20_goal_diff,away_recent_20_goal_against
1658,2,3,-2,5,3,-2,5,3,-2,5
1663,2,3,-2,5,3,-2,5,3,-2,5





away_total_points                     19
away_team_recent_5_wins_overall        0
away_season_wins_ave_overall       45.45
Name: 1633, dtype: object

Unnamed: 0,home_total_points,home_team_recent_5_wins_overall,home_season_wins_ave_overall
1642,19,0,45.45
1676,19,0,45.45


Unnamed: 0,away_total_points,away_team_recent_5_wins_overall,away_season_wins_ave_overall
1658,19,0,45.45
1663,19,0,45.45


team Tottenham


Unnamed: 0,home_team_recent_5_wins,home_recent_5_scores,home_recent_5_goal_diff,home_recent_5_goal_against,home_recent_10_scores,home_recent_10_goal_diff,home_recent_10_goal_against,home_recent_20_scores,home_recent_20_goal_diff,home_recent_20_goal_against
1620,1,6,-3,9,12,-5,17,33,-5,38


Unnamed: 0,home_team_recent_5_wins,home_recent_5_scores,home_recent_5_goal_diff,home_recent_5_goal_against,home_recent_10_scores,home_recent_10_goal_diff,home_recent_10_goal_against,home_recent_20_scores,home_recent_20_goal_diff,home_recent_20_goal_against
1644,1,6,-3,9,12,-5,17,33,-5,38
1665,1,6,-3,9,12,-5,17,33,-5,38





Unnamed: 0,away_team_recent_5_wins,away_recent_5_scores,away_recent_5_goal_diff,away_recent_5_goal_against,away_recent_10_scores,away_recent_10_goal_diff,away_recent_10_goal_against,away_recent_20_scores,away_recent_20_goal_diff,away_recent_20_goal_against
1638,4,12,7,5,16,-2,18,36,6,30


Unnamed: 0,away_team_recent_5_wins,away_recent_5_scores,away_recent_5_goal_diff,away_recent_5_goal_against,away_recent_10_scores,away_recent_10_goal_diff,away_recent_10_goal_against,away_recent_20_scores,away_recent_20_goal_diff,away_recent_20_goal_against
1652,4,12,7,5,16,-2,18,36,6,30
1675,4,12,7,5,16,-2,18,36,6,30





away_total_points                     18
away_team_recent_5_wins_overall        0
away_season_wins_ave_overall       45.45
Name: 1638, dtype: object

Unnamed: 0,home_total_points,home_team_recent_5_wins_overall,home_season_wins_ave_overall
1644,18,0,45.45
1665,18,0,45.45


Unnamed: 0,away_total_points,away_team_recent_5_wins_overall,away_season_wins_ave_overall
1652,18,0,45.45
1675,18,0,45.45


team Wolves


Unnamed: 0,home_team_recent_5_wins,home_recent_5_scores,home_recent_5_goal_diff,home_recent_5_goal_against,home_recent_10_scores,home_recent_10_goal_diff,home_recent_10_goal_against,home_recent_20_scores,home_recent_20_goal_diff,home_recent_20_goal_against
1635,0,7,-8,15,16,-4,20,29,-7,36


Unnamed: 0,home_team_recent_5_wins,home_recent_5_scores,home_recent_5_goal_diff,home_recent_5_goal_against,home_recent_10_scores,home_recent_10_goal_diff,home_recent_10_goal_against,home_recent_20_scores,home_recent_20_goal_diff,home_recent_20_goal_against
1656,0,7,-8,15,16,-4,20,29,-7,36
1669,0,7,-8,15,16,-4,20,29,-7,36





Unnamed: 0,away_team_recent_5_wins,away_recent_5_scores,away_recent_5_goal_diff,away_recent_5_goal_against,away_recent_10_scores,away_recent_10_goal_diff,away_recent_10_goal_against,away_recent_20_scores,away_recent_20_goal_diff,away_recent_20_goal_against
1624,0,3,-9,12,9,-6,15,26,-13,39


Unnamed: 0,away_team_recent_5_wins,away_recent_5_scores,away_recent_5_goal_diff,away_recent_5_goal_against,away_recent_10_scores,away_recent_10_goal_diff,away_recent_10_goal_against,away_recent_20_scores,away_recent_20_goal_diff,away_recent_20_goal_against
1646,0,3,-9,12,9,-6,15,26,-13,39
1673,0,3,-9,12,9,-6,15,26,-13,39





home_total_points                    2
home_team_recent_5_wins_overall      0
home_season_wins_ave_overall       0.0
Name: 1635, dtype: object

Unnamed: 0,home_total_points,home_team_recent_5_wins_overall,home_season_wins_ave_overall
1656,2,0,0.0
1669,2,0,0.0


Unnamed: 0,away_total_points,away_team_recent_5_wins_overall,away_season_wins_ave_overall
1646,2,0,0.0
1673,2,0,0.0


team Nottingham Forest


Unnamed: 0,home_team_recent_5_wins,home_recent_5_scores,home_recent_5_goal_diff,home_recent_5_goal_against,home_recent_10_scores,home_recent_10_goal_diff,home_recent_10_goal_against,home_recent_20_scores,home_recent_20_goal_diff,home_recent_20_goal_against
1628,1,5,-6,11,9,-7,16,29,6,23


Unnamed: 0,home_team_recent_5_wins,home_recent_5_scores,home_recent_5_goal_diff,home_recent_5_goal_against,home_recent_10_scores,home_recent_10_goal_diff,home_recent_10_goal_against,home_recent_20_scores,home_recent_20_goal_diff,home_recent_20_goal_against
1647,1,5,-6,11,9,-7,16,29,6,23
1675,1,5,-6,11,9,-7,16,29,6,23





Unnamed: 0,away_team_recent_5_wins,away_recent_5_scores,away_recent_5_goal_diff,away_recent_5_goal_against,away_recent_10_scores,away_recent_10_goal_diff,away_recent_10_goal_against,away_recent_20_scores,away_recent_20_goal_diff,away_recent_20_goal_against
1634,1,4,-6,10,15,-5,20,30,-7,37


Unnamed: 0,away_team_recent_5_wins,away_recent_5_scores,away_recent_5_goal_diff,away_recent_5_goal_against,away_recent_10_scores,away_recent_10_goal_diff,away_recent_10_goal_against,away_recent_20_scores,away_recent_20_goal_diff,away_recent_20_goal_against
1656,1,4,-6,10,15,-5,20,30,-7,37
1662,1,4,-6,10,15,-5,20,30,-7,37





away_total_points                      9
away_team_recent_5_wins_overall        0
away_season_wins_ave_overall       18.18
Name: 1634, dtype: object

Unnamed: 0,home_total_points,home_team_recent_5_wins_overall,home_season_wins_ave_overall
1647,9,0,18.18
1675,9,0,18.18


Unnamed: 0,away_total_points,away_team_recent_5_wins_overall,away_season_wins_ave_overall
1656,9,0,18.18
1662,9,0,18.18


team Chelsea


Unnamed: 0,home_team_recent_5_wins,home_recent_5_scores,home_recent_5_goal_diff,home_recent_5_goal_against,home_recent_10_scores,home_recent_10_goal_diff,home_recent_10_goal_against,home_recent_20_scores,home_recent_20_goal_diff,home_recent_20_goal_against
1624,3,7,1,6,15,6,9,36,17,19


Unnamed: 0,home_team_recent_5_wins,home_recent_5_scores,home_recent_5_goal_diff,home_recent_5_goal_against,home_recent_10_scores,home_recent_10_goal_diff,home_recent_10_goal_against,home_recent_20_scores,home_recent_20_goal_diff,home_recent_20_goal_against
1649,3,7,1,6,15,6,9,36,17,19
1670,3,7,1,6,15,6,9,36,17,19





Unnamed: 0,away_team_recent_5_wins,away_recent_5_scores,away_recent_5_goal_diff,away_recent_5_goal_against,away_recent_10_scores,away_recent_10_goal_diff,away_recent_10_goal_against,away_recent_20_scores,away_recent_20_goal_diff,away_recent_20_goal_against
1630,4,13,8,5,16,5,11,31,3,28


Unnamed: 0,away_team_recent_5_wins,away_recent_5_scores,away_recent_5_goal_diff,away_recent_5_goal_against,away_recent_10_scores,away_recent_10_goal_diff,away_recent_10_goal_against,away_recent_20_scores,away_recent_20_goal_diff,away_recent_20_goal_against
1657,4,13,8,5,16,5,11,31,3,28
1661,4,13,8,5,16,5,11,31,3,28





away_total_points                     20
away_team_recent_5_wins_overall        0
away_season_wins_ave_overall       54.55
Name: 1630, dtype: object

Unnamed: 0,home_total_points,home_team_recent_5_wins_overall,home_season_wins_ave_overall
1649,20,0,54.55
1670,20,0,54.55


Unnamed: 0,away_total_points,away_team_recent_5_wins_overall,away_season_wins_ave_overall
1657,20,0,54.55
1661,20,0,54.55


team Manchester United


Unnamed: 0,home_team_recent_5_wins,home_recent_5_scores,home_recent_5_goal_diff,home_recent_5_goal_against,home_recent_10_scores,home_recent_10_goal_diff,home_recent_10_goal_against,home_recent_20_scores,home_recent_20_goal_diff,home_recent_20_goal_against
1603,4,9,3,6,13,1,12,29,0,29


Unnamed: 0,home_team_recent_5_wins,home_recent_5_scores,home_recent_5_goal_diff,home_recent_5_goal_against,home_recent_10_scores,home_recent_10_goal_diff,home_recent_10_goal_against,home_recent_20_scores,home_recent_20_goal_diff,home_recent_20_goal_against
1639,4,9,3,6,13,1,12,29,0,29
1659,4,9,3,6,13,1,12,29,0,29





Unnamed: 0,away_team_recent_5_wins,away_recent_5_scores,away_recent_5_goal_diff,away_recent_5_goal_against,away_recent_10_scores,away_recent_10_goal_diff,away_recent_10_goal_against,away_recent_20_scores,away_recent_20_goal_diff,away_recent_20_goal_against
1620,1,6,-5,11,14,-7,21,23,-11,34


Unnamed: 0,away_team_recent_5_wins,away_recent_5_scores,away_recent_5_goal_diff,away_recent_5_goal_against,away_recent_10_scores,away_recent_10_goal_diff,away_recent_10_goal_against,away_recent_20_scores,away_recent_20_goal_diff,away_recent_20_goal_against
1645,1,6,-5,11,14,-7,21,23,-11,34
1669,1,6,-5,11,14,-7,21,23,-11,34





away_total_points                    17
away_team_recent_5_wins_overall       0
away_season_wins_ave_overall       50.0
Name: 1620, dtype: object

Unnamed: 0,home_total_points,home_team_recent_5_wins_overall,home_season_wins_ave_overall
1639,17,0,50.0
1659,17,0,50.0


Unnamed: 0,away_total_points,away_team_recent_5_wins_overall,away_season_wins_ave_overall
1645,17,0,50.0
1669,17,0,50.0


team Leeds


Unnamed: 0,home_team_recent_5_wins,home_recent_5_scores,home_recent_5_goal_diff,home_recent_5_goal_against,home_recent_10_scores,home_recent_10_goal_diff,home_recent_10_goal_against,home_recent_20_scores,home_recent_20_goal_diff,home_recent_20_goal_against
1637,2,7,-2,9,14,-10,24,26,-14,40


Unnamed: 0,home_team_recent_5_wins,home_recent_5_scores,home_recent_5_goal_diff,home_recent_5_goal_against,home_recent_10_scores,home_recent_10_goal_diff,home_recent_10_goal_against,home_recent_20_scores,home_recent_20_goal_diff,home_recent_20_goal_against
1657,2,7,-2,9,14,-10,24,26,-14,40
1666,2,7,-2,9,14,-10,24,26,-14,40





Unnamed: 0,away_team_recent_5_wins,away_recent_5_scores,away_recent_5_goal_diff,away_recent_5_goal_against,away_recent_10_scores,away_recent_10_goal_diff,away_recent_10_goal_against,away_recent_20_scores,away_recent_20_goal_diff,away_recent_20_goal_against
1628,1,4,-11,15,12,-17,29,21,-24,45


Unnamed: 0,away_team_recent_5_wins,away_recent_5_scores,away_recent_5_goal_diff,away_recent_5_goal_against,away_recent_10_scores,away_recent_10_goal_diff,away_recent_10_goal_against,away_recent_20_scores,away_recent_20_goal_diff,away_recent_20_goal_against
1641,1,4,-11,15,12,-17,29,21,-24,45
1678,1,4,-11,15,12,-17,29,21,-24,45





home_total_points                     11
home_team_recent_5_wins_overall        0
home_season_wins_ave_overall       27.27
Name: 1637, dtype: object

Unnamed: 0,home_total_points,home_team_recent_5_wins_overall,home_season_wins_ave_overall
1657,11,0,27.27
1666,11,0,27.27


Unnamed: 0,away_total_points,away_team_recent_5_wins_overall,away_season_wins_ave_overall
1641,11,0,27.27
1678,11,0,27.27


team West Ham


Unnamed: 0,home_team_recent_5_wins,home_recent_5_scores,home_recent_5_goal_diff,home_recent_5_goal_against,home_recent_10_scores,home_recent_10_goal_diff,home_recent_10_goal_against,home_recent_20_scores,home_recent_20_goal_diff,home_recent_20_goal_against
1622,1,6,-9,15,12,-8,20,26,-13,39


Unnamed: 0,home_team_recent_5_wins,home_recent_5_scores,home_recent_5_goal_diff,home_recent_5_goal_against,home_recent_10_scores,home_recent_10_goal_diff,home_recent_10_goal_against,home_recent_20_scores,home_recent_20_goal_diff,home_recent_20_goal_against
1648,1,6,-9,15,12,-8,20,26,-13,39
1677,1,6,-9,15,12,-8,20,26,-13,39





Unnamed: 0,away_team_recent_5_wins,away_recent_5_scores,away_recent_5_goal_diff,away_recent_5_goal_against,away_recent_10_scores,away_recent_10_goal_diff,away_recent_10_goal_against,away_recent_20_scores,away_recent_20_goal_diff,away_recent_20_goal_against
1631,2,8,-1,9,14,-2,16,24,-10,34


Unnamed: 0,away_team_recent_5_wins,away_recent_5_scores,away_recent_5_goal_diff,away_recent_5_goal_against,away_recent_10_scores,away_recent_10_goal_diff,away_recent_10_goal_against,away_recent_20_scores,away_recent_20_goal_diff,away_recent_20_goal_against
1659,2,8,-1,9,14,-2,16,24,-10,34
1667,2,8,-1,9,14,-2,16,24,-10,34





away_total_points                     10
away_team_recent_5_wins_overall        0
away_season_wins_ave_overall       27.27
Name: 1631, dtype: object

Unnamed: 0,home_total_points,home_team_recent_5_wins_overall,home_season_wins_ave_overall
1648,10,0,27.27
1677,10,0,27.27


Unnamed: 0,away_total_points,away_team_recent_5_wins_overall,away_season_wins_ave_overall
1659,10,0,27.27
1667,10,0,27.27


team Manchester City


Unnamed: 0,home_team_recent_5_wins,home_recent_5_scores,home_recent_5_goal_diff,home_recent_5_goal_against,home_recent_10_scores,home_recent_10_goal_diff,home_recent_10_goal_against,home_recent_20_scores,home_recent_20_goal_diff,home_recent_20_goal_against
1629,5,16,11,5,28,18,10,48,25,23


Unnamed: 0,home_team_recent_5_wins,home_recent_5_scores,home_recent_5_goal_diff,home_recent_5_goal_against,home_recent_10_scores,home_recent_10_goal_diff,home_recent_10_goal_against,home_recent_20_scores,home_recent_20_goal_diff,home_recent_20_goal_against
1641,5,16,11,5,28,18,10,48,25,23
1663,5,16,11,5,28,18,10,48,25,23





Unnamed: 0,away_team_recent_5_wins,away_recent_5_scores,away_recent_5_goal_diff,away_recent_5_goal_against,away_recent_10_scores,away_recent_10_goal_diff,away_recent_10_goal_against,away_recent_20_scores,away_recent_20_goal_diff,away_recent_20_goal_against
1636,3,9,5,4,12,7,5,30,7,23


Unnamed: 0,away_team_recent_5_wins,away_recent_5_scores,away_recent_5_goal_diff,away_recent_5_goal_against,away_recent_10_scores,away_recent_10_goal_diff,away_recent_10_goal_against,away_recent_20_scores,away_recent_20_goal_diff,away_recent_20_goal_against
1651,3,9,5,4,12,7,5,30,7,23
1674,3,9,5,4,12,7,5,30,7,23





away_total_points                     22
away_team_recent_5_wins_overall        0
away_season_wins_ave_overall       63.64
Name: 1636, dtype: object

Unnamed: 0,home_total_points,home_team_recent_5_wins_overall,home_season_wins_ave_overall
1641,22,0,63.64
1663,22,0,63.64


Unnamed: 0,away_total_points,away_team_recent_5_wins_overall,away_season_wins_ave_overall
1651,22,0,63.64
1674,22,0,63.64


team Bournemouth


Unnamed: 0,home_team_recent_5_wins,home_recent_5_scores,home_recent_5_goal_diff,home_recent_5_goal_against,home_recent_10_scores,home_recent_10_goal_diff,home_recent_10_goal_against,home_recent_20_scores,home_recent_20_goal_diff,home_recent_20_goal_against
1631,5,10,8,2,14,6,8,27,12,15


Unnamed: 0,home_team_recent_5_wins,home_recent_5_scores,home_recent_5_goal_diff,home_recent_5_goal_against,home_recent_10_scores,home_recent_10_goal_diff,home_recent_10_goal_against,home_recent_20_scores,home_recent_20_goal_diff,home_recent_20_goal_against
1650,5,10,8,2,14,6,8,27,12,15
1661,5,10,8,2,14,6,8,27,12,15





Unnamed: 0,away_team_recent_5_wins,away_recent_5_scores,away_recent_5_goal_diff,away_recent_5_goal_against,away_recent_10_scores,away_recent_10_goal_diff,away_recent_10_goal_against,away_recent_20_scores,away_recent_20_goal_diff,away_recent_20_goal_against
1625,1,10,-5,15,17,-5,22,40,4,36


Unnamed: 0,away_team_recent_5_wins,away_recent_5_scores,away_recent_5_goal_diff,away_recent_5_goal_against,away_recent_10_scores,away_recent_10_goal_diff,away_recent_10_goal_against,away_recent_20_scores,away_recent_20_goal_diff,away_recent_20_goal_against
1642,1,10,-5,15,17,-5,22,40,4,36
1679,1,10,-5,15,17,-5,22,40,4,36





home_total_points                     18
home_team_recent_5_wins_overall        0
home_season_wins_ave_overall       45.45
Name: 1631, dtype: object

Unnamed: 0,home_total_points,home_team_recent_5_wins_overall,home_season_wins_ave_overall
1650,18,0,45.45
1661,18,0,45.45


Unnamed: 0,away_total_points,away_team_recent_5_wins_overall,away_season_wins_ave_overall
1642,18,0,45.45
1679,18,0,45.45


team Brentford


Unnamed: 0,home_team_recent_5_wins,home_recent_5_scores,home_recent_5_goal_diff,home_recent_5_goal_against,home_recent_10_scores,home_recent_10_goal_diff,home_recent_10_goal_against,home_recent_20_scores,home_recent_20_goal_diff,home_recent_20_goal_against
1626,3,11,2,9,20,4,16,43,5,38


Unnamed: 0,home_team_recent_5_wins,home_recent_5_scores,home_recent_5_goal_diff,home_recent_5_goal_against,home_recent_10_scores,home_recent_10_goal_diff,home_recent_10_goal_against,home_recent_20_scores,home_recent_20_goal_diff,home_recent_20_goal_against
1640,3,11,2,9,20,4,16,43,5,38
1678,3,11,2,9,20,4,16,43,5,38





Unnamed: 0,away_team_recent_5_wins,away_recent_5_scores,away_recent_5_goal_diff,away_recent_5_goal_against,away_recent_10_scores,away_recent_10_goal_diff,away_recent_10_goal_against,away_recent_20_scores,away_recent_20_goal_diff,away_recent_20_goal_against
1632,1,6,-5,11,13,-2,15,29,4,25


Unnamed: 0,away_team_recent_5_wins,away_recent_5_scores,away_recent_5_goal_diff,away_recent_5_goal_against,away_recent_10_scores,away_recent_10_goal_diff,away_recent_10_goal_against,away_recent_20_scores,away_recent_20_goal_diff,away_recent_20_goal_against
1653,1,6,-5,11,13,-2,15,29,4,25
1665,1,6,-5,11,13,-2,15,29,4,25





away_total_points                     16
away_team_recent_5_wins_overall        0
away_season_wins_ave_overall       45.45
Name: 1632, dtype: object

Unnamed: 0,home_total_points,home_team_recent_5_wins_overall,home_season_wins_ave_overall
1640,16,0,45.45
1678,16,0,45.45


Unnamed: 0,away_total_points,away_team_recent_5_wins_overall,away_season_wins_ave_overall
1653,16,0,45.45
1665,16,0,45.45


team Burnley


Unnamed: 0,home_team_recent_5_wins,home_recent_5_scores,home_recent_5_goal_diff,home_recent_5_goal_against,home_recent_10_scores,home_recent_10_goal_diff,home_recent_10_goal_against,home_recent_20_scores,home_recent_20_goal_diff,home_recent_20_goal_against
1630,2,6,0,6,11,-4,15,21,-15,36


Unnamed: 0,home_team_recent_5_wins,home_recent_5_scores,home_recent_5_goal_diff,home_recent_5_goal_against,home_recent_10_scores,home_recent_10_goal_diff,home_recent_10_goal_against,home_recent_20_scores,home_recent_20_goal_diff,home_recent_20_goal_against
1655,2,6,0,6,11,-4,15,21,-15,36
1672,2,6,0,6,11,-4,15,21,-15,36





Unnamed: 0,away_team_recent_5_wins,away_recent_5_scores,away_recent_5_goal_diff,away_recent_5_goal_against,away_recent_10_scores,away_recent_10_goal_diff,away_recent_10_goal_against,away_recent_20_scores,away_recent_20_goal_diff,away_recent_20_goal_against
1622,1,8,-9,17,17,-7,24,26,-20,46


Unnamed: 0,away_team_recent_5_wins,away_recent_5_scores,away_recent_5_goal_diff,away_recent_5_goal_against,away_recent_10_scores,away_recent_10_goal_diff,away_recent_10_goal_against,away_recent_20_scores,away_recent_20_goal_diff,away_recent_20_goal_against
1640,1,8,-9,17,17,-7,24,26,-20,46
1664,1,8,-9,17,17,-7,24,26,-20,46





home_total_points                     10
home_team_recent_5_wins_overall        0
home_season_wins_ave_overall       27.27
Name: 1630, dtype: object

Unnamed: 0,home_total_points,home_team_recent_5_wins_overall,home_season_wins_ave_overall
1655,10,0,27.27
1672,10,0,27.27


Unnamed: 0,away_total_points,away_team_recent_5_wins_overall,away_season_wins_ave_overall
1640,10,0,27.27
1664,10,0,27.27


team Arsenal


Unnamed: 0,home_team_recent_5_wins,home_recent_5_scores,home_recent_5_goal_diff,home_recent_5_goal_against,home_recent_10_scores,home_recent_10_goal_diff,home_recent_10_goal_against,home_recent_20_scores,home_recent_20_goal_diff,home_recent_20_goal_against
1638,5,13,12,1,20,13,7,40,25,15


Unnamed: 0,home_team_recent_5_wins,home_recent_5_scores,home_recent_5_goal_diff,home_recent_5_goal_against,home_recent_10_scores,home_recent_10_goal_diff,home_recent_10_goal_against,home_recent_20_scores,home_recent_20_goal_diff,home_recent_20_goal_against
1653,5,13,12,1,20,13,7,40,25,15
1673,5,13,12,1,20,13,7,40,25,15





Unnamed: 0,away_team_recent_5_wins,away_recent_5_scores,away_recent_5_goal_diff,away_recent_5_goal_against,away_recent_10_scores,away_recent_10_goal_diff,away_recent_10_goal_against,away_recent_20_scores,away_recent_20_goal_diff,away_recent_20_goal_against
1623,5,8,5,3,16,9,7,35,18,17


Unnamed: 0,away_team_recent_5_wins,away_recent_5_scores,away_recent_5_goal_diff,away_recent_5_goal_against,away_recent_10_scores,away_recent_10_goal_diff,away_recent_10_goal_against,away_recent_20_scores,away_recent_20_goal_diff,away_recent_20_goal_against
1649,5,8,5,3,16,9,7,35,18,17
1660,5,8,5,3,16,9,7,35,18,17





home_total_points                     26
home_team_recent_5_wins_overall        0
home_season_wins_ave_overall       72.73
Name: 1638, dtype: object

Unnamed: 0,home_total_points,home_team_recent_5_wins_overall,home_season_wins_ave_overall
1653,26,0,72.73
1673,26,0,72.73


Unnamed: 0,away_total_points,away_team_recent_5_wins_overall,away_season_wins_ave_overall
1649,26,0,72.73
1660,26,0,72.73


team Crystal Palace


Unnamed: 0,home_team_recent_5_wins,home_recent_5_scores,home_recent_5_goal_diff,home_recent_5_goal_against,home_recent_10_scores,home_recent_10_goal_diff,home_recent_10_goal_against,home_recent_20_scores,home_recent_20_goal_diff,home_recent_20_goal_against
1627,3,12,5,7,20,10,10,30,3,27


Unnamed: 0,home_team_recent_5_wins,home_recent_5_scores,home_recent_5_goal_diff,home_recent_5_goal_against,home_recent_10_scores,home_recent_10_goal_diff,home_recent_10_goal_against,home_recent_20_scores,home_recent_20_goal_diff,home_recent_20_goal_against
1645,3,12,5,7,20,10,10,30,3,27
1674,3,12,5,7,20,10,10,30,3,27





Unnamed: 0,away_team_recent_5_wins,away_recent_5_scores,away_recent_5_goal_diff,away_recent_5_goal_against,away_recent_10_scores,away_recent_10_goal_diff,away_recent_10_goal_against,away_recent_20_scores,away_recent_20_goal_diff,away_recent_20_goal_against
1635,2,7,2,5,14,-4,18,30,6,24


Unnamed: 0,away_team_recent_5_wins,away_recent_5_scores,away_recent_5_goal_diff,away_recent_5_goal_against,away_recent_10_scores,away_recent_10_goal_diff,away_recent_10_goal_against,away_recent_20_scores,away_recent_20_goal_diff,away_recent_20_goal_against
1655,2,7,2,5,14,-4,18,30,6,24
1668,2,7,2,5,14,-4,18,30,6,24





away_total_points                     17
away_team_recent_5_wins_overall        0
away_season_wins_ave_overall       36.36
Name: 1635, dtype: object

Unnamed: 0,home_total_points,home_team_recent_5_wins_overall,home_season_wins_ave_overall
1645,17,0,36.36
1674,17,0,36.36


Unnamed: 0,away_total_points,away_team_recent_5_wins_overall,away_season_wins_ave_overall
1655,17,0,36.36
1668,17,0,36.36


team Everton


Unnamed: 0,home_team_recent_5_wins,home_recent_5_scores,home_recent_5_goal_diff,home_recent_5_goal_against,home_recent_10_scores,home_recent_10_goal_diff,home_recent_10_goal_against,home_recent_20_scores,home_recent_20_goal_diff,home_recent_20_goal_against
1621,3,7,2,5,13,0,13,27,6,21


Unnamed: 0,home_team_recent_5_wins,home_recent_5_scores,home_recent_5_goal_diff,home_recent_5_goal_against,home_recent_10_scores,home_recent_10_goal_diff,home_recent_10_goal_against,home_recent_20_scores,home_recent_20_goal_diff,home_recent_20_goal_against
1643,3,7,2,5,13,0,13,27,6,21
1662,3,7,2,5,13,0,13,27,6,21





Unnamed: 0,away_team_recent_5_wins,away_recent_5_scores,away_recent_5_goal_diff,away_recent_5_goal_against,away_recent_10_scores,away_recent_10_goal_diff,away_recent_10_goal_against,away_recent_20_scores,away_recent_20_goal_diff,away_recent_20_goal_against
1619,3,8,0,8,11,-1,12,18,-3,21


Unnamed: 0,away_team_recent_5_wins,away_recent_5_scores,away_recent_5_goal_diff,away_recent_5_goal_against,away_recent_10_scores,away_recent_10_goal_diff,away_recent_10_goal_against,away_recent_20_scores,away_recent_20_goal_diff,away_recent_20_goal_against
1639,3,8,0,8,11,-1,12,18,-3,21
1650,3,8,0,8,11,-1,12,18,-3,21





home_total_points                    12
home_team_recent_5_wins_overall       0
home_season_wins_ave_overall       30.0
Name: 1621, dtype: object

Unnamed: 0,home_total_points,home_team_recent_5_wins_overall,home_season_wins_ave_overall
1643,12,0,30.0
1662,12,0,30.0


Unnamed: 0,away_total_points,away_team_recent_5_wins_overall,away_season_wins_ave_overall
1639,12,0,30.0
1650,12,0,30.0


team Fulham


Unnamed: 0,home_team_recent_5_wins,home_recent_5_scores,home_recent_5_goal_diff,home_recent_5_goal_against,home_recent_10_scores,home_recent_10_goal_diff,home_recent_10_goal_against,home_recent_20_scores,home_recent_20_goal_diff,home_recent_20_goal_against
1633,3,8,3,5,15,1,14,29,-1,30


Unnamed: 0,home_team_recent_5_wins,home_recent_5_scores,home_recent_5_goal_diff,home_recent_5_goal_against,home_recent_10_scores,home_recent_10_goal_diff,home_recent_10_goal_against,home_recent_20_scores,home_recent_20_goal_diff,home_recent_20_goal_against
1651,3,8,3,5,15,1,14,29,-1,30
1668,3,8,3,5,15,1,14,29,-1,30





Unnamed: 0,away_team_recent_5_wins,away_recent_5_scores,away_recent_5_goal_diff,away_recent_5_goal_against,away_recent_10_scores,away_recent_10_goal_diff,away_recent_10_goal_against,away_recent_20_scores,away_recent_20_goal_diff,away_recent_20_goal_against
1621,1,7,-6,13,11,-9,20,29,-4,33


Unnamed: 0,away_team_recent_5_wins,away_recent_5_scores,away_recent_5_goal_diff,away_recent_5_goal_against,away_recent_10_scores,away_recent_10_goal_diff,away_recent_10_goal_against,away_recent_20_scores,away_recent_20_goal_diff,away_recent_20_goal_against
1644,1,7,-6,13,11,-9,20,29,-4,33
1672,1,7,-6,13,11,-9,20,29,-4,33





home_total_points                     11
home_team_recent_5_wins_overall        0
home_season_wins_ave_overall       27.27
Name: 1633, dtype: object

Unnamed: 0,home_total_points,home_team_recent_5_wins_overall,home_season_wins_ave_overall
1651,11,0,27.27
1668,11,0,27.27


Unnamed: 0,away_total_points,away_team_recent_5_wins_overall,away_season_wins_ave_overall
1644,11,0,27.27
1672,11,0,27.27


team Newcastle


Unnamed: 0,home_team_recent_5_wins,home_recent_5_scores,home_recent_5_goal_diff,home_recent_5_goal_against,home_recent_10_scores,home_recent_10_goal_diff,home_recent_10_goal_against,home_recent_20_scores,home_recent_20_goal_diff,home_recent_20_goal_against
1636,3,8,1,7,24,15,9,44,20,24


Unnamed: 0,home_team_recent_5_wins,home_recent_5_scores,home_recent_5_goal_diff,home_recent_5_goal_against,home_recent_10_scores,home_recent_10_goal_diff,home_recent_10_goal_against,home_recent_20_scores,home_recent_20_goal_diff,home_recent_20_goal_against
1652,3,8,1,7,24,15,9,44,20,24
1664,3,8,1,7,24,15,9,44,20,24





Unnamed: 0,away_team_recent_5_wins,away_recent_5_scores,away_recent_5_goal_diff,away_recent_5_goal_against,away_recent_10_scores,away_recent_10_goal_diff,away_recent_10_goal_against,away_recent_20_scores,away_recent_20_goal_diff,away_recent_20_goal_against
1626,0,2,-4,6,8,-5,13,26,-1,27


Unnamed: 0,away_team_recent_5_wins,away_recent_5_scores,away_recent_5_goal_diff,away_recent_5_goal_against,away_recent_10_scores,away_recent_10_goal_diff,away_recent_10_goal_against,away_recent_20_scores,away_recent_20_goal_diff,away_recent_20_goal_against
1643,0,2,-4,6,8,-5,13,26,-1,27
1676,0,2,-4,6,8,-5,13,26,-1,27





home_total_points                     12
home_team_recent_5_wins_overall        0
home_season_wins_ave_overall       27.27
Name: 1636, dtype: object

Unnamed: 0,home_total_points,home_team_recent_5_wins_overall,home_season_wins_ave_overall
1652,12,0,27.27
1664,12,0,27.27


Unnamed: 0,away_total_points,away_team_recent_5_wins_overall,away_season_wins_ave_overall
1643,12,0,27.27
1676,12,0,27.27


In [21]:
# 一時的なカラムを削除
df.drop(columns=['home_goal_difference', 'away_goal_difference', 'is_home_win', 'is_away_win', 'match_id'], inplace=True, errors='ignore')


#　データセット作成

In [22]:
# statusがFTの試合を除外して学習データを作成
train_df = df[df["status"] == "FT"].copy().reset_index(drop=True)
# 予測対象データ（statusがNSの試合）を抽出
predict_df = df[df["status"] == "NS"].copy().reset_index(drop=True)

# モデル学習に使用する特徴量の設定 & 説明変数と目的変数にデータを分割

In [23]:
#現在保有している特徴量名を確認
train_df.columns

Index(['fixture_id', 'date', 'season', 'home_team', 'away_team', 'home_score',
       'away_score', 'status', 'home_shots_on_goal', 'home_possession',
       'home_passes', 'home_passes_accuracy', 'home_fouls', 'home_corners',
       'home_yellow_cards', 'home_red_cards', 'away_shots_on_goal',
       'away_possession', 'away_passes', 'away_passes_accuracy', 'away_fouls',
       'away_corners', 'away_yellow_cards', 'away_red_cards',
       'home_last_position', 'home_last_won', 'home_last_drawn',
       'home_last_lost', 'home_last_gf', 'home_last_ga', 'home_last_gd',
       'home_last_points', 'away_last_position', 'away_last_won',
       'away_last_drawn', 'away_last_lost', 'away_last_gf', 'away_last_ga',
       'away_last_gd', 'away_last_points', 'target', 'home_team_recent_5_wins',
       'away_team_recent_5_wins', 'home_recent_5_scores',
       'away_recent_5_scores', 'home_recent_5_goal_against',
       'away_recent_5_goal_against', 'home_recent_5_goal_diff',
       'away_recent_5

In [24]:
# features = ["home_team","away_team",'home_season_wins_ave_overall',
#            'away_season_wins_ave_overall','home_last_points','away_last_points']
features = ["home_team","away_team",'home_season_wins_ave_overall',
            'away_season_wins_ave_overall','home_last_points','away_last_points',
            'home_last_gd','away_last_gd', 'home_recent_10_goal_diff','away_recent_10_goal_diff','points_difference']
target = "target"

In [25]:
x_all = train_df[features]
y_all = train_df[target]

print(x_all.shape)
print(y_all.shape)

(1639, 11)
(1639,)


# 学習データと検証データの期間設定


In [26]:
# --------------------------------------------------------------------------------
# 動的folds生成関数 
# --------------------------------------------------------------------------------
def generate_dynamic_folds(end_date_str, n_folds=3, val_period_days=30, gap_days=10):
    """
    現在の実行日を基準に、バックテスティング用のfoldsを動的に生成する
    """
    end_date = datetime.strptime(end_date_str, '%Y-%m-%d')
    folds = []
    
    for i in range(n_folds):
        val_end = end_date - dt.timedelta(days=i * gap_days)
        val_start = val_end - dt.timedelta(days=val_period_days)
        train_end = val_start - dt.timedelta(days=1)
        
        folds.append({
            "train_end": train_end.strftime('%Y-%m-%d'),
            "val_start": val_start.strftime('%Y-%m-%d'),
            "val_end": val_end.strftime('%Y-%m-%d'),
        })
        
    print("動的に生成されたfolds:")
    for fold in folds:
        print(f"  Train End: {fold['train_end']}, Val Period: {fold['val_start']} ~ {fold['val_end']}")
        
    return folds

In [27]:
# 4. 動的foldsの生成
# 最新の結果が出ている試合の日付を基準にする
latest_match_date = train_df['date'].max()
print(latest_match_date)
folds = generate_dynamic_folds(
    end_date_str=latest_match_date.strftime('%Y-%m-%d'), 
    n_folds=3, 
    val_period_days=30,
    gap_days=10
)


2025-11-23 16:30:00
動的に生成されたfolds:
  Train End: 2025-10-23, Val Period: 2025-10-24 ~ 2025-11-23
  Train End: 2025-10-13, Val Period: 2025-10-14 ~ 2025-11-13
  Train End: 2025-10-03, Val Period: 2025-10-04 ~ 2025-11-03


# モデル学習&評価関数

In [28]:
#ハイパーパラメータの設定
params = {
    "n_estimators":1000,
    "learning_rate":0.05,
    "num_leaves":32
}

In [29]:
#訓練データと検証データのindex作成
# foldsの中から、今回設定した範囲を取り出し、その範囲に入っているかどうかを判断し、その範囲内のデータのみを訓練データと検証データとしていく。
#これを3周する


def train_lgb(original_df,
              input_x,
              input_y,
              list_nfold=[0,1,2],
              params=params,
              folds=folds
              ):

    #推論値を格納する変数の作成
    df_valid_pred = pd.DataFrame()
    #評価値を入れる変数の作成
    metrics_tr = [] #学習データ用
    metrics_val = [] #検証データ用
    #重要度を格納するデータフレームの作成
    df_imp = pd.DataFrame()
    #レポートを入れる変数の作成
    report_tr_list = []
    report_val_list = []

    # 'H', 'D', 'A' のラベルを数値 (0, 1, 2) に変換
    input_y_factorized, target_labels = pd.factorize(input_y)

    print(f"ターゲットラベルの順序: {target_labels}")

    

    for i,nfold in enumerate(list_nfold):
        print("fold:",i)
        # 学習用
        train_idx = original_df["date"] <= folds[nfold]["train_end"]
        x_tr = input_x[train_idx]
        y_tr = input_y_factorized[train_idx]
        
        # 検証用
        val_idx = (original_df["date"] >= folds[nfold]["val_start"]) & (original_df["date"] <= folds[nfold]["val_end"])
        x_val = input_x[val_idx]
        y_val = input_y_factorized[val_idx]

        # LightGBM モデル
        model = lgb.LGBMClassifier(**params)
        
        
        # モデルの訓練
        model.fit(
            x_tr, y_tr,
            eval_set=[(x_val, y_val)],
            eval_metric="multi_logloss",
            callbacks=[
            early_stopping(stopping_rounds=50),  # 早期停止
            log_evaluation(50)                   # 50回ごとにログ表示
            ]
            )
        #保存するモデルのファイル名
        filename = "model_lgb_fold{}.pickle".format(nfold)

        # モデルの保存
        final_model_path = os.path.join(MODEL_DIR, filename)
        os.makedirs(MODEL_DIR, exist_ok=True)
        with open(final_model_path, 'wb') as f:
            pickle.dump(model, f)
            
        print(f"最終モデルを {final_model_path} に保存しました。")




#-------------------学習データの評価のコード-----------------------

        acc_tr,ll_tr,f1_macro_tr,f1_weighted_tr,report_tr,y_pred_tr,y_pred_proba_tr = evaluate_model(model,x_tr,y_tr,target_labels)    
        
        print(f"Fold {i+1} ACC: {acc_tr:.4f}, LogLoss: {ll_tr:.4f}, F1(macro): {f1_macro_tr:.4f}, F1(weighted): {f1_weighted_tr:.4f}")

        #検証スコアを各foldごとに格納 #あとでデータフレーム型に変更
        metrics_tr.append({
            "nfold": nfold,
            "accuracy": acc_tr,
            "log_loss": ll_tr,
            "f1_macro": f1_macro_tr,
            "f1_weighted": f1_weighted_tr,
            # per-class f1 を追加
            "f1_A": report_tr["A"]["f1-score"] if "A" in report_tr else None,
            "f1_D": report_tr["D"]["f1-score"] if "D" in report_tr else None,
            "f1_H": report_tr["H"]["f1-score"] if "H" in report_tr else None,
        })

        report_tr_list.append({
            "fold": nfold,
            "report": report_tr
            })


        
#----------------------以下、検証データの評価のコード---------------------------------

        acc_val,ll_val,f1_macro_val,f1_weighted_val,report_val,y_pred_val,y_pred_proba_val = evaluate_model(model,x_val,y_val,target_labels)    

        print(f"Fold {i+1} ACC: {acc_val:.4f}, LogLoss: {ll_val:.4f}, F1(macro): {f1_macro_val:.4f}, F1(weighted): {f1_weighted_val:.4f}")

        #検証スコアを各foldごとに格納 #あとでデータフレーム型に変更
        metrics_val.append({
            "nfold": nfold,
            "accuracy": acc_val,
            "log_loss": ll_val,
            "f1_macro": f1_macro_val,
            "f1_weighted": f1_weighted_val,
            # per-class f1 を追加
            "f1_A": report_val["A"]["f1-score"] if "A" in report_val else None,
            "f1_D": report_val["D"]["f1-score"] if "D" in report_val else None,
            "f1_H": report_val["H"]["f1-score"] if "H" in report_val else None,
        })

        report_val_list.append({
            "fold": nfold,
            "report": report_val
            })



#----------------検証データの予測値の格納と特徴量重要度の格納---------------------



        #各foldごとに実際の値と予測値を格納したDataFrameを作成
        
        df_pred = pd.DataFrame({
            "nfold": [nfold] * len(y_val),
            "true": y_val,
            "pred": y_pred_val,
            "prob_A": y_pred_proba_val[:, 0],
            "prob_D": y_pred_proba_val[:, 1],
            "prob_H": y_pred_proba_val[:, 2]
        })


        # df_pred = pd.DataFrame({"nfold":nfold,"true":y_val,"pred_proba":y_pred_proba,"pred":y_pred})
        df_valid_pred = pd.concat([df_valid_pred,df_pred],axis = 0,ignore_index=True)
        
        #各foldごとに各変数の重要度を格納したデータフレームを作成
        tmp_imp = pd.DataFrame({"nfold":nfold,"col":x_tr.columns,"imp":model.feature_importances_})
        df_imp = pd.concat([df_imp,tmp_imp])

#------------------------最終処理---------------------------
    
    print("-"*10,"result","-"*10)
    
    #各foldごとの評価値をデータフレームに格納
    df_metrics_tr = pd.DataFrame(metrics_tr,columns = ["nfold","accuracy","log_loss","f1_macro","f1_weighted","f1_A","f1_D","f1_H"]) #学習データ評価値
    df_metrics_val = pd.DataFrame(metrics_val,columns = ["nfold","accuracy","log_loss","f1_macro","f1_weighted","f1_A","f1_D","f1_H"]) #検証データ評価値

    print("-"*10,"学習データ","-"*10)
    print("各foldのaccuracyの平均:{:.2f}".format(df_metrics_tr['accuracy'].mean()))
    print("各foldのlog_lossの平均:{:.2f}".format(df_metrics_tr['log_loss'].mean()))
    print("各foldのf1_macroの平均:{:.2f}".format(df_metrics_tr['f1_macro'].mean()))
    print("各foldのf1_weightedの平均:{:.2f}".format(df_metrics_tr['f1_weighted'].mean()))
    print("各foldのf1_Hの平均:{:.2f}".format(df_metrics_tr['f1_A'].mean()))
    print("各foldのf1_Aの平均:{:.2f}".format(df_metrics_tr['f1_D'].mean()))
    print("各foldのf1_Dの平均:{:.2f}".format(df_metrics_tr['f1_H'].mean()))
    print()
    print("-"*10,"検証データ","-"*10)
    print("各foldのaccuracyの平均:{:.2f}".format(df_metrics_val['accuracy'].mean()))
    print("各foldのlog_lossの平均:{:.2f}".format(df_metrics_val['log_loss'].mean()))
    print("各foldのf1_macroの平均:{:.2f}".format(df_metrics_val['f1_macro'].mean()))
    print("各foldのf1_weightedの平均:{:.2f}".format(df_metrics_val['f1_weighted'].mean()))
    print("各foldのf1_Hの平均:{:.2f}".format(df_metrics_val['f1_A'].mean()))
    print("各foldのf1_Aの平均:{:.2f}".format(df_metrics_val['f1_D'].mean()))
    print("各foldのf1_Dの平均:{:.2f}".format(df_metrics_val['f1_H'].mean()))


    return df_valid_pred,df_imp,df_metrics_tr,df_metrics_val,report_tr_list,report_val_list,target_labels

In [30]:
def evaluate_model(model, X, y,target_labels):
    y_pred_proba = model.predict_proba(X)
    pred_idx = np.argmax(y_pred_proba, axis=1)
    # y_pred = model.classes_[pred_idx]
    y_pred = pred_idx

    # ロス計算 (ラベルが factorize された数値であることを前提)
    try:
        # yのユニークな値の順序をlabelsとして指定
        unique_y = np.unique(y)
        ll = log_loss(y, y_pred_proba, labels=unique_y) 
    except ValueError:
        ll = np.nan
    
    acc = accuracy_score(y, y_pred)
    # ll = log_loss(y, y_pred_proba, labels=model.classes_)
    f1_macro = f1_score(y, y_pred, average="macro")
    f1_weighted = f1_score(y, y_pred, average="weighted")
    report = classification_report(
        y, y_pred, 
        labels=[0,1,2],
        target_names=target_labels,
        output_dict=True
    )

    return acc, ll, f1_macro, f1_weighted, report, y_pred, y_pred_proba

In [31]:
df_valid_pred,df_imp,df_metrics_tr,df_metrics_val,report_tr,report_val,target_labels = train_lgb(train_df,
                                            x_all,
                                            y_all,
                                            list_nfold=[0,1,2],
                                            params=params,
                                            folds=folds
                                            )

ターゲットラベルの順序: CategoricalIndex(['H', 'A', 'D'], categories=['A', 'D', 'H'], ordered=False, dtype='category')
fold: 0
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000429 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 888
[LightGBM] [Info] Number of data points in the train set: 1600, number of used features: 11
[LightGBM] [Info] Start training from score -0.805476
[LightGBM] [Info] Start training from score -1.131652
[LightGBM] [Info] Start training from score -1.466962
Training until validation scores don't improve for 50 rounds
[50]	valid_0's multi_logloss: 0.985105
Early stopping, best iteration is:
[21]	valid_0's multi_logloss: 0.961494
最終モデルを C:\Users\image\Portfolio\Soccer analysis\soccer_pipeline\models\model_lgb_fold0.pickle に保存しました。
Fold 1 ACC: 0.7119, LogLoss: 0.7810, F1(macro): 0.6485, F1(weighted): 0.6839
Fold 1 ACC: 0.6216, LogLoss: 0.9615, F1(macro): 0.4220, F1(weighted): 0.5842
fold

#　モデルの評価

In [32]:
#各変数ごとに3回のfoldでの重要度の平均を算出
print("モデルの特徴量の重要度")
df_imp_mean = df_imp.groupby("col")["imp"].agg(["mean","std"]).sort_values("mean",ascending=False)
df_imp_mean

モデルの特徴量の重要度


Unnamed: 0_level_0,mean,std
col,Unnamed: 1_level_1,Unnamed: 2_level_1
away_season_wins_ave_overall,343.666667,218.532225
points_difference,301.666667,175.04666
home_season_wins_ave_overall,294.0,182.748461
home_recent_10_goal_diff,273.666667,167.255294
away_recent_10_goal_diff,242.333333,147.3273
home_team,203.0,134.714513
away_last_gd,192.666667,89.968513
away_last_points,185.666667,106.208914
home_last_gd,181.0,104.484449
home_last_points,165.666667,100.286257


In [33]:
print("学習データスコア表")
display(df_metrics_tr)
print()
print("検証データスコア表")
display(df_metrics_val)

print("各foldのaccuracyの平均:{:.2f}".format(df_metrics_tr['accuracy'].mean()))
print("各foldのlog_lossの平均:{:.2f}".format(df_metrics_tr['log_loss'].mean()))
print("各foldのf1_macroの平均:{:.2f}".format(df_metrics_tr['f1_macro'].mean()))
print("各foldのf1_weightedの平均:{:.2f}".format(df_metrics_tr['f1_weighted'].mean()))
print("各foldのf1_Hの平均:{:.2f}".format(df_metrics_tr['f1_A'].mean()))
print("各foldのf1_Aの平均:{:.2f}".format(df_metrics_tr['f1_D'].mean()))
print("各foldのf1_Dの平均:{:.2f}".format(df_metrics_tr['f1_H'].mean()))
print()
print("-"*10,"検証データ","-"*10)
print("各foldのaccuracyの平均:{:.2f}".format(df_metrics_val['accuracy'].mean()))
print("各foldのlog_lossの平均:{:.2f}".format(df_metrics_val['log_loss'].mean()))
print("各foldのf1_macroの平均:{:.2f}".format(df_metrics_val['f1_macro'].mean()))
print("各foldのf1_weightedの平均:{:.2f}".format(df_metrics_val['f1_weighted'].mean()))
print("各foldのf1_Hの平均:{:.2f}".format(df_metrics_val['f1_A'].mean()))
print("各foldのf1_Aの平均:{:.2f}".format(df_metrics_val['f1_D'].mean()))
print("各foldのf1_Dの平均:{:.2f}".format(df_metrics_val['f1_H'].mean()))

学習データスコア表


Unnamed: 0,nfold,accuracy,log_loss,f1_macro,f1_weighted,f1_A,f1_D,f1_H
0,0,0.711875,0.781032,0.648492,0.683902,0.724239,0.441176,0.780059
1,1,0.849686,0.596769,0.836085,0.846012,0.851562,0.781046,0.875648
2,2,0.667722,0.841477,0.57299,0.621391,0.679592,0.284382,0.754997



検証データスコア表


Unnamed: 0,nfold,accuracy,log_loss,f1_macro,f1_weighted,f1_A,f1_D,f1_H
0,0,0.621622,0.961494,0.421986,0.584244,0.5,0.0,0.765957
1,1,0.6,0.915721,0.519171,0.595872,0.5,0.363636,0.693878
2,2,0.631579,0.897974,0.425864,0.617409,0.538462,0.0,0.73913


各foldのaccuracyの平均:0.74
各foldのlog_lossの平均:0.74
各foldのf1_macroの平均:0.69
各foldのf1_weightedの平均:0.72
各foldのf1_Hの平均:0.75
各foldのf1_Aの平均:0.50
各foldのf1_Dの平均:0.80

---------- 検証データ ----------
各foldのaccuracyの平均:0.62
各foldのlog_lossの平均:0.93
各foldのf1_macroの平均:0.46
各foldのf1_weightedの平均:0.60
各foldのf1_Hの平均:0.51
各foldのf1_Aの平均:0.12
各foldのf1_Dの平均:0.73


In [34]:
df_metrics_mean = df_metrics_val.mean()

df_metrics_mean = df_metrics_mean.to_frame().T
df_metrics_mean = df_metrics_mean.drop("nfold",axis = 1)

df_metrics_mean

Unnamed: 0,accuracy,log_loss,f1_macro,f1_weighted,f1_A,f1_D,f1_H
0,0.617734,0.925063,0.455674,0.599175,0.512821,0.121212,0.732988


In [35]:
import json  

# 定数
LOG_FILE_PATH = os.path.join(ROOT_DIR,"data","evaluation_results_log.csv")

# 複合的な一意なIDを格納するカラム名
COMPOSITE_ID_COLUMN = 'Composite_ID'

def create_feature_id(feature_list):
    """
    特徴量リストをソートし、カンマ区切りの一意の文字列IDに変換する。
    """
    sorted_features = sorted(feature_list)
    return ",".join(sorted_features)

def create_composite_id(feature_list: list, params: dict) -> str:
    """
    特徴量リストIDとハイパーパラメータを組み合わせて一意の複合IDを作成する。
    """
    # 1. 特徴量IDを作成
    feature_id = create_feature_id(feature_list)
    
    # 2. パラメータをソートし、JSON文字列に変換
    sorted_params = dict(sorted(params.items()))
    params_str = json.dumps(sorted_params, sort_keys=True)
    
    # 3. 複合IDを作成
    # 区切り文字に '|' を使用して結合
    composite_id = f"{feature_id}|{params_str}"
    return composite_id


def update_evaluation_log(feature_list: list, metrics_df: pd.DataFrame, params: dict):
    """
    モデルの評価結果とパラメータをログファイルに追加する。
    特徴量とパラメータの組み合わせが既に存在する場合は、スキップする。
    
    Args:
        feature_list (list): モデルに使用した特徴量のリスト。
        metrics_df (pd.DataFrame): 1行7列の評価指標データフレーム。
        params (dict): モデルに使用したハイパーパラメータの辞書。
    """
    # 1. 複合IDの作成
    composite_id = create_composite_id(feature_list, params)

    # 2. パラメータDataFrameの作成 (ログに含めるため)
    # 辞書を1行のデータフレームに変換
    params_df = pd.DataFrame([params])

    # 3. メタデータ（複合ID、特徴量、タイムスタンプ）データフレームの作成
    metadata_df = pd.DataFrame({
        COMPOSITE_ID_COLUMN: [composite_id], # 重複チェックに使用するID
        'Features': [str(feature_list)],     # 特徴量リスト (視認性のため)
        'Params_JSON': [json.dumps(params)], # パラメータ全体をJSON文字列として保存
        'Timestamp': [datetime.now().strftime('%Y-%m-%d %H:%M:%S')] 
    })
    
    # 4. すべてのデータを結合 (メタデータ + パラメータ詳細 + 評価指標)
    new_result_df = pd.concat([metadata_df, params_df, metrics_df], axis=1)
    
    # 5. ログファイルが存在するかチェック
    if not os.path.exists(LOG_FILE_PATH):
        print("新しいログファイルを作成します。")
        new_result_df.to_csv(LOG_FILE_PATH, index=False)
        return

    # 6. ログファイルが存在する場合: 読み込みと重複チェック
    existing_log_df = pd.read_csv(LOG_FILE_PATH)
    
    # 複合IDが既存のログに含まれているかを確認
    is_duplicate = composite_id in existing_log_df[COMPOSITE_ID_COLUMN].values
    
    if is_duplicate:
        # 重複がある場合はスキップ
        print(f"✅ スキップ: 特徴量とパラメータの組み合わせ '{composite_id}' は既に存在します。")
    else:
        # 重複がない場合は追記
        new_result_df.to_csv(LOG_FILE_PATH, mode='a', header=False, index=False)
        print(f"➡️ 追加: 新しい評価結果をログに追加しました。 ({composite_id})")

In [36]:
update_evaluation_log(features,df_metrics_mean,params)

✅ スキップ: 特徴量とパラメータの組み合わせ 'away_last_gd,away_last_points,away_recent_10_goal_diff,away_season_wins_ave_overall,away_team,home_last_gd,home_last_points,home_recent_10_goal_diff,home_season_wins_ave_overall,home_team,points_difference|{"learning_rate": 0.05, "n_estimators": 1000, "num_leaves": 32}' は既に存在します。
