# 条件分析

In [None]:
# 必要なインポート
import pandas as pd
import numpy as np
import ipywidgets as widgets
from IPython.display import display
import os

OWNDIR = os.getcwd()
PARENTDIR = os.path.dirname(OWNDIR)

def categorize_distance(d):
    if d == 1000:
        return "1000"
    elif 1000 < d < 1200:
        return "1000〜1200"
    elif d == 1200:
        return "1200"
    elif 1200 < d < 1600:
        return "1200〜1600"
    elif d == 1600:
        return "1600"
    elif 1600 < d < 2000:
        return "1600〜2000"
    elif d == 2000:
        return "2000"
    elif 2000 < d < 2400:
        return "2000〜2400"
    elif d == 2400:
        return "2400"
    elif 2400 < d < 2800:
        return "2400〜2800"
    elif d == 2800:
        return "2800"
    elif 2800 < d < 3200:
        return "2800〜3200"
    elif d == 3200:
        return "3200"
    elif d > 3200:
        return "3200〜"
    else:
        return "不明"

# データ読み込みと前処理
df = pd.read_csv(PARENTDIR + "/merge_df.csv")
df.replace(-1, np.nan, inplace=True)
df = df[df["distance"] > 100].copy()
df["date"] = pd.to_datetime(df["date"].astype(str), errors="coerce")
df["season"] = pd.cut(df["date"].dt.month, bins=[0, 3, 6, 9, 12], labels=["1~3月", "4~6月", "7~9月", "10~12月"])
df["distance_category"] = df["distance"].apply(categorize_distance)
df["headcount_category"] = pd.cut(
    df["headcount"],
    bins=[4, 7, 10, 14, 18],
    labels=["5〜7頭", "8〜10頭", "11〜14頭", "15〜18頭"],
    right=True  # 右端を含める（デフォルト）
)

# 血統データ読み込み
pedigree_df = pd.read_csv(PARENTDIR + "/horse_pedigree_extracted.csv")

# 血統ID → 名前 の辞書作成
id_to_name = {
    "parent_ml_id": dict(zip(pedigree_df["parent_ml_id"], pedigree_df["parent_ml_name"])),
    "parent_ml_ml_id": dict(zip(pedigree_df["parent_ml_ml_id"], pedigree_df["parent_ml_ml_name"])),
    "parent_ml_fml_id": dict(zip(pedigree_df["parent_ml_fml_id"], pedigree_df["parent_ml_fml_name"])),
    "parent_fml_id": dict(zip(pedigree_df["parent_fml_id"], pedigree_df["parent_fml_name"])),
    "parent_fml_ml_id": dict(zip(pedigree_df["parent_fml_ml_id"], pedigree_df["parent_fml_ml_name"])),
    "parent_fml_fml_id": dict(zip(pedigree_df["parent_fml_fml_id"], pedigree_df["parent_fml_fml_name"]))
}

# ラベル変換用マッピング
race_course_map = {"01": "札幌", "02": "函館", "03": "福島", "04": "新潟", "05": "東京", "06": "中山", "07": "中京", "08": "京都", "09": "阪神", "10": "小倉"}
direction_map = {-1: "障害", 0: "直線", 1: "右", 2: "左"}
weather_map = {0: "晴", 1: "曇", 2: "小雨", 3: "雨", 4: "小雪", 5: "雪"}
ground_status_map = {0: "良", 1: "稍重", 2: "重", 3: "不良"}
obstacle_map = {0: "平地", 1: "障害"}
surface_map = {0: "ダート", 1: "芝"}

# UI用の変換
df_ui = df.copy()

df_ui["race_course_id"] = df_ui["race_course_id"].astype(int).astype(str).str.zfill(2).map(race_course_map)
df_ui["direction"] = df_ui["direction"].map(direction_map)
df_ui["weather"] = df_ui["weather"].map(weather_map)
df_ui["ground_status"] = df_ui["ground_status"].map(ground_status_map)
df_ui["is_obstacle"] = df_ui["is_obstacle"].map(obstacle_map)
df_ui["surface"] = df_ui["surface"].map(surface_map)

# UIウィジェット定義
dropdowns = {
    "season": widgets.Dropdown(options=["全条件"]+ sorted(df_ui["season"].dropna().unique()), description="季節"),
    "race_course_id": widgets.Dropdown(options=["全条件"]+ sorted(df_ui["race_course_id"].dropna().unique()), description="開催地"),
    "surface": widgets.Dropdown(options=["全条件"]+ sorted(df_ui["surface"].dropna().unique()), description="馬場"),
    "distance": widgets.Dropdown(options=["全条件"] + sorted(df["distance_category"].dropna().unique()),description="距離"),
    "direction": widgets.Dropdown(options=["全条件"]+ sorted(df_ui["direction"].dropna().unique()), description="周回方向"),
    "headcount": widgets.Dropdown(options=["全条件"]+ sorted(df["headcount_category"].dropna().unique()), description="頭数"),
    "weather": widgets.Dropdown(options=["全条件"]+ sorted(df_ui["weather"].dropna().unique()), description="天気"),
    "ground_status": widgets.Dropdown(options=["全条件"]+ sorted(df_ui["ground_status"].dropna().unique()), description="馬場状態"),
    "is_obstacle": widgets.Dropdown(options=["全条件"]+ sorted(df_ui["is_obstacle"].dropna().unique()), description="競走形態")
}

# UI表示
ui_box = widgets.VBox([dropdowns[k] for k in dropdowns])
output = widgets.Output()
display(ui_box, output)

# 分析処理定義
def analyze_top3_features(_):
    with output:
        output.clear_output()
        print("🔎 分析中です...")

        # 選択値取得とマッピング復元
        selected = {k: dropdowns[k].value for k in dropdowns}
        reverse_maps = {
            "race_course_id": {v: k for k, v in race_course_map.items()},
            "direction": {v: k for k, v in direction_map.items()},
            "weather": {v: k for k, v in weather_map.items()},
            "ground_status": {v: k for k, v in ground_status_map.items()},
            "is_obstacle": {v: k for k, v in obstacle_map.items()}
        }

        # 条件フィルタリング
        df_filtered = df.copy()
        df_filtered["season"] = pd.cut(df_filtered["date"].dt.month, bins=[0, 3, 6, 9, 12], labels=["1~3月", "4~6月", "7~9月", "10~12月"])
        # 季節
        if selected["season"] != "全条件":
            df_filtered = df_filtered[df_filtered["season"] == selected["season"]]

        # 開催地（race_course_id）
        if selected["race_course_id"] != "全条件":
            df_filtered = df_filtered[
                df_filtered["race_course_id"].astype(int).astype(str).str.zfill(2) ==
                reverse_maps["race_course_id"][selected["race_course_id"]]
            ]

        # 馬場（surface）
        if selected["surface"] != "全条件":
            df_filtered = df_filtered[
                df_filtered["surface"].map(surface_map) == selected["surface"]
            ]

        # 距離（distance）
        if selected["distance"] != "全条件":
            df_filtered = df_filtered[df_filtered["distance_category"] == selected["distance"]]

        # 周回方向（direction）
        if selected["direction"] != "全条件":
            df_filtered = df_filtered[
                df_filtered["direction"].map(direction_map) == selected["direction"]
            ]

        # 出走頭数（headcount）
        if selected["headcount"] != "全条件":
            df_filtered["headcount_category"] = pd.cut(
                df_filtered["headcount"],
                bins=[4, 7, 10, 14, 18],
                labels=["5〜7頭", "8〜10頭", "11〜14頭", "15〜18頭"],
                right=True
            )
            df_filtered = df_filtered[df_filtered["headcount_category"] == selected["headcount"]]

        # 天気（weather）
        if selected["weather"] != "全条件":
            df_filtered = df_filtered[
                df_filtered["weather"].map(weather_map) == selected["weather"]
            ]

        # 馬場状態（ground_status）
        if selected["ground_status"] != "全条件":
            df_filtered = df_filtered[
                df_filtered["ground_status"].map(ground_status_map) == selected["ground_status"]
            ]

        # 競走形態（is_obstacle）
        if selected["is_obstacle"] != "全条件":
            df_filtered = df_filtered[
                df_filtered["is_obstacle"].map(obstacle_map) == selected["is_obstacle"]
            ]

        df_top3 = df_filtered[df_filtered["rank"] <= 3].copy()
        print(f"\n✅ 該当条件に一致した3着以内のレース数: {len(df_top3)}件")
        if df_top3.empty:
            print("⚠️ 該当条件に一致する3着以内データが存在しません。")
            return

        df_top3["standardized_horse_number"] = pd.cut(df_top3["horse_number"] / df_top3["headcount"],
            bins=[0, 0.3, 0.7, 1.0], labels=["内枠", "中枠", "外枠"], right=False)
        df_top3["standardized_half_way_dif"] = pd.cut(df_top3["half_way_dif"] / df_top3["headcount"],
            bins=[0, 0.1, 0.3, 0.7, 1.0], labels=["逃げ", "先行", "差し", "追い込み"], right=False)

        target_columns = {
            #"frame_number": "枠番",
            "standardized_horse_number": "馬番（内外分類）",
            "rider_id": "騎手ID",
            "tamer_id": "調教師ID",
            "standardized_half_way_dif": "脚質",
            "parent_ml_id": "父ID",
            "parent_ml_ml_id": "父父ID",
            "parent_ml_fml_id": "父母ID",
            "parent_fml_id": "母ID",
            "parent_fml_ml_id": "母父ID",
            "parent_fml_fml_id": "母母ID"
        }
        

        print("\n📊 特徴量別出現ランキング（上位3件）:")
        for col, label in target_columns.items():
            vc = df_top3[col].value_counts(normalize=True).head(3)
            if not vc.empty:
                print(f"\n🟢 {label}:")
                for idx, p in vc.items():
                    if col in ["rider_id", "tamer_id"] and pd.notna(idx):
                        idx_str = str(int(idx)).zfill(5)
                    elif col in id_to_name and pd.notna(idx):
                        idx_str = id_to_name[col].get(idx, str(idx))
                    else:
                        idx_str = str(idx)
                    print(f"  - {idx_str}: {round(p * 100, 2)}%")
            else:
                print(f"\n🟡 {label}: データなし")

# 分析ボタン
button = widgets.Button(description="分析実行")
button.on_click(analyze_top3_features)
display(button)

VBox(children=(Dropdown(description='季節', options=('全条件', '10~12月', '1~3月', '4~6月', '7~9月'), value='全条件'), Dro…

Output()

Button(description='分析実行', style=ButtonStyle())

# 機械学習回収率分析

In [1]:
import pandas as pd
import numpy as np
import ipywidgets as widgets
from IPython.display import display
import os

OWNDIR = os.getcwd()
PARENTDIR = os.path.dirname(OWNDIR)

# 推論結果ファイル読み込み（例：過去に出力したものを集約したCSV）
pred_df = pd.read_csv(PARENTDIR + "/cleaned_test_data.csv")

# ラベル変換用辞書
race_course_map = {"01": "札幌", "02": "函館", "03": "福島", "04": "新潟", "05": "東京", "06": "中山", "07": "中京", "08": "京都", "09": "阪神", "10": "小倉"}
ground_status_map = {0: "良", 1: "稍重", 2: "重", 3: "不良"}
surface_map = {0: "ダート", 1: "芝"}
obstacle_map = {0: "平地", 1: "障害"}
weather_map = {0: "晴", 1: "曇", 2: "小雨", 3: "雨", 4: "小雪", 5: "雪"}

# 変換
pred_df["date"] = pd.to_datetime(pred_df["date"].astype(str), errors='coerce')
pred_df["season"] = pd.cut(pred_df["date"].dt.month, bins=[0, 3, 6, 9, 12], labels=["1~3月", "4~6月", "7~9月", "10~12月"])
pred_df["race_course_id"] = pred_df["race_course_id"].astype(str).str.zfill(2).map(race_course_map)
pred_df["surface"] = pred_df["surface"].map(surface_map)
pred_df["ground_status"] = pred_df["ground_status"].map(ground_status_map)
pred_df["is_obstacle"] = pred_df["is_obstacle"].map(obstacle_map)
pred_df["weather"] = pred_df["weather"].map(weather_map)

pred_df["distance_category"] = pd.cut(pred_df["distance"], 
    bins=[0, 1000, 1200, 1600, 2000, 2400, 2800, 3200, 10000], 
    labels=["~1000", "1001-1200", "1201-1600", "1601-2000", "2001-2400", "2401-2800", "2801-3200", "3201~"])

pred_df["headcount_category"] = pd.cut(pred_df["headcount"], bins=[4, 7, 10, 14, 18], 
                                       labels=["5〜7頭", "8〜10頭", "11〜14頭", "15〜18頭"])

# UI定義
dropdowns = {
    "season": widgets.Dropdown(options=["全条件"] + list(pred_df["season"].dropna().unique()), description="季節"),
    "race_course_id": widgets.Dropdown(options=["全条件"] + sorted(pred_df["race_course_id"].dropna().unique()), description="開催地"),
    "surface": widgets.Dropdown(options=["全条件"] + sorted(pred_df["surface"].dropna().unique()), description="馬場"),
    "distance_category": widgets.Dropdown(options=["全条件"] + list(pred_df["distance_category"].dropna().unique()), description="距離"),
    "headcount_category": widgets.Dropdown(options=["全条件"] + list(pred_df["headcount_category"].dropna().unique()), description="頭数"),
    "weather": widgets.Dropdown(options=["全条件"] + list(pred_df["weather"].dropna().unique()), description="天気"),
    "ground_status": widgets.Dropdown(options=["全条件"] + list(pred_df["ground_status"].dropna().unique()), description="馬場状態"),
    "is_obstacle": widgets.Dropdown(options=["全条件"] + list(pred_df["is_obstacle"].dropna().unique()), description="競走形態"),
}

ui_box = widgets.VBox(list(dropdowns.values()))
output = widgets.Output()
display(ui_box, output)


def analyze_callback(_):
    with output:
        output.clear_output()
        df = pred_df.copy()
        
        # 🔽 ここに fukusho_return を計算する処理を追加
        def get_fukusho(row):
            num = row['horse_number']
            if num == row['horse_number_first']:
                return row['hukusyo_first']
            elif num == row['horse_number_second']:
                return row['hukusyo_second']
            elif num == row['horse_number_third']:
                return row['hukusyo_third']
            else:
                return 0

        df["fukusho_return"] = df.apply(get_fukusho, axis=1)
        df["fukusho_return"] = pd.to_numeric(df["fukusho_return"], errors="coerce").fillna(0) * 10

        # 🔽 以下は既存処理（フィルター処理）
        for k, widget in dropdowns.items():
            val = widget.value
            if val != "全条件":
                df = df[df[k] == val]

        if df.empty:
            print("⚠ 条件に一致するデータがありません")
            return

        df_selected = df[df["prob"] >= 0.6]
        buy_count = len(df_selected)
        hit_count = df_selected[df_selected["rank"] <= 3].shape[0]
        payout = df_selected["fukusho_return"].sum()
        investment = buy_count * 1000
        rate = round(payout / investment * 100, 2) if investment > 0 else 0

        print(f"✅ 購入数: {buy_count} 件")
        print(f"✅ 的中数: {hit_count} 件")
        print(f"💰 投資額: {investment:,} 円")
        print(f"💰 払戻額: {payout:,.0f} 円")
        print(f"📈 回収率: {rate} %")
        
# ボタン
button = widgets.Button(description="回収率を分析")
button.on_click(analyze_callback)
display(button)

VBox(children=(Dropdown(description='季節', options=('全条件', '7~9月', '4~6月', '10~12月', '1~3月'), value='全条件'), Dro…

Output()

Button(description='回収率を分析', style=ButtonStyle())

# 1000円ずつ買ったときの指定日の回収率

In [21]:
import os
import pandas as pd

# アップロードされたファイルの読み込み
test_data = pd.read_csv(os.path.dirname(os.getcwd())+ "/cleaned_test_data.csv")

# 日付をdatetime型に変換
test_data["date"] = pd.to_datetime(test_data["date"].astype(str), errors="coerce")

date_str = input("8桁の日付：")

target_day = pd.to_datetime(date_str, format="%Y%m%d")
day_df = test_data[test_data["date"] == target_day]

# 各レースごとに、確率上位6頭かつ閾値0.6以上を購入対象とする
purchased = (
    day_df[day_df["prob"] >= 0.6]
    .sort_values(["race_id", "prob"], ascending=[True, False])
    .groupby("race_id")
    .head(6)
)

# 複勝払い戻しを取得する関数
def get_fukusho(row):
    num = row['horse_number']
    if num == row.get('horse_number_first'):
        return row.get('hukusyo_first', 0)
    elif num == row.get('horse_number_second'):
        return row.get('hukusyo_second', 0)
    elif num == row.get('horse_number_third'):
        return row.get('hukusyo_third', 0)
    else:
        return 0

# 複勝払い戻しを列として追加（対象は全 test_data）
test_data["fukusho_return"] = test_data.apply(get_fukusho, axis=1)
test_data["fukusho_return"] = pd.to_numeric(test_data["fukusho_return"], errors='coerce').fillna(0)
test_data["fukusho_return"] = test_data["fukusho_return"].astype(int)

# 購入対象データに複勝情報をマージして表示
purchased = purchased.merge(
    test_data[["race_id", "horse_number", "fukusho_return"]],
    on=["race_id", "horse_number"],
    how="left"
)

# 開催地の辞書（race_course_id -> 名称）
race_course_map = {
    "01": "札幌", "02": "函館", "03": "福島", "04": "新潟", "05": "東京",
    "06": "中山", "07": "中京", "08": "京都", "09": "阪神", "10": "小倉"
}

# race_id から race_course_id と race_number を抽出して列追加
purchased["開催地コード"] = purchased["race_id"].astype(str).str.zfill(6).str[4:6]
purchased["レース番号"] = purchased["race_id"].astype(str).str.zfill(6).str[-2:].astype(int)
purchased["開催地"] = purchased["開催地コード"].map(race_course_map)

# 表示項目を変更
columns_to_show = ["開催地", "レース番号", "horse_number", "prob"]
if "rank" in purchased.columns:
    columns_to_show.append("rank")
if "fukusho_return" in purchased.columns:
    columns_to_show.append("fukusho_return")

# 表示
from IPython.display import display
display(purchased[columns_to_show].sort_values(["開催地", "レース番号"]))
# 投資額と払戻額を計算
bet_unit = 1000  # 1頭あたりの投資額
total_bets = len(purchased) * bet_unit
total_hits = purchased[purchased["rank"] <= 3].shape[0]
total_returns = purchased["fukusho_return"].sum() * 10  # 100円単位 → 1000円単位に変換

# 回収率を計算
recovery_rate = round(total_returns / total_bets * 100, 2) if total_bets > 0 else 0

# 結果を表示
print(f"✅ 購入頭数: {len(purchased)} 頭")
print(f"🎯 的中頭数: {total_hits} 頭")
print(f"💰 投資額: {total_bets:,} 円")
print(f"💰 払戻額: {total_returns:,} 円")
print(f"📈 回収率: {recovery_rate} %")

Unnamed: 0,開催地,レース番号,horse_number,prob,rank,fukusho_return


✅ 購入頭数: 0 頭
🎯 的中頭数: 0 頭
💰 投資額: 0 円
💰 払戻額: 0 円
📈 回収率: 0 %
