In [4]:
import pandas as pd
import numpy as np
import os

In [5]:
def normalize_strategy(strategy: list[str]) -> tuple:
        """
        Normalize strategy to canonical short form.
        """
        mapping = {
            "INTERMEDIATE":"I",
            "SOFT": "S",
            "MEDIUM": "M",
            "HARD": "H",
            "S": "S",
            "M": "M",
            "H": "H",
        }
        return tuple(mapping.get(c.upper(), c) for c in strategy)

In [6]:
def strategy_diversity(race_path):

    def strategy_entropy_metrics(strategy_counts: pd.Series) -> tuple[float, float]:
        """
        Calculate entropy and normalize it
        """
        counts = strategy_counts.values
        total = counts.sum()

        if total == 0 or len(counts) <= 1:
            return 0.0, 0.0

        probs = counts / total
        entropy = -np.sum(probs * np.log(probs))
        entropy_norm = entropy / np.log(len(counts))

        return entropy, entropy_norm

    session_result = pd.read_csv(f"{race_path}/session_result.csv")
    race_laps = session_result["number_of_laps"].max()
    session_key = session_result["session_key"].max()
    valid_drivers = session_result[session_result["number_of_laps"] >= 0.75 * race_laps]["driver_number"]


    stints = pd.read_csv(f"{race_path}/stints.csv")
    stints = stints[stints["driver_number"].isin(valid_drivers)]
    strategies = (
            stints.sort_values(["driver_number", "lap_start"])
            .groupby("driver_number")["compound"]
            .apply(list)
            .reset_index()
            .rename(columns={"compound": "strategy_signature"})
        )
    strategies["strategy"] = strategies["strategy_signature"].apply(normalize_strategy)
    strategy_counts = (strategies["strategy"].value_counts())
    entropy, entropy_norm = strategy_entropy_metrics(strategy_counts)
    strategy_diversity = {
        "session_key": session_key,
        #"strategies": strategy_counts.index.to_list(),
        "n_strategies": len(strategy_counts),
        "entropy": entropy,
        "entropy_normalized": entropy_norm,
    }
    return strategy_diversity

In [7]:
strategy_diversity("data/raw/2025/MON - Monte Carlo")

{'session_key': np.int64(9979),
 'n_strategies': 11,
 'entropy': np.float64(2.351257284127319),
 'entropy_normalized': np.float64(0.9805504480532944)}

In [8]:
def timing_spread_first_pit(race_path):
    
    session_result = pd.read_csv(f"{race_path}/session_result.csv")
    race_laps = session_result["number_of_laps"].max()
    session_key = session_result["session_key"].max()

    pits = pd.read_csv(f"{race_path}/pit.csv")
    first_pits = (
        pits
        .sort_values(["driver_number", "lap_number"])
        .groupby("driver_number", as_index=False)
        .first()
    )

    laps = first_pits["lap_number"]

    q25 = laps.quantile(0.25)
    q75 = laps.quantile(0.75)

    spread = float(q75 - q25)
    spread_norm = spread / race_laps
    pit_spread = {
        "session_key": session_key,
        "spread": spread,
        "spread_normalized": spread_norm,
    }
    return pit_spread

In [9]:
timing_spread_first_pit("data/raw/2025/MON - Monte Carlo")

{'session_key': np.int64(9979),
 'spread': 16.0,
 'spread_normalized': np.float64(0.20512820512820512)}

In [14]:
def outcome_sensitivity(race_path): 
    stint_df = pd.read_csv(f"{race_path}/stints.csv")
    drivers = pd.read_csv(f"{race_path}/drivers.csv")
    stint_df = stint_df.merge(drivers, on="driver_number", how="inner")
    results = pd.read_csv(f"{race_path}/session_result.csv")
    session_key = results["session_key"].max()
    race_laps = results["number_of_laps"].max()
    valid_drivers = results[results["number_of_laps"] >= 0.75 * race_laps]["driver_number"]
    stint_df = stint_df[stint_df["driver_number"].isin(valid_drivers)].copy()

    stint_df["node"] = "S" + stint_df["stint_number"].astype(str) + "-" + stint_df["compound"]

    flows = (
        stint_df.groupby(["driver_number", "name_acronym"])["node"]
        .apply(list)
        .reset_index()
        .rename(columns={"node": "strategy"})
    )

    flows = flows.merge(results, on="driver_number", how="inner")
    N = len(flows)
    flows["position"] = results["position"].fillna(N + 1)
    flows["strategy"] = flows["strategy"].apply(tuple)

    mu = flows["position"].mean()
    between_var = 0.0
    for strat, group in flows.groupby("strategy"):
        n_s = len(group)
        mu_s = group["position"].mean()
        between_var += n_s * (mu_s - mu) ** 2

    between_var /= N

    # Общая дисперсия
    total_var = ((flows["position"] - mu) ** 2).sum() / N
    
    if total_var == 0:
        return 0.0

    outcome_sens = {
        'between_var': between_var,
        "total_var": total_var,
        "session_key": session_key,
        "outcome_sens": between_var / total_var,
    }
    return  outcome_sens

In [15]:
outcome_sensitivity("data/raw/2025/MON - Monte Carlo") 


{'between_var': np.float64(10.222222222222221),
 'total_var': np.float64(26.916666666666668),
 'session_key': np.int64(9979),
 'outcome_sens': np.float64(0.37977296181630543)}

In [18]:
def collect_strategic_metrics(base_path) :
    results = []

    # Проходим по всем подкаталогам
    for gp_folder in os.listdir(base_path):
        race_path = os.path.join(base_path, gp_folder)
        if not os.path.isdir(race_path):
            continue

        try:
            # Outcome Sensitivity
            outcome = outcome_sensitivity(race_path)
            outcome_dict = {"session_key": outcome.get("session_key", gp_folder),
                            "outcome_sens": outcome.get("outcome_sens", None)}

            # Strategy Diversity
            diversity = strategy_diversity(race_path)
            diversity_dict = {"n_strategies": diversity.get("n_strategies", None),
                              "strategy_entropy_normalized": diversity.get("entropy_normalized", None)}

            # Timing Spread
            timing = timing_spread_first_pit(race_path)  
            timing_dict = {"first_pit_spread_normalized": timing.get("spread_normalized", None)} 


            # Объединяем все
            combined = {**outcome_dict, **diversity_dict, **timing_dict}

            results.append(combined)

        except Exception as e:
            print(f"Ошибка при обработке {gp_folder}: {e}")
            continue

    # Собираем в DataFrame
    df_metrics = pd.DataFrame(results)
    df_metrics['strategy_score'] = 1 * df_metrics['outcome_sens'] + 0.3 * df_metrics['strategy_entropy_normalized'] + 0.5 * df_metrics['first_pit_spread_normalized']
    df_metrics.to_csv("data/processed/2025/strategic_metrics.csv", index=False)
    return df_metrics


In [19]:
collect_strategic_metrics("data/raw/2025")

Unnamed: 0,session_key,outcome_sens,n_strategies,strategy_entropy_normalized,first_pit_spread_normalized,strategy_score
0,10022,0.159324,3,0.664332,0.17,0.443623
1,10033,0.03615,2,0.896038,0.070175,0.340049
2,9987,0.289614,5,0.741838,0.285714,0.655023
3,9955,0.053676,9,0.946395,0.171429,0.423309
4,10014,0.483881,10,0.924417,0.070175,0.796294
5,10006,0.129073,5,0.679994,0.174528,0.420335
6,9912,0.048985,5,0.791595,0.367925,0.470426
7,9877,0.560376,5,0.902206,0.161972,0.912023
8,9858,0.242708,5,0.799579,0.34,0.652582
9,9888,0.203171,6,0.623137,0.071429,0.425827
