In [7]:
import pandas as pd

def chaos_extended(race_path):
    session_result = pd.read_csv(f"{race_path}/session_result.csv")
    session_key = session_result["session_key"].iloc[0]

    total_drivers = len(session_result)

    # --- DNF / DNS / DSQ ---
    dnf_drivers = session_result[["dnf", "dns", "dsq"]].any(axis=1).sum()
    DDR = dnf_drivers / total_drivers if total_drivers > 0 else 0.0

    # --- Events ---
    try:
        race_control = pd.read_csv(f"{race_path}/race_control.csv")
        total_laps = session_result["number_of_laps"].max()

        # Safety Car / VSC
        sc_mask = race_control["message"].str.contains(
            "SAFETY CAR DEPLOYED|VIRTUAL SAFETY CAR DEPLOYED|RACE WILL START BEHIND THE SAFETY CAR",
            case=False,
            na=False
        )
        sc_laps = race_control.loc[sc_mask, "lap_number"].nunique()
        SCF = sc_laps / total_laps if total_laps > 0 else 0.0

        # investigation
        investigation = race_control["message"].str.contains(
            "UNDER INVESTIGATION|NOTED",
            case=False,
            na=False
        )
        investigation_laps = race_control.loc[investigation, "lap_number"].nunique()
        IF = investigation_laps / total_laps if total_laps > 0 else 0.0

    except FileNotFoundError:
        SCF = 0.0
        IF = 0.0

    # --- Chaos Score ---
    chaos_score = 0.5 * DDR + 0.3 * SCF + 0.2 * IF

    return {
        "session_key": session_key,
        "chaos_score": chaos_score,
        "dnf": DDR,
        "safety_car": SCF,
        "investigation": IF
    }

In [8]:
chaos_extended("data/raw/2025/AUS - Melbourne")

{'session_key': np.int64(9693),
 'chaos_score': np.float64(0.18333333333333332),
 'dnf': np.float64(0.3),
 'safety_car': np.float64(0.05263157894736842),
 'investigation': np.float64(0.08771929824561403)}

In [8]:
import os
def collect_chaos(base_path) :
    results = []

    # Проходим по всем подкаталогам
    for gp_folder in os.listdir(base_path):
        race_path = os.path.join(base_path, gp_folder)
        if not os.path.isdir(race_path):
            continue

        try:
            chaos = chaos_extended(race_path)
            outcome_dict = {"session_key": chaos.get("session_key", gp_folder),
                            "chaos_score": chaos.get("chaos_score", None),
                            "dnf": chaos.get("dnf", None),
                            "safety_car": chaos.get("safety_car", None),
                            "investigation": chaos.get("investigation", None)
                            }

            


            results.append(outcome_dict)

        except Exception as e:
            print(f"Ошибка при обработке {gp_folder}: {e}")
            continue

    # Собираем в DataFrame
    df_metrics = pd.DataFrame(results)
    df_metrics.to_csv("data/processed/2025/chaos_metrics.csv", index=False)
    return df_metrics

In [9]:
collect_chaos("data/raw/2025")

Unnamed: 0,session_key,chaos_score,dnf,safety_car,investigation
0,10022,0.084,0.1,0.02,0.14
1,10033,0.140351,0.2,0.052632,0.122807
2,9987,0.075397,0.1,0.031746,0.079365
3,9955,0.135714,0.2,0.014286,0.157143
4,10014,0.107895,0.1,0.017544,0.263158
5,10006,0.011321,0.0,0.0,0.056604
6,9912,0.083962,0.1,0.0,0.169811
7,9877,0.129577,0.2,0.014085,0.126761
8,9858,0.177,0.25,0.04,0.2
9,9888,0.041071,0.05,0.017857,0.053571
