In [16]:
import pandas as pd
import numpy as np
import os

def compute_positions(laps_path):
    laps = pd.read_csv(f"{laps_path}/laps.csv")
    session_result = pd.read_csv(f"{laps_path}/session_result.csv")

    race_laps = session_result["number_of_laps"].max()
    session_key = laps["session_key"].iloc[0]

    # valid drivers >= 75%
    valid_drivers = session_result[
        session_result["number_of_laps"] >= 0.75 * race_laps
    ]["driver_number"]

    laps = laps[laps["driver_number"].isin(valid_drivers)].copy()

    # накопленное время
    laps["cumulative_time"] = laps.groupby("driver_number")["lap_duration"].cumsum()

    # функция для вычисления позиции на каждом круге
    def assign_position(df):
        df = df.copy()
        # заполняем NaN большими числами
        df["cumulative_time_filled"] = df["cumulative_time"].fillna(1e9)
        df["position"] = df.groupby("lap_number")["cumulative_time_filled"] \
                            .rank(method="min").astype(int)
        return df

    laps = assign_position(laps)

    return laps

def race_dynamics(laps_path, laps_df):
    laps = laps_df
    session_result = pd.read_csv(f"{laps_path}/session_result.csv")

    session_key = laps["session_key"].iloc[0]
    race_laps = session_result["number_of_laps"].max()

    # valid drivers (>=75%)
    valid_drivers = session_result[
        session_result["number_of_laps"] >= 0.75 * race_laps
    ]["driver_number"]

    laps = laps[laps["driver_number"].isin(valid_drivers)]

    N = laps["driver_number"].nunique()
    L = laps["lap_number"].nunique()

    if N == 0 or L <= 1:
        return {
            "session_key": session_key,
            "race_dynamics": 0.0
        }

    total_change = 0.0

    for _, d_laps in laps.sort_values(["driver_number", "lap_number"]) \
                          .groupby("driver_number"):

        pos = d_laps["position"].values
        total_change += (abs(pos[1:] - pos[:-1])).mean()

    race_dyn = total_change / N

    return {
        "session_key": session_key,
        "race_dynamics": race_dyn
    }

In [15]:
#race_dynamics("data/raw/2025/MON - Monte Carlo") 

laps_with_pos = compute_positions("data/raw/2025/AUS - Melbourne")
rd = race_dynamics("data/raw/2025/AUS - Melbourne", laps_with_pos)

In [None]:
def collect_race_dynamics(base_path) :
    results = []

    # Проходим по всем подкаталогам
    for gp_folder in os.listdir(base_path):
        race_path = os.path.join(base_path, gp_folder)
        if not os.path.isdir(race_path):
            continue

        try:
            laps_with_pos = compute_positions(race_path)
            rd = race_dynamics_v2(race_path, laps_with_pos)
            outcome_dict = {"session_key": rd.get("session_key", gp_folder),
                            "race_dynamics_score": rd.get("race_dynamics", None)}

            


            results.append(outcome_dict)

        except Exception as e:
            print(f"Ошибка при обработке {gp_folder}: {e}")
            continue

    # Собираем в DataFrame
    df_metrics = pd.DataFrame(results)
    df_metrics.to_csv("data/processed/2025/race_dynamics_metrics.csv", index=False)
    return df_metrics

In [22]:
collect_race_dynamics("data/raw/2025")

Unnamed: 0,session_key,race_dynamics_score
0,10022,0.234812
1,10033,0.451764
2,9987,0.299283
3,9955,0.293299
4,10014,0.50378
5,10006,0.236557
6,9912,0.277107
7,9877,0.243893
8,9858,0.210289
9,9888,0.246039


In [33]:
def race_dynamics_v2(laps_path):
    laps = pd.read_csv(f"{laps_path}/laps_v2.csv")
    session_result = pd.read_csv(f"{laps_path}/session_result.csv")

    session_key = session_result["session_key"].iloc[0]

    total_change = 0.0
    valid_drivers = 0

    for driver, d_laps in (
        laps.sort_values(["DriverNumber", "LapNumber"])
            .groupby("DriverNumber")
    ):
        # Берём только валидные позиции
        pos = (
            d_laps["Position"]
            .dropna()
            .astype(int)
            .values
        )

        # ❗ КЛЮЧЕВОЕ ИСПРАВЛЕНИЕ
        if len(pos) < 2:
            continue

        mean_change = np.abs(pos[1:] - pos[:-1]).mean()

        if not np.isnan(mean_change):
            total_change += mean_change
            valid_drivers += 1

    race_dyn = total_change / valid_drivers if valid_drivers > 0 else np.nan

    return {
        "session_key": session_key,
        "race_dynamics": race_dyn,
    }

In [32]:
race_dynamics_v2("data/raw/2025/AUS - Melbourne")

{'session_key': np.int64(9693),
 'race_dynamics': np.float64(0.19059927001103474),
 'drivers_used': 17}

In [34]:
def collect_race_dynamics(base_path) :
    results = []

    # Проходим по всем подкаталогам
    for gp_folder in os.listdir(base_path):
        race_path = os.path.join(base_path, gp_folder)
        if not os.path.isdir(race_path):
            continue

        try:
            rd = race_dynamics_v2(race_path)
            outcome_dict = {"session_key": rd.get("session_key", gp_folder),
                            "race_dynamics_score": rd.get("race_dynamics", None)}

            


            results.append(outcome_dict)

        except Exception as e:
            print(f"Ошибка при обработке {gp_folder}: {e}")
            continue

    # Собираем в DataFrame
    df_metrics = pd.DataFrame(results)
    df_metrics.to_csv("data/processed/2025/race_dynamics_metrics.csv", index=False)
    return df_metrics

In [35]:
collect_race_dynamics("data/raw/2025")

Unnamed: 0,session_key,race_dynamics_score
0,10022,0.239347
1,10033,0.167078
2,9987,0.317831
3,9955,0.330915
4,10014,0.521429
5,10006,0.236557
6,9912,0.307533
7,9877,0.335641
8,9858,0.233708
9,9888,0.259924
