Temp data

In [1]:
import pandas as pd
import numpy as np

mock_data = pd.DataFrame({
    'driver': ['VER', 'HAM', 'GAS', 'OCO'],
    'team': ['Red Bull', 'Mercedes', 'Alpine', 'Alpine'],
    'tire_degradation': np.random.rand(4),
    'braking_intensity': np.random.rand(4),
    'throttle_aggression': np.random.rand(4),
    'consistency_score': np.random.rand(4),
    'driver_form': np.random.rand(4),
    'driver_is_french': [0,0,1,1],
    'qual_pos': [1,2,5,7],
    'fp2_pace': np.random.rand(4),
    'target_win': [0,1,0,0]
})

mock_data.head()


Unnamed: 0,driver,team,tire_degradation,braking_intensity,throttle_aggression,consistency_score,driver_form,driver_is_french,qual_pos,fp2_pace,target_win
0,VER,Red Bull,0.231876,0.307988,0.315284,0.934723,0.765043,0,1,0.153914,0
1,HAM,Mercedes,0.875395,0.589598,0.452818,0.745226,0.531188,0,2,0.45839,1
2,GAS,Alpine,0.693245,0.812953,0.921217,0.373046,0.120153,1,5,0.000284,0
3,OCO,Alpine,0.738691,0.447669,0.322088,0.624767,0.235794,1,7,0.271069,0


Functions   feature engineering

In [None]:
import numpy as np
from sklearn.linear_model import LinearRegression


def compute_consistency(lap_times):
    lap_times = [lt for lt in lap_times if lt > 0]
    if len(lap_times) < 3:
        return None
    return np.std(lap_times)


def compute_driver_form(results_last_n):
    return None if len(results_last_n) == 0 else np.mean(results_last_n)


def compute_tire_degradation(lap_numbers, lap_times):
    if len(lap_numbers) < 3 or len(lap_times) < 3:
        return None

    lap_numbers = np.array(lap_numbers).reshape(-1, 1)
    lap_times = np.array(lap_times)

    model = LinearRegression()
    model.fit(lap_numbers, lap_times)

    return float(model.coef_[0])  


def compute_braking_intensity(brake_data):
    if len(brake_data) == 0:
        return None
    return float(np.mean(brake_data))


def compute_throttle_aggression(throttle_data):
    if len(throttle_data) == 0:
        return None
    return float(np.mean(throttle_data))


def compute_race_pace(lap_times):
    lap_times = [lt for lt in lap_times if lt > 0]
    if len(lap_times) == 0:
        return None
    return float(np.median(lap_times))


Data merging

In [None]:
def compute_my_features(df):
    df["consistency"] = df["lap_times"].apply(compute_consistency)
    df["driver_form"] = df["results_last_n"].apply(compute_driver_form)
    df["tire_degradation"] = df.apply(
        lambda row: compute_tire_degradation(row["lap_numbers"], row["lap_times"]), axis=1)
    df["braking_intensity"] = df["brake_data"].apply(compute_braking_intensity)
    df["throttle_aggression"] = df["throttle_data"].apply(compute_throttle_aggression)
    df["race_pace"] = df["lap_times"].apply(compute_race_pace)

    return df

def save_my_features(df, path="my_features.csv"):
    df = compute_my_features(df)
    df.to_csv(path, index=False)


ML-basis

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score

df_fastf1 = pd.read_parquet("cleaned_fastf1.csv")
df_openf1 = pd.read_parquet("cleaned_openf1.csv")
df_my = pd.read_parquet("my_features.csv")

df_all = (
    df_fastf1
    .merge(df_openf1, on="driver", how="inner")
    .merge(df_my, on="driver", how="inner")
)

# 3. Целевая переменная: выиграл гонку или нет
df_all["target_win"] = (df_all["position"] == 1).astype(int)

# 4. Формируем X и y
X = df_all.drop(columns=["driver", "team", "position", "target_win"])
y = df_all["target_win"]

# 5. Делим на train/test
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42
)

# 6. Создаём модель
model = RandomForestClassifier(
    n_estimators=300,
    max_depth=12,
    random_state=42
)

# 7. Обучаем модель
model.fit(X_train, y_train)

# 8. Предсказываем
y_pred = model.predict(X_test)

# 9. Метрики
print("Accuracy:", accuracy_score(y_test, y_pred))
print("F1:", f1_score(y_test, y_pred))
print("ROC-AUC:", roc_auc_score(y_test, model.predict_proba(X_test)[:, 1]))

# 10. Показ предсказаний
print("\nPredictions:", y_pred)


Accuracy: 0.0
F1: 0.0
[1 1]
