In [None]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.metrics import roc_curve, auc
import matplotlib.pyplot as plt

# ==========================================================
# Load your exact dataset
# ==========================================================
csv_path = r"C:\Users\aishw\OneDrive\Desktop\F1predictoncap\Data\f1_data.csv"
df = pd.read_csv(csv_path)

# Keep only model inputs (same as app.py)
X = df[["grid", "qual_position", "driver_points_to_date",
        "constructor_points_to_date", "year", "round"]]
y = df["win"].astype(int)

# Train/test split by season (leakage-proof)
last_season = df["year"].max()
X_train = X[df["year"] < last_season]
y_train = y[df["year"] < last_season]
X_test  = X[df["year"] == last_season]
y_test  = y[df["year"] == last_season]

# ==========================================================
# Model training functions
# ==========================================================
def get_roc(model, X_train, y_train, X_test, y_test):
    model.fit(X_train, y_train)
    prob = model.predict_proba(X_test)[:, 1]
    fpr, tpr, _ = roc_curve(y_test, prob)
    roc_auc = auc(fpr, tpr)
    return fpr, tpr, roc_auc

# ==========================================================
# Train 3 models
# ==========================================================
log_reg = LogisticRegression(max_iter=1000, class_weight="balanced")
rf = RandomForestClassifier(n_estimators=300, random_state=42, class_weight="balanced_subsample")
xgb = XGBClassifier(
    n_estimators=400, max_depth=4, learning_rate=0.05,
    subsample=0.9, colsample_bytree=0.9, random_state=42
)

# Get ROC & AUC
fpr_log, tpr_log, auc_log = get_roc(log_reg, X_train, y_train, X_test, y_test)
fpr_rf,  tpr_rf,  auc_rf  = get_roc(rf,      X_train, y_train, X_test, y_test)
fpr_xgb, tpr_xgb, auc_xgb = get_roc(xgb,     X_train, y_train, X_test, y_test)

# ==========================================================
# PLOT 1 → All Models ROC
# ==========================================================
plt.figure(figsize=(6,4))
plt.plot(fpr_log, tpr_log, label=f"Logistic Regression (AUC={auc_log:.3f})", color="blue")
plt.plot(fpr_rf,  tpr_rf,  label=f"Random Forest (AUC={auc_rf:.3f})", color="green")
plt.plot(fpr_xgb, tpr_xgb, label=f"XGBoost (AUC={auc_xgb:.3f})", color="orange", linewidth=2.5)

plt.plot([0,1],[0,1], "k--", alpha=0.5)
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.title("ROC Curve — Logistic vs RF vs XGBoost")
plt.legend()
plt.tight_layout()
plt.savefig("roc_all_models.png", dpi=300)
plt.show()

# ==========================================================
# PLOT 2 → XGBoost Only ROC
# ==========================================================
plt.figure(figsize=(6,4))
plt.plot(fpr_xgb, tpr_xgb, label=f"XGBoost (AUC={auc_xgb:.3f})", color="orange", linewidth=2.5)
plt.plot([0,1],[0,1], "k--", alpha=0.5)
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.title("ROC Curve — XGBoost Only")
plt.legend()
plt.tight_layout()
plt.savefig("roc_xgboost_only.png", dpi=300)
plt.show()


In [2]:
import os
os.getcwd()


'C:\\Users\\aishw\\OneDrive\\Desktop\\F1predictoncap'