In [22]:
# ── 1. Imports ─────────────────────────────────────────────
import pandas as pd
from pathlib import Path
from tqdm import tqdm
import re

# ── 2. Set Dynamic Project Paths ───────────────────────────
# This sets the root folder to the parent of the notebook directory (F1_Analysis)
PROJECT_ROOT = Path().resolve().parent
RAW_DIR = PROJECT_ROOT / "data" / "raw"
CLEAN_DIR = PROJECT_ROOT / "data" / "cleaned"
CLEAN_DIR.mkdir(parents=True, exist_ok=True)

print("📁 RAW_DIR:", RAW_DIR)
print("📁 CLEAN_DIR:", CLEAN_DIR)

# ── 3. Combine CSVs Function ───────────────────────────────
def combine_csvs(input_folder: Path, output_file: Path, variable: str, has_driver: bool = True):
    all_frames = []
    pattern_driver = re.compile(rf"{variable}_(\d+)_driver(\d+)\.csv", re.IGNORECASE)
    pattern_weather = re.compile(rf"{variable}_(\d+)\.csv", re.IGNORECASE)

    print(f"\n🔍 Searching in: {input_folder.resolve()}")
    for file in tqdm(sorted(input_folder.rglob("*.csv")), desc=f"Processing {variable}"):
        fname = file.name

        # Match filenames
        if has_driver:
            match = pattern_driver.match(fname)
            if not match:
                print(f"⚠️ Skipping unmatched file: {fname}")
                continue
            session_key, driver_number = match.groups()
        else:
            match = pattern_weather.match(fname)
            if not match:
                print(f"⚠️ Skipping unmatched file: {fname}")
                continue
            session_key = match.group(1)
            driver_number = None

        try:
            df = pd.read_csv(file)
            df["session_key"] = session_key
            if has_driver:
                df["driver_number"] = driver_number
            all_frames.append(df)
        except Exception as e:
            print(f"❌ Failed to read {fname}: {e}")

    if all_frames:
        combined_df = pd.concat(all_frames, ignore_index=True)
        combined_df.to_csv(output_file, index=False)
        print(f"✅ Saved to {output_file}")
    else:
        print(f"⚠️ No valid files found for {variable}")

# ── 4. Process All Variables ───────────────────────────────
variables_all = ["laps", "position", "stints"]
verstappen_var = "car_data"
weather_var = "weather"
race_control_var = "race_control"

# Process driver-specific variables
for var in variables_all:
    combine_csvs(RAW_DIR / var, CLEAN_DIR / f"{var}.csv", variable=var)

# Verstappen-only
combine_csvs(RAW_DIR / verstappen_var, CLEAN_DIR / f"{verstappen_var}.csv", variable=verstappen_var)

# Weather (no driver)
combine_csvs(RAW_DIR / race_control_var, CLEAN_DIR / f"{race_control_var}.csv", variable=race_control_var, has_driver=False)

# Weather (no driver)
combine_csvs(RAW_DIR / weather_var, CLEAN_DIR / f"{weather_var}.csv", variable=weather_var, has_driver=False)


📁 RAW_DIR: C:\Users\John\Documents\Capstone Projects\Python Capstones\F1_Analysis\data\raw
📁 CLEAN_DIR: C:\Users\John\Documents\Capstone Projects\Python Capstones\F1_Analysis\data\cleaned

🔍 Searching in: C:\Users\John\Documents\Capstone Projects\Python Capstones\F1_Analysis\data\raw\laps


Processing laps: 100%|██████████| 605/605 [00:00<00:00, 844.99it/s]
Processing laps: 100%|██████████| 605/605 [00:00<00:00, 844.99it/s]


✅ Saved to C:\Users\John\Documents\Capstone Projects\Python Capstones\F1_Analysis\data\cleaned\laps.csv

🔍 Searching in: C:\Users\John\Documents\Capstone Projects\Python Capstones\F1_Analysis\data\raw\position


Processing position: 100%|██████████| 607/607 [00:00<00:00, 1194.87it/s]



✅ Saved to C:\Users\John\Documents\Capstone Projects\Python Capstones\F1_Analysis\data\cleaned\position.csv

🔍 Searching in: C:\Users\John\Documents\Capstone Projects\Python Capstones\F1_Analysis\data\raw\stints


Processing stints: 100%|██████████| 605/605 [00:00<00:00, 1004.44it/s]



✅ Saved to C:\Users\John\Documents\Capstone Projects\Python Capstones\F1_Analysis\data\cleaned\stints.csv

🔍 Searching in: C:\Users\John\Documents\Capstone Projects\Python Capstones\F1_Analysis\data\raw\car_data


Processing car_data: 100%|██████████| 116/116 [00:01<00:00, 60.56it/s]



✅ Saved to C:\Users\John\Documents\Capstone Projects\Python Capstones\F1_Analysis\data\cleaned\car_data.csv

🔍 Searching in: C:\Users\John\Documents\Capstone Projects\Python Capstones\F1_Analysis\data\raw\race_control


Processing race_control: 100%|██████████| 115/115 [00:00<00:00, 937.70it/s]



✅ Saved to C:\Users\John\Documents\Capstone Projects\Python Capstones\F1_Analysis\data\cleaned\race_control.csv

🔍 Searching in: C:\Users\John\Documents\Capstone Projects\Python Capstones\F1_Analysis\data\raw\weather


Processing weather: 100%|██████████| 123/123 [00:00<00:00, 983.93it/s]
Processing weather: 100%|██████████| 123/123 [00:00<00:00, 983.93it/s]


✅ Saved to C:\Users\John\Documents\Capstone Projects\Python Capstones\F1_Analysis\data\cleaned\weather.csv
