14. What is the most common type of surface used in tournaments? 


In [1]:
import pandas as pd
from pathlib import Path

project_root = Path.cwd().parent
raw_dir = project_root / "data" / "raw" / "tennis_data"
answers_dir = project_root / "reports" / "answers"
answers_dir.mkdir(parents=True, exist_ok=True)

files = list(raw_dir.glob("2024*/data/raw/raw_match_parquet/tournament_*.parquet"))
print(f"Found {len(files)} tournament parquet files")

dfs = []
for f in files:
    try:
        df_tmp = pd.read_parquet(f, columns=["ground_type"])
        dfs.append(df_tmp)
    except Exception as e:
        print(f"Error reading {f}: {e}")

df_all = pd.concat(dfs, ignore_index=True)

unique_grounds = df_all["ground_type"].dropna().unique()
print("\nUnique ground types in dataset:")
for g in unique_grounds:
    print("-", g)

common_surface = df_all["ground_type"].value_counts().reset_index()
common_surface.columns = ["ground_type", "count"]

most_common = common_surface.iloc[0]

output_path = answers_dir / "q14_surface.csv"
common_surface.to_csv(output_path, index=False, encoding="utf-8-sig")

print(f"\nMost common surface: {most_common['ground_type']} ({most_common['count']} occurrences)")
print(f"Full surface distribution saved to: {output_path}")


Found 35671 tournament parquet files

Unique ground types in dataset:
- Hardcourt indoor
- Hardcourt outdoor
- Red clay
- Carpet indoor
- Synthetic outdoor
- Red clay indoor
- Grass
- Green clay

Most common surface: Hardcourt outdoor (16959 occurrences)
Full surface distribution saved to: c:\Users\mit\Desktop\Final_Project\reports\answers\q14_surface.csv
