In [None]:
import pandas as pd
import os

def average_service_time(file_path):
    """
    Given the path to a combined.xlsx for a UCLA player (e.g. "…/Rudy Quan/combined.xlsx"),
    infers the player name, sums the total duration of points they served in each match,
    and returns the average serve-time per match as an "M:SS" string.
    """
    # 1) Infer player name from the folder
    player = os.path.basename(os.path.dirname(file_path)).lower()

    # 2) Load data
    points_df = pd.read_excel(file_path, sheet_name="Points")
    shots_df  = pd.read_excel(file_path, sheet_name="Shots")

    # 3) Determine first shot of each point (the server)
    first_shots = (
        shots_df
        .sort_values("Start Time")
        .groupby(["__source_file__", "Set", "Game", "Point"], as_index=False)
        .first()[["__source_file__", "Set", "Game", "Point", "Player"]]
        .rename(columns={"Player": "Server"})
    )
    merged = points_df.merge(first_shots, on=["__source_file__", "Set", "Game", "Point"], how="left")

    # 4) Figure out which side (Host/Guest) is the player in each match
    mask = merged["Server"].str.lower() == player
    host_map = {
        m: grp["Match Server"].iloc[0]
        for m, grp in merged[mask].groupby("__source_file__")
    }

    # 5) Sum serve-point durations per match
    totals = []
    for match_file, side in host_map.items():
        dfm = merged[
            (merged["__source_file__"] == match_file) &
            (merged["Match Server"]     == side)
        ].copy()
        dfm["Duration"] = pd.to_numeric(dfm["Duration"], errors="coerce")
        dfm = dfm[dfm["Duration"] < 10000]
        totals.append(dfm["Duration"].sum())

    # 6) Compute average and format
    if not totals:
        return None
    avg_sec = sum(totals) / len(totals)
    mins, secs = divmod(int(round(avg_sec)), 60)
    return f"{mins}:{secs:02d}"




avg = average_service_time("/Users/eshaan/Desktop/Tennis Scouting/consulting-spring2025/data/mens/Rudy Quan/combined.xlsx")
avg


'11:01'