# Serve Statistics Notebook

### Load Packages

### Load Packages

In [3]:
import pandas as pd
import ipywidgets as widgets
from IPython.display import display

### Read Data

In [4]:
# Dropdown with a non-valid default option
choose_player_dropdown = widgets.Dropdown(
    options=['-- Select --', 'Rudy Quan', 'Emon Van Loben Sels', 'Kaylan Bigun', 'Alexander Hoogmartens', 
             'Spencer Johnson', 'Aadarsh Tripathi', 'Giacomo Revelli', 'Gianluca Ballotta'],
    value='-- Select --',
    description='Category:'
)

display(choose_player_dropdown)

Dropdown(description='Category:', options=('-- Select --', 'Rudy Quan', 'Emon Van Loben Sels', 'Kaylan Bigun',…

In [5]:
# Check selection before proceeding
if choose_player_dropdown.value == '-- Select --':
    raise ValueError("Please choose a valid category from the dropdown menu in the previous cell before proceeding.")

# If valid, use the value
player_name = choose_player_dropdown.value

In [6]:
combined_data_shots = pd.read_excel(f'../../data/mens/{player_name}/combined.xlsx', sheet_name='Shots')
combined_data_points = pd.read_excel(f'../../data/mens/{player_name}/combined.xlsx', sheet_name='Points')
combined_data_games = pd.read_excel(f'../../data/mens/{player_name}/combined.xlsx', sheet_name='Games')
combined_data_sets = pd.read_excel(f'../../data/mens/{player_name}/combined.xlsx', sheet_name='Sets')
combined_data_stats = pd.read_excel(f'../../data/mens/{player_name}/combined.xlsx', sheet_name='Stats')

In [7]:
# Subset 2024-2025 Season Matches!
mens_results = pd.read_csv('../../data/mens/mens_results.csv')[:229]

# Change Date Format
mens_results['Date'] = pd.to_datetime(mens_results['Date'])

# Function to Filter by Player and School Matches Only
def filter_player(data, player_name):

    # Filter for player_name
    data = data[(data['Player1'] == player_name) | (data['Player2'] == player_name)]

    # Filter for only school events
    data = data[data['Event Name'].str.startswith(('Dual Match', '2024 ITA', '2024-25 NCAA Division'))]
    return data


mens_results_player = filter_player(mens_results, player_name)

In [8]:
mens_results_player

Unnamed: 0,Event Name,Date,Player1,Player2,Player1 UTR,Player2 UTR,Score
5,"Dual Match: University of California, Los Ange...",2025-05-15,Timo Legout,Rudy Quan,14.23,13.59,"7-5, 6-2"
6,Dual Match: University of Southern California ...,2025-05-08,Rudy Quan,Makk Peter,13.59,13.79,"7-6(4), 3-6, 3-3"
14,"Dual Match: University of California, Los Ange...",2025-05-02,Carl Overbeck,Rudy Quan,13.48,13.59,"5-7, 7-5, 6-3"
18,"Dual Match: University of California, Santa Ba...",2025-05-01,Rudy Quan,Gianluca Brunkow,13.59,13.0,"5-7, 6-2, 0-1"
27,"Dual Match: University of California, Los Ange...",2025-04-26,Aidan Kim,Rudy Quan,13.74,13.59,"7-6(3), 6-2"
33,Dual Match: Michigan State University vs Unive...,2025-04-25,Rudy Quan,Aristotelis Thanos,13.59,13.67,"4-6, 3-1"
36,Dual Match: University of Michigan vs Universi...,2025-04-24,Rudy Quan,William Cooksey,13.59,12.0,"6-3, 5-6"
46,"Dual Match: University of California, Los Ange...",2025-04-19,Rudy Quan,Calvin MUELLER,13.59,13.0,"3-6, 6-3, 6-1"
52,"Dual Match: University of California, Los Ange...",2025-04-17,Michael Minasyan,Rudy Quan,12.0,13.59,"2-6, 6-3"
55,Dual Match: Michigan State University vs Unive...,2025-04-12,Rudy Quan,Ozan Baris,13.59,13.66,"6-1, 6-2"


### Average Service Game Duration

In [9]:
def average_service_time(data):

    # Use combined_data_games 
    # Subset 'Server' column name for only host (host is always UCLA player)
    # find the mean of the 'Duration' Column

    avg_seconds = data[data['Server'] == 'host']['Duration'].mean() # Automatically coerces NA
    total = int(round(avg_seconds))
    mins, secs = divmod(total, 60)    

    return f"{mins}:{secs:02d}"

In [10]:
# Output Average Service Game Duration
avg_service_game_duration = average_service_time(combined_data_games)
avg_service_game_duration

'4:18'

### Average Games Held Percentage

In [11]:
def service_games_won_percentage(df):
    
    # Subset Dataframe to only be UCLA Player serving
    service_games = df[df["Server"] == "host"]

    # Find the percentage of the "Game Winner" column everytime the value is "host"
    percentage = service_games["Game Winner"].value_counts(normalize=True).get('host', 0) * 100

    # Round and make number into an integer
    percentage = int(round(percentage, 0))

    return percentage

In [12]:
average_games_held = service_games_won_percentage(combined_data_games)
average_games_held

69

### Average Breakpoints Saved

In [13]:
def breakpoints_saved_function(data):

    # Filter Data
    filtered_data = data[(data['Match Server'] == 'host') & 
                         (data['Break Point'] == True)
                         ].copy()

    percentage = (filtered_data['Point Winner'].value_counts(normalize=True).get('host', 0) * 100).round(2)

    return percentage

In [14]:
breakpoints_saved_percentage = breakpoints_saved_function(combined_data_points)
breakpoints_saved_percentage

50.0

### Average Aces

In [38]:
def average_aces(df):

    # Filter for the row where 'Stat Name' is 'Aces'
    aces_row = df[df['Stat Name'] == 'Aces']

    if aces_row.empty:
        print("No 'Aces' row found.")
        return None
    
    # Columns that contain the per-set values
    set_columns = [col for col in df.columns if 'Host Set' in col]

    # Extract ace counts per match from those columns
    aces_per_match = aces_row[set_columns].sum(axis=1)
    
    # Calculate and return the average
    average = aces_per_match.mean()
    return round(average, 1)

In [39]:
# Output Average Aces
average_aces = average_aces(combined_data_stats)
average_aces

0.5

### Average Double Faults

In [91]:
def average_doubleFaults(df):

    # Filter only rows with Stat Name = '2nd Serves' and '2nd Serves In'
    second_serves = df[df['Stat Name'].str.strip() == '2nd Serves'].copy()
    second_serves_in = df[df['Stat Name'].str.strip() == '2nd Serves In'].copy()

    set_columns = [col for col in df.columns if 'Host Set' in col]

    second_serves_vals = second_serves[set_columns].sum(axis=1).reset_index(drop=True)
    second_serves_in_vals = second_serves_in[set_columns].sum(axis=1).reset_index(drop=True)
    average_double_faults = (second_serves_vals - second_serves_in_vals).mean()

    # Return average
    return average_double_faults

In [92]:
average_double_faults = average_doubleFaults(combined_data_stats)
average_double_faults

1.3636363636363635

### Serve Zone Distribution

##### Helper Function

In [52]:
# Helper Function: Classify Zones borrowed from swingvison_transformation.ipynb

def classify_zone(df):
    x = df['x_coord']
    y = df['y_coord']
    sign = x * y # if sign is pos, it's on ad side, if neg, it's deuce

    if (x < -105) or (x > 105):
        if sign > 0:
            return 'Ad Wide'
        else:
            return 'Deuce Wide'
    elif (-105 <= x <= -52.5) or (52.5 <= x <= 105):
        if sign > 0:
            return 'Ad Body'
        else:
            return 'Deuce Body'
    elif -52.5 < x < 52.5:
        if sign > 0:
            return 'Ad T'
        else:
            return 'Deuce T'
    else:
        return np.nan

In [59]:
def serve_zone_distribution(df_shots, df_points):

    # only use matches with complete data
    df_shots = df_shots[df_shots['__source_file__'].isin(df_points['__source_file__'])] # UPDATE: Temporary fix

    # add column for winner of the point
    combined = pd.merge(df_shots, df_points[['Point', 'Game', 'Set', 'Point Winner', 'Match Server', '__source_file__']], on=['Point', 'Game', 'Set', '__source_file__'], how='left')

    serves = combined[(combined['Stroke'] == 'Serve') & (combined['Match Server'] == 'host')] # Added Player Name Filter
    serves_in = serves[serves['Result'] == 'In'].copy()

    # zone classification
    serves_in.loc[:, 'x_coord'] = serves_in['Bounce (x)'] * 38.2764654418
    serves_in.loc[:, 'y_coord'] = (serves_in['Bounce (y)'] - 11.8872) * 38.2764654418
    serves_in['Zone'] = serves_in.apply(classify_zone, axis=1)

    # get win proportions and convert to json
    zones = serves_in.groupby('Zone')['Point Winner'].value_counts().unstack()
    zones['Win Proportion'] = zones['host'].astype(str) + '/' + (zones['host'] + zones['guest']).astype(str)
    zones.reset_index(inplace=True)
    zones.columns.name = None
    zones = zones[['Zone', 'Win Proportion']]
    zones.to_json('serve_dist.json', orient='records')

In [60]:
serve_zone_distribution(combined_data_shots, combined_data_points)