# Serve Statistics Notebook

### Load Packages

In [43]:
import pandas as pd
import ipywidgets as widgets
from IPython.display import display

### Read Data

In [44]:
# Dropdown with a non-valid default option
choose_player_dropdown = widgets.Dropdown(
    options=['-- Select --', 'Rudy Quan', 'Emon Van Loben Sels', 'Kaylan Bigun', 'Alexander Hoogmartens', 
             'Spencer Johnson', 'Aadarsh Tripathi', 'Giacomo Revelli', 'Gianluca Ballotta'],
    value='-- Select --',
    description='Category:'
)

display(choose_player_dropdown)

Dropdown(description='Category:', options=('-- Select --', 'Rudy Quan', 'Emon Van Loben Sels', 'Kaylan Bigun',…

In [45]:
# Check selection before proceeding
if choose_player_dropdown.value == '-- Select --':
    raise ValueError("Please choose a valid category from the dropdown menu in the previous cell before proceeding.")

# If valid, use the value
player_name = choose_player_dropdown.value

In [46]:
combined_data_shots = pd.read_excel(f'../../data/mens/{player_name}/combined.xlsx', sheet_name='Shots')
combined_data_points = pd.read_excel(f'../../data/mens/{player_name}/combined.xlsx', sheet_name='Points')
combined_data_games = pd.read_excel(f'../../data/mens/{player_name}/combined.xlsx', sheet_name='Games')
combined_data_sets = pd.read_excel(f'../../data/mens/{player_name}/combined.xlsx', sheet_name='Sets')
combined_data_stats = pd.read_excel(f'../../data/mens/{player_name}/combined.xlsx', sheet_name='Stats')

### Average Service Game Duration

In [47]:
def average_service_time(data):

    # Use combined_data_games 
    # Subset 'Server' column name for only host (host is always UCLA player)
    # find the mean of the 'Duration' Column

    avg_seconds = data[data['Server'] == 'host']['Duration'].mean() # Automatically coerces NA
    total = int(round(avg_seconds))
    mins, secs = divmod(total, 60)    

    return f"{mins}:{secs:02d}"

In [48]:
# Output Average Service Game Duration
avg_service_game_duration = average_service_time(combined_data_games)
avg_service_game_duration

'4:18'

### Average Games Held Percentage

In [49]:
def service_games_won_percentage(df):
    
    # Subset Dataframe to only be UCLA Player serving
    service_games = df[df["Server"] == "host"]

    # Subset to only complete games
    service_games = service_games[service_games["Game Winner"] != "draw"]

    # Find the percentage of the "Game Winner" column everytime the value is "host"
    percentage = service_games["Game Winner"].value_counts(normalize=True).get('host', 0) * 100

    # Round and make number into an integer
    percentage = int(round(percentage, 0))

    return percentage

In [50]:
average_games_held = service_games_won_percentage(combined_data_games)
average_games_held

71

### Average Breakpoints Saved

In [51]:
def breakpoints_saved_function(data):

    # Filter Data
    filtered_data = data[(data['Match Server'] == 'host') & 
                         (data['Break Point'] == True)
                         ].copy()

    percentage = (filtered_data['Point Winner'].value_counts(normalize=True).get('host', 0) * 100).round(2)

    return percentage

In [52]:
breakpoints_saved_percentage = breakpoints_saved_function(combined_data_points)
breakpoints_saved_percentage

50.0

### Average Aces

In [53]:
def average_aces(df):

    # Filter for the row where 'Stat Name' is 'Aces'
    aces_row = df[df['Stat Name'] == 'Aces']

    if aces_row.empty:
        print("No 'Aces' row found.")
        return None
    
    # Columns that contain the per-set values
    set_columns = [col for col in df.columns if 'Host Set' in col]

    # Extract ace counts per match from those columns
    aces_per_match = aces_row[set_columns].sum(axis=1)
    
    # Calculate and return the average
    average = aces_per_match.mean()
    return round(average, 1)

In [54]:
# Output Average Aces
average_aces = average_aces(combined_data_stats)
average_aces

0.5

### Average Double Faults

In [55]:
def average_doubleFaults(df):

    # Filter only rows with Stat Name = '2nd Serves' and '2nd Serves In'
    second_serves = df[df['Stat Name'].str.strip() == '2nd Serves'].copy()
    second_serves_in = df[df['Stat Name'].str.strip() == '2nd Serves In'].copy()

    set_columns = [col for col in df.columns if 'Host Set' in col]

    second_serves_vals = second_serves[set_columns].sum(axis=1).reset_index(drop=True)
    second_serves_in_vals = second_serves_in[set_columns].sum(axis=1).reset_index(drop=True)
    average_double_faults = (second_serves_vals - second_serves_in_vals).mean()

    # Return average
    return average_double_faults

In [56]:
average_double_faults = average_doubleFaults(combined_data_stats)
average_double_faults

1.3636363636363635

### Serve In/Won Percentages

In [61]:
# Helper Function
def find_stat(df, stat_name):
    # Subset to only get rows of specified Statistic
    stat_total = df.loc[df['Stat Name'] == stat_name]

    # Grab column names
    column_names = stat_total.columns 

    # Subset column names that only start with 'Host Set'
    column_names_subset = column_names[column_names.str.startswith('Host Set')]

    # 
    stat_total_value = stat_total[column_names_subset].sum().sum()
    return stat_total_value

In [71]:
first_serves = find_stat(combined_data_stats, '1st Serves')   
first_serves_in = find_stat(combined_data_stats, '1st Serves In') 
first_serves_won = find_stat(combined_data_stats, '1st Serves Won')   

second_serves = find_stat(combined_data_stats, '2nd Serves')   
second_serves_in = find_stat(combined_data_stats, '2nd Serves In') 
second_serves_won = find_stat(combined_data_stats, '2nd Serves Won')   

In [74]:
first_serve_in_percentage = int(round((first_serves_in / first_serves) * 100, 0))
first_serve_won_percentage = int(round((first_serves_won / first_serves_in) * 100, 0))
second_serve_in_percentage = int(round((second_serves_in / second_serves) * 100, 0))
second_serve_won_percentage = int(round((second_serves_won / second_serves_in) * 100, 0))

print(f"Serve Performance Summary for {player_name}:\n")
print(f"  1st Serve In %:        {first_serve_in_percentage}%")
print(f"  1st Serve Won %:       {first_serve_won_percentage}%")
print(f"  2nd Serve In %:        {second_serve_in_percentage}%")
print(f"  2nd Serve Won %:       {second_serve_won_percentage}%")


Serve Performance Summary for Rudy Quan:

  1st Serve In %:        72%
  1st Serve Won %:       65%
  2nd Serve In %:        88%
  2nd Serve Won %:       55%


### Serve Zone Distribution

##### Helper Function

In [15]:
# Helper Function: Classify Zones borrowed from swingvison_transformation.ipynb

def classify_zone(df):
    x = df['x_coord']
    y = df['y_coord']
    sign = x * y # if sign is pos, it's on ad side, if neg, it's deuce

    if (x < -105) or (x > 105):
        if sign > 0:
            return 'Ad Wide'
        else:
            return 'Deuce Wide'
    elif (-105 <= x <= -52.5) or (52.5 <= x <= 105):
        if sign > 0:
            return 'Ad Body'
        else:
            return 'Deuce Body'
    elif -52.5 < x < 52.5:
        if sign > 0:
            return 'Ad T'
        else:
            return 'Deuce T'
    else:
        return np.nan

In [16]:
def serve_zone_distribution(df_shots, df_points):

    # only use matches with complete data
    df_shots = df_shots[df_shots['__source_file__'].isin(df_points['__source_file__'])] # UPDATE: Temporary fix

    # add column for winner of the point
    combined = pd.merge(df_shots, df_points[['Point', 'Game', 'Set', 'Point Winner', 'Match Server', '__source_file__']], on=['Point', 'Game', 'Set', '__source_file__'], how='left')

    serves = combined[(combined['Stroke'] == 'Serve') & (combined['Match Server'] == 'host')] # Added Player Name Filter
    serves_in = serves[serves['Result'] == 'In'].copy()

    # zone classification
    serves_in.loc[:, 'x_coord'] = serves_in['Bounce (x)'] * 38.2764654418
    serves_in.loc[:, 'y_coord'] = (serves_in['Bounce (y)'] - 11.8872) * 38.2764654418
    serves_in['Zone'] = serves_in.apply(classify_zone, axis=1)

    # get win proportions and convert to json
    zones = serves_in.groupby('Zone')['Point Winner'].value_counts().unstack()
    zones['Win Proportion'] = zones['host'].astype(str) + '/' + (zones['host'] + zones['guest']).astype(str)
    zones.reset_index(inplace=True)
    zones.columns.name = None
    zones = zones[['Zone', 'Win Proportion']]
    zones.to_json('serve_dist.json', orient='records')

In [17]:
serve_zone_distribution(combined_data_shots, combined_data_points)