# Serve Statistics Notebook

### Load Packages

In [1]:
import pandas as pd
import json
import ipywidgets as widgets
from IPython.display import display

### Read Data

In [2]:
# Dropdown with a non-valid default option
choose_player_dropdown = widgets.Dropdown(
    options=['-- Select --', 'Rudy Quan', 'Emon Van Loben Sels', 'Kaylan Bigun', 'Alexander Hoogmartens', 
             'Spencer Johnson', 'Aadarsh Tripathi', 'Giacomo Revelli', 'Gianluca Ballotta'],
    value='-- Select --',
    description='Category:'
)

display(choose_player_dropdown)

Dropdown(description='Category:', options=('-- Select --', 'Rudy Quan', 'Emon Van Loben Sels', 'Kaylan Bigun',…

In [3]:
# Check selection before proceeding
if choose_player_dropdown.value == '-- Select --':
    raise ValueError("Please choose a valid category from the dropdown menu in the previous cell before proceeding.")

# If valid, use the value
player_name = choose_player_dropdown.value

In [4]:
combined_data_shots = pd.read_excel(f'../../data/mens/{player_name}/combined.xlsx', sheet_name='Shots')
combined_data_points = pd.read_excel(f'../../data/mens/{player_name}/combined.xlsx', sheet_name='Points')
combined_data_games = pd.read_excel(f'../../data/mens/{player_name}/combined.xlsx', sheet_name='Games')
combined_data_sets = pd.read_excel(f'../../data/mens/{player_name}/combined.xlsx', sheet_name='Sets')
combined_data_stats = pd.read_excel(f'../../data/mens/{player_name}/combined.xlsx', sheet_name='Stats')

### Average Service Game Duration

In [5]:
def average_service_time(data):

    # Use combined_data_games 
    # Subset 'Server' column name for only host (host is always UCLA player)
    # find the mean of the 'Duration' Column

    avg_seconds = data[data['Server'] == 'host']['Duration'].mean() # Automatically coerces NA
    total = int(round(avg_seconds))
    mins, secs = divmod(total, 60)    

    return f"{mins}:{secs:02d}"

In [6]:
# Output Average Service Game Duration
avg_service_game_duration = average_service_time(combined_data_games)
min, sec = avg_service_game_duration.split(':')
min, sec

('4', '18')

### Average Games Held Percentage

In [7]:
def service_games_won_percentage(df):
    
    # Subset Dataframe to only be UCLA Player serving
    service_games = df[df["Server"] == "host"]

    # Subset to only complete games
    service_games = service_games[service_games["Game Winner"] != "draw"]

    # Find the percentage of the "Game Winner" column everytime the value is "host"
    percentage = service_games["Game Winner"].value_counts(normalize=True).get('host', 0) * 100

    # Round and make number into an integer
    percentage = int(round(percentage, 0))

    return percentage

In [8]:
average_games_held = str(service_games_won_percentage(combined_data_games)) + '%'
average_games_held

'71%'

### Average Breakpoints Saved

In [9]:
def breakpoints_saved_function(data):

    # Filter Data
    filtered_data = data[(data['Match Server'] == 'host') & 
                         (data['Break Point'] == True)
                         ].copy()

    percentage = int(round(filtered_data['Point Winner'].value_counts(normalize=True).get('host', 0) * 100, 0))

    return percentage

In [10]:
breakpoints_saved_percentage = str(breakpoints_saved_function(combined_data_points)) + '%'
breakpoints_saved_percentage

'50%'

### Average Aces

In [11]:
def average_aces(df):

    # Filter for the row where 'Stat Name' is 'Aces'
    aces_row = df[df['Stat Name'] == 'Aces']

    if aces_row.empty:
        print("No 'Aces' row found.")
        return None
    
    # Columns that contain the per-set values
    set_columns = [col for col in df.columns if 'Host Set' in col]

    # Extract ace counts per match from those columns
    aces_per_match = aces_row[set_columns].sum(axis=1)
    
    # Calculate and return the average
    average = aces_per_match.mean()
    return round(average, 1)

In [12]:
# Output Average Aces
average_aces = average_aces(combined_data_stats)
average_aces

0.5

### Average Double Faults

In [13]:
def average_doubleFaults(df):

    # Filter only rows with Stat Name = '2nd Serves' and '2nd Serves In'
    second_serves = df[df['Stat Name'].str.strip() == '2nd Serves'].copy()
    second_serves_in = df[df['Stat Name'].str.strip() == '2nd Serves In'].copy()

    set_columns = [col for col in df.columns if 'Host Set' in col]

    second_serves_vals = second_serves[set_columns].sum(axis=1).reset_index(drop=True)
    second_serves_in_vals = second_serves_in[set_columns].sum(axis=1).reset_index(drop=True)
    average_double_faults = (second_serves_vals - second_serves_in_vals).mean()

    # Return average
    return round(average_double_faults, 1)

In [14]:
average_double_faults = average_doubleFaults(combined_data_stats)
average_double_faults

1.4

### Serve In/Won Percentages

In [15]:
# Helper Function
def find_stat(df, stat_name):
    # Subset to only get rows of specified Statistic
    stat_total = df.loc[df['Stat Name'] == stat_name]

    # Grab column names
    column_names = stat_total.columns 

    # Subset column names that only start with 'Host Set'
    column_names_subset = column_names[column_names.str.startswith('Host Set')]

    # 
    stat_total_value = stat_total[column_names_subset].sum().sum()
    return stat_total_value

In [16]:
first_serves = find_stat(combined_data_stats, '1st Serves')   
first_serves_in = find_stat(combined_data_stats, '1st Serves In') 
first_serves_won = find_stat(combined_data_stats, '1st Serves Won')   

second_serves = find_stat(combined_data_stats, '2nd Serves')   
second_serves_in = find_stat(combined_data_stats, '2nd Serves In') 
second_serves_won = find_stat(combined_data_stats, '2nd Serves Won')   

In [17]:
first_serve_in_percentage = int(round((first_serves_in / first_serves) * 100, 0))
first_serve_won_percentage = int(round((first_serves_won / first_serves_in) * 100, 0))
second_serve_in_percentage = int(round((second_serves_in / second_serves) * 100, 0))
second_serve_won_percentage = int(round((second_serves_won / second_serves_in) * 100, 0))

print(f"Serve Performance Summary for {player_name}:\n")
print(f"  1st Serve In %:        {first_serve_in_percentage}%")
print(f"  1st Serve Won %:       {first_serve_won_percentage}%")
print(f"  2nd Serve In %:        {second_serve_in_percentage}%")
print(f"  2nd Serve Won %:       {second_serve_won_percentage}%")


Serve Performance Summary for Rudy Quan:

  1st Serve In %:        72%
  1st Serve Won %:       65%
  2nd Serve In %:        88%
  2nd Serve Won %:       55%


### Total Serve Points Won

In [18]:
total_serves = combined_data_points[combined_data_points['Match Server'] == 'host'].shape[0]
# total_serves = find_stat(combined_data_stats, '1st Serves') # OR this too
serve_points_won = combined_data_points[combined_data_points['Match Server'] == 'host']['Point Winner'].value_counts().get(0, 'host')

total_serve_points_won = int(round((serve_points_won / total_serves) * 100, 0))
total_serve_points_won

60

##### Output Json

In [19]:
# Build dictionary
serve_json = {
    "type": "Total Serve Points Won",
    "total": int(total_serves),
    "won": int(serve_points_won)
}

# Write to JSON
with open("data/total_serves.json", "w") as f:
    json.dump(serve_json, f, indent=4)

print("Saved to data/total_serves.json")

Saved to data/total_serves.json


### Fastest Serve

In [20]:
def fastest_serve(data):
    max_serve = combined_data_shots[combined_data_shots['Type'].isin(['first_serve', 'second_serve'])]['Speed (MPH)'].max()
    return int(round(max_serve, 0))

In [21]:
fastest_serve = fastest_serve(combined_data_shots)
fastest_serve

131

### Favorite Serve

##### Helper Function

In [22]:
# Helper Function: Classify Zones borrowed from swingvison_transformation.ipynb

def classify_zone(df):
    x = df['x_coord']
    y = df['y_coord']
    sign = x * y # if sign is pos, it's on ad side, if neg, it's deuce

    if (x < -105) or (x > 105):
        if sign > 0:
            side, zone = 'Ad', 'Wide'
        else:
            side, zone = 'Deuce', 'Wide'
    elif (-105 <= x <= -52.5) or (52.5 <= x <= 105):
        if sign > 0:
            side, zone = 'Ad', 'Body'
        else:
            side, zone = 'Deuce', 'Body'
    elif -52.5 < x < 52.5:
        if sign > 0:
            side, zone = 'Ad', 'T'
        else:
            side, zone = 'Deuce', 'T'
    else:
        side, zone = np.nan, np.nan
    
    return pd.Series({'side': side, 'Zone': zone})

In [23]:
def favorite_serve(df_shots, df_points):

    df_shots = df_shots[df_shots['__source_file__'].isin(df_points['__source_file__'])] # UPDATE: Temporary fix

    combined = pd.merge(df_shots, df_points[['Point', 'Game', 'Set', 'Point Winner', 'Match Server', '__source_file__']], on=['Point', 'Game', 'Set', '__source_file__'], how='left')

    serves = combined[(combined['Stroke'] == 'Serve') & (combined['Match Server'] == 'host')] # Added Player Name Filter
    serves_in = serves[serves['Result'] == 'In'].copy()

    serves_in.loc[:, 'x_coord'] = serves_in['Bounce (x)'] * 38.2764654418
    serves_in.loc[:, 'y_coord'] = (serves_in['Bounce (y)'] - 11.8872) * 38.2764654418
    serves_in[['side', 'serve_zone']] = serves_in.apply(classify_zone, axis=1)
    favorite_serve = serves_in['serve_zone'].value_counts().idxmax()

    return favorite_serve

In [24]:
favorite_serve(combined_data_shots, combined_data_points)

'Body'

### Best Serve (WIP)

In [25]:
df_shots = combined_data_shots
df_points = combined_data_points


df_shots = df_shots[df_shots['__source_file__'].isin(df_points['__source_file__'])] # UPDATE: Temporary fix

combined = pd.merge(df_shots, df_points[['Point', 'Game', 'Set', 'Point Winner', 'Match Server', '__source_file__']], on=['Point', 'Game', 'Set', '__source_file__'], how='left')

serves = combined[(combined['Stroke'] == 'Serve') & (combined['Match Server'] == 'host')] # Added Player Name Filter
serves_in = serves[serves['Result'] == 'In'].copy()

serves_in.loc[:, 'x_coord'] = serves_in['Bounce (x)'] * 38.2764654418
serves_in.loc[:, 'y_coord'] = (serves_in['Bounce (y)'] - 11.8872) * 38.2764654418
serves_in[['side', 'serve_zone']] = serves_in.apply(classify_zone, axis=1)
favorite_serve = serves_in[['side', 'serve_zone', 'Point Winner']]


### Serve Zone Distribution Bars (Nathan??) (WIP)

##### Helper Function

In [26]:
def classify_zone(df):
    x = df['x_coord']
    y = df['y_coord']
    sign = x * y # if sign is pos, it's on ad side, if neg, it's deuce

    if (x < -105) or (x > 105):
        if sign > 0:
            side, zone = 'Ad', 'Wide'
        else:
            side, zone = 'Deuce', 'Wide'
    elif (-105 <= x <= -52.5) or (52.5 <= x <= 105):
        if sign > 0:
            side, zone = 'Ad', 'Body'
        else:
            side, zone = 'Deuce', 'Body'
    elif -52.5 < x < 52.5:
        if sign > 0:
            side, zone = 'Ad', 'T'
        else:
            side, zone = 'Deuce', 'T'
    else:
        side, zone = np.nan, np.nan
    
    return pd.Series({'side': side, 'Zone': zone})

In [29]:
def serve_zone_bar(df, player_name):
    # Keep Player's Serve
    serves = df[(df['Player'] == player_name) & (df['Type'].isin(['first_serve', 'second_serve']))]

    # Keep only Serves In
    serves_in = serves[serves['Result'] == 'In'].copy()

    # Transform Coordinates
    serves_in.loc[:, 'x_coord'] = serves_in['Bounce (x)'] * 38.2764654418
    serves_in.loc[:, 'y_coord'] = (serves_in['Bounce (y)'] - 11.8872) * 38.2764654418

    # Create "side" and "serve_zone" columns
    serves_in[['side', 'serve_zone']] = serves_in.apply(classify_zone, axis=1)


    serves_in[['Type', 'side', 'serve_zone']].value_counts()

    # Step 1: Get the counts
    counts = serves_in[['Type', 'side', 'serve_zone']].value_counts().reset_index(name='count')

    # Step 2: Initialize output structure
    output = {'serves': []}

    # Step 3: Iterate through serve types
    for serve_type in counts['Type'].unique():
        serve_dict = {
            'type': 'First Serve' if serve_type == 'first_serve' else 'Second Serve',
            'sides': {
                'deuce': {'wide': 0, 'body': 0, 't': 0},
                'ad': {'wide': 0, 'body': 0, 't': 0}
            }
        }
        
        # Filter for this serve type
        subset = counts[counts['Type'] == serve_type]
        
        for _, row in subset.iterrows():
            side = row['side'].lower()     # 'Deuce' -> 'deuce'
            zone = row['serve_zone'].lower()  # 'Wide' -> 'wide'
            serve_dict['sides'][side][zone] = int(row['count'])
        
        output['serves'].append(serve_dict)

    # Save to current directory
    with open('data/serve_zones.json', 'w') as f:
        json.dump(output, f, indent=4)

In [30]:
serve_zone_bar(combined_data_shots, player_name)

### Serve Zone Distribution Court Visual(WIP)

##### Helper Function

In [None]:
# Helper Function: Classify Zones borrowed from swingvison_transformation.ipynb

def classify_zone(df):
    x = df['x_coord']
    y = df['y_coord']
    sign = x * y # if sign is pos, it's on ad side, if neg, it's deuce

    if (x < -105) or (x > 105):
        if sign > 0:
            return 'Ad Wide'
        else:
            return 'Deuce Wide'
    elif (-105 <= x <= -52.5) or (52.5 <= x <= 105):
        if sign > 0:
            return 'Ad Body'
        else:
            return 'Deuce Body'
    elif -52.5 < x < 52.5:
        if sign > 0:
            return 'Ad T'
        else:
            return 'Deuce T'
    else:
        return np.nan

In [None]:
def serve_zone_distribution(df_shots, df_points):

    # only use matches with complete data
    df_shots = df_shots[df_shots['__source_file__'].isin(df_points['__source_file__'])] # UPDATE: Temporary fix

    # add column for winner of the point
    combined = pd.merge(df_shots, df_points[['Point', 'Game', 'Set', 'Point Winner', 'Match Server', '__source_file__']], on=['Point', 'Game', 'Set', '__source_file__'], how='left')

    serves = combined[(combined['Stroke'] == 'Serve') & (combined['Match Server'] == 'host')] # Added Player Name Filter
    serves_in = serves[serves['Result'] == 'In'].copy()

    # zone classification
    serves_in.loc[:, 'x_coord'] = serves_in['Bounce (x)'] * 38.2764654418
    serves_in.loc[:, 'y_coord'] = (serves_in['Bounce (y)'] - 11.8872) * 38.2764654418
    serves_in['Zone'] = serves_in.apply(classify_zone, axis=1)

    # get win proportions and convert to json
    zones = serves_in.groupby('Zone')['Point Winner'].value_counts().unstack()
    zones['Win Proportion'] = zones['host'].astype(str) + '/' + (zones['host'] + zones['guest']).astype(str)
    zones.reset_index(inplace=True)
    zones.columns.name = None
    zones = zones[['Zone', 'Win Proportion']]
    zones.to_json('serve_dist.json', orient='records')

In [None]:
serve_zone_distribution(combined_data_shots, combined_data_points)

### Serve Placement (WIP)

### Serve Ratings

##### Helper Functions

In [None]:
# Helper Function: Grabs total of Specified Stat
def get_total(df, stat_name):
    rows = df[df['Stat Name'] == stat_name]
    if rows.empty: # UPDATE Throw an error?
        return 0
    total = 0
    for col in rows.columns:
        if col.startswith('Host Set'):
            numeric_vals = pd.to_numeric(rows[col], errors='coerce')
            total += numeric_vals.sum()  # Ignores NaNs automatically # UPDATE: throw an error? because this means that the data is bad?
    return total
    # return rows

In [None]:
# Helper Function: Calculate Service Games Won Percentage

def calculate_service_games_won(df):
    # Filter the DataFrame for host server and non-draw game winner
    host_service_games = df[(df['Server'] == 'host') & (df['Game Winner'] != 'draw')]
    
    # Filter for games where host won
    host_service_games_won = host_service_games[host_service_games['Game Winner'] == 'host']
    
    service_games_won_percentage = len(host_service_games_won) / len(host_service_games) 
    
    return service_games_won_percentage

In [None]:
# Helper Function: Calculate Double Faults

def calculate_double_faults(df):
    double_fault_total = df[(df['Match Server'] == 'host') & 
                                   (df['Detail'] == 'Double Fault')].shape[0]
    return double_fault_total

##### Serve Rating Calculation

In [None]:
# Total Matches in dataset
total_matches = len(combined_data_stats.groupby('__source_file__'))

# 1st Serve In Percentage
first_serve_in_percentage_rating = ((get_total(combined_data_stats, '1st Serves In') / get_total(combined_data_stats, '1st Serves')) * 100).round(1)

# 1st Serve Points Won Percentage
first_serve_won_percentage_rating = ((get_total(combined_data_stats, '1st Serves Won') / get_total(combined_data_stats, '1st Serves In')) * 100).round(1)

# 2nd Serve Points Won Percentage
second_serve_won_percentage_rating = ((get_total(combined_data_stats, '2nd Serves Won') / get_total(combined_data_stats, '2nd Serves In')) * 100).round(1)

# Service Games Won Percentage
service_games_won_percentage_rating = round(calculate_service_games_won(combined_data_games) * 100, 1)

# Average Aces per Match Percentagae
aces_average_rating = ((get_total(combined_data_stats, 'Aces') / total_matches)).round(1)

# Average Double Faults per Match Percentage
doubleFaults_average_rating = round(calculate_double_faults(combined_data_points) / total_matches, 1)

In [None]:
# Calculate Serve Rating
serve_rating = round(first_serve_in_percentage_rating + first_serve_won_percentage_rating + second_serve_won_percentage_rating + service_games_won_percentage_rating - doubleFaults_average_rating, 1)

# Print All Calculations
print(f"First Serve In %: {first_serve_in_percentage_rating}%")
print(f"First Serve Points Won %: {first_serve_won_percentage_rating}%")
print(f"Second Serve Points Won %: {second_serve_won_percentage_rating}%")
print(f"Service Games Won %: {service_games_won_percentage_rating}%")
print(f"Aces per Match: {aces_average_rating}")
print(f"Double Faults per Match: {doubleFaults_average_rating}")
print(f"Serve Rating: {serve_rating}")

### Output CSV

In [None]:
min, sec
average_games_held
breakpoints_saved_percentage
average_aces
average_double_faults
fastest_serve