# Serve Statistics Notebook

### Load Packages

In [10]:
import pandas as pd
import re
import os
import ipywidgets as widgets
from IPython.display import display

### Read Data

In [11]:
# Dropdown with a non-valid default option
choose_player_dropdown = widgets.Dropdown(
    options=['-- Select --', 'Rudy Quan', 'Emon Van Loben Sels', 'Kaylan Bigun', 'Alexander Hoogmartens', 
             'Spencer Johnson', 'Aadarsh Tripathi', 'Giacomo Revelli', 'Gianluca Ballotta'],
    value='-- Select --',
    description='Category:'
)

display(choose_player_dropdown)

Dropdown(description='Category:', options=('-- Select --', 'Rudy Quan', 'Emon Van Loben Sels', 'Kaylan Bigun',…

In [18]:
# Check selection before proceeding
if choose_player_dropdown.value == '-- Select --':
    raise ValueError("Please choose a valid category from the dropdown menu in the previous cell before proceeding.")

# If valid, use the value
player_name = choose_player_dropdown.value

In [19]:
combined_data_shots = pd.read_excel(f'../../data/mens/{player_name}/combined.xlsx', sheet_name='Shots')
combined_data_points = pd.read_excel(f'../../data/mens/{player_name}/combined.xlsx', sheet_name='Points')
combined_data_games = pd.read_excel(f'../../data/mens/{player_name}/combined.xlsx', sheet_name='Games')
combined_data_sets = pd.read_excel(f'../../data/mens/{player_name}/combined.xlsx', sheet_name='Sets')
combined_data_stats = pd.read_excel(f'../../data/mens/{player_name}/combined.xlsx', sheet_name='Stats')

In [20]:
# Subset 2024-2025 Season Matches!
mens_results = pd.read_csv('../../data/mens/mens_results.csv')[:229]

# Change Date Format
mens_results['Date'] = pd.to_datetime(mens_results['Date'])

# Function to Filter by Player and School Matches Only
def filter_player(data, player_name):

    # Filter for player_name
    data = data[(data['Player1'] == player_name) | (data['Player2'] == player_name)]

    # Filter for only school events
    data = data[data['Event Name'].str.startswith(('Dual Match', '2024 ITA', '2024-25 NCAA Division'))]
    return data


mens_results_player = filter_player(mens_results, player_name)

In [21]:
mens_results_player

Unnamed: 0,Event Name,Date,Player1,Player2,Player1 UTR,Player2 UTR,Score
8,Dual Match: University of Southern California ...,2025-05-08,Alexander Hoogmartens,Karl Lee,13.0,13.01,"4-6, 6-3, 6-3"
17,"Dual Match: University of California, Los Ange...",2025-05-02,Alexander Hoogmartens,Timofey Stepanov,13.0,13.02,"7-6(7), 1-6, 6-3"
20,"Dual Match: University of California, Santa Ba...",2025-05-01,Alexander Hoogmartens,Diogo Morais,13.0,12.79,"4-6, 6-3, 1-2"
25,"Dual Match: University of California, Los Ange...",2025-04-26,Alexander Hoogmartens,Jack Anthrop,13.0,13.57,"7-5, 0-6, 6-4"
34,Dual Match: Michigan State University vs Unive...,2025-04-25,Alexander Hoogmartens,Matthew Forbes,13.0,12.0,"3-6, 5-0"
41,Dual Match: University of Michigan vs Universi...,2025-04-24,Alexander Hoogmartens,Nicholas Steiglehner,13.0,12.71,"2-6, 6-3, 2-2"
47,"Dual Match: University of California, Los Ange...",2025-04-19,Alexander Hoogmartens,Lars Johann,13.0,12.82,"6-4, 6-4"
50,"Dual Match: University of California, Los Ange...",2025-04-17,Alexander Hoogmartens,Tomas Zlatohlavek,13.0,11.0,"6-1, 6-1"
59,Dual Match: Michigan State University vs Unive...,2025-04-12,Alexander Hoogmartens,Vuk Radjenovic,13.0,12.66,"3-6, 6-1, 6-1"
62,"Dual Match: University of California, Los Ange...",2025-04-10,Alexander Hoogmartens,Nicholas Steiglehner,13.0,12.71,"6-4, 6-3"


### Average Service Game Duration

In [22]:
def average_service_time(data):

    # Use combined_data_games 
    # Subset 'Server' column name for only host (host is always UCLA player)
    # find the mean of the 'Duration' Column

    avg_seconds = data[data['Server'] == 'host']['Duration'].mean() # Automatically coerces NA
    total = int(round(avg_seconds))
    mins, secs = divmod(total, 60)    

    return f"{mins}:{secs:02d}"

In [23]:
# Output Average Service Game Duration
avg_service_game_duration = average_service_time(combined_data_games)
avg_service_game_duration

'4:04'

### Average Games Held Percentage

In [24]:
def service_games_won_percentage(df):
    
    # Subset Dataframe to only be UCLA Player serving
    service_games = df[df["Server"] == "host"]

    # Find the percentage of the "Game Winner" column everytime the value is "host"
    percentage = service_games["Game Winner"].value_counts(normalize=True).get('host', 0) * 100

    # Round and make number into an integer
    percentage = int(round(percentage, 0))

    return percentage

In [25]:
average_games_held = service_games_won_percentage(combined_data_games)
average_games_held

78

### Breakpoints Saved (WIP)