In [29]:
import pandas as pd

# Load all sheets into a dictionary
shot_ratings_file = pd.read_excel("../../data/mens/Rudy Quan/combined.xlsx", sheet_name=None, engine="openpyxl")

In [30]:
display(shot_ratings_file)

{'Settings':                                            Start Time  End Time  Location  \
 0                                            05:45:11  08:03:20       NaN   
 1                                                 NaN       NaN       NaN   
 2                Note: xyz coordinates are in meters.       NaN       NaN   
 3            X is positive to the right of the camera       NaN       NaN   
 4   Y is positive towards the opposite side of the...       NaN       NaN   
 5                   Z is positive up out of the court       NaN       NaN   
 6                                            22:48:10  00:02:45       NaN   
 7                                                 NaN       NaN       NaN   
 8                Note: xyz coordinates are in meters.       NaN       NaN   
 9            X is positive to the right of the camera       NaN       NaN   
 10  Y is positive towards the opposite side of the...       NaN       NaN   
 11                  Z is positive up out of the cou

In [31]:
stats_df = shot_ratings_file['Stats']
games_df = shot_ratings_file['Games'] 
points_df = shot_ratings_file['Points']

In [32]:
stats_df

Unnamed: 0,Stat Name,Host Set 1,Guest Set 1,Host Set 2,Guest Set 2,Host Set 3,Guest Set 3,Host Set 4,Guest Set 4,Host Set 5,Guest Set 5,__source_file__
0,1st Serves,0.0,0.0,,,,,,,,,RudyQuan_Cal_1_25_25.xlsx
1,1st Serves In,0.0,0.0,,,,,,,,,RudyQuan_Cal_1_25_25.xlsx
2,1st Serves Won,0.0,0.0,,,,,,,,,RudyQuan_Cal_1_25_25.xlsx
3,2nd Serves,0.0,0.0,,,,,,,,,RudyQuan_Cal_1_25_25.xlsx
4,2nd Serves In,0.0,0.0,,,,,,,,,RudyQuan_Cal_1_25_25.xlsx
...,...,...,...,...,...,...,...,...,...,...,...,...
265,Forehand Forced Errors,0.0,0.0,0.0,0.0,,,,,,,RudyQuan_USC_2_22_25.xlsx
266,Backhand Forced Errors,0.0,0.0,0.0,0.0,,,,,,,RudyQuan_USC_2_22_25.xlsx
267,Calories Burned (CAL),0.0,0.0,0.0,0.0,,,,,,,RudyQuan_USC_2_22_25.xlsx
268,Distance Run (MI),0.0,0.0,0.0,0.0,,,,,,,RudyQuan_USC_2_22_25.xlsx


In [33]:
# Cj Question: Is this to filter out bad CAL data?
filtered_stats_df = stats_df[stats_df['Host Set 2'].notna()] 
filtered_stats_df.head(40)

Unnamed: 0,Stat Name,Host Set 1,Guest Set 1,Host Set 2,Guest Set 2,Host Set 3,Guest Set 3,Host Set 4,Guest Set 4,Host Set 5,Guest Set 5,__source_file__
30,1st Serves,22.0,25.0,27.0,25.0,,,,,,,RudyQuan_DeaconThomas_Indiana.xlsx
31,1st Serves In,18.0,14.0,18.0,12.0,,,,,,,RudyQuan_DeaconThomas_Indiana.xlsx
32,1st Serves Won,13.0,6.0,12.0,8.0,,,,,,,RudyQuan_DeaconThomas_Indiana.xlsx
33,2nd Serves,4.0,11.0,9.0,13.0,,,,,,,RudyQuan_DeaconThomas_Indiana.xlsx
34,2nd Serves In,3.0,10.0,7.0,10.0,,,,,,,RudyQuan_DeaconThomas_Indiana.xlsx
35,2nd Serves Won,1.0,4.0,4.0,3.0,,,,,,,RudyQuan_DeaconThomas_Indiana.xlsx
36,Break Points,3.0,5.0,3.0,4.0,,,,,,,RudyQuan_DeaconThomas_Indiana.xlsx
37,Break Points Saved,2.0,2.0,1.0,1.0,,,,,,,RudyQuan_DeaconThomas_Indiana.xlsx
38,1st Returns,14.0,18.0,12.0,18.0,,,,,,,RudyQuan_DeaconThomas_Indiana.xlsx
39,1st Returns Won,8.0,5.0,4.0,6.0,,,,,,,RudyQuan_DeaconThomas_Indiana.xlsx


<span style="font-size:24px;">Functions </span>

In [34]:
def get_total(stat_name):
    rows = filtered_stats_df[filtered_stats_df['Stat Name'].str.strip() == stat_name]
    if rows.empty:
        return 0
    total = 0
    for col in rows.columns:
        if col.startswith('Host Set'):
            numeric_vals = pd.to_numeric(rows[col], errors='coerce')
            total += numeric_vals.sum()  # Ignores NaNs automatically
    return total


def calculate_service_games_won(games_df):
    # Filter the DataFrame for host server and non-draw game winner
    host_service_games = games_df[(games_df['Server'] == 'host') & (games_df['Game Winner'] != 'draw')]
    
    # Filter for games where host won
    host_service_games_won = host_service_games[host_service_games['Game Winner'] == 'host']
    
    # Calculate percentage
    if len(host_service_games) > 0:
        service_games_won_percentage = len(host_service_games_won) / len(host_service_games) 
        return service_games_won_percentage
    else:
        return None  # No games to calculate

def calculate_return_games_won(games_df):
    # Filter the DataFrame for guest server and non-draw game winner
    guest_service_games = games_df[(games_df['Server'] == 'guest') & (games_df['Game Winner'] != 'draw')]
    
    # Filter for games where host won (since return games won means host wins)
    guest_return_games_won = guest_service_games[guest_service_games['Game Winner'] == 'host']
    
    # Calculate percentage
    if len(guest_service_games) > 0:
        return_games_won_percentage = len(guest_return_games_won) / len(guest_service_games) 
        return return_games_won_percentage
    else:
        return None  # No games to calculate

def calculate_transition_points(points_df, games_df):
    # Identify the transitions from non-1 to 1 in the 'Set' column
    transitions = (points_df['Set'] != 1) & (points_df['Set'].shift(-1) == 1)

    # Check if we should add the last row as a transition
    last_point_winner = points_df.iloc[-1]['Point Winner']
    last_game_winner = games_df.iloc[-1]['Game Winner']

    # If they match, manually set the last point as a transition
    if last_point_winner == last_game_winner:
        transitions.iloc[-1] = True  # force last row as a transition

    # Filter rows where Point Winner is 'host' and it's a transition
    transition_points = points_df[(points_df['Point Winner'] == 'host') & transitions]

    # Numerator: host transitions
    host_transition_points_count = transition_points.shape[0]

    # Denominator: all transitions
    transition_count = transitions.sum()

    return (host_transition_points_count / transition_count) if transition_count > 0 else None

def calculate_tiebreaks_won_percent(games_df):
    # Find all rows where both players have 6 games in the set — a tiebreak situation
    tiebreaks = games_df[(games_df['Host Set Score'] == 6) & (games_df['Guest Set Score'] == 6)]
    
    # Total number of tiebreaks
    total_tiebreaks = tiebreaks.shape[0]
    
    if total_tiebreaks == 0:
        return None  # Avoid division by zero if there are no tiebreaks

    # Count how many tiebreaks were won by the host
    host_tiebreaks_won = (tiebreaks['Game Winner'] == 'host').sum()

    # Compute percentage
    return (host_tiebreaks_won / total_tiebreaks)


### Serve Rating
- 1st Serve %
- 1st Serve Points Won %
- 2nd Serve Points Won %
- Service Games Won %
- Avg. Aces/Match
- Avg. Double Faults/Match

In [35]:
filtered_stats_df[filtered_stats_df['Stat Name'] == 'Aces']
# get_total('Aces')

Unnamed: 0,Stat Name,Host Set 1,Guest Set 1,Host Set 2,Guest Set 2,Host Set 3,Guest Set 3,Host Set 4,Guest Set 4,Host Set 5,Guest Set 5,__source_file__
49,Aces,0.0,1.0,0.0,0.0,,,,,,,RudyQuan_DeaconThomas_Indiana.xlsx
79,Aces,0.0,0.0,0.0,1.0,,,,,,,RudyQuan_FloridaState_9_23_24.xlsx
109,Aces,0.0,1.0,0.0,0.0,,,,,,,RudyQuan_Indiana_3_7_25.xlsx
139,Aces,0.0,0.0,0.0,0.0,,,,,,,RudyQuan_MichiganState_4_13_25.xlsx
169,Aces,3.0,1.0,0.0,0.0,0.0,0.0,,,,,RudyQuan_Pepperdine_2_27_25.xlsx
229,Aces,0.0,2.0,0.0,3.0,,,,,,,RudyQuan_UMichigan_4_25_25.xlsx
259,Aces,0.0,1.0,0.0,0.0,,,,,,,RudyQuan_USC_2_22_25.xlsx


In [36]:
match_count = (filtered_stats_df['Stat Name'].str.strip() == '1st Serves').sum()
(filtered_stats_df['Stat Name'].str.strip() == '1st Serves')

30      True
31     False
32     False
33     False
34     False
       ...  
265    False
266    False
267    False
268    False
269    False
Name: Stat Name, Length: 210, dtype: bool

In [37]:
total_matches = len(filtered_stats_df.groupby('__source_file__'))

In [None]:
serve_results = {}

# 1st Serve %
serve_results['1st Serve %'] = get_total('1st Serves In') / get_total('1st Serves') if get_total('1st Serves') else None

# 1st Serve Points Won %
serve_results['1st Serve Points Won %'] = get_total('1st Serves Won') / get_total('1st Serves In') if get_total('1st Serves In') else None

# 2nd Serve Points Won %
serve_results['2nd Serve Points Won %'] = get_total('2nd Serves Won') / get_total('2nd Serves In') if get_total('2nd Serves In') else None

# Service Games Won %
serve_results['Service Games Won %'] = calculate_service_games_won(games_df)



# Avg. Aces/Match
aces_total = get_total('Aces') # Cj Question: Why Do you do it twice
aces_count = (filtered_stats_df['Stat Name'].str.strip() == 'Aces').sum() # Cj Question: Why Do you do it twice
serve_results['Avg. Aces/Match'] = float(aces_total) / aces_count if aces_count else None

# Count total double faults from points_df
double_fault_total = points_df[
    (points_df['Match Server'] == 'host') & 
    (points_df['Detail'] == 'Double Fault')
].shape[0]

# Count matches by number of '1st Serves' entries in filtered_stats_df
match_count = (filtered_stats_df['Stat Name'].str.strip() == '1st Serves').sum()
# Compute average
serve_results['Avg. Double Faults/Match'] = double_fault_total / match_count if match_count else None

# Calculate Serve Rating
serve_rating = 0

# Add percentages (convert decimals to percentage scale)
if serve_results['1st Serve %'] is not None:
    serve_rating += serve_results['1st Serve %'] * 100
if serve_results['1st Serve Points Won %'] is not None:
    serve_rating += serve_results['1st Serve Points Won %'] * 100
if serve_results['2nd Serve Points Won %'] is not None:
    serve_rating += serve_results['2nd Serve Points Won %'] * 100
if serve_results['Service Games Won %'] is not None:
    serve_rating += serve_results['Service Games Won %'] * 100

# Add Aces and subtract Double Faults
if serve_results['Avg. Aces/Match'] is not None:
    serve_rating += serve_results['Avg. Aces/Match']
if serve_results['Avg. Double Faults/Match'] is not None:
    serve_rating -= serve_results['Avg. Double Faults/Match']

# Print out the serve results in the desired format
for k, v in serve_results.items():
    if v is not None:
        # For Avg. Aces/Match and Avg. Double Faults/Match, format as regular float
        if k in ['Avg. Aces/Match', 'Avg. Double Faults/Match']:
            print(f"{k}: {v:.2f}")
        # Otherwise, format as percentage
        else:
            print(f"{k}: {v:.2%}")
    else:
        print(f"{k}: N/A")

# Print Serve Rating
print(f"Serve Rating: {serve_rating:.2f}")


1st Serve %: 72.22%
1st Serve Points Won %: 66.15%
2nd Serve Points Won %: 58.82%
Service Games Won %: 72.31%
Avg. Aces/Match: 0.43
Avg. Double Faults/Match: 2.14
Serve Rating: 267.79


<span style="font-size:24px;">Return Rating </span> <br>
<span style="font-size:20px;"> 1st Serve Return Points Won %, 2nd Serve Return Points Won %, Return Games Won %, Break Points Converted % </span>


In [None]:
return_results = {}


# 1st Serve Return Points Won %
return_results['1st Serve Return Points Won %'] = get_total('1st Returns Won') / get_total('1st Returns') if get_total('1st Returns') else None

# 2nd Serve Return Points Won %
return_results['2nd Serve Return Points Won %'] = get_total('2nd Returns Won') / get_total('2nd Returns') if get_total('2nd Returns') else None

# Return Games Won %
return_results['Return Games Won %'] = calculate_return_games_won(games_df)

# % Break Points Converted
return_results['Break Points Converted %'] = get_total('Break Points Won') / get_total('Break Point Opportunities') if get_total('Break Point Opportunities') else None

# Calculate Return Rating
return_rating = 0

# Add percentage stats (scale from decimal to 0–100)
if return_results['1st Serve Return Points Won %'] is not None:
    return_rating += return_results['1st Serve Return Points Won %'] * 100
if return_results['2nd Serve Return Points Won %'] is not None:
    return_rating += return_results['2nd Serve Return Points Won %'] * 100
if return_results['Break Points Converted %'] is not None:
    return_rating += return_results['Break Points Converted %'] * 100
if return_results['Return Games Won %'] is not None:
    return_rating += return_results['Return Games Won %'] * 100

# Print out the serve results in the desired format
for k, v in return_results.items():
    if v is not None:
        # For Avg. Aces/Match and Avg. Double Faults/Match, format as regular float
        if k in ['Avg. Aces/Match', 'Avg. Double Faults/Match']:
            print(f"{k}: {v:.2f}")
        # Otherwise, format as percentage
        else:
            print(f"{k}: {v:.2%}")
    else:
        print(f"{k}: N/A")

# Print Return Rating
print(f"Return Rating: {return_rating:.2f}")

<span style="font-size:24px;">Under Pressure Rating </span> <br>
<span style="font-size:20px;"> Break Points Converted %, Break Points Saved %, Tie Breaks Won %, Deciding Sets Won % </span>


In [None]:
pressure_results = {}


# Break Points Converted %
pressure_results['Break Points Converted %'] = get_total('Break Points Won') / get_total('Break Point Opportunities') if get_total('Break Point Opportunities') else None

# Break Points Saved %
pressure_results['Break Points Saved %'] = get_total('Break Points Saved') / get_total('Break Points') if get_total('Break Points') else None

# Tie Breaks Won %
pressure_results['Tie Breaks Won %'] = calculate_tiebreaks_won_percent(games_df) 

# Deciding Sets Won %
pressure_results['Deciding Sets Won %'] = calculate_transition_points(points_df, games_df)

# Calculate Return Rating
pressure_rating = 0

# Add percentage stats (scale from decimal to 0–100)
if pressure_results['Break Points Converted %'] is not None:
    pressure_rating += pressure_results['Break Points Converted %'] * 100
if pressure_results['Break Points Saved %'] is not None:
    pressure_rating += pressure_results['Break Points Saved %'] * 100
if pressure_results['Tie Breaks Won %'] is not None:
    pressure_rating += pressure_results['Tie Breaks Won %'] * 100
if pressure_results['Deciding Sets Won %'] is not None:
    pressure_rating += pressure_results['Deciding Sets Won %'] * 100

# Print out the serve results in the desired format
for k, v in pressure_results.items():
    if v is not None:
        # For Avg. Aces/Match and Avg. Double Faults/Match, format as regular float
        if k in ['Avg. Aces/Match', 'Avg. Double Faults/Match']:
            print(f"{k}: {v:.2f}")
        # Otherwise, format as percentage
        else:
            print(f"{k}: {v:.2%}")
    else:
        print(f"{k}: N/A")

# Print Pressure Rating
print(f"Pressure Rating: {pressure_rating:.2f}")

## Cj Work

### Load Packages

In [1]:
import pandas as pd
import re
import os

### Read Data

In [2]:
# USER INPUT: Specify Player!
player_name = 'Rudy Quan'
# player_name = 'Kaylan Bigun'
# # player_name = 'Emon Van Loben Sels'
# player_name = 'Alexander Hoogmartens'
# player_name = 'Spencer Johnson'
# player_name = 'Aadarsh Tripathi'
# player_name = 'Giacomo Revelli'
# player_name = 'Gianluca Ballotta'

In [3]:
combined_data_shots = pd.read_excel(f'../../data/mens/{player_name}/combined.xlsx', sheet_name='Shots')
combined_data_points = pd.read_excel(f'../../data/mens/{player_name}/combined.xlsx', sheet_name='Points')
combined_data_sets = pd.read_excel(f'../../data/mens/{player_name}/combined.xlsx', sheet_name='Sets')
combined_data_stats = pd.read_excel(f'../../data/mens/{player_name}/combined.xlsx', sheet_name='Stats')

In [4]:
# Subset 2024-2025 Season Matches!
mens_results = pd.read_csv('../../data/mens/mens_results.csv')[:229]

# Change Date Format
mens_results['Date'] = pd.to_datetime(mens_results['Date'])


def filter_player(data, player_name):

    # Filter for player_name
    data = data[(data['Player1'] == player_name) | (data['Player2'] == player_name)]

    # Filter for only school events
    data = data[data['Event Name'].str.startswith(('Dual Match', '2024 ITA', '2024-25 NCAA Division'))]
    return data

mens_results_player = filter_player(mens_results, player_name)

In [5]:
mens_results_player

Unnamed: 0,Event Name,Date,Player1,Player1 UTR,Player2,Player2 UTR,Score
2,"Dual Match: University of California, Los Ange...",2025-04-26,Aidan Kim,13.75,Rudy Quan,13.67,"7-6(3), 6-2"
10,Dual Match: Michigan State University vs Unive...,2025-04-25,Rudy Quan,13.67,Aristotelis THANOS,13.61,"4-6, 3-1"
17,Dual Match: University of Michigan vs Universi...,2025-04-24,Rudy Quan,13.67,William Cooksey,12.0,"6-3, 5-6"
23,"Dual Match: University of California, Los Ange...",2025-04-19,Rudy Quan,13.67,Calvin MUELLER,13.0,"3-6, 6-3, 6-1"
25,"Dual Match: University of California, Los Ange...",2025-04-17,Michael Minasyan,12.0,Rudy Quan,13.67,"2-6, 6-3"
32,Dual Match: Michigan State University vs Unive...,2025-04-12,Rudy Quan,13.67,Ozan Baris,13.83,"6-1, 6-2"
36,"Dual Match: University of California, Los Ange...",2025-04-10,Rudy Quan,13.67,William Cooksey,12.0,"6-3, 6-3"
44,"Dual Match: University of California, Los Ange...",2025-04-05,Rudy Quan,13.67,Charl Morgan,12.0,"6-3, 6-3"
48,Dual Match: Ohio State University vs Universit...,2025-04-03,Alexander Bernard,13.33,Rudy Quan,13.67,"7-6(5), 0-6"
52,"Dual Match: University of California, Los Ange...",2025-04-03,Alexander Bernard,13.33,Rudy Quan,13.67,"7-6(5), 0-6, 3-1"


In [10]:
combined_data_stats['Stat Name'].unique()

array(['1st Serves', '1st Serves In', '1st Serves Won', '2nd Serves',
       '2nd Serves In', '2nd Serves Won', 'Break Points',
       'Break Points Saved', '1st Returns', '1st Returns Won',
       '2nd Returns', '2nd Returns Won', 'Break Point Opportunities',
       'Break Points Won', 'Set Points Saved', 'Set Point Opportunities',
       'Set Points Won', 'Total Points', 'Total Points Won', 'Aces',
       'Service Winners', 'Forehand Winners', 'Backhand Winners',
       'Forehand Unforced Errors', 'Backhand Unforced Errors',
       'Forehand Forced Errors', 'Backhand Forced Errors',
       'Calories Burned (CAL)', 'Distance Run (MI)',
       'Average Heart Rate (BPM)', 'Calories Burned (kCAL)'], dtype=object)

In [13]:
combined_data_stats.columns

Index(['Stat Name', 'Host Set 1', 'Guest Set 1', 'Host Set 2', 'Guest Set 2',
       'Host Set 3', 'Guest Set 3', 'Host Set 4', 'Guest Set 4', 'Host Set 5',
       'Guest Set 5', '__source_file__'],
      dtype='object')

In [27]:
combined_data_stats


Unnamed: 0,Stat Name,Host Set 1,Guest Set 1,Host Set 2,Guest Set 2,Host Set 3,Guest Set 3,Host Set 4,Guest Set 4,Host Set 5,Guest Set 5,__source_file__
0,1st Serves,0.0,0.0,,,,,,,,,RudyQuan_Cal_1_25_25.xlsx
1,1st Serves In,0.0,0.0,,,,,,,,,RudyQuan_Cal_1_25_25.xlsx
2,1st Serves Won,0.0,0.0,,,,,,,,,RudyQuan_Cal_1_25_25.xlsx
3,2nd Serves,0.0,0.0,,,,,,,,,RudyQuan_Cal_1_25_25.xlsx
4,2nd Serves In,0.0,0.0,,,,,,,,,RudyQuan_Cal_1_25_25.xlsx
...,...,...,...,...,...,...,...,...,...,...,...,...
265,Forehand Forced Errors,0.0,0.0,0.0,0.0,,,,,,,RudyQuan_USC_2_22_25.xlsx
266,Backhand Forced Errors,0.0,0.0,0.0,0.0,,,,,,,RudyQuan_USC_2_22_25.xlsx
267,Calories Burned (CAL),0.0,0.0,0.0,0.0,,,,,,,RudyQuan_USC_2_22_25.xlsx
268,Distance Run (MI),0.0,0.0,0.0,0.0,,,,,,,RudyQuan_USC_2_22_25.xlsx


### Rahul Work Modified by Cj

In [58]:
total_matches = len(filtered_stats_df.groupby('__source_file__'))
total_matches

7

##### Helper Functions

In [52]:
# Helper Function: Grabs total of Specified Stat
def get_total(df, stat_name):
    rows = df[df['Stat Name'] == stat_name]
    if rows.empty: # UPDATE Throw an error?
        return 0
    total = 0
    for col in rows.columns:
        if col.startswith('Host Set'):
            numeric_vals = pd.to_numeric(rows[col], errors='coerce')
            total += numeric_vals.sum()  # Ignores NaNs automatically #UPDATE: throw an error? because this means that the data is bad?
    return total
    # return rows

In [None]:
# Helper Function: 

### Serve Rating

In [68]:
get_total(combined_data_stats, 'Aces')

3.0

In [71]:
# 1st Serve In Percentage
first_serve_in_percentage = ((get_total(combined_data_stats, '1st Serves In') / get_total(combined_data_stats, '1st Serves')) * 100).round(2)

# 1st Serve Points Won Percentage
first_serve_won_percentage = ((get_total(combined_data_stats, '1st Serves Won') / get_total(combined_data_stats, '1st Serves In')) * 100).round(2)

# 2nd Serve Points Won Percentage
second_serve_won_percentage = ((get_total(combined_data_stats, '2nd Serves Won') / get_total(combined_data_stats, '2nd Serves In')) * 100).round(2)

# Service Games Won Percentage


# Average Aces per Match
aces_average = ((get_total(combined_data_stats, 'Aces') / total_matches)).round(2)

# Average Double Faults per Match



In [72]:
second_serve_won_percentage, aces_average

(58.82, 0.43)