In [2]:
import pandas as pd

# Load all sheets into a dictionary
shot_ratings_file = pd.read_excel("../../data/mens/Rudy Quan/combined.xlsx", sheet_name=None, engine="openpyxl")



In [5]:
display(shot_ratings_file)

{'Settings':                                            Start Time  End Time  Location  \
 0                                            05:45:11  08:03:20       NaN   
 1                                                 NaN       NaN       NaN   
 2                Note: xyz coordinates are in meters.       NaN       NaN   
 3            X is positive to the right of the camera       NaN       NaN   
 4   Y is positive towards the opposite side of the...       NaN       NaN   
 5                   Z is positive up out of the court       NaN       NaN   
 6                                            22:48:10  00:02:45       NaN   
 7                                                 NaN       NaN       NaN   
 8                Note: xyz coordinates are in meters.       NaN       NaN   
 9            X is positive to the right of the camera       NaN       NaN   
 10  Y is positive towards the opposite side of the...       NaN       NaN   
 11                  Z is positive up out of the cou

In [6]:
stats_df = shot_ratings_file['Stats']
games_df = shot_ratings_file['Games'] 
points_df = shot_ratings_file['Points']

In [8]:
stats_df

Unnamed: 0,Stat Name,Host Set 1,Guest Set 1,Host Set 2,Guest Set 2,Host Set 3,Guest Set 3,Host Set 4,Guest Set 4,Host Set 5,Guest Set 5,__source_file__
0,1st Serves,0.0,0.0,,,,,,,,,RudyQuan_Cal_1_25_25.xlsx
1,1st Serves In,0.0,0.0,,,,,,,,,RudyQuan_Cal_1_25_25.xlsx
2,1st Serves Won,0.0,0.0,,,,,,,,,RudyQuan_Cal_1_25_25.xlsx
3,2nd Serves,0.0,0.0,,,,,,,,,RudyQuan_Cal_1_25_25.xlsx
4,2nd Serves In,0.0,0.0,,,,,,,,,RudyQuan_Cal_1_25_25.xlsx
...,...,...,...,...,...,...,...,...,...,...,...,...
265,Forehand Forced Errors,0.0,0.0,0.0,0.0,,,,,,,RudyQuan_USC_2_22_25.xlsx
266,Backhand Forced Errors,0.0,0.0,0.0,0.0,,,,,,,RudyQuan_USC_2_22_25.xlsx
267,Calories Burned (CAL),0.0,0.0,0.0,0.0,,,,,,,RudyQuan_USC_2_22_25.xlsx
268,Distance Run (MI),0.0,0.0,0.0,0.0,,,,,,,RudyQuan_USC_2_22_25.xlsx


In [7]:
filtered_stats_df = stats_df[stats_df['Host Set 2'].notna()]


<span style="font-size:24px;">Functions </span>

In [42]:
def get_total(stat_name):
    rows = filtered_stats_df[filtered_stats_df['Stat Name'].str.strip() == stat_name]
    if rows.empty:
        return 0
    total = 0
    for col in rows.columns:
        if col.startswith('Host Set'):
            numeric_vals = pd.to_numeric(rows[col], errors='coerce')
            total += numeric_vals.sum()  # Ignores NaNs automatically
    return total


def calculate_service_games_won(games_df):
    # Filter the DataFrame for host server and non-draw game winner
    host_service_games = games_df[(games_df['Server'] == 'host') & (games_df['Game Winner'] != 'draw')]
    
    # Filter for games where host won
    host_service_games_won = host_service_games[host_service_games['Game Winner'] == 'host']
    
    # Calculate percentage
    if len(host_service_games) > 0:
        service_games_won_percentage = len(host_service_games_won) / len(host_service_games) 
        return service_games_won_percentage
    else:
        return None  # No games to calculate

def calculate_return_games_won(games_df):
    # Filter the DataFrame for guest server and non-draw game winner
    guest_service_games = games_df[(games_df['Server'] == 'guest') & (games_df['Game Winner'] != 'draw')]
    
    # Filter for games where host won (since return games won means host wins)
    guest_return_games_won = guest_service_games[guest_service_games['Game Winner'] == 'host']
    
    # Calculate percentage
    if len(guest_service_games) > 0:
        return_games_won_percentage = len(guest_return_games_won) / len(guest_service_games) 
        return return_games_won_percentage
    else:
        return None  # No games to calculate

def calculate_transition_points(points_df, games_df):
    # Identify the transitions from non-1 to 1 in the 'Set' column
    transitions = (points_df['Set'] != 1) & (points_df['Set'].shift(-1) == 1)

    # Check if we should add the last row as a transition
    last_point_winner = points_df.iloc[-1]['Point Winner']
    last_game_winner = games_df.iloc[-1]['Game Winner']

    # If they match, manually set the last point as a transition
    if last_point_winner == last_game_winner:
        transitions.iloc[-1] = True  # force last row as a transition

    # Filter rows where Point Winner is 'host' and it's a transition
    transition_points = points_df[(points_df['Point Winner'] == 'host') & transitions]

    # Numerator: host transitions
    host_transition_points_count = transition_points.shape[0]

    # Denominator: all transitions
    transition_count = transitions.sum()

    return (host_transition_points_count / transition_count) if transition_count > 0 else None

def calculate_tiebreaks_won_percent(games_df):
    # Find all rows where both players have 6 games in the set — a tiebreak situation
    tiebreaks = games_df[(games_df['Host Set Score'] == 6) & (games_df['Guest Set Score'] == 6)]
    
    # Total number of tiebreaks
    total_tiebreaks = tiebreaks.shape[0]
    
    if total_tiebreaks == 0:
        return None  # Avoid division by zero if there are no tiebreaks

    # Count how many tiebreaks were won by the host
    host_tiebreaks_won = (tiebreaks['Game Winner'] == 'host').sum()

    # Compute percentage
    return (host_tiebreaks_won / total_tiebreaks)


<span style="font-size:24px;">Serve Rating </span> <br>
<span style="font-size:20px;">1st Serve %, 1st Serve Points Won %, 2nd Serve Points Won %, Service Games Won %, Avg. Aces/Match, Avg. Double Faults/Match </span>


In [38]:
serve_results = {}

# 1st Serve %
serve_results['1st Serve %'] = get_total('1st Serves In') / get_total('1st Serves') if get_total('1st Serves') else None

# 1st Serve Points Won %
serve_results['1st Serve Points Won %'] = get_total('1st Serves Won') / get_total('1st Serves In') if get_total('1st Serves In') else None

# 2nd Serve Points Won %
serve_results['2nd Serve Points Won %'] = get_total('2nd Serves Won') / get_total('2nd Serves In') if get_total('2nd Serves In') else None

# Service Games Won %
serve_results['Service Games Won %'] = calculate_service_games_won(games_df)

# Avg. Aces/Match
aces_total = get_total('Aces')
aces_count = (filtered_stats_df['Stat Name'].str.strip() == 'Aces').sum()
serve_results['Avg. Aces/Match'] = float(aces_total) / aces_count if aces_count else None

# Count total double faults from points_df
double_fault_total = points_df[
    (points_df['Match Server'] == 'host') & 
    (points_df['Detail'] == 'Double Fault')
].shape[0]
# Count matches by number of '1st Serves' entries in filtered_stats_df
match_count = (filtered_stats_df['Stat Name'].str.strip() == '1st Serves').sum()
# Compute average
serve_results['Avg. Double Faults/Match'] = double_fault_total / match_count if match_count else None

# Calculate Serve Rating
serve_rating = 0

# Add percentages (convert decimals to percentage scale)
if serve_results['1st Serve %'] is not None:
    serve_rating += serve_results['1st Serve %'] * 100
if serve_results['1st Serve Points Won %'] is not None:
    serve_rating += serve_results['1st Serve Points Won %'] * 100
if serve_results['2nd Serve Points Won %'] is not None:
    serve_rating += serve_results['2nd Serve Points Won %'] * 100
if serve_results['Service Games Won %'] is not None:
    serve_rating += serve_results['Service Games Won %'] * 100

# Add Aces and subtract Double Faults
if serve_results['Avg. Aces/Match'] is not None:
    serve_rating += serve_results['Avg. Aces/Match']
if serve_results['Avg. Double Faults/Match'] is not None:
    serve_rating -= serve_results['Avg. Double Faults/Match']

# Print out the serve results in the desired format
for k, v in serve_results.items():
    if v is not None:
        # For Avg. Aces/Match and Avg. Double Faults/Match, format as regular float
        if k in ['Avg. Aces/Match', 'Avg. Double Faults/Match']:
            print(f"{k}: {v:.2f}")
        # Otherwise, format as percentage
        else:
            print(f"{k}: {v:.2%}")
    else:
        print(f"{k}: N/A")

# Print Serve Rating
print(f"Serve Rating: {serve_rating:.2f}")


1st Serve %: 72.22%
1st Serve Points Won %: 66.15%
2nd Serve Points Won %: 58.82%
Service Games Won %: 72.31%
Avg. Aces/Match: 0.43
Avg. Double Faults/Match: 2.14
Serve Rating: 267.79


<span style="font-size:24px;">Return Rating </span> <br>
<span style="font-size:20px;"> 1st Serve Return Points Won %, 2nd Serve Return Points Won %, Return Games Won %, Break Points Converted % </span>


In [39]:
return_results = {}


# 1st Serve Return Points Won %
return_results['1st Serve Return Points Won %'] = get_total('1st Returns Won') / get_total('1st Returns') if get_total('1st Returns') else None

# 2nd Serve Return Points Won %
return_results['2nd Serve Return Points Won %'] = get_total('2nd Returns Won') / get_total('2nd Returns') if get_total('2nd Returns') else None

# Return Games Won %
return_results['Return Games Won %'] = calculate_return_games_won(games_df)

# % Break Points Converted
return_results['Break Points Converted %'] = get_total('Break Points Won') / get_total('Break Point Opportunities') if get_total('Break Point Opportunities') else None

# Calculate Return Rating
return_rating = 0

# Add percentage stats (scale from decimal to 0–100)
if return_results['1st Serve Return Points Won %'] is not None:
    return_rating += return_results['1st Serve Return Points Won %'] * 100
if return_results['2nd Serve Return Points Won %'] is not None:
    return_rating += return_results['2nd Serve Return Points Won %'] * 100
if return_results['Break Points Converted %'] is not None:
    return_rating += return_results['Break Points Converted %'] * 100
if return_results['Return Games Won %'] is not None:
    return_rating += return_results['Return Games Won %'] * 100

# Print out the serve results in the desired format
for k, v in return_results.items():
    if v is not None:
        # For Avg. Aces/Match and Avg. Double Faults/Match, format as regular float
        if k in ['Avg. Aces/Match', 'Avg. Double Faults/Match']:
            print(f"{k}: {v:.2f}")
        # Otherwise, format as percentage
        else:
            print(f"{k}: {v:.2%}")
    else:
        print(f"{k}: N/A")

# Print Return Rating
print(f"Return Rating: {return_rating:.2f}")

1st Serve Return Points Won %: 40.58%
2nd Serve Return Points Won %: 64.75%
Return Games Won %: 52.38%
Break Points Converted %: 60.38%
Return Rating: 218.09


<span style="font-size:24px;">Under Pressure Rating </span> <br>
<span style="font-size:20px;"> Break Points Converted %, Break Points Saved %, Tie Breaks Won %, Deciding Sets Won % </span>


In [46]:
pressure_results = {}


# Break Points Converted %
pressure_results['Break Points Converted %'] = get_total('Break Points Won') / get_total('Break Point Opportunities') if get_total('Break Point Opportunities') else None

# Break Points Saved %
pressure_results['Break Points Saved %'] = get_total('Break Points Saved') / get_total('Break Points') if get_total('Break Points') else None

# Tie Breaks Won %
pressure_results['Tie Breaks Won %'] = calculate_tiebreaks_won_percent(games_df) 

# Deciding Sets Won %
pressure_results['Deciding Sets Won %'] = calculate_transition_points(points_df, games_df)

# Calculate Return Rating
pressure_rating = 0

# Add percentage stats (scale from decimal to 0–100)
if pressure_results['Break Points Converted %'] is not None:
    pressure_rating += pressure_results['Break Points Converted %'] * 100
if pressure_results['Break Points Saved %'] is not None:
    pressure_rating += pressure_results['Break Points Saved %'] * 100
if pressure_results['Tie Breaks Won %'] is not None:
    pressure_rating += pressure_results['Tie Breaks Won %'] * 100
if pressure_results['Deciding Sets Won %'] is not None:
    pressure_rating += pressure_results['Deciding Sets Won %'] * 100

# Print out the serve results in the desired format
for k, v in pressure_results.items():
    if v is not None:
        # For Avg. Aces/Match and Avg. Double Faults/Match, format as regular float
        if k in ['Avg. Aces/Match', 'Avg. Double Faults/Match']:
            print(f"{k}: {v:.2f}")
        # Otherwise, format as percentage
        else:
            print(f"{k}: {v:.2%}")
    else:
        print(f"{k}: N/A")

# Print Pressure Rating
print(f"Pressure Rating: {pressure_rating:.2f}")

Break Points Converted %: 60.38%
Break Points Saved %: 56.10%
Tie Breaks Won %: N/A
Deciding Sets Won %: 85.71%
Pressure Rating: 202.19
