# Return Statistics Notebook

### Load Packages

In [1]:
import pandas as pd
import numpy as np
import os
import json
import ipywidgets as widgets
from IPython.display import display
from collections import defaultdict

### Create Data Folder

In [2]:
def create_data_folder():
    # Define the folder name
    folder_name = "data"
    
    # Check if the folder exists
    if not os.path.exists(folder_name):
        os.makedirs(folder_name)
        print(f"Folder '{folder_name}' created.")
    else:
        print(f"Folder '{folder_name}' already exists.")

In [3]:
create_data_folder()

Folder 'data' created.


### Read Data

In [4]:
# Dropdown with a non-valid default option
choose_player_dropdown = widgets.Dropdown(
    options=['-- Select --', 'Rudy Quan', 'Emon Van Loben Sels', 'Kaylan Bigun', 'Alexander Hoogmartens', 
             'Spencer Johnson', 'Aadarsh Tripathi', 'Giacomo Revelli', 'Gianluca Ballotta'],
    value='-- Select --',
    description='Category:'
)

display(choose_player_dropdown)

Dropdown(description='Category:', options=('-- Select --', 'Rudy Quan', 'Emon Van Loben Sels', 'Kaylan Bigun',â€¦

In [5]:
# Check selection before proceeding
if choose_player_dropdown.value == '-- Select --':
    raise ValueError("Please choose a valid category from the dropdown menu in the previous cell before proceeding.")

# If valid, use the value
player_name = choose_player_dropdown.value

In [6]:
combined_data_shots = pd.read_excel(f'../../data/mens/{player_name}/combined.xlsx', sheet_name='Shots')
combined_data_points = pd.read_excel(f'../../data/mens/{player_name}/combined.xlsx', sheet_name='Points')
combined_data_games = pd.read_excel(f'../../data/mens/{player_name}/combined.xlsx', sheet_name='Games')
combined_data_sets = pd.read_excel(f'../../data/mens/{player_name}/combined.xlsx', sheet_name='Sets')
combined_data_stats = pd.read_excel(f'../../data/mens/{player_name}/combined.xlsx', sheet_name='Stats')

### Coordinate Transformation

In [7]:
combined_data_shots["Hit (x)"] = combined_data_shots["Hit (x)"] * 38.2764654418
combined_data_shots["Hit (y)"] = (combined_data_shots["Hit (y)"] - 11.8872) * 38.2764654418

combined_data_shots["Bounce (x)"] = combined_data_shots["Bounce (x)"] * 38.2764654418
combined_data_shots["Bounce (y)"] = (combined_data_shots["Bounce (y)"] - 11.8872) * 38.2764654418


### Fastest Return

In [8]:
def fastest_return(data):
    max_return = data[data['Type'].isin(['first_return', 'second_return'])]['Speed (MPH)'].max() # return_serve is a return after a serve that was classified as out
    return int(round(max_return, 0))

In [9]:
fastest_return = fastest_return(combined_data_shots)
fastest_return

90

### Favorite Return

##### Helper Function

In [10]:
combined_data_shots['Hit Zone'].unique()


'deuce' in 'deuce_out'

def normalize_hit_zone(zone):
    if isinstance(zone, str):  # safety check
        if "deuce" in zone:
            return 'deuce'
        elif "ad" in zone:
            return 'ad'
    return zone  # return original if no match

In [11]:
def classify_return_direction(row):
    x = row["Bounce (x)"]
    side = row["Hit Side"]
    zone_raw = row["Hit Zone"]

    # Normalize zone
    if "deuce" in zone_raw:
        zone = "deuce"
    elif "ad" in zone_raw:
        zone = "ad"
    else:
        zone = zone_raw
    
    if -52.5 <= x <= 52.5:
        return "Middle"
    
    if side == "far":
        if zone == "ad":
            if x < -52.5:
                return "Cross Court"
            elif x > 52.5:
                return "Down the Line"
        elif zone == "deuce":
            if x < -52.5:
                return "Down the Line"
            elif x > 52.5:
                return "Cross Court"
    
    elif side == "near":
        if zone == "ad":
            if x < -52.5:
                return "Down the Line"
            elif x > 52.5:
                return "Cross Court"
        elif zone == "deuce":
            if x < -52.5:
                return "Cross Court"
            elif x > 52.5:
                return "Down the Line"
    
    return "Unknown"

In [12]:
def fav_return(shots_data, points_data):
    points_data_subset = points_data[['Point', 'Game', 'Set', '__source_file__', 'Match Server', 'Point Winner']]
    data = shots_data.merge(points_data_subset, on=['Point', 'Game', 'Set', '__source_file__'], how='left')

    df = data[(data["Match Server"] == "guest") & 
              (data["Shot"] == 2) &
              (data["Type"].isin(['first_return', 'second_return'])) 
            #   & (data["Result"] != "Out") # Only include shots that went 
              ].copy()

    df["return_zone"] = df.apply(classify_return_direction, axis=1)

    # NEED TO DO
    zone_labels = {'Cross Court': 'Cross', 'Middle': 'Middle', 'Down the Line': 'Line'}
    df["return_zone"] = df["return_zone"].map(zone_labels)
    
    counts = df["return_zone"].value_counts()
    favorite = counts.idxmax()

    # return df[df['Player'] != 'Rudy Quan'] # Errors to check in the future
    return favorite

In [13]:
favorite_return = fav_return(combined_data_shots, combined_data_points)
favorite_return

'Middle'

### Favorite Return Stroke

In [14]:
def fav_stroke(shots_data, points_data):
    points_data_subset = points_data[['Point', 'Game', 'Set', '__source_file__', 'Match Server', 'Point Winner']]
    data = shots_data.merge(points_data_subset, on=['Point', 'Game', 'Set', '__source_file__'], how='left')

    df = data[(data["Match Server"] == "guest") & 
              (data["Shot"] == 2) &
              (data["Type"].isin(['first_return', 'second_return'])) 
            #   & (data["Result"] == "In")
              ].copy()
    
    stroke_labels = {'Forehand': 'FH', 'Backhand':'BH'}
    df['Stroke'] = df['Stroke'].map(stroke_labels)
    
    counts = df["Stroke"].value_counts()
    favorite = counts.idxmax()

    # return df[df['Player'] != 'Rudy Quan'] # Errors to check in the future
    return favorite

In [15]:
favorite_stroke = fav_stroke(combined_data_shots, combined_data_points)
favorite_stroke

'BH'

### Average Return Game Duration

In [16]:
def average_service_time(data):

    # Use combined_data_games 
    # Subset 'Server' column name for only guest (host is always UCLA player)
    # find the mean of the 'Duration' Column

    avg_seconds = data[data['Server'] == 'guest']['Duration'].mean() # Automatically coerces NA
    total = int(round(avg_seconds))
    mins, secs = divmod(total, 60)    

    return f"{mins}:{secs:02d}"

In [17]:
# Output Average Service Game Duration
avg_service_game_duration = average_service_time(combined_data_games)
min, sec = avg_service_game_duration.split(':')
min, sec

('4', '48')

### Return Rating

##### Helper Function

In [18]:
# Helper Function: Grabs total of Specified Stat
def get_total(df, stat_name):
    rows = df[df['Stat Name'] == stat_name]
    if rows.empty: # UPDATE Throw an error?
        return 0
    total = 0
    for col in rows.columns:
        if col.startswith('Host Set'):
            numeric_vals = pd.to_numeric(rows[col], errors='coerce')
            total += numeric_vals.sum()  # Ignores NaNs automatically # UPDATE: throw an error? because this means that the data is bad?
    return total
    # return rows

In [19]:
# Helper Function: Caluclate Return Games Won 

def calculate_return_games_won(df):
    # Filter the DataFrame for guest server and non-draw game winner
    guest_service_games = df[(df['Server'] == 'guest') & (df['Game Winner'] != 'draw')]
    
    # Filter for games where host won (since return games won means host wins)
    guest_return_games_won = guest_service_games[guest_service_games['Game Winner'] == 'host']

    return_games_won_percentage = len(guest_return_games_won) / len(guest_service_games) 
    
    return return_games_won_percentage

In [20]:
# 1st Serve Return Points Won Percentage
first_serve_returns_won = ((get_total(combined_data_stats, '1st Returns Won') / get_total(combined_data_stats, '1st Returns')) * 100).round(1)

# 2nd Serve Return Points Won Percentage
second_serve_returns_won = ((get_total(combined_data_stats, '2nd Returns Won') / get_total(combined_data_stats, '2nd Returns')) * 100).round(1)

# Return Games Won Percentage
return_games_won_percentage = round(calculate_return_games_won(combined_data_games) * 100, 1)

# % Break Points Converted Percentage
break_points_converted_percentage = ((get_total(combined_data_stats, 'Break Points Won') / get_total(combined_data_stats, 'Break Point Opportunities')) * 100).round(1)

In [21]:
# 1st Serve Return Points Won Percentage
first_serve_returns_won = ((get_total(combined_data_stats, '1st Returns Won') / get_total(combined_data_stats, '1st Returns')) * 100).round(1)

# 2nd Serve Return Points Won Percentage
second_serve_returns_won = ((get_total(combined_data_stats, '2nd Returns Won') / get_total(combined_data_stats, '2nd Returns')) * 100).round(1)

# Return Games Won Percentage
return_games_won_percentage = round(calculate_return_games_won(combined_data_games) * 100, 1)

# % Break Points Converted Percentage
break_points_converted_percentage = ((get_total(combined_data_stats, 'Break Points Won') / get_total(combined_data_stats, 'Break Point Opportunities')) * 100).round(1)

In [22]:
# Calculate Return Rating
return_rating = round(first_serve_returns_won + second_serve_returns_won + return_games_won_percentage + break_points_converted_percentage, 1)

# Print Calculations
print(f"First Serve Return Points Won %: {first_serve_returns_won}%")
print(f"Second Serve Return Points Won %: {second_serve_returns_won}%")
print(f"Return Games Won %: {return_games_won_percentage}%")
print(f"Break Points Converted %: {break_points_converted_percentage}%")
print(f"Return Rating: {return_rating}")


First Serve Return Points Won %: 38.7%
Second Serve Return Points Won %: 47.5%
Return Games Won %: 34.1%
Break Points Converted %: 57.5%
Return Rating: 177.8


### Total Returns Won

In [23]:
def returns_won(shots_data, points_data):
    points_data_subset = points_data[['Point', 'Game', 'Set', '__source_file__', 'Match Server', 'Point Winner']]
    data = shots_data.merge(points_data_subset, on=['Point', 'Game', 'Set', '__source_file__'], how='left')

    df = data[(data["Match Server"] == "guest") & 
              (data["Shot"] == 2) &
              (data["Type"].isin(['first_return', 'second_return']))
              ].copy()
    
    counts = df["Point Winner"].value_counts()
    guest = int(counts.get("guest", 0))
    host = int(counts.get("host", 0))

    total = guest + host

    result = {
        "type": "Total Returns Won",
        "total": total,
        "won": host
    }

    # Build output path
    output_path = f"data/total_returns.json"

    # Write to JSON
    with open(output_path, "w") as f:
        json.dump(result, f, indent=4)


    host_ratio = host / (guest + host)

    return f"{round(host_ratio * 100)}%"

In [24]:
total_returns_won_percentage = returns_won(combined_data_shots, combined_data_points)
total_returns_won_percentage

'41%'

### Stroke Distribution

In [25]:
def stroke_distribution(shots_data, points_data):
    points_data_subset = points_data[['Point', 'Game', 'Set', '__source_file__', 'Match Server', 'Point Winner']]
    data = shots_data.merge(points_data_subset, on=['Point', 'Game', 'Set', '__source_file__'], how='left')

    df = data[(data["Match Server"] == "guest") & 
              (data["Shot"] == 2) &
              (data["Type"].isin(['first_return', 'second_return']))
              ].copy()
    
    counts = df["Stroke"].value_counts(normalize=True)

    forehand = round(counts.get("Forehand", 0) * 100)
    backhand = round(counts.get("Backhand", 0) * 100)

    result = {
        "type": "Stroke Distribution",
        "forehand": forehand,
        "backhand": backhand
    }

    # Write to JSON
    with open("data/stroke_distribution.json", "w") as f:
        json.dump(result, f, indent=4)

    return f"{forehand}%", f"{backhand}%"

In [26]:
forehand_ratio, backhand_ratio = stroke_distribution(combined_data_shots, combined_data_points)
forehand_ratio, backhand_ratio

('42%', '58%')

### Return Zone Distribution Bars

In [27]:
def normalize_hit_zone(zone):
    if isinstance(zone, str):  # safety check
        if "deuce" in zone:
            return 'deuce'
        elif "ad" in zone:
            return 'ad'
    return zone  # return original if no match

In [28]:
def return_zone_bar(shots_data, points_data):
    points_data_subset = points_data[['Point', 'Game', 'Set', '__source_file__', 'Match Server', 'Serve State', 'Point Winner']]
    data = shots_data.merge(points_data_subset, on=['Point', 'Game', 'Set', '__source_file__'], how='left')

    data["Hit Zone"] = data["Hit Zone"].apply(normalize_hit_zone)


    df = data[(data["Match Server"] == "guest") & 
              (data["Shot"] == 2) &
              (data["Type"].isin(['first_return', 'second_return']))
              ].copy()
    
    df["return_zone"] = df.apply(classify_return_direction, axis=1)

    counts = df[['Serve State', 'Hit Zone', 'Stroke', 'return_zone']].value_counts().sort_index()

    # Nested dict for structured output
    nested = defaultdict(lambda: defaultdict(lambda: defaultdict(dict)))

    # Fill the nested structure
    for (serve_state, hit_zone, stroke, return_zone), count in counts.items():
        nested[serve_state][hit_zone][stroke][return_zone] = count

    # Convert to final JSON-compatible format
    returns_json = []
    for serve_type in ['first', 'second']:
        sides = {}
        for side in ['deuce', 'ad']:
            strokes = {}
            for stroke in ['Forehand', 'Backhand']:
                if stroke in nested[serve_type][side]:
                    strokes[stroke] = nested[serve_type][side][stroke]
            if strokes:
                sides[side] = strokes
        if sides:
            returns_json.append({
                "type": f"{serve_type.capitalize()} Return",
                "sides": sides
            })

    # Print the final structure as JSON
    with open("data/return_zones.json", "w") as f:
        json.dump({"returns": returns_json}, f, indent=2)

    # return counts

In [29]:
return_zone_bar(combined_data_shots, combined_data_points)

### Return In Percentage (First + Second)

In [30]:
def return_in(shots_data, points_data, serve_state, side):
    df = shots_data.merge(points_data[['Point', 'Game', 'Set', '__source_file__', 'Serve State', 'Match Server', 'Point Winner']],
                          how='left', 
                          on=['Point', 'Game', 'Set', '__source_file__'])
    
    df["Hit Zone"] = df["Hit Zone"].apply(normalize_hit_zone)


    returns = df[(df['Match Server'] == 'guest') &
                 (df['Type'].isin(['first_return', 'second_return'])) &
                 (df['Hit Zone'] == side)]

    serve_returns = returns[returns['Serve State'] == serve_state]

    returns_total = serve_returns['Result'].value_counts().sum()
    returns_in = serve_returns['Result'].value_counts().get('In', 0)

    returns_won = serve_returns[(serve_returns['Result'] == 'In') & (serve_returns['Point Winner'] == 'host')].shape[0]

    return_in_pct = f"{int(round((returns_in / returns_total) * 100, 0))}%"
    return_won_pct = f"{int(round((returns_won / returns_in) * 100, 0))}%"

    return return_in_pct, return_won_pct


In [31]:
first_return_deuce_in_pct, first_return_deuce_won_pct = return_in(combined_data_shots, combined_data_points, 'first', 'deuce')
first_return_ad_in_pct, first_return_ad_won_pct = return_in(combined_data_shots, combined_data_points, 'first', 'ad')

second_return_deuce_in_pct, second_return_deuce_won_pct = return_in(combined_data_shots, combined_data_points, 'second', 'deuce')
second_return_ad_in_pct, second_return_ad_won_pct = return_in(combined_data_shots, combined_data_points, 'second', 'ad')

first_return_deuce_in_pct, first_return_deuce_won_pct, first_return_ad_in_pct, first_return_ad_won_pct, second_return_deuce_in_pct, second_return_deuce_won_pct, second_return_ad_in_pct, second_return_ad_won_pct

('83%', '44%', '89%', '45%', '85%', '55%', '82%', '41%')

### Best Return Zone (First + Second)

In [32]:
def return_zone(shots_data, points_data, serve_state, side):
    # Merge
    df = shots_data.merge(
        points_data[['Point', 'Game', 'Set', '__source_file__', 'Serve State', 'Match Server', 'Point Winner']],
        how='left', 
        on=['Point', 'Game', 'Set', '__source_file__']
    )
    
    # Normalize zones
    df["Hit Zone"] = df["Hit Zone"].apply(normalize_hit_zone)

    # Filter to all *guest* returns of correct type and side
    returns = df[
        (df['Match Server'] == 'guest') &
        (df['Type'].isin(['first_return', 'second_return'])) &
        (df['Hit Zone'] == side)
    ].copy()

    # Classify direction
    returns["return_zone"] = returns.apply(classify_return_direction, axis=1)

    # Filter to correct serve state
    returns = returns[returns['Serve State'] == serve_state]

    zone_labels = {'Cross Court': 'Cross', 'Middle': 'Middle', 'Down the Line': 'Line'}
    returns["return_zone"] = returns["return_zone"].map(zone_labels)

    # Compute wins per zone
    win_counts = returns[returns['Point Winner'] == 'host']['return_zone'].value_counts()

    # Compute total attempts per zone
    total_counts = returns['return_zone'].value_counts()

    # Compute win %
    win_pct = (win_counts / total_counts)

    best_zone = win_pct.idxmax()

    # Optionally return full stats too
    return best_zone

In [33]:
best_first_return_deuce_zone = return_zone(combined_data_shots, combined_data_points, 'first', 'deuce')
best_first_return_ad_zone = return_zone(combined_data_shots, combined_data_points, 'first', 'ad')

best_second_return_deuce_zone = return_zone(combined_data_shots, combined_data_points, 'second', 'deuce')
best_second_return_ad_zone = return_zone(combined_data_shots, combined_data_points, 'second', 'ad')

best_first_return_deuce_zone, best_first_return_ad_zone, best_second_return_deuce_zone, best_second_return_ad_zone

('Cross', 'Line', 'Middle', 'Line')

### Best Return Stroke (Forehand + Backhand)

In [34]:
def return_stroke(shots_data, points_data, serve_state, side):
    # Merge
    df = shots_data.merge(
        points_data[['Point', 'Game', 'Set', '__source_file__', 'Serve State', 'Match Server', 'Point Winner']],
        how='left', 
        on=['Point', 'Game', 'Set', '__source_file__']
    )

    # Normalize hit zones
    df["Hit Zone"] = df["Hit Zone"].apply(normalize_hit_zone)

    # Filter to guest returns of correct type and side
    returns = df[
        (df['Match Server'] == 'guest') &
        (df['Type'].isin(['first_return', 'second_return'])) &
        (df['Hit Zone'] == side)
    ].copy()

    # Filter by serve state
    serve_returns = returns[returns['Serve State'] == serve_state].copy()
    
    # Map stroke labels
    stroke_labels = {'Forehand': 'FH', 'Backhand':'BH'}
    serve_returns['Stroke'] = serve_returns['Stroke'].map(stroke_labels)

    # Compute total counts per stroke
    total_counts = serve_returns['Stroke'].value_counts()

    # Compute wins per stroke
    win_counts = serve_returns[serve_returns['Point Winner'] == 'host']['Stroke'].value_counts()

    # Compute win %
    win_pct = (win_counts / total_counts).fillna(0)

    if win_pct.empty:
        return None

    # Best stroke = highest win %
    best_stroke = win_pct.idxmax()

    # Optionally return full stats
    return best_stroke

In [35]:
best_first_return_deuce_stroke = return_stroke(combined_data_shots, combined_data_points, 'first', 'deuce')
best_first_return_ad_stroke = return_stroke(combined_data_shots, combined_data_points, 'first', 'ad')

best_second_return_deuce_stroke = return_stroke(combined_data_shots, combined_data_points, 'second', 'deuce')
best_second_return_ad_stroke = return_stroke(combined_data_shots, combined_data_points, 'second', 'ad')

best_first_return_deuce_stroke, best_first_return_ad_stroke, best_second_return_deuce_stroke, best_second_return_ad_stroke

('FH', 'FH', 'BH', 'FH')

### Return Court Visual

##### Helper Functions

In [36]:
def normalize_hit_zone(zone):
    if isinstance(zone, str):  # safety check
        if zone.lower().startswith('deuce'):
            return 'deuce'
        elif zone.lower().startswith('ad'):
            return 'ad'
    return zone  # return original if no match

In [37]:
def normalize_coordinates(row):
    # Flip shotContactY to negative if it's positive
    if row['shotContactY'] > 0:
        row['shotContactY'] = -row['shotContactY']
        row['shotContactX'] = -row['shotContactX']

    # Flip shotLocationY to positive if it's negative
    if (row['shotLocationY'] < 0) & (row['Result'] != 'Net'):
        row['shotLocationY'] = -row['shotLocationY']
        row['shotLocationX'] = -row['shotLocationX']
    elif (row['shotLocationY'] > 0) & (row['Result'] == 'Net'):
        row['shotLocationY'] = 0
        row['shotLocationX'] = -row['shotLocationX']
    elif (row['shotLocationY'] < 0) & (row['Result'] == 'Net'):
        row['shotLocationY'] = 0



    # If the return hit the net, force the landing Y position to 0
    # if row['Result'] == 'Net':
    #     row['shotLocationY'] = 0

    return row

In [38]:
def classify_return_direction_updated(row):
    x = row["shotLocationX"]
    zone = row["Hit Zone"]
    
    if -52.5 <= x <= 52.5:
        return "Middle"
    
    if zone == "ad":
        if x < -52.5:
            return "Down the Line"
        elif x > 52.5:
            return "Cross Court"
    elif zone == "deuce":
        if x < -52.5:
            return "Cross Court"
        elif x > 52.5:
            return "Down the Line"
    
    return "Unknown"

In [39]:
df_shots = combined_data_shots
df_points = combined_data_points

# add column for winner of the point
combined = df_shots.merge(df_points[['Point', 'Game', 'Set', 'Point Winner', 'Serve State', 'Match Server', 'Detail', '__source_file__']], 
                    on=['Point', 'Game', 'Set', '__source_file__'], 
                    how='left')

# only get UCLA player returns that are in
# should we only get the results that are in or no?
returns = combined[(combined['Match Server'] == 'guest') & (
                    combined['Type'].isin(['first_return', 'second_return']))
                    ].copy()

returns = returns[['Serve State', 'Stroke', 'Result', 'Hit Side', 'Hit Zone', 'Hit (x)', 'Hit (y)', 
                'Bounce (x)', 'Bounce (y)', 'Point Winner']]

returns['shotContactX'] = returns['Hit (x)'] 
returns['shotContactY'] = returns['Hit (y)']

returns['shotLocationX'] = returns['Bounce (x)']
returns['shotLocationY'] = returns['Bounce (y)']

# Normalize Hit Zone
returns['Hit Zone'] = returns['Hit Zone'].apply(normalize_hit_zone)

# Apply function to replace "In" with the value from "Point Winner"
returns["Result"] = returns.apply(
    lambda row: row["Point Winner"] if row["Result"] == "In" else row["Result"],
    axis=1
)

# Use Helper Function
returns = returns.apply(normalize_coordinates, axis=1)

returns["return_zone_original"] = returns.apply(classify_return_direction, axis=1)

returns["return_zone"] = returns.apply(classify_return_direction_updated, axis=1)



# classify contact depth
returns['return_contact'] = returns['shotContactY'].apply(lambda y: 'inside' if y >= -455 
                                                else 'mid' if -455 > y > -513.33
                                                else 'deep'
                                                )

In [40]:
def generate_return_contact(returns, serve, side):

    returns_filter = returns[(returns['Serve State'] == serve) &
                                (returns['Hit Zone'] == side)].copy()
    
    returns_filter.to_csv(f"data/{serve}_{side}_return_court.csv", index=False)


In [41]:
generate_return_contact(returns, 'first', 'deuce')
generate_return_contact(returns, 'first', 'ad')
generate_return_contact(returns, 'second', 'deuce')
generate_return_contact(returns, 'second', 'ad')

### Return Zone Court Visual Counts + Won

In [42]:
def return_zone_summary(df, serve_state, hit_zone):
    # Filter by serve state and side
    subset = df[(df['Serve State'] == serve_state) 
                & (df['Hit Zone'] == hit_zone)]

    def zone_stats(zone):
        zone_df = subset[subset['return_zone'] == zone]
        count = len(zone_df)
        wins = (zone_df['Point Winner'] == 'host').sum()
        win_pct = round((wins / count) * 100, 1) if count > 0 else 0.0
        return count, win_pct

    # Get stats for each zone
    cross_count, cross_win_pct = zone_stats("Cross Court")
    middle_count, middle_win_pct = zone_stats("Middle")
    line_count, line_win_pct = zone_stats("Down the Line")

    return cross_count, cross_win_pct, middle_count, middle_win_pct, line_count, line_win_pct

In [43]:
# First Serve - Deuce Side
first_deuce_cross_count, first_deuce_cross_win, \
first_deuce_middle_count, first_deuce_middle_win, \
first_deuce_line_count, first_deuce_line_win = return_zone_summary(returns, 'first', 'deuce')

# First Serve - Ad Side
first_ad_cross_count, first_ad_cross_win, \
first_ad_middle_count, first_ad_middle_win, \
first_ad_line_count, first_ad_line_win = return_zone_summary(returns, 'first', 'ad')

# Second Serve - Deuce Side
second_deuce_cross_count, second_deuce_cross_win, \
second_deuce_middle_count, second_deuce_middle_win, \
second_deuce_line_count, second_deuce_line_win = return_zone_summary(returns, 'second', 'deuce')

# Second Serve - Ad Side
second_ad_cross_count, second_ad_cross_win, \
second_ad_middle_count, second_ad_middle_win, \
second_ad_line_count, second_ad_line_win = return_zone_summary(returns, 'second', 'ad')

### Return Contact Court Visual Counts + Won

In [44]:
def return_contact_summary(df, serve_state, hit_zone):
    # Filter by serve state and side
    subset = df[(df['Serve State'] == serve_state) & (df['Hit Zone'] == hit_zone)]

    def contact_stats(contact):
        contact_df = subset[subset['return_contact'] == contact]
        count = len(contact_df)
        wins = (contact_df['Point Winner'] == 'host').sum()
        win_pct = round((wins / count) * 100, 1) if count > 0 else 0.0
        return count, win_pct

    # Get stats for each zone
    cross_count, cross_win_pct = contact_stats("inside")
    line_count, line_win_pct = contact_stats("mid")
    middle_count, middle_win_pct = contact_stats("deep")

    return cross_count, cross_win_pct, line_count, line_win_pct, middle_count, middle_win_pct


In [45]:
# First Serve - Deuce Side
first_deuce_inside_count, first_deuce_inside_win, \
first_deuce_mid_count, first_deuce_mid_win, \
first_deuce_deep_count, first_deuce_deep_win = return_contact_summary(returns, 'first', 'deuce')

# First Serve - Ad Side
first_ad_inside_count, first_ad_inside_win, \
first_ad_mid_count, first_ad_mid_win, \
first_ad_deep_count, first_ad_deep_win = return_contact_summary(returns, 'first', 'ad')

# Second Serve - Deuce Side
second_deuce_inside_count, second_deuce_inside_win, \
second_deuce_mid_count, second_deuce_mid_win, \
second_deuce_deep_count, second_deuce_deep_win = return_contact_summary(returns, 'second', 'deuce')

# Second Serve - Ad Side
second_ad_inside_count, second_ad_inside_win, \
second_ad_mid_count, second_ad_mid_win, \
second_ad_deep_count, second_ad_deep_win = return_contact_summary(returns, 'second', 'ad')

### Output CSV

In [46]:
# Formatted Return Ratings
first_serve_returns_won_updated = '+' + str(first_serve_returns_won) + '%'
second_serve_returns_won_updated = '+' + str(second_serve_returns_won) + '%'
return_games_won_percentage_updated = '+' + str(return_games_won_percentage) + '%'
break_points_converted_percentage_updated = '+' + str(break_points_converted_percentage) + '%'

In [47]:
return_summary = {
    "fastest_return": [fastest_return],
    "favorite_zone": favorite_return,
    "favorite_stroke": favorite_stroke,
    "min": min,
    "sec": sec,
    "first_serve_returns_won": first_serve_returns_won_updated,
    "second_serve_returns_won": second_serve_returns_won_updated,
    "return_games_won": return_games_won_percentage_updated,
    "breakpoints_converted": break_points_converted_percentage_updated,
    "return_rating": return_rating,
    "total_returns_won": total_returns_won_percentage,
    "forehand_ratio": forehand_ratio,
    "backhand_ratio": backhand_ratio
}

df = pd.DataFrame(return_summary)


# Output to CSV
output_path = f"data/{player_name.replace(' ', '')}_return0.csv"
df.to_csv(output_path, index=False)

In [48]:
first_return_ad = {
    "serve_state": "first",
    "side": "ad",
    "return_in": [first_return_ad_in_pct],
    "return_won": first_return_ad_won_pct,
    "best_return_zone": best_first_return_ad_zone,
    "best_return_stroke": best_first_return_ad_stroke,
    "cross_placement_count": first_ad_cross_count,
    "middle_placement_count": first_ad_middle_count,
    "line_placement_count": first_ad_line_count,
    "cross_placement_won": f"{first_ad_cross_win}%",
    "middle_placement_won": f"{first_ad_middle_win}%",
    "line_placement_won": f"{first_ad_line_win}%",
    "inside_contact_count": first_ad_inside_count,
    "mid_contact_count": first_ad_mid_count,
    "deep_contact_count": first_ad_deep_count,
    "inside_contact_won": f"{first_ad_inside_win}%",
    "mid_contact_won": f"{first_ad_mid_win}%",
    "deep_contact_won": f"{first_ad_deep_win}%",
}

df = pd.DataFrame(first_return_ad)

# Output to CSV
output_path = f"data/{player_name.replace(' ', '')}_return2.csv"
df.to_csv(output_path, index=False)

In [49]:
# First Return - Deuce
first_return_deuce = {
    "serve_state": "first",
    "side": "deuce",
    "return_in": [first_return_deuce_in_pct],
    "return_won": first_return_deuce_won_pct,
    "best_return_zone": best_first_return_deuce_zone,
    "best_return_stroke": best_first_return_deuce_stroke,
    "cross_placement_count": first_deuce_cross_count,
    "middle_placement_count": first_deuce_middle_count,
    "line_placement_count": first_deuce_line_count,
    "cross_placement_won": f"{first_deuce_cross_win}%",
    "middle_placement_won": f"{first_deuce_middle_win}%",
    "line_placement_won": f"{first_deuce_line_win}%",
    "inside_contact_count": first_deuce_inside_count,
    "mid_contact_count": first_deuce_mid_count,
    "deep_contact_count": first_deuce_deep_count,
    "inside_contact_won": f"{first_deuce_inside_win}%",
    "mid_contact_won": f"{first_deuce_mid_win}%",
    "deep_contact_won": f"{first_deuce_deep_win}%"
}

df = pd.DataFrame(first_return_deuce)

# Output to CSV
output_path = f"data/{player_name.replace(' ', '')}_return1.csv"
df.to_csv(output_path, index=False)

In [50]:
# Second Return - Deuce
second_return_deuce = {
    "serve_state": "second",
    "side": "deuce",
    "return_in": [second_return_deuce_in_pct],
    "return_won": second_return_deuce_won_pct,
    "best_return_zone": best_second_return_deuce_zone,
    "best_return_stroke": best_second_return_deuce_stroke,
    "cross_placement_count": second_deuce_cross_count,
    "middle_placement_count": second_deuce_middle_count,
    "line_placement_count": second_deuce_line_count,
    "cross_placement_won": f"{second_deuce_cross_win}%",
    "middle_placement_won": f"{second_deuce_middle_win}%",
    "line_placement_won": f"{second_deuce_line_win}%",
    "inside_contact_count": second_deuce_inside_count,
    "mid_contact_count": second_deuce_mid_count,
    "deep_contact_count": second_deuce_deep_count,
    "inside_contact_won": f"{second_deuce_inside_win}%",
    "mid_contact_won": f"{second_deuce_mid_win}%",
    "deep_contact_won": f"{second_deuce_deep_win}%"
}

df = pd.DataFrame(second_return_deuce)

# Output to CSV
output_path = f"data/{player_name.replace(' ', '')}_return3.csv"
df.to_csv(output_path, index=False)

In [51]:
# Second Return - Ad
second_return_ad = {
    "serve_state": "second",
    "side": "ad",
    "return_in": [second_return_ad_in_pct],
    "return_won": second_return_ad_won_pct,
    "best_return_zone": best_second_return_ad_zone,
    "best_return_stroke": best_second_return_ad_stroke,
    "cross_placement_count": second_ad_cross_count,
    "middle_placement_count": second_ad_middle_count,
    "line_placement_count": second_ad_line_count,
    "cross_placement_won": f"{second_ad_cross_win}%",
    "middle_placement_won": f"{second_ad_middle_win}%",
    "line_placement_won": f"{second_ad_line_win}%",
    "inside_contact_count": second_ad_inside_count,
    "mid_contact_count": second_ad_mid_count,
    "deep_contact_count": second_ad_deep_count,
    "inside_contact_won": f"{second_ad_inside_win}%",
    "mid_contact_won": f"{second_ad_mid_win}%",
    "deep_contact_won": f"{second_ad_deep_win}%",
}

df = pd.DataFrame(second_return_ad)

# Output to CSV
output_path = f"data/{player_name.replace(' ', '')}_return4.csv"
df.to_csv(output_path, index=False)