# Summary Statistics Notebook

### Load Packages

In [None]:
import pandas as pd
import re
import json
import ipywidgets as widgets
from IPython.display import display

### Read Data

In [None]:
# Dropdown with a non-valid default option
choose_player_dropdown = widgets.Dropdown(
    options=['-- Select --', 'Rudy Quan', 'Emon Van Loben Sels', 'Kaylan Bigun', 'Alexander Hoogmartens', 
             'Spencer Johnson', 'Aadarsh Tripathi', 'Giacomo Revelli', 'Gianluca Ballotta'],
    value='-- Select --',
    description='Category:'
)

display(choose_player_dropdown)

In [None]:
# Check selection before proceeding
if choose_player_dropdown.value == '-- Select --':
    raise ValueError("Please choose a valid category from the dropdown menu in the previous cell before proceeding.")

# If valid, use the value
player_name = choose_player_dropdown.value

In [None]:
combined_data_shots = pd.read_excel(f'../../data/mens/{player_name}/combined.xlsx', sheet_name='Shots')
combined_data_points = pd.read_excel(f'../../data/mens/{player_name}/combined.xlsx', sheet_name='Points')
combined_data_games = pd.read_excel(f'../../data/mens/{player_name}/combined.xlsx', sheet_name='Games')
combined_data_sets = pd.read_excel(f'../../data/mens/{player_name}/combined.xlsx', sheet_name='Sets')
combined_data_stats = pd.read_excel(f'../../data/mens/{player_name}/combined.xlsx', sheet_name='Stats')
combined_data_settings = pd.read_excel(f'../../data/mens/{player_name}/combined.xlsx', sheet_name='Settings')

In [None]:
# Subset 2024-2025 Season Matches!
mens_results = pd.read_csv('../../data/mens/mens_results.csv')[:253]

# Change Date Format
mens_results['Date'] = pd.to_datetime(mens_results['Date'])

# Function to Filter by Player and School Matches Only
def filter_player(data, player_name):

    # Filter for player_name
    data = data[(data['Player1'] == player_name) | (data['Player2'] == player_name)]

    # Filter for only school events
    data = data[data['Event Name'].str.startswith(('Dual Match', '2024 ITA', '2024-25 NCAA Division'))]
    return data


mens_results_player = filter_player(mens_results, player_name).drop_duplicates().reset_index()

In [None]:
mens_results_player.head()

### Longest Rally Function

In [None]:
def longest_rally(data):

    # Error Check
    if "Shot" not in data.columns:
        raise ValueError("The column 'Shot' was not found in the 'Shots' sheet.")
    
    # Find the index of the max shot value
    max_rally_length = data["Shot"].max()  

    # Return the value of the max shot
    return max_rally_length

In [None]:
longest_rally_length = longest_rally(combined_data_shots)
print(f"Longest Rally Length: {longest_rally_length} shots")

### Average Time On Court

In [None]:
def average_court_time(data):
    
    if 'Duration' not in data.columns:
        raise ValueError("The column 'Duration' was not found in the 'Sets' sheet.")
    
    if '__source_file__' not in data.columns:
        raise ValueError("The column '__source_file__' was not found in the 'Sets' sheet.")
    
    # Make sure the column values are Numeric | errors argument sets not numerics to NA
    data['Duration'] = pd.to_numeric(data['Duration'], errors='coerce')

    # Create Boolean Mask to filter out NA values
    data = data[data['Duration'].notna()]

    # Group by 'Duration' and sum values then convert to minutes
    match_durations = (
                        data.groupby('__source_file__')['Duration']
                        .sum()
                        .div(60)
                        .round(1)
                        )
    
    # Compute overall average in minutes
    avg_minutes = match_durations.mean()
    if pd.notna(avg_minutes):
        hours = int(avg_minutes) // 60
        mins = int(avg_minutes) % 60            

    duration = f"{hours}:{mins:02d}"
    
    return duration

In [None]:
average_time_on_court = average_court_time(combined_data_sets)

hours, minutes = map(int, average_time_on_court.split(":"))
print(f"Average Time on Court: {hours} hr {minutes} min")

### Tiebreak Wins

In [None]:
def analyze_tiebreak_data(data, player_name):

    # Subset data to only include matches with tiebreaker sets
    tiebreak_data = data[data['Score'].str.contains(r'6-7\(|7-6\(|1-0|0-1', na=False)].reset_index(drop=True)

    # Function to use in apply function
    def tiebreaker_win_loss(score, player1, player2, player_name):
        if player1 == player_name:
            win = score.count("7-6(") + score.count('1-0')
            loss = score.count("6-7(") + score.count('0-1')
        elif player2 == player_name:
            win = score.count("6-7(") + score.count('0-1')
            loss = score.count("7-6(") + score.count('1-0')
        else:
            pass
        return win, loss

    # Use the apply function to create tiebreake win/loss count columsn
    tiebreak_data[['win', 'loss']] = tiebreak_data.apply(lambda x:tiebreaker_win_loss(x['Score'], 
                                                                                      x['Player1'], 
                                                                                      x['Player2'], 
                                                                                      player_name), axis=1).apply(pd.Series)
    
    # Assign tiebreaker sets wins and losses to respective variables
    total_wins = tiebreak_data['win'].sum()
    total_losses = tiebreak_data['loss'].sum()


    return total_wins, total_losses


In [None]:
# Output tiebreaker wins
tiebreaker_wins = analyze_tiebreak_data(mens_results_player, player_name)[0]
tiebreaker_losses = analyze_tiebreak_data(mens_results_player, player_name)[1]

print(f"Tiebreak Record: {tiebreaker_wins}-{tiebreaker_losses} (W–L)")

### Average Winners

In [None]:
def get_average_winners(data):

    # Find the number of matches
    num_matches = len(data['__source_file__'].value_counts())

    # Subset the data for all the points that our player hits
    host_wins = data[data['Point Winner'] == 'host']
    
    # Find the counts of Total Winners overall (Forehand + Backhand)
    total_winners = host_wins['Detail'].value_counts()['Forehand Winner'] + host_wins['Detail'].value_counts()['Backhand Winner']

    # Return value
    return int(round(total_winners / num_matches, 1))

In [None]:
# Output Average Winners
average_winners = get_average_winners(combined_data_points)
print(f"Average Winners per Match: {average_winners}")

### Sets Won

##### Helper Functions

In [None]:
def count_sets_won(sets, player_name, player1, player2):
    sets_won = 0
    for set_score in sets:
        if "-" not in set_score:
            continue

        try:
            # Check for 10-point tiebreaker format (e.g., "1-0(3)" or "0-1(7)")
            if "(" in set_score:
                tie_split = set_score.split("(")[0]  # Get the part before "("
                player1_score = int(tie_split.split("-")[0].strip())
                player2_score = int(tie_split.split("-")[1].strip())
                
                # A valid 10-point tiebreaker should have a 1-0 or 0-1 structure
                if (player1_score == 1 and player2_score == 0) or (player1_score == 0 and player2_score == 1):
                    if player1 == player_name and player1_score == 1:
                        sets_won += 1
                    elif player2 == player_name and player2_score == 1:
                        sets_won += 1
                    continue
            
            # Standard set format (e.g., "6-4", "7-6")
            player1_score = int(set_score.split("-")[0].split("(")[0].strip())
            player2_score = int(set_score.split("-")[1].split("(")[0].strip())
        except ValueError:
            continue

        # Check for valid set completion
        if ((player1_score >= 6 or player2_score >= 6) and abs(player1_score - player2_score) >= 2) or \
           ((player1_score == 7 and player2_score == 6) or (player1_score == 6 and player2_score == 7)):
            if player1 == player_name and player1_score > player2_score:
                sets_won += 1
            elif player2 == player_name and player2_score > player1_score:
                sets_won += 1

    return sets_won


##### Sets Won Function

In [None]:
def get_sets_won(df, player_name):

    # Apply the function to the 'Score' column
    df.loc[:, 'Sets'] = df['Score'].str.split(", ")

    # Apply the function to the DataFrame
    df.loc[:, 'sets_won'] = df.apply(lambda row: count_sets_won(row['Sets'], player_name, row['Player1'], row['Player2']), axis=1).copy()
    
    return df['sets_won'].sum()

In [None]:
# Output Sets Won
sets_won = get_sets_won(mens_results_player, player_name)
print(f"Total Sets Won: {sets_won}")

### Three Set Matches Won

##### Helper Function

In [None]:
# Helper function to create column third_set_wins

def find_threeSet_matchWins(sets, player_name, player1):
    # Set wins
    wins = 0

    # Filter only 3-set games
    if len(sets) == 3:
        
        UCLA_wins = 0
        for i, set_score in enumerate(sets):
            # Skip if the set score is not in the correct format
            if "-" not in set_score:
                continue

            try:
                # Handle 10-point tiebreaker format (e.g., "1-0(3)" or "0-1(7)")
                if "(" in set_score:
                    tie_split = set_score.split("(")[0]  # Get the part before "("
                    player1_score = int(tie_split.split("-")[0].strip())
                    player2_score = int(tie_split.split("-")[1].strip())

                    # Check if it is a valid 10-point tiebreaker set
                    if (player1_score == 1 and player2_score == 0) or (player1_score == 0 and player2_score == 1):
                        if player1 == player_name and player1_score == 1:
                            UCLA_wins += 1
                        elif player1 != player_name and player2_score == 1:
                            UCLA_wins += 1
                        continue

                # Standard set format (e.g., "6-4", "7-6")
                player1_score = int(set_score.split("-")[0].split("(")[0].strip())
                player2_score = int(set_score.split("-")[1].split("(")[0].strip())
            except ValueError:
                # Skip invalid scores or unfinished sets
                continue

            # Check for unfinished third set (does not meet valid set criteria)
            if i == 2:  # Checking the third set specifically
                if not ((player1_score >= 6 or player2_score >= 6) and abs(player1_score - player2_score) >= 2) and \
                   not ((player1_score == 7 and player2_score == 6) or (player1_score == 6 and player2_score == 7)):
                    continue  # Unfinished set, skip

            # Check valid set completion for standard sets
            if ((player1_score >= 6 or player2_score >= 6) and abs(player1_score - player2_score) >= 2) or \
               ((player1_score == 7 and player2_score == 6) or (player1_score == 6 and player2_score == 7)):
                if player1 == player_name and player1_score > player2_score:
                    UCLA_wins += 1
                elif player1 != player_name and player2_score > player1_score:
                    UCLA_wins += 1

        # Check if UCLA won at least 2 sets
        if UCLA_wins >= 2:
            wins += 1

    return wins

##### Three Set Wins Function

In [None]:
def count_threeSet_wins(df, player_name):

    # Split the score column into sets
    df.loc[:, 'sets'] = df['Score'].str.split(", ")

    # Apply the function to calculate three-set match wins
    df.loc[:, 'third_set_wins'] = df.apply(lambda row: find_threeSet_matchWins(row['sets'], player_name, row['Player1']), 
                                            axis=1
                                            )
    
    return df['third_set_wins'].sum()

In [None]:
# Output 3 Set Wins
threeSet_wins = count_threeSet_wins(mens_results_player, player_name)
print(f"3-Set Match Wins: {threeSet_wins}")


#### Three Set Matches Lost

##### Helper Function

In [None]:
def find_threeSet_matchLosses(sets, player_name, player1):
    losses = 0

    if len(sets) == 3:
        opponent_wins = 0

        for i, set_score in enumerate(sets):
            if "-" not in set_score:
                continue

            try:
                if "(" in set_score:
                    tie_split = set_score.split("(")[0]
                    player1_score = int(tie_split.split("-")[0].strip())
                    player2_score = int(tie_split.split("-")[1].strip())

                    if (player1_score == 1 and player2_score == 0) or (player1_score == 0 and player2_score == 1):
                        if player1 == player_name and player2_score == 1:
                            opponent_wins += 1
                        elif player1 != player_name and player1_score == 1:
                            opponent_wins += 1
                        continue

                player1_score = int(set_score.split("-")[0].split("(")[0].strip())
                player2_score = int(set_score.split("-")[1].split("(")[0].strip())
            except ValueError:
                continue

            if i == 2:
                if not ((player1_score >= 6 or player2_score >= 6) and abs(player1_score - player2_score) >= 2) and \
                   not ((player1_score == 7 and player2_score == 6) or (player1_score == 6 and player2_score == 7)):
                    continue

            if ((player1_score >= 6 or player2_score >= 6) and abs(player1_score - player2_score) >= 2) or \
               ((player1_score == 7 and player2_score == 6) or (player1_score == 6 and player2_score == 7)):
                if player1 == player_name and player1_score < player2_score:
                    opponent_wins += 1
                elif player1 != player_name and player2_score < player1_score:
                    opponent_wins += 1

        if opponent_wins >= 2:
            losses += 1

    return losses


##### Three Set Losses Function

In [None]:
def count_threeSet_losses(df, player_name):
    df.loc[:, 'sets'] = df['Score'].str.split(", ")
    df.loc[:, 'third_set_losses'] = df.apply(
        lambda row: find_threeSet_matchLosses(row['sets'], player_name, row['Player1']),
        axis=1
    )
    return df['third_set_losses'].sum()


In [None]:
# Output 3 Set Losses
threeSet_losses = count_threeSet_losses(mens_results_player, player_name)
print(f"3-Set Match Losses: {threeSet_losses}")

### Overall Record and Singles Record

##### Helper Functions

In [None]:
# Helper Function: Count Sets Won
def count_sets_won(score_str):
    if not isinstance(score_str, str):
        return 0, 0
    
    p1_sets = p2_sets = 0

    sets = [s.strip() for s in score_str.split(',') if s.strip()]  # Clean and split sets
    
    for i, s in enumerate(sets):
        parts = s.split('-')
        
        # Check for valid score format
        if len(parts) < 2:
            continue

        try:
            # Extract the player scores, ignoring tiebreaker details
            p1 = int(parts[0].split('(')[0].strip())
            p2 = int(parts[1].split('(')[0].strip())
        except ValueError:
            continue

        # Check for 10-point tiebreaker format (e.g., "1-0(7)" or "0-1(3)")
        if len(sets) == 3 and i == 2 and "(" in s:
            if (p1 == 1 and p2 == 0) or (p1 == 0 and p2 == 1):
                if p1 == 1:
                    p1_sets += 1
                else:
                    p2_sets += 1
            continue

        # Check for standard set completion
        valid_standard_set = ((p1 >= 6 or p2 >= 6) and abs(p1 - p2) >= 2) or \
                             ((p1 == 7 and p2 == 6) or (p1 == 6 and p2 == 7))

        # If the set is unfinished, skip counting
        if not valid_standard_set:
            continue
        
        # Count the set win for the respective player
        if p1 > p2:
            p1_sets += 1
        else:
            p2_sets += 1
            
    return p1_sets, p2_sets

In [None]:
# Helper Function: Determine Winner
def get_winner(row):
    
    p1_sets, p2_sets = count_sets_won(row['Score'])

    # Output Winner of match or 'Unfinisihed'
    if p1_sets == 2:
        return row['Player1']
    elif p2_sets == 2:
        return row['Player2']
    else:
        return 'Unfinished'

In [None]:
# Set Conference Start and End Dates
conf_start = pd.Timestamp('2025-03-06')
conf_end = pd.Timestamp('2025-04-20')

##### Singles Record Function

In [None]:
def player_records(data, conf_start, conf_end):

    # Create Columns to count how many sets each player has won
    data[['player1_sets', 'player2_sets']] = data['Score'].apply(count_sets_won).apply(pd.Series)

    # Create 'result' Column with player1, player2, or Unfinished
    data['result'] = data.apply(get_winner, axis=1)

    player_conf_matches = data[(data['Date'] >= conf_start) & 
                                (data['Date'] <= conf_end
                                    )]

    # Calculate Overall Wins/Losses
    overall_wins = (data['result'] == player_name).sum()
    overall_losses = ((data['result'] != player_name) & 
                    (data['result'] != 'Unfinished')).sum()
    overall_unfished = (data['result'] == 'Unfinished').sum()

    # Calculate Overall Wins/Losses (Conference)
    conf_wins = (player_conf_matches['result'] == player_name).sum()
    conf_losses = ((player_conf_matches['result'] != player_name) & 
                (player_conf_matches['result'] != 'Unfinished')).sum()
    conf_unfinished = (player_conf_matches['result'] == 'Unfinished').sum()

    # Put in string
    overall_record = f"{overall_wins}-{overall_losses}"
    conference_record = f"{conf_wins}-{conf_losses}"

    # Prepare records for JSON
    records = {
        "records": [
            {
                "type": "Overall Record",
                "total": int(overall_wins + overall_losses),
                "won": int(overall_wins),
                "lost": int(overall_losses)
            },
            {
                "type": "Conference Record",
                "total": int(conf_wins + conf_losses),
                "won": int(conf_wins),
                "lost": int(conf_losses)
            }
        ]
    }

    # Output to JSON file in correct structure
    filename = "singles_record.json"
    with open(filename, "w") as f:
        json.dump(records, f, indent=4)

    return overall_record, conference_record, overall_unfished, conf_unfinished, records

In [None]:
# Output Records
overall_record, conference_record, overall_unfished, conf_unfinished, records_json = player_records(mens_results_player, conf_start, conf_end)
print(f"Overall Record: {overall_record} (W–L)")
print(f"Conference Record: {conference_record} (W–L)")

### Winstreak

In [None]:
def winstreak(data, player_name):
    
    data['Winner'] = data.apply(get_winner, axis=1)
    data = data[data['Winner'] != 'Unfinished']
    data = data.sort_values(by='Date', ascending=True)


    # Only mark streaks where the name is Player
    is_player_name = data['Winner'] == player_name

    # Create groups where Player streaks are separated
    groups = (is_player_name != is_player_name.shift()).cumsum()

    column_name = f'{player_name}_streak'

    # Apply cumulative count within Player streaks
    data[column_name] = is_player_name.groupby(groups).cumcount() + 1

    # Zero out streak values that aren't Player
    data.loc[~is_player_name, column_name] = 0

    return data[column_name].max()

winstreak(mens_results_player, player_name)


In [None]:
streak = winstreak(mens_results_player, player_name)

print(f"{player_name}'s longest win streak: {streak}")

### Unfinished Matches

##### Helper Function

In [None]:
# Helper Function: Determine Result (Borrowed from index.ipynb)
# Make Win and Loss --> "W" and "L" AND take into account UNFINISHED matches "UF"
# eg. 6-3 6-3 W  | 3-6 4-6 L  | 2-6 6-5 UF
def determine_result(row):
    score = row['Score']
    if pd.isna(score) or 'UF' in str(score).upper():
        return 'UF'

    sets = score.split(',')
    p1_sets_won = 0
    p2_sets_won = 0
    valid_sets = 0

    for i, set_score in enumerate(sets):
        set_score = set_score.strip()

        # Special case for 10-point tiebreaker in the third set
        if i == len(sets) - 1:  # Check if it is the last set
            match_tb = re.match(r'(1-0|0-1)\((\d+)\)', set_score)
            if match_tb:
                if match_tb.group(1) == '1-0':
                    p1_sets_won += 1
                else:
                    p2_sets_won += 1
                valid_sets += 1
                continue

        # Regular set score matching
        match = re.match(r'(\d+)-(\d+)(\(\d+\))?', set_score)
        if match:
            p1_games, p2_games, tiebreak = match.groups()
            p1_games, p2_games = int(p1_games), int(p2_games)

            # Check if the set is complete (at least 6 games with a difference of 2)
            if (p1_games >= 6 or p2_games >= 6) and abs(p1_games - p2_games) >= 2:
                valid_sets += 1
                if p1_games > p2_games:
                    p1_sets_won += 1
                else:
                    p2_sets_won += 1
            # Check for tiebreak set with 7 games (e.g., 7-6)
            elif (p1_games == 7 or p2_games == 7) and (p1_games >= 6 and p2_games >= 6):
                valid_sets += 1
                if p1_games > p2_games:
                    p1_sets_won += 1
                else:
                    p2_sets_won += 1
            else:
                # Unfinished set if conditions are not met
                return 'UF'
        else:
            # If the format is not recognized, mark as unfinished
            return 'UF'

    # Check if the match is complete based on valid sets won
    if (valid_sets >= 2 and (p1_sets_won == 2 or p2_sets_won == 2)) or (valid_sets >= 3 and (p1_sets_won == 3 or p2_sets_won == 3)):
        if row['Player1'] == player_name:
            return 'W' if p1_sets_won > p2_sets_won else 'L'
        elif row['Player2'] == player_name:
            return 'W' if p2_sets_won > p1_sets_won else 'L'
    return 'UF'


##### Unfinished Matches Total Function

In [None]:
def index_csv(data, player_name):

    # Filter for Player
    df = data[(data['Player1'] == player_name) | (data['Player2'] == player_name)].copy()

    # Result column
    df['ucla_result'] = df.apply(determine_result, axis=1)

    unfinished_matches = df['ucla_result'].value_counts().get('UF', 0)

    return unfinished_matches

In [None]:
unfinished_matches = index_csv(mens_results_player, player_name)
print(f"Unfinished Matches: {unfinished_matches}")

### Bagels Served

In [None]:
# Number of Bagels Given

def bagels_summary(df, player_name):

    bagel_count = 0

    for i in range(len(df)):
        p1 = df.loc[i, 'Player1']
        p2 = df.loc[i, 'Player2']
        score = str(df.loc[i, 'Score'])  # make sure it's a string
        sets = [s.strip() for s in score.split(',')]

        if p1 == player_name:
            bagel_count += sets.count('6-0')
        elif p2 == player_name:
            bagel_count += sets.count('0-6')

    return bagel_count


In [None]:
bagels_total = bagels_summary(mens_results_player, player_name)
print(f"Total Bagels Served: {bagels_total}")

## Performance Ratings

##### Total Matches

In [None]:
total_matches = len(combined_data_settings.groupby('__source_file__'))
total_matches

##### Helper Functions

In [None]:
# Helper Function: Grabs total of Specified Stat
def get_total(df, stat_name):
    rows = df[df['Stat Name'] == stat_name]
    if rows.empty: # UPDATE Throw an error?
        return 0
    total = 0
    for col in rows.columns:
        if col.startswith('Host Set'):
            numeric_vals = pd.to_numeric(rows[col], errors='coerce')
            total += numeric_vals.sum()  # Ignores NaNs automatically # UPDATE: throw an error? because this means that the data is bad?
    return total
    # return rows

In [None]:
# Helper Function: Calculate Service Games Won Percentage

def calculate_service_games_won(df):
    # Filter the DataFrame for host server and non-draw game winner
    host_service_games = df[(df['Server'] == 'host') & (df['Game Winner'] != 'draw')]
    
    # Filter for games where host won
    host_service_games_won = host_service_games[host_service_games['Game Winner'] == 'host']
    
    service_games_won_percentage = len(host_service_games_won) / len(host_service_games) 
    
    return service_games_won_percentage

In [None]:
# Helper Function: Calculate Double Faults

def calculate_double_faults(df):
    double_fault_total = df[(df['Match Server'] == 'host') & 
                                   (df['Detail'] == 'Double Fault')].shape[0]
    return double_fault_total

In [None]:
# Helper Function: Caluclate Return Games Won 

def calculate_return_games_won(df):
    # Filter the DataFrame for guest server and non-draw game winner
    guest_service_games = df[(df['Server'] == 'guest') & (df['Game Winner'] != 'draw')]
    
    # Filter for games where host won (since return games won means host wins)
    guest_return_games_won = guest_service_games[guest_service_games['Game Winner'] == 'host']

    return_games_won_percentage = len(guest_return_games_won) / len(guest_service_games) 
    
    return return_games_won_percentage

### Serve Rating

In [None]:
# 1st Serve In Percentage
first_serve_in_percentage = ((get_total(combined_data_stats, '1st Serves In') / get_total(combined_data_stats, '1st Serves')) * 100).round(2)

# 1st Serve Points Won Percentage
first_serve_won_percentage = ((get_total(combined_data_stats, '1st Serves Won') / get_total(combined_data_stats, '1st Serves In')) * 100).round(2)

# 2nd Serve Points Won Percentage
second_serve_won_percentage = ((get_total(combined_data_stats, '2nd Serves Won') / get_total(combined_data_stats, '2nd Serves In')) * 100).round(2)

# Service Games Won Percentage
service_games_won_percentage = round(calculate_service_games_won(combined_data_games) * 100, 2)

# Average Aces per Match Percentagae
aces_average = ((get_total(combined_data_stats, 'Aces') / total_matches)).round(2)

# Average Double Faults per Match Percentage
doubleFaults_average = round(calculate_double_faults(combined_data_points) / total_matches, 2)

In [None]:
# Calculate Serve Rating
serve_rating = round(first_serve_in_percentage + first_serve_won_percentage + second_serve_won_percentage + service_games_won_percentage - doubleFaults_average, 1)

# Print All Calculations
print(f"First Serve In %: {first_serve_in_percentage:.2f}%")
print(f"First Serve Points Won %: {first_serve_won_percentage:.2f}%")
print(f"Second Serve Points Won %: {second_serve_won_percentage:.2f}%")
print(f"Service Games Won %: {service_games_won_percentage:.2f}%")
print(f"Aces per Match: {aces_average:.2f}")
print(f"Double Faults per Match: {doubleFaults_average:.2f}")
print(f"Serve Rating: {serve_rating:.1f}")

### Return Rating

In [None]:
# 1st Serve Return Points Won Percentage
first_serve_returns_won = ((get_total(combined_data_stats, '1st Returns Won') / get_total(combined_data_stats, '1st Returns')) * 100).round(2)

# 2nd Serve Return Points Won Percentage
second_serve_returns_won = ((get_total(combined_data_stats, '2nd Returns Won') / get_total(combined_data_stats, '2nd Returns')) * 100).round(2)

# Return Games Won Percentage
return_games_won_percentage = round(calculate_return_games_won(combined_data_games) * 100, 2)

# % Break Points Converted Percentage
break_points_converted_percentage = ((get_total(combined_data_stats, 'Break Points Won') / get_total(combined_data_stats, 'Break Point Opportunities')) * 100).round(2)

In [None]:
# Calculate Return Rating
return_rating = round(first_serve_returns_won + second_serve_returns_won + return_games_won_percentage + break_points_converted_percentage, 1)

# Print Calculations
print(f"First Serve Return Points Won %: {first_serve_returns_won:.2f}%")
print(f"Second Serve Return Points Won %: {second_serve_returns_won:.2f}%")
print(f"Return Games Won %: {return_games_won_percentage:.2f}%")
print(f"Break Points Converted %: {break_points_converted_percentage:.2f}%")
print(f"Return Rating: {return_rating:.1f}")


### Under Pressure Rating

In [None]:
# % Break Points Converted Percentage
break_points_converted_percentage = ((get_total(combined_data_stats, 'Break Points Won') / get_total(combined_data_stats, 'Break Point Opportunities')) * 100).round(2)

# Break Points Saved Percentage
break_points_saved_percentage = ((get_total(combined_data_stats, 'Break Points Saved') / get_total(combined_data_stats, 'Break Points')) * 100).round(2)

# Tie Breaks Won Percentage
tiebreaks_won_percentage = (tiebreaker_wins / (tiebreaker_wins + tiebreaker_losses) * 100).round(2)

# Deciding Sets Won Percentage
deciding_sets_won_percentage = ((threeSet_wins / (threeSet_wins + threeSet_losses)) * 100).round(2)

In [None]:
# Calculate Under Pressure Rating
under_pressure_rating = round(break_points_converted_percentage + break_points_saved_percentage + tiebreaks_won_percentage + deciding_sets_won_percentage, 1)

# Print Calculations
print(f"Break Points Converted %: {break_points_converted_percentage:.2f}%")
print(f"Break Points Saved %: {break_points_saved_percentage:.2f}%")
print(f"Tiebreaks Won %: {tiebreaks_won_percentage:.2f}%")
print(f"Deciding Sets Won %: {deciding_sets_won_percentage:.2f}%")
print(f"Under Pressure Rating: {under_pressure_rating:.1f}")

### Output Csv

In [None]:
first_name, last_name = player_name.split(' ')

In [None]:
# Current Statistics
row = {
    "first_name": first_name,
    "last_name": last_name,
    "matches_clinched": 'N/A',
    "average_court_time": average_time_on_court,
    "serve_rating": serve_rating,
    "return_rating": return_rating,
    "under_pressure_rating": under_pressure_rating,
    "singles_record": overall_record,
    "conference_record": conference_record,
    "sets_won": sets_won,
    "tiebreakers_won": tiebreaker_wins,
    "three_set_matches_won": threeSet_wins,
    "longest_rally_length": longest_rally_length,
    "average_winners": average_winners,
    "unfinished_matches": unfinished_matches,
    "longest_win_streak": streak,
    "bagels_served": bagels_total,
}

In [None]:
# Create the DataFrame
df_summary = pd.DataFrame([row])

# Save to CSV
filename = f"{player_name}_summary.csv"
df_summary.to_csv(filename, index=False)

### Rahul Section: Winners and Errors

In [None]:
def get_average_winners(data, stroke_type=None):
    # Find number of matches
    num_matches = len(data['__source_file__'].value_counts())

    # Subset: only points where host wins
    host_wins = data[data['Point Winner'] == 'host']
    
    # Count stroke-specific winners
    detail_counts = host_wins['Detail'].value_counts()

    if stroke_type == "Forehand":
        total_winners = detail_counts.get('Forehand Winner', 0)
    elif stroke_type == "Backhand":
        total_winners = detail_counts.get('Backhand Winner', 0)
    else:
        # Default: total of both
        total_winners = detail_counts.get('Forehand Winner', 0) + detail_counts.get('Backhand Winner', 0)

    return round(total_winners / num_matches, 1) if num_matches > 0 else 0


In [None]:
# # Output Average Winners
# average_winners = get_average_winners(combined_data_points)
# print(f"Average Winners per Match: {average_winners}")

# average_forehand_winners = get_average_winners(combined_data_points, stroke_type="Forehand")
# print(f"Average Forehand Winners per Match: {average_forehand_winners}")

# average_backhand_winners = get_average_winners(combined_data_points, stroke_type="Backhand")
# print(f"Average Backhand Winners per Match: {average_backhand_winners}")


In [None]:
def get_average_unforced_errors(data, stroke_type=None):
    # Find number of matches
    num_matches = len(data['__source_file__'].value_counts())

    # Subset: only points where host wins
    host_wins = data[data['Point Winner'] == 'guest']
    
    # Count stroke-specific winners
    detail_counts = host_wins['Detail'].value_counts()

    if stroke_type == "Forehand":
        total_unforced_errors = detail_counts.get('Forehand Unforced Error', 0)
    elif stroke_type == "Backhand":
        total_unforced_errors = detail_counts.get('Backhand Unforced Error', 0)
    else:
        # Default: total of both
        total_unforced_errors = detail_counts.get('Forehand Unforced Error', 0) + detail_counts.get('Backhand Unforced Error', 0)

    return round(total_unforced_errors / num_matches, 1) if num_matches > 0 else 0


In [None]:
# Output Average Unforced Errors
average_unforced_errors = get_average_unforced_errors(combined_data_points)
print(f"Average Unforced Errors  per Match: {average_unforced_errors}")

average_forehand_unforced_errors = get_average_unforced_errors(combined_data_points, stroke_type="Forehand")
print(f"Average Forehand Unforced Errors per Match: {average_forehand_unforced_errors}")

average_backhand_unforced_errors = get_average_unforced_errors(combined_data_points, stroke_type="Backhand")
print(f"Average Backhand Unforced Errors  per Match: {average_backhand_unforced_errors}")


### Forehand and Backhand Rating (WIP)

In [None]:
def stroke_accuracy(df, player_name, stroke_type):
    # Filter by player and stroke
    player_strokes = df[(df['Player'] == player_name) & (df['Stroke'] == stroke_type)]
    total_attempted = len(player_strokes)
    if total_attempted == 0:
        return 0

    total_made = len(player_strokes[player_strokes['Result'] == 'In'])
    return total_made / total_attempted

In [None]:
# Output Stroke Accuracy
forehand_accuracy = (stroke_accuracy(combined_data_shots, player_name, 'Forehand'))
backhand_accuracy = stroke_accuracy(combined_data_shots, player_name, 'Backhand')

print(f"Forehand accuracy: {forehand_accuracy:.2%}")
print(f"Backhand accuracy: {backhand_accuracy:.2%}")

In [None]:
forehand_net_winner_error_metric = average_forehand_winners - average_forehand_unforced_errors
backhand_net_winner_error_metric = average_backhand_winners - average_backhand_unforced_errors
print(f"Forehand Net Winner-Error Rate: {forehand_net_winner_error_metric}")
print(f"Backhand Net Winner-Error Rate: {backhand_net_winner_error_metric}")

In [None]:
def net_before_winner_error_metric(df, player_name, stroke_type):
    df = df.copy()  # To avoid SettingWithCopyWarning
    df['Next_Result'] = df['Result'].shift(-1)
    df['Next2_Result'] = df['Result'].shift(-2)
    
    player_strokes_in = df[(df['Player'] == player_name) & 
                           (df['Stroke'] == stroke_type) & 
                           (df['Result'] == 'In')]
    
    leading_to_error = player_strokes_in[player_strokes_in['Next_Result'].isin(['Out', 'Net'])]
    leading_to_winner = player_strokes_in[
        (player_strokes_in['Next_Result'] == 'In') & 
        (player_strokes_in['Next2_Result'].isin(['Out', 'Net']))
    ]
    
    num_matches = len(df['__source_file__'].value_counts())
    
    avg_leading_to_error = len(leading_to_error) / num_matches
    avg_leading_to_winner = len(leading_to_winner) / num_matches
    
    return avg_leading_to_error - avg_leading_to_winner


In [None]:
forehand_before_net_winner_error_rate = net_before_winner_error_metric(combined_data_shots, player_name, 'Forehand')
backhand_before_net_winner_error_rate = net_before_winner_error_metric(combined_data_shots, player_name, 'Backhand')

print(f"Forehand Before Net Winner-Error Rate: {forehand_before_net_winner_error_rate}")
print(f"Backhand Before Net Winner-Error Rate: {backhand_before_net_winner_error_rate}")

In [None]:
def serve_plus_one_point_won(df, player_name, stroke_type):
    serve_plus_one_strokes = df[
        (df['Player'] == player_name) &
        (df['Type'] == 'serve_plus_one') &
        (df['Stroke'] == stroke_type)
    ]
    total_attempted = len(serve_plus_one_strokes)
    if total_attempted == 0:
        return 0

    points_won = 0

    for idx in serve_plus_one_strokes.index:
        # Consider current row and all following rows
        future_rows = df.loc[idx:]

        # Find first row with Out or Net
        first_error = future_rows[future_rows['Result'].isin(['Out', 'Net'])]
        if first_error.empty:
            continue  # No error found, skip

        first_error_row = first_error.iloc[0]

        # Count if error is by opponent
        if first_error_row['Player'] != player_name:
            points_won += 1

    return points_won / total_attempted


In [None]:
serve_plus_one_forehand_win_pct = round(serve_plus_one_point_won(combined_data_shots, player_name, 'Forehand') * 100, 2)
serve_plus_one_backhand_win_pct = round(serve_plus_one_point_won(combined_data_shots, player_name, 'Backhand') * 100, 2)

print(f"Serve +1 Forehand Win %: {serve_plus_one_forehand_win_pct}%")
print(f"Serve +1 Backhand Win %: {serve_plus_one_backhand_win_pct}%")

In [None]:
serve_plus_one_forehand_win_pct

In [None]:
# Calculate Forehand Rating
forehand_rating = round(forehand_accuracy * 100 + forehand_net_winner_error_metric + forehand_before_net_winner_error_rate + serve_plus_one_forehand_win_pct, 1)

# Print Calculations
print(f"Forehand Accuracy %: {forehand_accuracy:.2%}")
print(f"Forehand Net Winner Error Metric: {forehand_net_winner_error_metric}")
print(f"Forehand Before Net Winner Error Rate: {forehand_before_net_winner_error_rate}")
print(f"Serve +1 Forehand Win %: {serve_plus_one_forehand_win_pct}%")
print(f"Forehand Rating: {forehand_rating:.1f}")