In [None]:
import pandas as pd
import os
import re

In [None]:
file_path = '../../data/mens/mens_results.csv'

In [None]:
matches = pd.read_csv(file_path)
matches

In [None]:
# Set Player Name!
player_name = 'Rudy Quan'
school = "University of California, Los Angeles"

#### Helper Function to Flip Scores

In [None]:
# Helper function to flip scores like "7-6(5), 5-7, 6-3"
# Also does not include commas in the final result

def flip_score(score_str, player1):

    if player1 in player_name:
        return score_str.replace(",", "")

    flipped = []
    for set_score in score_str.split(', '):
        match = re.match(r'(\d+)-(\d+)(\(\d+\))?', set_score)
        if match:
            p1, p2, tiebreak = match.groups()
            flipped_score = f"{p2}-{p1}"
            if tiebreak:
                flipped_score += tiebreak
            flipped.append(flipped_score)
        else:
            flipped.append(set_score)  # Preserve unrecognized formats
    else: 
        return ' '.join(flipped)

In [None]:
# Make Win and Loss --> "W" and "L" AND take into account UNFINISHED matches "UF"
# eg. 6-3 6-3 W  | 3-6 4-6 L  | 2-6 6-5 UF

def determine_result(row):
    score = row['Score']
    if pd.isna(score) or 'UF' in str(score).upper():
        return 'UF'

    sets = score.split(',')
    p1_sets_won = 0
    p2_sets_won = 0
    valid_sets = 0

    for i, set_score in enumerate(sets):
        set_score = set_score.strip()

        # Special case for 10-point tiebreaker in the third set
        if i == len(sets) - 1:  # Check if it is the last set
            match_tb = re.match(r'(1-0|0-1)\((\d+)\)', set_score)
            if match_tb:
                if match_tb.group(1) == '1-0':
                    p1_sets_won += 1
                else:
                    p2_sets_won += 1
                valid_sets += 1
                continue

        # Regular set score matching
        match = re.match(r'(\d+)-(\d+)(\(\d+\))?', set_score)
        if match:
            p1_games, p2_games, tiebreak = match.groups()
            p1_games, p2_games = int(p1_games), int(p2_games)

            # Check if the set is complete (at least 6 games with a difference of 2)
            if (p1_games >= 6 or p2_games >= 6) and abs(p1_games - p2_games) >= 2:
                valid_sets += 1
                if p1_games > p2_games:
                    p1_sets_won += 1
                else:
                    p2_sets_won += 1
            # Check for tiebreak set with 7 games (e.g., 7-6)
            elif (p1_games == 7 or p2_games == 7) and (p1_games >= 6 and p2_games >= 6):
                valid_sets += 1
                if p1_games > p2_games:
                    p1_sets_won += 1
                else:
                    p2_sets_won += 1
            else:
                # Unfinished set if conditions are not met
                return 'UF'
        else:
            # If the format is not recognized, mark as unfinished
            return 'UF'

    # Check if the match is complete based on valid sets won
    if (valid_sets >= 2 and (p1_sets_won == 2 or p2_sets_won == 2)) or (valid_sets >= 3 and (p1_sets_won == 3 or p2_sets_won == 3)):
        if row['Player1'] == player_name:
            return 'W' if p1_sets_won > p2_sets_won else 'L'
        elif row['Player2'] == player_name:
            return 'W' if p2_sets_won > p1_sets_won else 'L'
    return 'UF'


In [None]:
pd.set_option('display.max_rows', None)

In [None]:
def index_csv(data, player_name):

    # Filter for Player
    df = data[(data['Player1'] == player_name) | (matches['Player2'] == player_name)].copy()

    # Result column
    df['ucla_result'] = df.apply(determine_result, axis=1)

    unfinished_matches = df['ucla_result'].value_counts().get('UF', 0)

    return df

In [None]:
index_csv(matches, player_name)

In [None]:
edited = index_csv(matches, player_name)

unfinished = edited[edited["ucla_result"] == "UF"]

len(unfinished)