## Match Index Notebook
This Notebook is for the Index Page of the Season Reports

### Load Packages

In [1]:
import pandas as pd
import os
import re

### Read Data

In [2]:
matches = pd.read_csv('../../data/mens/mens_results.csv')[:253]

### Set Player to Query

In [3]:
# Set Player Name!
player_name = 'Rudy Quan'
school = 'University of California, Los Angeles'

### Filter dataset

In [4]:
def filter(data, player_name):
    # Filter for player
    data = data[(data['Player1'] == player_name) | (data['Player2'] == player_name)]

    # Filter for school events only
    data = data[data['Event Name'].str.startswith(('Dual Match', '2024 ITA', '2024-25 NCAA Division'))]
    return data

matches = filter(matches, player_name)
matches

Unnamed: 0,Event Name,Date,Player1,Player2,Player1 UTR,Player2 UTR,Score
5,"Dual Match: University of California, Los Ange...",2025-05-15,Timo Legout,Rudy Quan,14.23,13.59,"7-5, 6-2"
6,Dual Match: University of Southern California ...,2025-05-08,Rudy Quan,Makk Peter,13.59,13.79,"7-6(4), 3-6, 3-3"
14,"Dual Match: University of California, Los Ange...",2025-05-02,Carl Overbeck,Rudy Quan,13.48,13.59,"5-7, 7-5, 6-3"
18,"Dual Match: University of California, Santa Ba...",2025-05-01,Rudy Quan,Gianluca Brunkow,13.59,13.0,"5-7, 6-2, 0-1"
27,"Dual Match: University of California, Los Ange...",2025-04-26,Aidan Kim,Rudy Quan,13.74,13.59,"7-6(3), 6-2"
33,Dual Match: Michigan State University vs Unive...,2025-04-25,Rudy Quan,Aristotelis Thanos,13.59,13.67,"4-6, 3-1"
36,Dual Match: University of Michigan vs Universi...,2025-04-24,Rudy Quan,William Cooksey,13.59,12.0,"6-3, 5-6"
46,"Dual Match: University of California, Los Ange...",2025-04-19,Rudy Quan,Calvin MUELLER,13.59,13.0,"3-6, 6-3, 6-1"
52,"Dual Match: University of California, Los Ange...",2025-04-17,Michael Minasyan,Rudy Quan,12.0,13.59,"2-6, 6-3"
55,Dual Match: Michigan State University vs Unive...,2025-04-12,Rudy Quan,Ozan Baris,13.59,13.66,"6-1, 6-2"


### index_csv() Function

#### Helper Function to Flip Scores

In [5]:
# Helper function to flip scores like "7-6(5), 5-7, 6-3"
# Also does not include commas in the final result

def flip_score(score_str, player1):

    if player1 in player_name:
        return score_str.replace(",", "")

    flipped = []
    for set_score in score_str.split(', '):
        match = re.match(r'(\d+)-(\d+)(\(\d+\))?', set_score)
        if match:
            p1, p2, tiebreak = match.groups()
            flipped_score = f"{p2}-{p1}"
            if tiebreak:
                flipped_score += tiebreak
            flipped.append(flipped_score)
        else:
            flipped.append(set_score)  # Preserve unrecognized formats
    else: 
        return ' '.join(flipped)

In [6]:
# Make Win and Loss --> "W" and "L" AND take into account UNFINISHED matches "UF"
# eg. 6-3 6-3 W  | 3-6 4-6 L  | 2-6 6-5 UF

def determine_result(row):
    score = row['Score']
    if pd.isna(score) or 'UF' in str(score).upper():
        return 'UF'

    sets = score.split(',')
    p1_sets_won = 0
    p2_sets_won = 0
    valid_sets = 0

    for i, set_score in enumerate(sets):
        set_score = set_score.strip()

        # Special case for 10-point tiebreaker in the third set
        if i == len(sets) - 1:  # Check if it is the last set
            match_tb = re.match(r'(1-0|0-1)\((\d+)\)', set_score)
            if match_tb:
                if match_tb.group(1) == '1-0':
                    p1_sets_won += 1
                else:
                    p2_sets_won += 1
                valid_sets += 1
                continue

        # Regular set score matching
        match = re.match(r'(\d+)-(\d+)(\(\d+\))?', set_score)
        if match:
            p1_games, p2_games, tiebreak = match.groups()
            p1_games, p2_games = int(p1_games), int(p2_games)

            # Check if the set is complete (at least 6 games with a difference of 2)
            if (p1_games >= 6 or p2_games >= 6) and abs(p1_games - p2_games) >= 2:
                valid_sets += 1
                if p1_games > p2_games:
                    p1_sets_won += 1
                else:
                    p2_sets_won += 1
            # Check for tiebreak set with 7 games (e.g., 7-6)
            elif (p1_games == 7 or p2_games == 7) and (p1_games >= 6 and p2_games >= 6):
                valid_sets += 1
                if p1_games > p2_games:
                    p1_sets_won += 1
                else:
                    p2_sets_won += 1
            else:
                # Unfinished set if conditions are not met
                return 'UF'
        else:
            # If the format is not recognized, mark as unfinished
            return 'UF'

    # Check if the match is complete based on valid sets won
    if (valid_sets >= 2 and (p1_sets_won == 2 or p2_sets_won == 2)) or (valid_sets >= 3 and (p1_sets_won == 3 or p2_sets_won == 3)):
        if row['Player1'] == player_name:
            return 'W' if p1_sets_won > p2_sets_won else 'L'
        elif row['Player2'] == player_name:
            return 'W' if p2_sets_won > p1_sets_won else 'L'
    return 'UF'


In [7]:
def index_csv(data, player_name, school):

    # Filter for Player
    df = data[(data['Player1'] == player_name) | (matches['Player2'] == player_name)].copy()
    
    # Extract opponent name from 'Event Name'
    def get_opponent(event):
        if not isinstance(event, str):
            return None
        teams = re.findall(r'vs\s(.+)', event)
        if school in event and teams:
            return teams[0].strip() if school in event.split('vs')[0] else event.split('vs')[0].replace('Dual Match:', '').strip()
        return None

    # Create opponent_school Column
    df['opponent_school'] = df['Event Name'].apply(get_opponent)

    # Create opponent Column
    df['opponent'] = df.apply(lambda row: row['Player2'] if row['Player1'] in player_name 
                                    else (row['Player1'] if row['Player2'] in player_name 
                                    else None
                                    ), axis=1
                                    )
    
    # Make all names to where the only the first letter of each name is capitalized
    # eg. Aristotelis THANOS --> Aristotelis Thanos
    df['opponent'] = df['opponent'].str.title()    

    # Flip the score only if Player 2 is a UCLA player
    df['Score (UCLA Perspective)'] = df.apply(lambda row: flip_score(row['Score'], row['Player1']),
                                                        axis=1)

    # Result column
    df['ucla_result'] = df.apply(determine_result, axis=1)

    # Convert the 'Date' column to datetime format
    df['Date'] = pd.to_datetime(df['Date'])
    
    # Make date column to mm/dd/yy format
    # eg. 2025-04-26 --> 04/26/25
    df['Date'] = df['Date'].dt.strftime('%m/%d/%y')
    
    # Final Columns + rename columns
    final_df = df[['opponent_school', 
                   'opponent', 
                   'Score (UCLA Perspective)', 
                   'ucla_result',
                   'Date']].rename(columns={'Score (UCLA Perspective)': 'score',
                                            'ucla_result': 'result',
                                            'Date': 'date'
                                            })

    return final_df

### Output Function Result

In [8]:
matches_cleaned = index_csv(matches, player_name, school)
matches_cleaned

Unnamed: 0,opponent_school,opponent,score,result,date
5,University of Texas at Austin,Timo Legout,5-7 2-6,L,05/15/25
6,University of Southern California,Makk Peter,7-6(4) 3-6 3-3,UF,05/08/25
14,"University of California, Berkeley",Carl Overbeck,7-5 5-7 3-6,L,05/02/25
18,"University of California, Santa Barbara",Gianluca Brunkow,5-7 6-2 0-1,UF,05/01/25
27,The Ohio State University,Aidan Kim,6-7(3) 2-6,L,04/26/25
33,Michigan State University,Aristotelis Thanos,4-6 3-1,UF,04/25/25
36,University of Michigan,William Cooksey,6-3 5-6,UF,04/24/25
46,University of Nebraska-Lincoln,Calvin Mueller,3-6 6-3 6-1,W,04/19/25
52,University of Wisconsin-Madison,Michael Minasyan,6-2 3-6,UF,04/17/25
55,Michigan State University,Ozan Baris,6-1 6-2,W,04/12/25


In [9]:
# Output to csv in index_page folder

player_name_nospace = player_name.replace(' ', '')

# Save the DataFrame to a CSV file
matches_cleaned.to_csv(f'{player_name_nospace}_matches.csv', index=False)

# Split DataFrame into chunks of 15 rows
chunk_size = 15
chunks = [matches_cleaned.iloc[i:i + chunk_size] for i in range(0, len(matches_cleaned), chunk_size)]

# Save each chunk as a separate CSV file
for idx, chunk in enumerate(chunks):
    chunk.to_csv(f'{player_name_nospace}_matches{idx+1}.csv', index=False)
    print(f'{player_name_nospace}_matches{idx+1}.csv' + ' Saved!')

RudyQuan_matches1.csv Saved!
RudyQuan_matches2.csv Saved!
RudyQuan_matches3.csv Saved!
