## Match Index Notebook

This Notebook is for index functions

### Load Packages

In [49]:
import pandas as pd
import os
import re

### Set Working Directory

In [50]:
parent_dir = os.path.abspath('../..') # ".." means go back 1 directory!
file_path = os.path.join(parent_dir, 'data/mens/mens_results.csv')

### Read Data

In [51]:
matches = pd.read_csv(file_path)
matches.head()

# Update: Only Read in Rows from this Season! (subset starting from September ? 2024)

Unnamed: 0,Event Name,Date,Player1,Player1 UTR,Player2,Player2 UTR,Score
0,"Dual Match: University of California, Los Ange...",2025-04-26,Emon Van Loben Sels,13.59,Alexander Bernard,13.33,"7-6(5), 5-7, 6-3"
1,"Dual Match: University of California, Los Ange...",2025-04-26,Alexander Hoogmartens,13.0,Jack Anthrop,13.66,"7-5, 0-6, 6-4"
2,"Dual Match: University of California, Los Ange...",2025-04-26,Aidan Kim,13.75,Rudy Quan,13.67,"7-6(3), 6-2"
3,"Dual Match: University of California, Los Ange...",2025-04-26,Preston Stearns,13.08,Aadarsh Tripathi,12.94,"6-3, 2-6, 6-2"
4,"Dual Match: University of California, Los Ange...",2025-04-26,Kaylan Bigun,13.17,William Jansen,13.24,"6-2, 7-5"


### Set Player to Query

In [52]:
# Set Player Name!
player_name = 'Rudy Quan'
school = "University of California, Los Angeles"
# date = ?

### index_csv() Function

#### Helper Function to Flip Scores

In [53]:
# Helper function to flip scores like "7-6(5), 5-7, 6-3"
def flip_score(score_str):
    if not isinstance(score_str, str):
        return score_str
    flipped = []
    for set_score in score_str.split(', '):
        match = re.match(r'(\d+)-(\d+)(\(\d+\))?', set_score)
        if match:
            p1, p2, tiebreak = match.groups()
            flipped_score = f"{p2}-{p1}"
            if tiebreak:
                flipped_score += tiebreak
            flipped.append(flipped_score)
        else:
            flipped.append(set_score)  # Preserve unrecognized formats
    return ', '.join(flipped)


# UPDATE do not include the comma in final result

In [54]:
def index_csv(data, player_name, school):

    # Filter for Player
    df = data[(data['Player1'] == player_name) | (matches['Player2'] == player_name)].copy()

    
    # Extract opponent name from 'Event Name'
    def get_opponent(event):
        if not isinstance(event, str):
            return None
        teams = re.findall(r'vs\s(.+)', event)
        if school in event and teams:
            return teams[0].strip() if school in event.split('vs')[0] else event.split('vs')[0].replace('Dual Match:', '').strip()
        return None

    # Create opponent_school Column
    df['opponent_school'] = df['Event Name'].apply(get_opponent)

    # Create opponent Column
    df['opponent'] = df.apply(lambda row: row['Player2'] if row['Player1'] in player_name 
                                    else (row['Player1'] if row['Player2'] in player_name 
                                    else None
                                    ), axis=1
                                    )
    
    # UPDATE: Make all names to where the only the first letter of each name is capitalized
    # eg. Aristotelis THANOS --> Aristotelis Thanos
    df['opponent'] = df['opponent'].str.title()

    # Create ucla_result Column
    df['ucla_result'] = df.apply(lambda row: 'Win' if row['Player1'] in player_name 
                                            else ('Loss' if row['Player2'] in player_name 
                                            else None), axis=1
                                            )
    
    # UPDATE: Make Win and Loss --> "W" and "L" AND take into account UNFINISHED matches "UF"
    # UPDATE: Combine result and score column to say result after score
    # eg. 6-3 6-3 W  | 3-6 4-6 L  | 2-6 6-5 UF
    def determine_result(row):
        score = row['Score']
        if pd.isna(score) or 'UF' in str(score).upper():
            return 'UF'

        sets = score.split(',')
        p1_sets_won = 0
        p2_sets_won = 0
        valid_sets = 0

        for set_score in sets:
            set_score = set_score.strip().split('(')[0]
            try:
                p1_games, p2_games = map(int, set_score.strip().split('-'))
            except:
                continue
            valid_sets += 1
            if p1_games > p2_games:
                p1_sets_won += 1
            elif p2_games > p1_games:
                p2_sets_won += 1

        if valid_sets < 2 or p1_sets_won == p2_sets_won:
            return 'UF'

        if row['Player1'] == player_name:
            return 'W' if p1_sets_won > p2_sets_won else 'L'
        elif row['Player2'] == player_name:
            return 'W' if p2_sets_won > p1_sets_won else 'L'
        else:
            return None

    # Result column
    df['ucla_result'] = df.apply(determine_result, axis=1)
    
    # Create column Score (UCLA Perspective)
    df['Score (UCLA Perspective)'] = df.apply(lambda row: flip_score(row['Score']) 
                                                        if row['Player2'] in player_name 
                                                        else row['Score'],
                                                        axis=1
                                                        )
    
    # UPDATE: make date column to mm/dd/yy format
    # eg. 2025-04-26 --> 04/26/25
    # Convert the 'Date' column to datetime format
    df['Date'] = pd.to_datetime(df['Date'])

    # Filter for matches on or after September 1, 2024
    df = df[df['Date'] >= '2024-09-01']
    df['Date'] = df['Date'].dt.strftime('%m/%d/%Y')


    
    final_df = df[['opponent_school', 
                   'opponent', 
                   'Score (UCLA Perspective)', 
                   'ucla_result',
                   'Date']].rename(columns={'Score (UCLA Perspective)': 'score',
                                            'Date': 'date'
                                            })

    return final_df

### Output Function Result

In [55]:
index_csv(matches, player_name, school)

# UPDATE: Output to csv in index_page folder

Unnamed: 0,opponent_school,opponent,score,ucla_result,date
2,The Ohio State University,Aidan Kim,"6-7(3), 2-6",L,04/26/2025
10,Michigan State University,Aristotelis Thanos,"4-6, 3-1",UF,04/25/2025
17,University of Michigan,William Cooksey,"6-3, 5-6",UF,04/24/2025
23,University of Nebraska-Lincoln,Calvin Mueller,"3-6, 6-3, 6-1",W,04/19/2025
25,University of Wisconsin-Madison,Michael Minasyan,"6-2, 3-6",UF,04/17/2025
32,Michigan State University,Ozan Baris,"6-1, 6-2",W,04/12/2025
36,University of Michigan,William Cooksey,"6-3, 6-3",W,04/10/2025
44,Pennsylvania State University,Charl Morgan,"6-3, 6-3",W,04/05/2025
48,Ohio State University,Alexander Bernard,"6-7(5), 6-0",UF,04/03/2025
52,The Ohio State University,Alexander Bernard,"6-7(5), 6-0, 1-3",L,04/03/2025


### Old Niranjan + Emma Work

In [56]:
# Set UCLA name
my_school = "University of California, Los Angeles"

# Extract opponent name from 'Event Name'
def get_opponent(event):
    if not isinstance(event, str):
        return None
    teams = re.findall(r'vs\s(.+)', event)
    if my_school in event and teams:
        return teams[0].strip() if my_school in event.split('vs')[0] else event.split('vs')[0].replace('Dual Match:', '').strip()
    return None

matches['opponent_school'] = matches['Event Name'].apply(get_opponent)

# Set of UCLA players
ucla_players = {
    "Andrei Crabel", "Alexander Hoogmartens", "Spencer Johnson", "Rudy Quan",
    "Giacomo Revelli", "Aadarsh Tripathi", "Gianluca Ballotta", "Kaylan Bigun",
    "Cassius Chinlund", "Emon van Loben Sels", "Leo von Bismarck"
}

matches['opponent'] = matches.apply(
    lambda row: row['Player2'] if row['Player1'] in ucla_players else (
        row['Player1'] if row['Player2'] in ucla_players else None
    ),
    axis=1
)

matches['ucla_result'] = matches.apply(
    lambda row: 'Win' if row['Player1'] in ucla_players else (
        'Loss' if row['Player2'] in ucla_players else None
    ),
    axis=1
)

# Helper function to flip scores like "7-6(5), 5-7, 6-3"
def flip_score(score_str):
    if not isinstance(score_str, str):
        return score_str
    flipped = []
    for set_score in score_str.split(', '):
        match = re.match(r'(\d+)-(\d+)(\(\d+\))?', set_score)
        if match:
            p1, p2, tiebreak = match.groups()
            flipped_score = f"{p2}-{p1}"
            if tiebreak:
                flipped_score += tiebreak
            flipped.append(flipped_score)
        else:
            flipped.append(set_score)  # Preserve unrecognized formats
    return ', '.join(flipped)

# Flip the score only if Player 2 is a UCLA player
matches['Score (UCLA Perspective)'] = matches.apply(
    lambda row: flip_score(row['Score']) if row['Player2'] in ucla_players else row['Score'],
    axis=1
)

matches

# filter through each player on ucla team
## Gianluca Ballotta
gianlucaBallotta_df = matches[(matches['Player1'] == 'Gianluca Ballotta') | (matches['Player2'] == 'Gianluca Ballotta')]
gianlucaBallotta_df
## Kaylan Bigun
kaylanBigun_df = matches[(matches['Player1'] == 'Kaylan Bigun') | (matches['Player2'] == 'Kaylan Bigun')]
kaylanBigun_df
## Cassius Chinlund
cassius_df = matches[(matches['Player1'] == 'Kaylan Bigun') | (matches['Player2'] == 'Kaylan Bigun')]
kaylanBigun_df
## Andrei Crabel
## Alexander Hoogmartens
## Spencer Johnson
## Rudy Quan
rudy_df = matches[(matches['Player1'] == 'Rudy Quan') | (matches['Player2'] == 'Rudy Quan')]
rudy_df
## Giacomo Revelli
## Aadarsh Tripathi
## Emon van Loben Sels
## Leo von Bismarck

Unnamed: 0,Event Name,Date,Player1,Player1 UTR,Player2,Player2 UTR,Score,opponent_school,opponent,ucla_result,Score (UCLA Perspective)
2,"Dual Match: University of California, Los Ange...",2025-04-26,Aidan Kim,13.75,Rudy Quan,13.67,"7-6(3), 6-2",The Ohio State University,Aidan Kim,Loss,"6-7(3), 2-6"
10,Dual Match: Michigan State University vs Unive...,2025-04-25,Rudy Quan,13.67,Aristotelis THANOS,13.61,"4-6, 3-1",Michigan State University,Aristotelis THANOS,Win,"4-6, 3-1"
17,Dual Match: University of Michigan vs Universi...,2025-04-24,Rudy Quan,13.67,William Cooksey,12.00,"6-3, 5-6",University of Michigan,William Cooksey,Win,"6-3, 5-6"
23,"Dual Match: University of California, Los Ange...",2025-04-19,Rudy Quan,13.67,Calvin MUELLER,13.00,"3-6, 6-3, 6-1",University of Nebraska-Lincoln,Calvin MUELLER,Win,"3-6, 6-3, 6-1"
25,"Dual Match: University of California, Los Ange...",2025-04-17,Michael Minasyan,12.00,Rudy Quan,13.67,"2-6, 6-3",University of Wisconsin-Madison,Michael Minasyan,Loss,"6-2, 3-6"
...,...,...,...,...,...,...,...,...,...,...,...
4790,,2014-05-10,Rudy Quan,13.67,Matthew Trinidad,2.00,"6-0, 6-0",,Matthew Trinidad,Win,"6-0, 6-0"
4804,,2014-04-12,Kyle Kang,13.00,Rudy Quan,13.67,"6-0, 6-1",,Kyle Kang,Loss,"0-6, 1-6"
4805,,2014-04-12,Rudy Quan,13.67,Prayag Ahire,9.00,"6-2, 6-0",,Prayag Ahire,Win,"6-2, 6-0"
4810,,2014-04-11,Rudy Quan,13.67,Heaton Manor,0.00,"6-0, 6-0",,Heaton Manor,Win,"6-0, 6-0"


In [57]:
final_df = rudy_df[['opponent_school', 'opponent', 'Score (UCLA Perspective)', 'Date']].rename(columns={
    'Score (UCLA Perspective)': 'score',
    'Date': 'date'})

final_df

Unnamed: 0,opponent_school,opponent,score,date
2,The Ohio State University,Aidan Kim,"6-7(3), 2-6",2025-04-26
10,Michigan State University,Aristotelis THANOS,"4-6, 3-1",2025-04-25
17,University of Michigan,William Cooksey,"6-3, 5-6",2025-04-24
23,University of Nebraska-Lincoln,Calvin MUELLER,"3-6, 6-3, 6-1",2025-04-19
25,University of Wisconsin-Madison,Michael Minasyan,"6-2, 3-6",2025-04-17
...,...,...,...,...
4790,,Matthew Trinidad,"6-0, 6-0",2014-05-10
4804,,Kyle Kang,"0-6, 1-6",2014-04-12
4805,,Prayag Ahire,"6-2, 6-0",2014-04-12
4810,,Heaton Manor,"6-0, 6-0",2014-04-11


In [58]:
# Create a function that returns data in this format
file_path = os.path.join(parent_dir, 'data/practice/index_sample.csv')
sample_output = pd.read_csv(file_path)
sample_output

Unnamed: 0,school_name,opponent,score,date
0,UCLA,Govind Nanda,7-6(5),04/25/25
1,Oregon,Vlad whatever,,03/24/28
2,Univeristy of Washington,Cesar Bouchelaghem,6-4 7-6(3),03/24/28


In [59]:
# Column Format Guidelines:

# school_name: Full School Name
# opponent: Full Name
# score: Respect to Player we are querying for (switch score if ucla player lost)
# date: 00/00/00