## Match Index Notebook
This Notebook is for the Index Page of the Season Reports

### Load Packages

In [None]:
import pandas as pd
import re
import os

import ipywidgets as widgets
from IPython.display import display

### Read Data

In [None]:
# Dropdown with a non-valid default option
choose_player_dropdown = widgets.Dropdown(
    options=['-- Select --', 'Rudy Quan', 'Emon Van Loben Sels', 'Kaylan Bigun', 'Alexander Hoogmartens', 
             'Spencer Johnson', 'Aadarsh Tripathi', 'Giacomo Revelli', 'Gianluca Ballotta'],
    value='-- Select --',
    description='Category:'
)

display(choose_player_dropdown)

# Set School
school = 'University of California, Los Angeles'

In [None]:
# Check selection before proceeding
if choose_player_dropdown.value == '-- Select --':
    raise ValueError("Please choose a valid category from the dropdown menu in the previous cell before proceeding.")

# If valid, use the value
player_name = choose_player_dropdown.value

In [None]:
# Subset 2024-2025 Season Matches!
matches = pd.read_csv('../../data/mens/mens_results.csv')[:253]

# Change Date Format
matches['Date'] = pd.to_datetime(matches['Date'])

# Function to Filter by Player and School Matches Only
def filter_player(data, player_name):

    # Filter for player_name
    data = data[(data['Player1'] == player_name) | (data['Player2'] == player_name)]

    # Filter for only school events
    data = data[data['Event Name'].str.startswith(('Dual Match', '2024 ITA', '2024-25 NCAA Division'))]
    return data


matches = filter_player(matches, player_name)

In [None]:
matches

### index_csv() Function

#### Helper Function to Flip Scores

In [None]:
# Helper function to flip scores like "7-6(5), 5-7, 6-3"
# Also does not include commas in the final result

def flip_score(score_str, player1):

    if player1 in player_name:
        return score_str.replace(",", "")

    flipped = []
    for set_score in score_str.split(', '):
        match = re.match(r'(\d+)-(\d+)(\(\d+\))?', set_score)
        if match:
            p1, p2, tiebreak = match.groups()
            flipped_score = f"{p2}-{p1}"
            if tiebreak:
                flipped_score += tiebreak
            flipped.append(flipped_score)
        else:
            flipped.append(set_score)  # Preserve unrecognized formats
    else: 
        return ' '.join(flipped)

In [None]:
# Make Win and Loss --> "W" and "L" AND take into account UNFINISHED matches "UF"
# eg. 6-3 6-3 W  | 3-6 4-6 L  | 2-6 6-5 UF

def determine_result(row):
    score = row['Score']
    if pd.isna(score) or 'UF' in str(score).upper():
        return 'UF'

    sets = score.split(',')
    p1_sets_won = 0
    p2_sets_won = 0
    valid_sets = 0

    for i, set_score in enumerate(sets):
        set_score = set_score.strip()

        # Special case for 10-point tiebreaker in the third set
        if i == len(sets) - 1:  # Check if it is the last set
            match_tb = re.match(r'(1-0|0-1)\((\d+)\)', set_score)
            if match_tb:
                if match_tb.group(1) == '1-0':
                    p1_sets_won += 1
                else:
                    p2_sets_won += 1
                valid_sets += 1
                continue

        # Regular set score matching
        match = re.match(r'(\d+)-(\d+)(\(\d+\))?', set_score)
        if match:
            p1_games, p2_games, tiebreak = match.groups()
            p1_games, p2_games = int(p1_games), int(p2_games)

            # Check if the set is complete (at least 6 games with a difference of 2)
            if (p1_games >= 6 or p2_games >= 6) and abs(p1_games - p2_games) >= 2:
                valid_sets += 1
                if p1_games > p2_games:
                    p1_sets_won += 1
                else:
                    p2_sets_won += 1
            # Check for tiebreak set with 7 games (e.g., 7-6)
            elif (p1_games == 7 or p2_games == 7) and (p1_games >= 6 and p2_games >= 6):
                valid_sets += 1
                if p1_games > p2_games:
                    p1_sets_won += 1
                else:
                    p2_sets_won += 1
            else:
                # Unfinished set if conditions are not met
                return 'UF'
        else:
            # If the format is not recognized, mark as unfinished
            return 'UF'

    # Check if the match is complete based on valid sets won
    if (valid_sets >= 2 and (p1_sets_won == 2 or p2_sets_won == 2)) or (valid_sets >= 3 and (p1_sets_won == 3 or p2_sets_won == 3)):
        if row['Player1'] == player_name:
            return 'W' if p1_sets_won > p2_sets_won else 'L'
        elif row['Player2'] == player_name:
            return 'W' if p2_sets_won > p1_sets_won else 'L'
    return 'UF'


In [None]:
def index_csv(data, player_name, school):

    # Filter for Player
    df = data[(data['Player1'] == player_name) | (matches['Player2'] == player_name)].copy()
    
    # Extract opponent name from 'Event Name'
    def get_opponent(event):
        if not isinstance(event, str):
            return None
        teams = re.findall(r'vs\s(.+)', event)
        if school in event and teams:
            return teams[0].strip() if school in event.split('vs')[0] else event.split('vs')[0].replace('Dual Match:', '').strip()
        return None

    # Create opponent_school Column
    df['opponent_school'] = df['Event Name'].apply(get_opponent)

    # Create opponent Column
    df['opponent'] = df.apply(lambda row: row['Player2'] if row['Player1'] in player_name 
                                    else (row['Player1'] if row['Player2'] in player_name 
                                    else None
                                    ), axis=1
                                    )
    
    # Make all names to where the only the first letter of each name is capitalized
    # eg. Aristotelis THANOS --> Aristotelis Thanos
    df['opponent'] = df['opponent'].str.title()    

    # Flip the score only if Player 2 is a UCLA player
    df['Score (UCLA Perspective)'] = df.apply(lambda row: flip_score(row['Score'], row['Player1']),
                                                        axis=1)

    # Result column
    df['ucla_result'] = df.apply(determine_result, axis=1)

    # Convert the 'Date' column to datetime format
    df['Date'] = pd.to_datetime(df['Date'])
    
    # Make date column to mm/dd/yy format
    # eg. 2025-04-26 --> 04/26/25
    df['Date'] = df['Date'].dt.strftime('%m/%d/%y')
    
    # Final Columns + rename columns
    final_df = df[['opponent_school', 
                   'opponent', 
                   'Score (UCLA Perspective)', 
                   'ucla_result',
                   'Date']].rename(columns={'Score (UCLA Perspective)': 'score',
                                            'ucla_result': 'result',
                                            'Date': 'date'
                                            })

    return final_df

### Output Function Result

In [None]:
matches_cleaned = index_csv(matches, player_name, school)
matches_cleaned

In [None]:
# Output to csv in index_page folder

player_name_nospace = player_name.replace(' ', '')

# Save the DataFrame to a CSV file
matches_cleaned.to_csv(f'{player_name_nospace}_matches.csv', index=False)

# Split DataFrame into chunks of 15 rows
chunk_size = 15
chunks = [matches_cleaned.iloc[i:i + chunk_size] for i in range(0, len(matches_cleaned), chunk_size)]

# Save each chunk as a separate CSV file
for idx, chunk in enumerate(chunks):
    chunk.to_csv(f'{player_name_nospace}_matches{idx+1}.csv', index=False)
    print(f'{player_name_nospace}_matches{idx+1}.csv' + ' Saved!')