In [1]:
import pandas as pd

# Define the file path
file_path = '/Users/eunchanhyung/Desktop/tennis consulting/consulting-spring2025/data/mens/mens_results.csv'

# Load the data into a DataFrame
data = pd.read_csv(file_path)
matches = pd.read_csv(file_path).drop_duplicates()
matches['Date'] = pd.to_datetime(matches['Date'])

# Function to count sets won
def count_sets_won(score_str):
    if not isinstance(score_str, str):
        return 0, 0
    p1_sets = p2_sets = 0
    for s in score_str.split(','):
        s = s.strip()
        parts = s.split('-')
        if len(parts) < 2:
            continue
        try:
            p1 = int(parts[0].split('(')[0])
            p2 = int(parts[1].split('(')[0])
        except:
            continue
        if p1 > p2:
            p1_sets += 1
        else:
            p2_sets += 1
    return p1_sets, p2_sets

# Function to determine winner
def get_winner(row):
    p1_sets, p2_sets = count_sets_won(row['Score'])
    if p1_sets > p2_sets:
        return row['Player1']
    elif p2_sets > p1_sets:
        return row['Player2']
    else:
        return None
# Add winner column once globally
if 'Winner' not in matches.columns:
    matches['Winner'] = matches.apply(get_winner, axis=1)

# UNIVERSAL FUNCTION returning DataFrame
def get_player_record_table(player_name):
    season_start = pd.Timestamp('2024-09-19')
    conf_start = pd.Timestamp('2025-03-07')
    conf_end = pd.Timestamp('2025-04-20')
    
    # Filter player’s matches since season start
    player_matches = matches[
        ((matches['Player1'] == player_name) | (matches['Player2'] == player_name)) &
        (matches['Date'] >= season_start)
    ]
    
    # Filter player’s conference matches
    player_conf_matches = player_matches[
        (player_matches['Date'] >= conf_start) & (player_matches['Date'] <= conf_end)
    ]
    
    # Only finished matches
    finished_player_matches = player_matches[player_matches['Winner'].notna()]
    finished_player_conf_matches = player_conf_matches[player_conf_matches['Winner'].notna()]
def get_max_win_streak(player_name):
    season_start = pd.Timestamp('2024-09-19')
    
    # Filter player's matches since season start
    player_matches = matches[
        ((matches['Player1'] == player_name) | (matches['Player2'] == player_name)) &
        (matches['Date'] >= season_start)
    ].sort_values(by='Date')
    
    # Build list of results in order
    results = []
    for _, row in player_matches.iterrows():
        if pd.isna(row['Winner']):
            results.append('Unfinished')
        elif row['Winner'] == player_name:
            results.append('Win')
        else:
            results.append('Loss')
    
    # Calculate max win streak (ignoring unfinished)
    max_streak = 0
    current_streak = 0
    for result in results:
        if result == 'Win':
            current_streak += 1
            if current_streak > max_streak:
                max_streak = current_streak
        elif result == 'Loss':
            current_streak = 0
        elif result == 'Unfinished':
            # skip unfinished (streak continues)
            continue
    
    return max_streak


streak = get_max_win_streak('Rudy Quan')
print(f"Rudy Quan's longest win streak: {streak}")

Rudy Quan's longest win streak: 4
