In [4]:
import pandas as pd

# Define the file path
file_path = '../../data/mens/mens_results.csv'

# Load the data into a DataFrame
data = pd.read_csv(file_path)
matches = pd.read_csv(file_path).drop_duplicates()
matches['Date'] = pd.to_datetime(matches['Date'])

# Function to count sets won
def count_sets_won(score_str):
    if not isinstance(score_str, str):
        return 0, 0
    p1_sets = p2_sets = 0
    for s in score_str.split(','):
        s = s.strip()
        parts = s.split('-')
        if len(parts) < 2:
            continue
        try:
            p1 = int(parts[0].split('(')[0])
            p2 = int(parts[1].split('(')[0])
        except:
            continue
        if p1 > p2:
            p1_sets += 1
        else:
            p2_sets += 1
    return p1_sets, p2_sets

# Function to determine winner
def get_winner(row):
    p1_sets, p2_sets = count_sets_won(row['Score'])
    if p1_sets > p2_sets:
        return row['Player1']
    elif p2_sets > p1_sets:
        return row['Player2']
    else:
        return None
# Add winner column once globally
if 'Winner' not in matches.columns:
    matches['Winner'] = matches.apply(get_winner, axis=1)


def get_max_win_streak(player_name):
    season_start = pd.Timestamp('2024-09-19')
    
    # Filter player's matches since season start
    player_matches = matches[
        ((matches['Player1'] == player_name) | (matches['Player2'] == player_name)) &
        (matches['Date'] >= season_start)
    ].sort_values(by='Date')
    
    # Build list of results in order
    results = []
    for _, row in player_matches.iterrows():
        if pd.isna(row['Winner']):
            results.append('Unfinished')
        elif row['Winner'] == player_name:
            results.append('Win')
        else:
            results.append('Loss')
    
    # Calculate max win streak (ignoring unfinished)
    max_streak = 0
    current_streak = 0
    for result in results:
        if result == 'Win':
            current_streak += 1
            if current_streak > max_streak:
                max_streak = current_streak
        elif result == 'Loss':
            current_streak = 0
        elif result == 'Unfinished':
            # skip unfinished (streak continues)
            continue
    
    return max_streak


streak = get_max_win_streak('Rudy Quan')
print(f"Rudy Quan's longest win streak: {streak}")

Rudy Quan's longest win streak: 4


### Cj Work

In [5]:
import pandas as pd

# Define the file path
file_path = '../../data/mens/mens_results.csv'
player_name = 'Rudy Quan'

# Load the data into a DataFrame
matches = pd.read_csv(file_path)[:253]
matches['Date'] = pd.to_datetime(matches['Date'])


# Function to Filter by Player and School Matches Only
def filter_player(data, player_name):

    # Filter for player_name
    data = data[(data['Player1'] == player_name) | (data['Player2'] == player_name)]

    # Filter for only school events
    data = data[data['Event Name'].str.startswith(('Dual Match', '2024 ITA', '2024-25 NCAA Division'))]
    return data


mens_results_player = filter_player(matches, player_name).drop_duplicates().reset_index()

In [6]:
# Helper Function: Count Sets Won
def count_sets_won(score_str):
    if not isinstance(score_str, str):
        return 0, 0
    
    p1_sets = p2_sets = 0

    sets = [s.strip() for s in score_str.split(',') if s.strip()]  # Clean and split sets
    
    for i, s in enumerate(sets):
        parts = s.split('-')
        
        # Check for valid score format
        if len(parts) < 2:
            continue

        try:
            # Extract the player scores, ignoring tiebreaker details
            p1 = int(parts[0].split('(')[0].strip())
            p2 = int(parts[1].split('(')[0].strip())
        except ValueError:
            continue

        # Check for 10-point tiebreaker format (e.g., "1-0(7)" or "0-1(3)")
        if len(sets) == 3 and i == 2 and "(" in s:
            if (p1 == 1 and p2 == 0) or (p1 == 0 and p2 == 1):
                if p1 == 1:
                    p1_sets += 1
                else:
                    p2_sets += 1
            continue

        # Check for standard set completion
        valid_standard_set = ((p1 >= 6 or p2 >= 6) and abs(p1 - p2) >= 2) or \
                             ((p1 == 7 and p2 == 6) or (p1 == 6 and p2 == 7))

        # If the set is unfinished, skip counting
        if not valid_standard_set:
            continue
        
        # Count the set win for the respective player
        if p1 > p2:
            p1_sets += 1
        else:
            p2_sets += 1
            
    return p1_sets, p2_sets

# Helper Function: Determine Winner
def get_winner(row):
    
    p1_sets, p2_sets = count_sets_won(row['Score'])

    # Output Winner of match or 'Unfinisihed'
    if p1_sets == 2:
        return row['Player1']
    elif p2_sets == 2:
        return row['Player2']
    else:
        return 'Unfinished'

In [7]:
# Add winner column once globally
mens_results_player['Winner'] = mens_results_player.apply(get_winner, axis=1)
mens_results_player

Unnamed: 0,index,Event Name,Date,Player1,Player2,Player1 UTR,Player2 UTR,Score,Winner
0,5,"Dual Match: University of California, Los Ange...",2025-05-15,Timo Legout,Rudy Quan,14.23,13.59,"7-5, 6-2",Timo Legout
1,6,Dual Match: University of Southern California ...,2025-05-08,Rudy Quan,Makk Peter,13.59,13.79,"7-6(4), 3-6, 3-3",Unfinished
2,14,"Dual Match: University of California, Los Ange...",2025-05-02,Carl Overbeck,Rudy Quan,13.48,13.59,"5-7, 7-5, 6-3",Carl Overbeck
3,18,"Dual Match: University of California, Santa Ba...",2025-05-01,Rudy Quan,Gianluca Brunkow,13.59,13.0,"5-7, 6-2, 0-1",Unfinished
4,27,"Dual Match: University of California, Los Ange...",2025-04-26,Aidan Kim,Rudy Quan,13.74,13.59,"7-6(3), 6-2",Aidan Kim
5,33,Dual Match: Michigan State University vs Unive...,2025-04-25,Rudy Quan,Aristotelis Thanos,13.59,13.67,"4-6, 3-1",Unfinished
6,36,Dual Match: University of Michigan vs Universi...,2025-04-24,Rudy Quan,William Cooksey,13.59,12.0,"6-3, 5-6",Unfinished
7,46,"Dual Match: University of California, Los Ange...",2025-04-19,Rudy Quan,Calvin MUELLER,13.59,13.0,"3-6, 6-3, 6-1",Rudy Quan
8,52,"Dual Match: University of California, Los Ange...",2025-04-17,Michael Minasyan,Rudy Quan,12.0,13.59,"2-6, 6-3",Unfinished
9,55,Dual Match: Michigan State University vs Unive...,2025-04-12,Rudy Quan,Ozan Baris,13.59,13.66,"6-1, 6-2",Rudy Quan


In [9]:
def winstreak(data, player_name):
    data['Winner'] = data.apply(get_winner, axis=1)
    data = data[data['Winner'] != 'Unfinished']
    data = data.sort_values(by='Date', ascending=True)


    # Only mark streaks where the name is 'Alice'
    is_player_name = data['Winner'] == player_name

    # Create groups where 'Alice' streaks are separated
    groups = (is_player_name != is_player_name.shift()).cumsum()

    column_name = f'{player_name}_streak'

    # Apply cumulative count within 'Alice' streaks
    data[column_name] = is_player_name.groupby(groups).cumcount() + 1

    # Zero out streak values that aren't Alice
    data.loc[~is_player_name, column_name] = 0

    return data[column_name].max()

winstreak(mens_results_player, player_name)


4

In [10]:
streak = winstreak(mens_results_player, player_name)

print(f"{player_name}'s longest win streak: {streak}")

Rudy Quan's longest win streak: 4
