In [59]:
import pandas as pd
import os
import re
import glob
import logging
import numpy as np
import ast

In [60]:
# Load results_df
results_df = pd.read_csv(r"C:\Users\bpali\PycharmProjects\SquashApp\previous_seasons\2016-2017\results_df\6_results_df.csv")

In [61]:
# Convert string representation of list to actual list if needed
results_df['Rubbers'] = results_df['Rubbers'].apply(lambda x: ast.literal_eval(x) if isinstance(x, str) else x)

In [62]:
def determine_winner(rubber_score, home_team, away_team):
    """
    Function to determine the winner of a rubber
    """
    if pd.isna(rubber_score) or rubber_score in ['CR', 'WO']:
        return pd.NA
    home_score, away_score = map(int, rubber_score.split('-'))
    return home_team if home_score > away_score else away_team

In [63]:
def count_valid_matches(df, rubber_index):
    """
    Function to count matches excluding 'NA', 'CR', and 'WO'
    """
    valid_matches_count = {}
    for _, row in df.iterrows():
        if (rubber_index < len(row['Rubbers']) and
            pd.notna(row['Rubbers'][rubber_index]) and
                row['Rubbers'][rubber_index] not in ['CR', 'WO']):
            valid_matches_count[row['Home Team']] = valid_matches_count.get(row['Home Team'], 0) + 1
            valid_matches_count[row['Away Team']] = valid_matches_count.get(row['Away Team'], 0) + 1
    return valid_matches_count

In [64]:
# Find the maximum number of rubbers in any match
max_rubbers = results_df['Rubbers'].apply(len).max()

In [65]:
# Apply the determine_winner function to each rubber in the list
for i in range(1, max_rubbers + 1):
    rubber_column = f'Rubber {i}'
    results_df[f'Winner {rubber_column}'] = results_df.apply(
        lambda row: determine_winner(row['Rubbers'][i - 1] if i - 1 < len(row['Rubbers']) else pd.NA,
                                     row['Home Team'], row['Away Team']), axis=1)


In [66]:
# Aggregate the number of wins for each team in each rubber
aggregate_wins = pd.DataFrame()
for i in range(1, max_rubbers + 1):
    rubber_column = f'Rubber {i}'
    winner_column = f'Winner {rubber_column}'
    wins = results_df[winner_column].value_counts().rename(f'Wins in {rubber_column}')
    aggregate_wins = pd.concat([aggregate_wins, wins], axis=1)

# Fill NaN values in aggregate wins with zeros
aggregate_wins.fillna(0, inplace=True)

# Convert wins to integer type
aggregate_wins = aggregate_wins.astype(int)

In [67]:
# Calculate total matches for each rubber excluding 'NA', 'CR', and 'WO'
total_matches_per_rubber = {
    f'Rubber {i}': count_valid_matches(results_df, i - 1) for i in range(1, max_rubbers + 1)
}

# Convert the dictionary to a DataFrame with teams as index
total_matches_df = pd.DataFrame(total_matches_per_rubber)

In [68]:
# Properly merge total matches and aggregate wins based on team names
combined = aggregate_wins.merge(total_matches_df, left_index=True, right_index=True, how='outer')

In [70]:
# Replace NaN in wins and total matches with 0
combined.fillna(0, inplace=True)

# Calculate win percentage
for i in range(1, max_rubbers + 1):
    rubber_column = f'Rubber {i}'
    combined[f'{rubber_column} Win %'] = (combined[f'Wins in {rubber_column}'] / combined[rubber_column]) * 100

# Replace NaN in win % columns 0
combined.fillna(0, inplace=True)

# Calculate average win percentage
combined["avg_win_perc"] = combined[[f'Rubber {i} Win %' for i in range(1, max_rubbers + 1)]].mean(axis=1)

# Calculate total games played by summing all the rubber matches for each team
combined["Total Rubbers"] = total_matches_df.sum(axis=1)

# Sort by total wins
combined_sorted = combined.sort_values("avg_win_perc", ascending=False)

# Reset the index
combined_sorted = combined_sorted.reset_index().rename(columns={'index': 'Team'})

# Filter out unnecessary columns
keep_columns = (
        ["Team"] +
        [f'Rubber {i} Win %' for i in range(1, max_rubbers + 1)] +
        ['avg_win_perc', "Total Rubbers"]
)

# Select only the win percentage columns and the avg_win_perc column
win_percentage_df = combined_sorted[keep_columns]

In [71]:
win_percentage_df

Unnamed: 0,Team,Rubber 1 Win %,Rubber 2 Win %,Rubber 3 Win %,Rubber 4 Win %,Rubber 5 Win %,avg_win_perc,Total Rubbers
0,GS 1,65.0,90.0,70.0,90.0,75.0,78.0,100
1,Razor 2,75.0,75.0,50.0,80.0,57.894737,67.578947,99
2,Hong Kong Football Club 6A,30.0,85.0,80.0,55.0,66.666667,63.333333,98
3,Hong Kong Football Club 6B,60.0,45.0,75.0,45.0,63.157895,57.631579,99
4,Head Shatin 2,55.0,55.0,55.0,60.0,50.0,55.0,100
5,Island Squash Racquet Club,47.368421,68.421053,57.894737,72.222222,23.529412,53.887169,92
6,i-MASK Advance Squash Club 3,50.0,44.444444,55.555556,44.444444,55.555556,50.0,90
7,Hong Kong Cricket Club 2,70.0,25.0,40.0,31.578947,50.0,43.315789,97
8,Perrier KCC 5,35.0,20.0,45.0,30.0,23.529412,30.705882,97
9,Runners,15.789474,21.052632,21.052632,31.578947,31.25,24.144737,92
