In [1]:
import pandas as pd
import numpy as np

In [2]:
all_divisions = {
    "Premier Main": 424,
    "2": 425,
    "3": 426,
    "4": 427,
    "5": 428,
    "6": 429,
    "7A": 430,
    "7B": 431,
    "8A": 432,
    "8B": 433,
    "9": 434,
    "10": 435,
    "11": 436,
    "12": 437,
    "13A": 438,
    "13B": 439,
    "14": 440,
    "15A": 441,
    "15B": 442,
    "Premier Masters": 443,
    "M2": 444,
    "M3": 445,
    "M4": 446,
    "Premier Ladies": 447,
    "L2": 448,
    "L3": 449,
    "L4": 450,
    }

In [3]:
summary_directory = r"C:\Users\bpali\PycharmProjects\SquashApp\2024-2025\summary_df"
schedules_directory = r"C:\Users\bpali\PycharmProjects\SquashApp\2024-2025\schedules_df"

In [4]:
awaiting_results_directory = r"C:\Users\bpali\PycharmProjects\SquashApp\2024-2025\awaiting_results"

In [5]:
# In the the schedules directory are week_x folders (eg week_4). First check the most recent folder (ie the highest number), then check the next highest, etc, 
# using the all_divisions dictionary to make sure we load every division's most recent summary file and avoid loading an older file for a division that has a more recent file.

schedules_dfs = []
for division in all_divisions.keys():
    for week in range(10, 0, -1):
        try:
            # Load csv file
            schedules_df = pd.read_csv(f"{schedules_directory}\\week_{week}\\{division}_schedules_df.csv")
            # Create division column
            schedules_df["Division"] = division
            # Append to list
            schedules_dfs.append(schedules_df)
            break
        except FileNotFoundError:
            continue

# Concatenate all the dataframes
schedules_df = pd.concat(schedules_dfs, ignore_index=True)

In [6]:
# In the the summary directory are week_x folders (eg week_4). First check the most recent folder (ie the highest number), then check the next highest, etc, 
# using the all_divisions dictionary to make sure we load every division's most recent summary file and avoid loading an older file for a division that has a more recent file.

summary_dfs = []
for division in all_divisions.keys():
    for week in range(10, 0, -1):
        try:
            summary_df = pd.read_csv(f"{summary_directory}\\week_{week}\\{division}_summary_df.csv")
            summary_df["Division"] = division
            # Create Rank column based on Points column
            summary_df["Rank"] = summary_df["Points"].rank(ascending=False, method="min")
            # Make sure Rank column is an integer
            summary_df["Rank"] = summary_df["Rank"].astype(int)
            # Create column for number of teams in the division
            summary_df["Teams"] = len(summary_df)
            summary_dfs.append(summary_df)
            break
        except FileNotFoundError:
            continue

# Concatenate all the summary dataframes into one
summary_df = pd.concat(summary_dfs, ignore_index=True)

In [7]:
# In the the awaiting_results directory are week_x folders (eg week_4). First check the most recent folder (ie the highest number), then check the next highest, etc, 
# using the all_divisions dictionary to make sure we load every division's most recent summary file and avoid loading an older file for a division that has a more recent file.

awaiting_results_dfs = []
for division in all_divisions.keys():
    for week in range(10, 0, -1):
        try:
            awaiting_results_df = pd.read_csv(f"{awaiting_results_directory}\\week_{week}\\{division}_awaiting_results.csv")
            awaiting_results_df["Division"] = division
            awaiting_results_dfs.append(awaiting_results_df)
            break
        except FileNotFoundError:
            continue

# Concatenate all the awaiting_results dataframes into one
awaiting_results_df = pd.concat(awaiting_results_dfs)

### Create dataframe just for HKCC teams

In [8]:
hkcc = "Hong Kong Cricket Club"

# Filter rows where Team column contains "Hong Kong Cricket Club"
hkcc_summary_df = summary_df[(summary_df["Team"].str.contains(hkcc)) | 
                             (summary_df["Team"].str.contains("hkcc", case=False))].reset_index()

hkcc_summary_df[["Division", "Team", "Played", "Won", "Lost", "Points", "Rank", "Teams"]]

Unnamed: 0,Division,Team,Played,Won,Lost,Points,Rank,Teams
0,Premier Main,Hong Kong Cricket Club 1,5,4,1,13,2,6
1,2,Hong Kong Cricket Club 2,6,1,5,8,4,5
2,4,Hong Kong Cricket Club 3,8,3,5,20,6,8
3,6,Hong Kong Cricket Club 4,8,5,3,28,3,10
4,7A,Hong Kong Cricket Club 5,7,3,4,19,4,7
5,7B,Hong Kong Cricket Club 6,7,3,4,18,6,7
6,11,HKCC Tuesday Night Rockers,7,2,5,18,8,11
7,15A,Hong Kong Cricket Club 8,6,1,5,15,3,7
8,Premier Masters,Hong Kong Cricket Club M1A,8,3,5,12,3,5
9,Premier Masters,Hong Kong Cricket Club M1B,6,3,3,11,4,5


### Filter schedules_df for HKCC teams

In [9]:
# Filter schedules_df for rows where Home Team column or Away Team column contains "Hong Kong Cricket Club" or "HKCC"
hkcc_schedules_df = schedules_df[(schedules_df["Home Team"].str.contains(hkcc)) |
                                    (schedules_df["Home Team"].str.contains("hkcc", case=False)) |
                                    (schedules_df["Away Team"].str.contains(hkcc)) |
                                    (schedules_df["Away Team"].str.contains("hkcc", case=False))]

### Create results_df from hkcc_schedules_df

In [10]:
def parse_result(result):
    """
    Function to parse the 'result' string
    """
    overall, rubbers = result.split('(')
    rubbers = rubbers.strip(')').split(',')
    return overall, rubbers

In [11]:
def count_games_won(row):
    """
    Function to count the number of games won by each team in a match,
    handling walkovers (WO) and conceded rubbers (CR) by referring to the 'Overall Score'.
    """
    home_games_won = 0
    away_games_won = 0

    # Calculate the games won from the rubbers, excluding 'CR' and 'WO'
    for rubber in row['Rubbers']:
        if rubber == 'CR' or rubber == 'WO':
            continue
        home, away = map(int, rubber.split('-'))
        home_games_won += home
        away_games_won += away

    # Now handle the 'WO' and 'CR' rubbers by referring to the 'Overall Score'
    if 'WO' in row['Rubbers'] or 'CR' in row['Rubbers']:
        home_overall_score, away_overall_score = map(int, row['Overall Score'].split('-'))
        
        # If the home team has a higher overall score, award the missing games to them
        # Otherwise, award the missing games to the away team
        for rubber in row['Rubbers']:
            if rubber == 'WO' or rubber == 'CR':
                if home_overall_score > away_overall_score:
                    home_games_won += 3
                else:
                    away_games_won += 3

    return home_games_won, away_games_won

In [12]:
# Exclude rows where 'Away Team' is '[BYE]' (indicative of a bye week)
results_df = hkcc_schedules_df[hkcc_schedules_df['Away Team'] != '[BYE]'].copy()

# Replace NaN values in 'Result' with an empty string before applying str.contains
results_df['Result'] = results_df['Result'].fillna('')

# Keep rows where 'Result' contains brackets (indicative of a played match)
results_df = results_df[results_df['Result'].str.contains(r'\(')]

# Apply the function to the 'Result' column
results_df[['Overall Score', 'Rubbers']] = results_df['Result'].apply(lambda x: pd.Series(parse_result(x)))

# Drop the original 'Result' column
results_df.drop(columns=['Result'], inplace=True)

# Replace 'CR' with NaN
results_df.replace('CR', np.nan, inplace=True)
results_df.replace('WO', np.nan, inplace=True)

# Count the number of Rubbers For and Against for each team

# Splitting the 'Overall Score' into two separate columns
results_df[['Home Score', 'Away Score']] = results_df['Overall Score'].str.split('-', expand=True).astype(int)

# Initialize dictionaries to keep track of won and conceded rubbers
rubbers_won = {}
rubbers_conceded = {}

# Create Games Won columns
results_df[['Home Games Won', 'Away Games Won']] = results_df.apply(count_games_won, axis=1, result_type='expand')

In [13]:
# Create Home Win column, using Home Games Won vs Away Games Won as tiebreaker
for i, row in results_df.iterrows():
    if row['Home Score'] > row['Away Score']:
        results_df.loc[i, 'Home Win'] = 1
    elif row['Home Score'] < row['Away Score']:
        results_df.loc[i, 'Home Win'] = 0
    else:
        if row["Home Games Won"] > row["Away Games Won"]:
            results_df.loc[i, 'Home Win'] = 1
        elif row["Home Games Won"] < row["Away Games Won"]:
            results_df.loc[i, 'Home Win'] = 0
        else:
            results_df.loc[i, 'Home Win'] = "Don't know"


In [14]:
results_df["Home Win"] = results_df["Home Win"].astype(int) # if this leads to error, we have an unknown 'tied' result to investigate
results_df.head()

Unnamed: 0,Home Team,vs,Away Team,Venue,Time,Match Week,Date,Division,Overall Score,Rubbers,Home Score,Away Score,Home Games Won,Away Games Won,Home Win
0,FWD,v,Hong Kong Cricket Club 1,HK Squash Centre,19:00,1,10/10/2024,Premier Main,1-2,"[0-3, 3-0, 0-3]",1,2,3,6,0
3,Hong Kong Football Club 1B,v,Hong Kong Cricket Club 1,Hong Kong Football Club,19:00,2,17/10/2024,Premier Main,1-2,"[3-0, 0-3, 0-3]",1,2,3,6,0
8,Hong Kong Cricket Club 1,v,Kowloon Cricket Club 1B,Hong Kong Cricket Club,19:00,3,24/10/2024,Premier Main,2-1,"[3-0, 3-0, 2-3]",2,1,8,3,1
10,Hong Kong Cricket Club 1,v,Hong Kong Football Club 1A,Hong Kong Cricket Club,19:00,4,21/11/2024,Premier Main,2-1,"[3-0, 3-0, 1-3]",2,1,7,3,1
13,Hong Kong Cricket Club 1,v,Kowloon Cricket Club 1A,Hong Kong Cricket Club,19:00,5,28/11/2024,Premier Main,1-2,"[3-0, 0-3, 0-3]",1,2,3,6,0


In [15]:
# Function to create Home Points and Away Points columns
def create_points_columns(df):

    df['Home Points'] = 0
    df['Away Points'] = 0

    for i, row in df.iterrows():
        if row['Home Score'] > row['Away Score']:
            df.at[i, 'Home Points'] = row["Home Score"] + 1
            df.at[i, 'Away Points'] = row["Away Score"]
        elif row['Home Score'] < row['Away Score']:
            df.at[i, 'Home Points'] = row["Home Score"]
            df.at[i, 'Away Points'] = row["Away Score"] + 1
        else:
            if row["Home Games Won"] > row["Away Games Won"]:
                df.at[i, 'Home Points'] = row["Home Score"] + 1
                df.at[i, 'Away Points'] = row["Away Score"]
            elif row["Home Games Won"] < row["Away Games Won"]:
                df.at[i, 'Home Points'] = row["Home Score"]
                df.at[i, 'Away Points'] = row["Away Score"] + 1
            else:
                print(f"Error: No winner found for {row['Home Team']} vs {row['Away Team']}")

    return df

# Apply the function to the dataframe
results_df = create_points_columns(results_df)

In [16]:
results_df.head()

Unnamed: 0,Home Team,vs,Away Team,Venue,Time,Match Week,Date,Division,Overall Score,Rubbers,Home Score,Away Score,Home Games Won,Away Games Won,Home Win,Home Points,Away Points
0,FWD,v,Hong Kong Cricket Club 1,HK Squash Centre,19:00,1,10/10/2024,Premier Main,1-2,"[0-3, 3-0, 0-3]",1,2,3,6,0,1,3
3,Hong Kong Football Club 1B,v,Hong Kong Cricket Club 1,Hong Kong Football Club,19:00,2,17/10/2024,Premier Main,1-2,"[3-0, 0-3, 0-3]",1,2,3,6,0,1,3
8,Hong Kong Cricket Club 1,v,Kowloon Cricket Club 1B,Hong Kong Cricket Club,19:00,3,24/10/2024,Premier Main,2-1,"[3-0, 3-0, 2-3]",2,1,8,3,1,3,1
10,Hong Kong Cricket Club 1,v,Hong Kong Football Club 1A,Hong Kong Cricket Club,19:00,4,21/11/2024,Premier Main,2-1,"[3-0, 3-0, 1-3]",2,1,7,3,1,3,1
13,Hong Kong Cricket Club 1,v,Kowloon Cricket Club 1A,Hong Kong Cricket Club,19:00,5,28/11/2024,Premier Main,1-2,"[3-0, 0-3, 0-3]",1,2,3,6,0,1,3


In [17]:
# Convert Date column to datetime format and then filter for rows that are in November
results_df['Date'] = pd.to_datetime(results_df['Date'], dayfirst=True)
november_results_df = results_df[results_df['Date'].dt.month == 11]
november_results_df.head()

Unnamed: 0,Home Team,vs,Away Team,Venue,Time,Match Week,Date,Division,Overall Score,Rubbers,Home Score,Away Score,Home Games Won,Away Games Won,Home Win,Home Points,Away Points
10,Hong Kong Cricket Club 1,v,Hong Kong Football Club 1A,Hong Kong Cricket Club,19:00,4,2024-11-21,Premier Main,2-1,"[3-0, 3-0, 1-3]",2,1,7,3,1,3,1
13,Hong Kong Cricket Club 1,v,Kowloon Cricket Club 1A,Hong Kong Cricket Club,19:00,5,2024-11-28,Premier Main,1-2,"[3-0, 0-3, 0-3]",1,2,3,6,0,1,3
43,Hong Kong Football Club 2B,v,Hong Kong Cricket Club 2,Hong Kong Football Club,19:00,5,2024-11-04,2,3-1,"[0-3, 3-1, 3-2, 3-0]",3,1,9,6,1,4,1
49,Hong Kong Football Club 2A,v,Hong Kong Cricket Club 2,Hong Kong Football Club,19:00,7,2024-11-18,2,3-1,"[3-2, 3-0, 3-1, 0-3]",3,1,9,6,1,4,1
51,Hong Kong Cricket Club 2,v,Kowloon Cricket Club 2,Hong Kong Cricket Club,19:00,8,2024-11-25,2,0-4,"[1-3, 2-3, 0-3, 2-3]",0,4,5,12,0,0,5


In [18]:
# Create a dataframe similar to hkcc_summary_df but based on the results in november_results_df
# To do this, we aggregate the Home Team Home Points won and Away Team Away Points won and combine them
# We also aggregate the number of games played by each team

# Create a dictionary to store the points won by each team
points_won = {}
games_played = {}
games_won = {}

# Iterate over the rows in november_results_df
for i, row in november_results_df.iterrows():
    home_team = row['Home Team']
    away_team = row['Away Team']
    home_points = row['Home Points']
    away_points = row['Away Points']

    # If the team is not in the dictionary, add it
    if home_team not in points_won:
        points_won[home_team] = 0
    if away_team not in points_won:
        points_won[away_team] = 0

    # Add the points won to the dictionary
    points_won[home_team] += home_points
    points_won[away_team] += away_points

    # Add games played to the dictionary
    if home_team not in games_played:
        games_played[home_team] = 0
    if away_team not in games_played:
        games_played[away_team] = 0

    games_played[home_team] += 1
    games_played[away_team] += 1

    # Add games won to the dictionary
    if home_team not in games_won:
        games_won[home_team] = 0
    if away_team not in games_won:
        games_won[away_team] = 0
    
    if row["Home Win"] == 1:
        games_won[home_team] += 1
    else:
        games_won[away_team] += 1

# Create a dataframe from the dictionary
november_summary_df = pd.DataFrame(list(points_won.items()), columns=['Team', 'Points'])
november_summary_df['Played'] = november_summary_df['Team'].map(games_played)
november_summary_df['Won'] = november_summary_df['Team'].map(games_won)
november_summary_df['Lost'] = november_summary_df['Played'] - november_summary_df['Won']

# Drop Team rows that don't contain "Hong Kong Cricket Club" or "HKCC"
november_summary_df = november_summary_df[(november_summary_df["Team"].str.contains(hkcc)) |
                                          (november_summary_df["Team"].str.contains("hkcc", case=False))]

# Create Division column by using Team column of november_summary_df and hkcc_summary_df
november_summary_df["Division"] = november_summary_df["Team"].map(hkcc_summary_df.set_index("Team")["Division"])

november_summary_df[["Division", "Team", "Played", "Won", "Lost", "Points"]]

Unnamed: 0,Division,Team,Played,Won,Lost,Points
0,Premier Main,Hong Kong Cricket Club 1,2,1,1,4
4,2,Hong Kong Cricket Club 2,3,0,3,2
7,4,Hong Kong Cricket Club 3,4,2,2,14
13,6,Hong Kong Cricket Club 4,4,2,2,11
17,7A,Hong Kong Cricket Club 5,3,1,2,7
22,7B,Hong Kong Cricket Club 6,3,0,3,3
25,11,HKCC Tuesday Night Rockers,4,2,2,14
30,15A,Hong Kong Cricket Club 8,2,0,2,4
33,Premier Masters,Hong Kong Cricket Club M1B,2,1,1,3
34,Premier Masters,Hong Kong Cricket Club M1A,4,2,2,7


In [19]:
# If 'Home Team' contains 'Hong Kong Cricket Club' or 'HKCC', then summarize the Home Points won for each Home Team. Do the same for the Away Team.
# Then combine the two dataframes into one.

hkcc_home_points = results_df[results_df['Home Team'].str.contains(hkcc) 
                              | results_df['Home Team'].str.contains('hkcc', case=False)].groupby('Home Team')['Home Points'].sum()

hkcc_away_points = results_df[results_df['Away Team'].str.contains(hkcc) 
                                | results_df['Away Team'].str.contains('hkcc', case=False)].groupby('Away Team')['Away Points'].sum()

hkcc_points_df = pd.concat([hkcc_home_points, hkcc_away_points], axis=1).fillna(0)

In [20]:
hkcc_points_df

Unnamed: 0,Home Points,Away Points
HKCC Tuesday Night Rockers,13,5
Hong Kong Cricket Club 1,7,6
Hong Kong Cricket Club 2,6,2
Hong Kong Cricket Club 3,15,5
Hong Kong Cricket Club 4,15,13
Hong Kong Cricket Club 5,11,8
Hong Kong Cricket Club 6,8,10
Hong Kong Cricket Club 8,10,5
Hong Kong Cricket Club L1,0,2
Hong Kong Cricket Club L2,4,7


In [21]:
results_df[results_df["Home Score"] == 
            results_df["Away Score"]]

Unnamed: 0,Home Team,vs,Away Team,Venue,Time,Match Week,Date,Division,Overall Score,Rubbers,Home Score,Away Score,Home Games Won,Away Games Won,Home Win,Home Points,Away Points
1796,Hong Kong Cricket Club M2A,v,Hong Kong Cricket Club M2B,Hong Kong Cricket Club,19:00,1,2024-10-02,M2,2-2,"[3-0, 1-3, 3-0, 0-3]",2,2,7,6,1,3,2
1801,Ladies Recreation Club M2A,v,Hong Kong Cricket Club M2A,Ladies Recreation Club,19:00,3,2024-10-16,M2,2-2,"[3-1, 3-1, 0-3, 0-3]",2,2,6,8,0,2,3
1804,Hong Kong Cricket Club M2B,v,Ladies Recreation Club M2A,Hong Kong Cricket Club,19:00,4,2024-10-23,M2,2-2,"[0-3, 3-2, 3-0, 1-3]",2,2,7,8,0,2,3
1816,Hong Kong Cricket Club M2A,v,Ladies Recreation Club M2A,Hong Kong Cricket Club,19:00,8,2024-11-20,M2,2-2,"[3-0, 2-3, 2-3, 3-2]",2,2,10,8,1,3,2
1819,Ladies Recreation Club M2A,v,Hong Kong Cricket Club M2B,Ladies Recreation Club,19:00,9,2024-11-27,M2,2-2,"[3-2, 1-3, 1-3, CR]",2,2,5,11,0,2,3
2062,Hong Kong Football Club L2A,v,Hong Kong Cricket Club L2,Hong Kong Football Club,19:00,4,2024-10-29,L2,2-2,"[3-1, 0-3, 3-1, 0-3]",2,2,6,8,0,2,3


In [22]:
round(hkcc_summary_df["Won"].sum() / hkcc_summary_df["Played"].sum(), 3)

0.426

### Show HKCC results that haven't been uploaded yet

In [23]:
# Filter rows where Home Team column or Away Team column contains "Hong Kong Cricket Club" or "hkcc" (case-insensitive)
hkcc_awaiting_results_df = awaiting_results_df[(awaiting_results_df["Home Team"].str.contains(hkcc, case=False)) | 
                                               (awaiting_results_df["Away Team"].str.contains(hkcc, case=False)) |
                                               (awaiting_results_df["Home Team"].str.contains("hkcc", case=False)) |
                                                (awaiting_results_df["Away Team"].str.contains("hkcc", case=False))]


In [24]:
hkcc_awaiting_results_df.sort_values("Date")

Unnamed: 0,Home Team,Away Team,Venue,Match Week,Date,Division
0,Hong Kong Cricket Club M1B,Ladies Recreation Club M1,Hong Kong Cricket Club,9,2024-11-28,Premier Masters
