In [39]:
import numpy as np
import pandas as pd
import os
import glob

In [40]:
base_directory = r"C:\Users\bpali\PycharmProjects\SquashApp\previous_seasons"


### Load Schedules Data

#### Load Previous Seasons

In [41]:
# Load all seasons summary data
schedules_df_list = []
for season_folder in os.listdir(base_directory):
    season_folder_path = os.path.join(base_directory, season_folder)
    schedules_df_folder = os.path.join(season_folder_path, "schedules_df")
    for file in os.listdir(schedules_df_folder):
        if not file.endswith("schedules_df.csv"):
            continue
        file_path = os.path.join(schedules_df_folder, file)
        df = pd.read_csv(file_path)
        # Add Division column to the dataframe
        df["Division"] = file.split("_")[0]
        # Add Season column to the dataframe
        df["Season"] = season_folder

        schedules_df_list.append(df)

# Concatenate all the dataframes in df_list into one big dataframe
all_previous_schedules_df = pd.concat(schedules_df_list, ignore_index=True)

#### Load Current Season

In [42]:
all_divisions = {
    "Premier Main": 424,
    "2": 425,
    "3": 426,
    "4": 427,
    "5": 428,
    "6": 429,
    "7A": 430,
    "7B": 431,
    "8A": 432,
    "8B": 433,
    "9": 434,
    "10": 435,
    "11": 436,
    "12": 437,
    "13A": 438,
    "13B": 439,
    "14": 440,
    "15A": 441,
    "15B": 442,
    "Premier Masters": 443,
    "M2": 444,
    "M3": 445,
    "M4": 446,
    "Premier Ladies": 447,
    "L2": 448,
    "L3": 449,
    "L4": 450,
    }

In [43]:
# 2024-2025 schedules_df directory
schedules_df_dir = r"C:\Users\bpali\PycharmProjects\SquashApp\2024-2025\schedules_df"

In [44]:
schedules_dfs = []
for division in all_divisions.keys():
    for week in range(30, 0, -1):
        try:
            schedules_df = pd.read_csv(f"{schedules_df_dir}\\week_{week}\\{division}_schedules_df.csv")
            schedules_df["Season"] = "2024-2025"
            schedules_df["Division"] = division
            schedules_dfs.append(schedules_df)
            break
        except FileNotFoundError:
            continue

In [45]:
# Concatenate all the summary dataframes into one
schedules_df_2024_2025 = pd.concat(schedules_dfs)

In [46]:
# Concatenate the schedules dataframes from all seasons
all_schedules_df = pd.concat([all_previous_schedules_df, schedules_df_2024_2025], ignore_index=True)

### Create Results Dataframe

In [47]:
def parse_result(result):
    """
    Function to parse the 'result' string
    """
    overall, rubbers = result.split('(')
    rubbers = rubbers.strip(')').split(',')
    return overall, rubbers

In [48]:
# Drop unnecessary columns
all_schedules_df.drop(columns=['vs', 'Time'], inplace=True)

# Exclude rows where 'Away Team' is '[BYE]' (indicative of a bye week)
results_df = all_schedules_df[all_schedules_df['Away Team'] != '[BYE]'].copy()

# Replace NaN values in 'Result' with an empty string before applying str.contains
results_df['Result'] = results_df['Result'].fillna('')

# Keep rows where 'Result' contains brackets (indicative of a played match)
results_df = results_df[results_df['Result'].str.contains(r'\(')]

# Apply the function to the 'Result' column
results_df[['Overall Score', 'Rubbers']] = results_df['Result'].apply(lambda x: pd.Series(parse_result(x)))

# Splitting the 'Overall Score' into two separate columns
results_df[['Home Score', 'Away Score']] = results_df['Overall Score'].str.split('-', expand=True).astype(int)

### Load Teams dataframes

In [49]:
# Load all seasons summary data
teams_df_list = []
for season_folder in os.listdir(base_directory):
    season_folder_path = os.path.join(base_directory, season_folder)
    teams_df_folder = os.path.join(season_folder_path, "teams_df")
    for file in os.listdir(teams_df_folder):
        if not file.endswith("teams_df.csv"):
            continue
        file_path = os.path.join(teams_df_folder, file)
        df = pd.read_csv(file_path)
        # Add Division column to the dataframe
        df["Division"] = file.split("_")[0]
        # Add Season column to the dataframe
        df["Season"] = season_folder

        teams_df_list.append(df)

# Concatenate all the dataframes in df_list into one big dataframe
all_previous_teams_df = pd.concat(teams_df_list, ignore_index=True)

In [50]:
# 2024-2025 teams_df directory
teams_df_dir = r"C:\Users\bpali\PycharmProjects\SquashApp\2024-2025\teams_df"

teams_dfs = []
for division in all_divisions.keys():
    for week in range(30, 0, -1):
        try:
            df = pd.read_csv(f"{teams_df_dir}\\week_{week}\\{division}_teams_df.csv")
            df["Season"] = "2024-2025"
            df["Division"] = division
            teams_dfs.append(df)
            break
        except FileNotFoundError:
            continue

# Concatenate all the teams dataframes into one
teams_df_2024_2025 = pd.concat(teams_dfs)

# Concatenate the teams dataframes from all seasons
all_teams_df = pd.concat([all_previous_teams_df, teams_df_2024_2025], ignore_index=True)

In [51]:
all_teams_df

Unnamed: 0,Team Name,Home,Convenor,Email,Division,Season
0,Banbil,HK Squash Centre (HKSC),Christopher Barrett,thekristophebarrett@gmail.com,10,2016-2017
1,i-MASK Advance Squash Club 7,Cornwall Street (CSPSC),Tat Chi Chow,chowsir30@gmail.com,10,2016-2017
2,Hong Kong Football Club 10A,Hong Kong Football Club (HKFC),Vinod Harjani,vinod@sashaint.com,10,2016-2017
3,Hong Kong Football Club 10B,Hong Kong Football Club (HKFC),Arthur Maloy,arthur@ascentca.com,10,2016-2017
4,Perrier KCC 7,Kowloon Cricket Club (KCC),Dhiraj Kundamal,dhk@kundamalgroup.com,10,2016-2017
...,...,...,...,...,...,...
1796,Hong Kong Football Club L4,Hong Kong Football Club,Jigyasa Dua,jigyasadua88@gmail.com,L4,2024-2025
1797,Hong Kong Football Club JSA,Hong Kong Football Club,Jason Che Man Lam,j.lambo@gmail.com,L4,2024-2025
1798,i-Mask Advance Squash Club L4,Cornwall Street Squash Centre,Tat Chi Chow,chowsir30@gmail.com,L4,2024-2025
1799,JESSICA L4,HK Squash Centre,Chin Pik Ho,pikpik@gmail.com,L4,2024-2025


In [52]:
results_df.head()

Unnamed: 0,Home Team,Away Team,Venue,Result,Match Week,Date,Division,Season,Overall Score,Rubbers,Home Score,Away Score
0,The Hong Kong Jockey Club,i-MASK Advance Squash Club 7,HKJC Sha Tin Club House (HKJC),"1-4(0-3,1-3,0-3,3-2,2-3)",1,03/10/2016,10,2016-2017,1-4,"[0-3, 1-3, 0-3, 3-2, 2-3]",1,4
1,Hong Kong Football Club 10A,Hong Kong Football Club 10B,Hong Kong Football Club (HKFC),"3-2(1-3,3-0,3-1,1-3,3-0)",1,03/10/2016,10,2016-2017,3-2,"[1-3, 3-0, 3-1, 1-3, 3-0]",3,2
2,Royal Hong Kong Yacht Club 10,Banbil,Royal Hong Kong Yacht Club (RHKYC),"4-1(0-3,3-0,3-0,3-0,3-0)",1,03/10/2016,10,2016-2017,4-1,"[0-3, 3-0, 3-0, 3-0, 3-0]",4,1
3,Xavier,Perrier KCC 7,Cornwall Street (CSPSC),"3-2(3-2,3-0,3-2,0-3,2-3)",1,03/10/2016,10,2016-2017,3-2,"[3-2, 3-0, 3-2, 0-3, 2-3]",3,2
4,i-MASK Advance Squash Club 7,Royal Hong Kong Yacht Club 10,Cornwall Street (CSPSC),"3-2(3-0,3-0,3-1,1-3,2-3)",2,17/10/2016,10,2016-2017,3-2,"[3-0, 3-0, 3-1, 1-3, 2-3]",3,2


In [53]:
# Using "Team Name", "Home", "Divison", and "Season" columns of all_teams_df, and "Away Team", "Divsion", and "Season" columns of results_df, merge the two dataframes
merged_df = pd.merge(results_df, all_teams_df, left_on=['Away Team', 'Division', 'Season'], right_on=['Team Name', 'Division', 'Season'], how='left')

In [54]:
# Drop unnecessary columns
columns_to_drop = ["Email", "Convenor", "Team Name"]
merged_df.drop(columns=columns_to_drop, inplace=True)

# Rename "Home" column to "Away Team Home"
merged_df.rename(columns={"Home": "Away Team Home"}, inplace=True)

In [55]:
len(merged_df)

13528

In [56]:
merged_df[merged_df["Venue"] == merged_df["Away Team Home"]]

Unnamed: 0,Home Team,Away Team,Venue,Result,Match Week,Date,Division,Season,Overall Score,Rubbers,Home Score,Away Score,Away Team Home
1,Hong Kong Football Club 10A,Hong Kong Football Club 10B,Hong Kong Football Club (HKFC),"3-2(1-3,3-0,3-1,1-3,3-0)",1,03/10/2016,10,2016-2017,3-2,"[1-3, 3-0, 3-1, 1-3, 3-0]",3,2,Hong Kong Football Club (HKFC)
19,i-MASK Advance Squash Club 7,Xavier,Cornwall Street (CSPSC),"3-2(2-3,3-2,3-0,0-3,3-0)",5,07/11/2016,10,2016-2017,3-2,"[2-3, 3-2, 3-0, 0-3, 3-0]",3,2,Cornwall Street (CSPSC)
30,Hong Kong Football Club 10B,Hong Kong Football Club 10A,Hong Kong Football Club (HKFC),"2-3(1-3,3-0,0-3,1-3,3-1)",8,28/11/2016,10,2016-2017,2-3,"[1-3, 3-0, 0-3, 1-3, 3-1]",2,3,Hong Kong Football Club (HKFC)
47,Xavier,i-MASK Advance Squash Club 7,Cornwall Street (CSPSC),"1-4(3-0,1-3,0-3,1-3,0-3)",12,16/01/2017,10,2016-2017,1-4,"[3-0, 1-3, 0-3, 1-3, 0-3]",1,4,Cornwall Street (CSPSC)
58,Hong Kong Football Club 10A,Hong Kong Football Club 10B,Hong Kong Football Club (HKFC),"4-1(3-1,2-3,3-1,3-0,3-1)",15,20/02/2017,10,2016-2017,4-1,"[3-1, 2-3, 3-1, 3-0, 3-1]",4,1,Hong Kong Football Club (HKFC)
...,...,...,...,...,...,...,...,...,...,...,...,...,...
13507,Hong Kong Football Club L3A,Hong Kong Football Club L3C,Hong Kong Football Club,"3-0(3-0,3-0,3-0)",7,22/11/2024,L3,2024-2025,3-0,"[3-0, 3-0, 3-0]",3,0,Hong Kong Football Club
13509,JESSICA L4,JESSICA Smashin' Mochi,HK Squash Centre,"3-0(3-0,3-1,3-0)",1,04/10/2024,L4,2024-2025,3-0,"[3-0, 3-1, 3-0]",3,0,HK Squash Centre
13510,Hong Kong Football Club JSA,Hong Kong Football Club L4,Hong Kong Football Club,"2-1(2-3,3-0,3-0)",1,04/10/2024,L4,2024-2025,2-1,"[2-3, 3-0, 3-0]",2,1,Hong Kong Football Club
13514,JESSICA Smashin' Mochi,Electrify Squash Ladies,HK Squash Centre,"3-0(3-0,3-0,CR)",2,18/10/2024,L4,2024-2025,3-0,"[3-0, 3-0, CR]",3,0,HK Squash Centre
