In [1]:
import json
from pathlib import Path

import numpy as np
import pandas as pd

In [2]:
SEASON = "2025-2026"
SEASON_LABEL = "2025/26"
REPO_ROOT = Path.cwd().resolve()
while REPO_ROOT != REPO_ROOT.parent and not (REPO_ROOT / "config").exists():
    REPO_ROOT = REPO_ROOT.parent
SEASON_DIR = REPO_ROOT / SEASON
DIVISIONS_CONFIG_PATH = REPO_ROOT / "config" / "divisions" / f"{SEASON}.json"

with DIVISIONS_CONFIG_PATH.open(encoding="utf-8") as fp:
    divisions_config = json.load(fp)

def _extract_divisions(data):
    """Return {division_name: division_id} from any supported config shape."""
    if isinstance(data, dict):
        if "divisions" in data:
            return _extract_divisions(data["divisions"])
        # already {name: {...}}
        mapping = {}
        for name, meta in data.items():
            if name in {"season", "generated_at"}:
                continue
            if isinstance(meta, dict):
                div_id = meta.get("id") or meta.get("division_id") or meta.get("hks_id")
                if div_id is not None:
                    mapping[str(name)] = int(div_id)
                    continue
            try:
                mapping[str(name)] = int(meta)
            except (TypeError, ValueError):
                continue
        return mapping
    if isinstance(data, list):
        mapping = {}
        for entry in data:
            if not isinstance(entry, dict):
                continue
            name = entry.get("name") or entry.get("division") or entry.get("label")
            div_id = entry.get("id") or entry.get("division_id") or entry.get("hks_id")
            if name is None or div_id is None:
                continue
            try:
                mapping[str(name)] = int(div_id)
            except (TypeError, ValueError):
                continue
        return mapping
    return {}

all_divisions = _extract_divisions(divisions_config)


In [3]:
summary_directory = SEASON_DIR / "summary_df"
schedules_directory = SEASON_DIR / "schedules_df"

In [4]:
awaiting_results_directory = SEASON_DIR / "awaiting_results"

In [5]:
schedules_dfs = []
for division in all_divisions.keys():
    for week in range(25, 0, -1):
        file_path = schedules_directory / f"week_{week}" / f"{division}_schedules_df.csv"
        if file_path.exists():
            schedules_df = pd.read_csv(file_path)
            schedules_df["Division"] = division
            schedules_dfs.append(schedules_df)
            break

schedules_df = pd.concat(schedules_dfs, ignore_index=True)
schedules_df = schedules_df.sort_values(by=["Division", "Match Week", "Date"])

schedules_output_path = SEASON_DIR / f"updated_schedule_{SEASON}.csv"
schedules_df.to_csv(schedules_output_path, index=False)

schedules_df["Date"] = pd.to_datetime(schedules_df["Date"], dayfirst=True)

In [6]:
summary_dfs = []
for division in all_divisions.keys():
    for week in range(25, 0, -1):
        file_path = summary_directory / f"week_{week}" / f"{division}_summary_df.csv"
        if file_path.exists():
            summary_df = pd.read_csv(file_path)
            summary_df["Division"] = division
            summary_df["Rank"] = summary_df["Points"].rank(ascending=False, method="min").astype(int)
            summary_df["Teams"] = len(summary_df)
            summary_dfs.append(summary_df)
            break

summary_df = pd.concat(summary_dfs, ignore_index=True)

In [7]:
awaiting_results_dfs = []
for division in all_divisions.keys():
    for week in range(25, 0, -1):
        file_path = awaiting_results_directory / f"week_{week}" / f"{division}_awaiting_results.csv"
        if file_path.exists():
            awaiting_results_df = pd.read_csv(file_path)
            awaiting_results_df["Division"] = division
            awaiting_results_dfs.append(awaiting_results_df)
            break

awaiting_results_df = pd.concat(awaiting_results_dfs, ignore_index=True)

### Create dataframe just for HKCC teams

In [8]:
hkcc = "Hong Kong Cricket Club"

# Filter rows where Team column contains "Hong Kong Cricket Club"
hkcc_summary_df = summary_df[(summary_df["Team"].str.contains(hkcc)) |
                             (summary_df["Team"].str.contains("hkcc", case=False))].reset_index()

hkcc_df = hkcc_summary_df[["Division", "Team", "Played", "Won", "Lost", "Points", "Rank", "Teams"]].copy()

division_order = [
    "Premier Main",
    "2",
    "4",
    "6",
    "7",
    "10",
    "15B",
    "Premier Masters",
    "M2",
    "M3",
    "M4",
    "Premier Ladies",
    "L2",
    "L3",
    "L4",
]

hkcc_df["Division"] = pd.Categorical(hkcc_df["Division"], categories=division_order, ordered=True)
hkcc_df = hkcc_df.sort_values(by=["Division", "Team", "Rank"], ignore_index=True)

hkcc_df["Team"] = hkcc_df["Team"].str.replace("Hong Kong Cricket Club", "HKCC", regex=False)

hkcc_df

Unnamed: 0,Division,Team,Played,Won,Lost,Points,Rank,Teams
0,Premier Main,HKCC D1,3,2,1,7,2,6
1,2,HKCC D2,5,0,5,6,4,5
2,4,HKCC D4A,7,1,6,9,10,10
3,4,HKCC D4B,7,1,6,10,9,10
4,6,HKCC D6,7,1,6,12,10,12
5,7,HKCC D7A,6,4,2,22,5,11
6,7,HKCC D7B,7,3,4,18,6,11
7,10,HKCC D10,6,3,3,20,5,11
8,15B,HKCC D15,6,6,0,28,1,8
9,Premier Masters,HKCC M1A,7,3,4,16,3,5


In [9]:
def highlight_top_of_league(row):
    if row["Rank"] == 1:
        return ["background-color: #FFFFC5"] * len(row)
    return [""] * len(row)

previous_month = pd.Timestamp.now() - pd.DateOffset(months=1)
report_month = previous_month.strftime("%Y_%m")
report_month_label = previous_month.strftime("%B %Y")
outputs_dir = REPO_ROOT / "outputs"
outputs_dir.mkdir(parents=True, exist_ok=True)
output_filename = f"hkcc_overall_standings_{report_month}.html"
output_path = outputs_dir / output_filename

styled_html = (
    hkcc_df.style
    .apply(highlight_top_of_league, axis=1)
    .set_table_styles([
        {"selector": "th", "props": [("background-color", "#003366"), ("color", "white"), ("padding", "10px"), ("font-size", "14px")]},
        {"selector": "td", "props": [("padding", "8px"), ("font-size", "13px")]},
        {"selector": "tbody tr:nth-child(even)", "props": [("background-color", "#e6f2ff")]}
    ])
    .set_properties(**{
        "text-align": "center",
        "border": "1px solid #ccc"
    })
    .hide(axis="index")
    .to_html()
)

html_output = f"""
<html>
<head>
    <meta charset=\"UTF-8\">
    <title>HK Squash League {SEASON_LABEL} Standings</title>
    <style>
        body {{
            font-family: Arial, sans-serif;
            padding: 20px;
        }}
        .wrapper {{
            max-width: 800px;
            margin: 0 auto;
        }}
        .header {{
            display: flex;
            align-items: center;
            gap: 20px;
            margin-bottom: 20px;
        }}
        .header img {{
            height: 70px;
        }}
        .title-block {{
            text-align: left;
        }}
    </style>
</head>
<body>

    <div class=\"wrapper\">
        <div class=\"header\">
            <img src=\"hkcc_logo.png\" alt=\"HKCC Logo\">
            <div class=\"title-block\">
                <h2 style=\"color: #003366; margin: 0;\">HK Squash League - {report_month_label}</h2>
            </div>
        </div>

        {styled_html}
    </div>

</body>
</html>
"""

with output_path.open("w", encoding="utf-8") as f:
    f.write(html_output)


In [10]:
# Count the number of rows where Rank is 4 or less
len(hkcc_summary_df[hkcc_summary_df["Rank"] <= 4])

9

In [11]:
kcc = "Kowloon Cricket Club"

# Filter rows where Team column contains "Hong Kong Cricket Club"
kcc_summary_df = summary_df[summary_df["Team"].str.contains(kcc)].reset_index()

kcc_summary_df["Win %"] = kcc_summary_df["Won"] / kcc_summary_df["Played"] * 100

kcc_summary_df[["Division", "Team", "Played", "Won", "Lost", "Points", "Rank", "Teams", "Win %"]]

Unnamed: 0,Division,Team,Played,Won,Lost,Points,Rank,Teams,Win %
0,2,Kowloon Cricket Club 2,6,5,1,29,1,5,83.333333
1,6,Kowloon Cricket Club 6B,7,7,0,38,1,12,100.0
2,6,Kowloon Cricket Club 6A,7,5,2,25,3,12,71.428571
3,3,Kowloon Cricket Club 3B,7,5,2,26,2,8,71.428571
4,3,Kowloon Cricket Club 3A,7,2,5,18,5,8,28.571429
5,4,Kowloon Cricket Club 4,7,7,0,36,1,10,100.0
6,L2,Kowloon Cricket Club L2B,7,7,0,29,1,6,100.0
7,L2,Kowloon Cricket Club L2A,7,2,5,12,5,6,28.571429
8,9,Kowloon Cricket Club 9,7,3,4,18,6,10,42.857143
9,Premier Main,Kowloon Cricket Club 1A,3,1,2,5,4,6,33.333333


### Filter schedules_df for HKCC teams

In [12]:
# Filter schedules_df for rows where Home Team column or Away Team column contains "Hong Kong Cricket Club" or "HKCC"
hkcc_schedules_df = schedules_df[(schedules_df["Home Team"].str.contains(hkcc)) |
                                    (schedules_df["Home Team"].str.contains("hkcc", case=False)) |
                                    (schedules_df["Away Team"].str.contains(hkcc)) |
                                    (schedules_df["Away Team"].str.contains("hkcc", case=False))]

### Create results_df from hkcc_schedules_df

In [13]:
def parse_result(result):
    """
    Function to parse the 'result' string
    """
    overall, rubbers = result.split('(')
    rubbers = rubbers.strip(')').split(',')
    return overall, rubbers

In [14]:
def count_games_won(row):
    """
    Function to count the number of games won by each team in a match,
    handling walkovers (WO) and conceded rubbers (CR) by referring to the 'Overall Score'.
    """
    home_games_won = 0
    away_games_won = 0

    # Calculate the games won from the rubbers, excluding 'CR' and 'WO'
    for rubber in row['Rubbers']:
        if rubber == 'CR' or rubber == 'WO':
            continue
        home, away = map(int, rubber.split('-'))
        home_games_won += home
        away_games_won += away

    # Now handle the 'WO' and 'CR' rubbers by referring to the 'Overall Score'
    if 'WO' in row['Rubbers'] or 'CR' in row['Rubbers']:
        home_overall_score, away_overall_score = map(int, row['Overall Score'].split('-'))
        
        # If the home team has a higher overall score, award the missing games to them
        # Otherwise, award the missing games to the away team
        for rubber in row['Rubbers']:
            if rubber == 'WO' or rubber == 'CR':
                if home_overall_score > away_overall_score:
                    home_games_won += 3
                else:
                    away_games_won += 3

    return home_games_won, away_games_won

In [15]:
# Exclude rows where 'Away Team' is '[BYE]' (indicative of a bye week)
results_df = hkcc_schedules_df[hkcc_schedules_df['Away Team'] != '[BYE]'].copy()

# Replace NaN values in 'Result' with an empty string before applying str.contains
results_df['Result'] = results_df['Result'].fillna('')

# Keep rows where 'Result' contains brackets (indicative of a played match)
results_df = results_df[results_df['Result'].str.contains(r'\(')]

# Apply the function to the 'Result' column
results_df[['Overall Score', 'Rubbers']] = results_df['Result'].apply(lambda x: pd.Series(parse_result(x)))

# Drop the original 'Result' column
results_df.drop(columns=['Result'], inplace=True)

# Replace 'CR' with NaN
results_df.replace('CR', np.nan, inplace=True)
results_df.replace('WO', np.nan, inplace=True)

# Count the number of Rubbers For and Against for each team

# Splitting the 'Overall Score' into two separate columns
results_df[['Home Score', 'Away Score']] = results_df['Overall Score'].str.split('-', expand=True).astype(int)

# Initialize dictionaries to keep track of won and conceded rubbers
rubbers_won = {}
rubbers_conceded = {}

# Create Games Won columns
results_df[['Home Games Won', 'Away Games Won']] = results_df.apply(count_games_won, axis=1, result_type='expand')

In [16]:
# Create Home Win column, using Home Games Won vs Away Games Won as tiebreaker
for i, row in results_df.iterrows():
    if row['Home Score'] > row['Away Score']:
        results_df.loc[i, 'Home Win'] = 1
    elif row['Home Score'] < row['Away Score']:
        results_df.loc[i, 'Home Win'] = 0
    else:
        if row["Home Games Won"] > row["Away Games Won"]:
            results_df.loc[i, 'Home Win'] = 1
        elif row["Home Games Won"] < row["Away Games Won"]:
            results_df.loc[i, 'Home Win'] = 0
        else:
            results_df.loc[i, 'Home Win'] = "Don't know"


  results_df.loc[i, 'Home Win'] = "Don't know"


In [17]:
# Find the row where Home Win is "Don't know"
results_df[results_df["Home Win"] == "Don't know"]

Unnamed: 0,Home Team,vs,Away Team,Venue,Time,Match Week,Date,Division,Overall Score,Rubbers,Home Score,Away Score,Home Games Won,Away Games Won,Home Win
1999,Hong Kong Cricket Club L4,v,The Squash Club L4,Hong Kong Cricket Club,19:00:00,8,2025-11-21,L4,2-2,"[3-0, 3-0, 0-3, 0-3]",2,2,6,6,Don't know


In [18]:
# For the row where Home Win is "Don't know", change Home Win to 0, Home Points to 2, and Away Points to 3
results_df.loc[results_df["Home Win"] == "Don't know", "Home Win"] = 0
results_df.loc[results_df["Home Win"] == "Don't know", "Home Points"] = 2
results_df.loc[results_df["Home Win"] == "Don't know", "Away Points"] = 3

In [19]:
results_df["Home Win"] = results_df["Home Win"].astype(int) # if this leads to error, we have an unknown 'tied' result to investigate

In [20]:
# Function to create Home Points and Away Points columns
def create_points_columns(df):

    df['Home Points'] = 0
    df['Away Points'] = 0

    for i, row in df.iterrows():
        if row['Home Score'] > row['Away Score']:
            df.at[i, 'Home Points'] = row["Home Score"] + 1
            df.at[i, 'Away Points'] = row["Away Score"]
        elif row['Home Score'] < row['Away Score']:
            df.at[i, 'Home Points'] = row["Home Score"]
            df.at[i, 'Away Points'] = row["Away Score"] + 1
        else:
            if row["Home Games Won"] > row["Away Games Won"]:
                df.at[i, 'Home Points'] = row["Home Score"] + 1
                df.at[i, 'Away Points'] = row["Away Score"]
            elif row["Home Games Won"] < row["Away Games Won"]:
                df.at[i, 'Home Points'] = row["Home Score"]
                df.at[i, 'Away Points'] = row["Away Score"] + 1
            else:
                print(f"Error: No winner found for {row['Home Team']} vs {row['Away Team']}")

    return df

# Apply the function to the dataframe
results_df = create_points_columns(results_df)

Error: No winner found for Hong Kong Cricket Club L4 vs The Squash Club L4


In [21]:
monthly_results = results_df[results_df['Date'].dt.month == previous_month.month]

In [22]:
# Create a dataframe similar to hkcc_summary_df but based on the results in november_results_df
# To do this, we aggregate the Home Team Home Points won and Away Team Away Points won and combine them
# We also aggregate the number of games played by each team

# Create a dictionary to store the points won by each team
points_won = {}
games_played = {}
games_won = {}

# Iterate over the rows in november_results_df
for i, row in monthly_results.iterrows():
    home_team = row['Home Team']
    away_team = row['Away Team']
    home_points = row['Home Points']
    away_points = row['Away Points']

    # If the team is not in the dictionary, add it
    if home_team not in points_won:
        points_won[home_team] = 0
    if away_team not in points_won:
        points_won[away_team] = 0

    # Add the points won to the dictionary
    points_won[home_team] += home_points
    points_won[away_team] += away_points

    # Add games played to the dictionary
    if home_team not in games_played:
        games_played[home_team] = 0
    if away_team not in games_played:
        games_played[away_team] = 0

    games_played[home_team] += 1
    games_played[away_team] += 1

    # Add games won to the dictionary
    if home_team not in games_won:
        games_won[home_team] = 0
    if away_team not in games_won:
        games_won[away_team] = 0
    
    if row["Home Win"] == 1:
        games_won[home_team] += 1
    else:
        games_won[away_team] += 1

# Create a dataframe from the dictionary
monthly_summary_df = pd.DataFrame(list(points_won.items()), columns=['Team', 'Points'])
monthly_summary_df['Played'] = monthly_summary_df['Team'].map(games_played)
monthly_summary_df['Won'] = monthly_summary_df['Team'].map(games_won)
monthly_summary_df['Lost'] = monthly_summary_df['Played'] - monthly_summary_df['Won']

# Drop Team rows that don't contain "Hong Kong Cricket Club" or "HKCC"
monthly_summary_df = monthly_summary_df[(monthly_summary_df["Team"].str.contains(hkcc)) |
                                         (monthly_summary_df["Team"].str.contains("hkcc", case=False))]

# Create Division column by using Team column of november_summary_df and hkcc_summary_df
monthly_summary_df["Division"] = monthly_summary_df["Team"].map(hkcc_summary_df.set_index("Team")["Division"])

monthly_summary_df = monthly_summary_df[["Division", "Team", "Played", "Won", "Lost", "Points"]]

# Sort by Division based on the configured division order
division_order = list(all_divisions.keys())
monthly_summary_df["Division"] = pd.Categorical(
    monthly_summary_df["Division"],
    categories=division_order,
    ordered=True,
)

monthly_summary_df = monthly_summary_df.sort_values(by=["Division", "Team"], ascending=[True, True])

monthly_summary_df

Unnamed: 0,Division,Team,Played,Won,Lost,Points
9,2,Hong Kong Cricket Club D2,3,0,3,4
20,6,Hong Kong Cricket Club D6,4,0,4,5
1,10,Hong Kong Cricket Club D10,4,2,2,12
14,4,Hong Kong Cricket Club D4A,4,1,3,5
13,4,Hong Kong Cricket Club D4B,4,0,4,2
34,L2,Hong Kong Cricket Club L2,4,2,2,11
28,7,Hong Kong Cricket Club D7A,3,2,1,12
25,7,Hong Kong Cricket Club D7B,4,2,2,11
53,M2,Hong Kong Cricket Club M2,3,1,2,9
70,Premier Main,Hong Kong Cricket Club D1,2,1,1,4


In [23]:
# If 'Home Team' contains 'Hong Kong Cricket Club' or 'HKCC', then summarize the Home Points won for each Home Team. Do the same for the Away Team.
# Then combine the two dataframes into one.

hkcc_home_points = results_df[results_df['Home Team'].str.contains(hkcc) 
                              | results_df['Home Team'].str.contains('hkcc', case=False)].groupby('Home Team')['Home Points'].sum()

hkcc_away_points = results_df[results_df['Away Team'].str.contains(hkcc) 
                                | results_df['Away Team'].str.contains('hkcc', case=False)].groupby('Away Team')['Away Points'].sum()

hkcc_points_df = pd.concat([hkcc_home_points, hkcc_away_points], axis=1).fillna(0)

In [24]:
hkcc_points_df

Unnamed: 0,Home Points,Away Points
Hong Kong Cricket Club D1,1,6
Hong Kong Cricket Club D10,17,3
Hong Kong Cricket Club D15,20,8
Hong Kong Cricket Club D2,2,4
Hong Kong Cricket Club D4A,8,1
Hong Kong Cricket Club D4B,4,6
Hong Kong Cricket Club D6,2,10
Hong Kong Cricket Club D7A,12,10
Hong Kong Cricket Club D7B,10,8
Hong Kong Cricket Club L1,0,0


In [25]:
results_df[(results_df["Home Score"] == results_df["Away Score"]) & 
           (results_df["Home Games Won"] == results_df["Away Games Won"])]

Unnamed: 0,Home Team,vs,Away Team,Venue,Time,Match Week,Date,Division,Overall Score,Rubbers,Home Score,Away Score,Home Games Won,Away Games Won,Home Win,Home Points,Away Points
1999,Hong Kong Cricket Club L4,v,The Squash Club L4,Hong Kong Cricket Club,19:00:00,8,2025-11-21,L4,2-2,"[3-0, 3-0, 0-3, 0-3]",2,2,6,6,0,0,0


In [26]:
round(hkcc_summary_df["Won"].sum() / hkcc_summary_df["Played"].sum(), 3)

0.447

### Show HKCC results that haven't been uploaded yet

In [27]:
# Filter rows where Home Team column or Away Team column contains "Hong Kong Cricket Club" or "hkcc" (case-insensitive)
hkcc_awaiting_results_df = awaiting_results_df[(awaiting_results_df["Home Team"].str.contains(hkcc, case=False)) | 
                                               (awaiting_results_df["Away Team"].str.contains(hkcc, case=False)) |
                                               (awaiting_results_df["Home Team"].str.contains("hkcc", case=False)) |
                                                (awaiting_results_df["Away Team"].str.contains("hkcc", case=False))]


In [28]:
hkcc_awaiting_results_df.sort_values("Date")

Unnamed: 0,Home Team,Away Team,Venue,Match Week,Date,Division
