In [205]:
import pandas as pd
import os
import re
import glob
import logging
import numpy as np

In [206]:
# Load schedules_df
schedules_df = pd.read_csv(r"C:\Users\bpali\PycharmProjects\SquashApp\previous_seasons\2016-2017\schedules_df\M1_schedules_df.csv")

# Load teams_df
teams_df = pd.read_csv(r"C:\Users\bpali\PycharmProjects\SquashApp\previous_seasons\2016-2017\teams_df\M1_teams_df.csv")

In [207]:
### Functions

In [208]:
# Define functions

def parse_result(result):
    """
    Function to parse the 'result' string
    """
    overall, rubbers = result.split('(')
    rubbers = rubbers.strip(')').split(',')
    return overall, rubbers


def split_overall_score(score):
    """
    Function to split the overall score and return home and away scores
    """
    home_score, away_score = map(int, score.split('-'))
    return home_score, away_score


def update_rubbers(row):
    """
    Function to count rubbers for and against for each team
    """
    logging.debug(f"Updating rubbers for match between {row['Home Team']} and {row['Away Team']}")

    # Update for home team
    rubbers_won[row['Home Team']] = rubbers_won.get(row['Home Team'], 0) + row['Home Score']
    rubbers_conceded[row['Home Team']] = rubbers_conceded.get(row['Home Team'], 0) + row['Away Score']

    # Update for away team
    rubbers_won[row['Away Team']] = rubbers_won.get(row['Away Team'], 0) + row['Away Score']
    rubbers_conceded[row['Away Team']] = rubbers_conceded.get(row['Away Team'], 0) + row['Home Score']


def update_counts(row):
    """
    Function to count CRs and WOs For and Against
    """
    home_score, away_score = map(int, row['Overall Score'].split('-'))
    home_wins = away_wins = 0

    for rubber in row['Rubbers']:
        if rubber == 'CR':
            # Count CRs
            if home_wins < home_score:
                cr_given_count[row['Away Team']] = cr_given_count.get(row['Away Team'], 0) + 1
                cr_received_count[row['Home Team']] = cr_received_count.get(row['Home Team'], 0) + 1
            else:
                cr_given_count[row['Home Team']] = cr_given_count.get(row['Home Team'], 0) + 1
                cr_received_count[row['Away Team']] = cr_received_count.get(row['Away Team'], 0) + 1
        elif rubber == 'WO':
            # Count WOs
            if home_wins < home_score:
                wo_given_count[row['Away Team']] = wo_given_count.get(row['Away Team'], 0) + 1
                wo_received_count[row['Home Team']] = wo_received_count.get(row['Home Team'], 0) + 1
            else:
                wo_given_count[row['Home Team']] = wo_given_count.get(row['Home Team'], 0) + 1
                wo_received_count[row['Away Team']] = wo_received_count.get(row['Away Team'], 0) + 1
        else:
            # Count the rubbers won by each team
            rubber_home, rubber_away = map(int, rubber.split('-'))
            if rubber_home > rubber_away:
                home_wins += 1
            elif rubber_away > rubber_home:
                away_wins += 1
                

def count_games_won(row):
    """
    Function to count the number of games won by each team in a match,
    handling walkovers (WO) and conceded rubbers (CR) by referring to the 'Overall Score'.
    """
    home_games_won = 0
    away_games_won = 0

    # Calculate the games won from the rubbers, excluding 'CR' and 'WO'
    for rubber in row['Rubbers']:
        if rubber == 'CR' or rubber == 'WO':
            continue
        home, away = map(int, rubber.split('-'))
        home_games_won += home
        away_games_won += away

    # Now handle the 'WO' and 'CR' rubbers by referring to the 'Overall Score'
    if 'WO' in row['Rubbers'] or 'CR' in row['Rubbers']:
        home_overall_score, away_overall_score = map(int, row['Overall Score'].split('-'))
        
        # If the home team has a higher overall score, award the missing games to them
        # Otherwise, award the missing games to the away team
        for rubber in row['Rubbers']:
            if rubber == 'WO' or rubber == 'CR':
                if home_overall_score > away_overall_score:
                    home_games_won += 3
                else:
                    away_games_won += 3

    return home_games_won, away_games_won



In [209]:
# Create results_df from schedules_df

### Process Data
# Drop unnecessary columns
schedules_df.drop(columns=['vs', 'Time'], inplace=True)

# Exclude rows where 'Away Team' is '[BYE]'
results_df = schedules_df[schedules_df['Away Team'] != '[BYE]'].copy()

# Replace NaN values in 'Result' with an empty string
results_df['Result'] = results_df['Result'].fillna('')

# Keep rows where 'Result' contains brackets (indicative of a played match)
results_df = results_df[results_df['Result'].str.contains(r'\(')]

# Apply the function to the 'Result' column
results_df[['Overall Score', 'Rubbers']] = results_df['Result'].apply(lambda x: pd.Series(parse_result(x)))

# Drop the original 'Result' column
results_df.drop(columns=['Result'], inplace=True)

# Replace 'CR' and 'WO' with NaN
results_df.replace('CR', np.nan, inplace=True)
results_df.replace('WO', np.nan, inplace=True)

# Splitting the 'Overall Score' into two separate columns
results_df[['Home Score', 'Away Score']] = results_df['Overall Score'].str.split('-', expand=True).astype(int)

# Initialize dictionaries for rubbers won and conceded
rubbers_won = {}
rubbers_conceded = {}

# Create Games Won columns
results_df[['Home Games Won', 'Away Games Won']] = results_df.apply(count_games_won, axis=1, result_type='expand')

In [210]:
# Create results dataframe that ignores games where away team plays at home venue
# Create dictionary of team home venues
team_home_venues = teams_df.set_index("Team Name")["Home"].to_dict()
valid_matches_df = results_df[
    ~results_df.apply(lambda row: team_home_venues.get(row['Away Team']) == row['Venue'], axis=1)].copy()

Split overall score unnecessary as we already ahve Home Score and Away Score?

In [211]:
# Calculate the average score for home and away teams, including neutral venues
average_home_overall_score_inc_neutral = results_df['Home Score'].mean()
average_away_overall_score_inc_neutral = results_df['Away Score'].mean()

In [212]:
# Calculate the average score for home and away teams, excluding neutral venues
average_home_overall_score_exc_neutral = valid_matches_df['Home Score'].mean()
average_away_overall_score_exc_neutral = valid_matches_df['Away Score'].mean()

In [213]:
# filter results_df for rows where the Rubbers column contains "CR" or "WO"
cr_wo_df = results_df[results_df['Rubbers'].apply(lambda x: 'CR' in x or 'WO' in x)].copy()
cr_wo_df

Unnamed: 0,Home Team,Away Team,Venue,Match Week,Date,Overall Score,Rubbers,Home Score,Away Score,Home Games Won,Away Games Won
14,Hong Kong Football Club M1A,Discovery Bay Masters,Hong Kong Football Club (HKFC),5,09/11/2016,2-2,"[0-3, 3-1, 3-0, WO]",2,2,6,7
35,Sovereign Group LRC Master 1,Hong Kong Football Club M1B,Ladies Recreation Club (LRC),10,14/12/2016,2-2,"[3-0, 3-0, WO, WO]",2,2,6,6
36,Discovery Bay Masters,Hong Kong Cricket Club M1,Discovery Bay Residents Club (DBRC),10,14/12/2016,0-4,"[WO, WO, WO, WO]",0,4,0,12
63,Discovery Bay Masters,Hong Kong Cricket Club M1,Discovery Bay Residents Club (DBRC),17,01/03/2017,0-4,"[WO, WO, WO, WO]",0,4,0,12
69,Discovery Bay Masters,Hong Kong Football Club M1B,Discovery Bay Residents Club (DBRC),18,08/03/2017,4-0,"[WO, WO, WO, WO]",4,0,12,0
72,Hong Kong Football Club M1A,Discovery Bay Masters,Hong Kong Football Club (HKFC),19,15/03/2017,3-1,"[3-0, 3-0, 3-0, WO]",3,1,12,0


In [214]:
# Calculate Overall Division Win Percentage, using Games Won as the tiebreaker (If Overall score and Games Won are equal, then ignore the match)
# Create new function to create columns to show whether Home or Away team won the match
def home_team_won(row):
    """Function to determine whether the home team or away team
    won the match, using games won as a tiebreaker. If overall score
    and games won are equal, the match is ignored.
    """
    if row['Home Score'] > row['Away Score']:
        return 'Home'
    elif row['Home Score'] < row['Away Score']:
        return 'Away'
    else:
        # If overall scores are equal, use games won as tiebreaker
        if row['Home Games Won'] > row['Away Games Won']:
            return 'Home'
        elif row['Home Games Won'] < row['Away Games Won']:
            return 'Away'
        else:
            return 'Ignore'


In [215]:
# Apply the function to the results_df
valid_matches_df['Winner'] = valid_matches_df.apply(home_team_won, axis=1)

In [216]:
# Calculate home team win percentage, filtering out matches where the winner is 'Ignore'
home_win_percentage = valid_matches_df[valid_matches_df["Winner"] != "Ignore"]["Winner"].value_counts(normalize=True)["Home"]

In [217]:
# Calculate average home score for each home team
average_home_score = valid_matches_df.groupby("Home Team")["Home Score"].mean().rename("Average Home Score")

In [218]:
# Calculate average away score for each away team
average_away_score = valid_matches_df.groupby("Away Team")["Away Score"].mean().rename("Average Away Score")

In [219]:
# Combine the two Series into one DataFrame
team_average_scores = pd.concat([average_home_score, average_away_score], axis=1)

In [220]:
# Calculate the difference in home and away scores for each team
team_average_scores["Difference"] = team_average_scores["Average Home Score"] - team_average_scores["Average Away Score"]

In [221]:
# Merge with teams_df to get home venue info
team_average_scores = team_average_scores.merge(teams_df, left_index=True, right_on="Team Name")

In [222]:
# Reorganise columns and show teams in order of difference in scores
team_average_scores = team_average_scores[["Team Name", "Home", "Average Home Score", "Average Away Score", "Difference"]].sort_values("Difference", ascending=False)

In [223]:
team_average_scores

Unnamed: 0,Team Name,Average Home Score,Average Away Score,Difference,Home
1,CWB M1,1.818182,1.3,0.518182,Clear Water Bay Club (CWBC)
5,Hong Kong Football Club M1A,1.888889,1.777778,0.111111,Hong Kong Football Club (HKFC)
7,Royal Hong Kong Yacht Club M1,2.909091,2.9,0.009091,Royal Hong Kong Yacht Club (RHKYC)
4,Hong Kong Cricket Club M1,2.6,2.636364,-0.036364,Hong Kong Cricket Club (HKCC)
0,Sovereign Group LRC Master 1,2.090909,2.2,-0.109091,Ladies Recreation Club (LRC)
2,Discovery Bay Masters,1.0,1.4,-0.4,Discovery Bay Residents Club (DBRC)
6,Hong Kong Football Club M1B,0.75,1.2,-0.45,Hong Kong Football Club (HKFC)
3,HK Club Gold,2.2,2.909091,-0.709091,HK CLUB


In [224]:
valid_matches_df

Unnamed: 0,Home Team,Away Team,Venue,Match Week,Date,Overall Score,Rubbers,Home Score,Away Score,Home Games Won,Away Games Won,Winner
0,HK Club Gold,Sovereign Group LRC Master 1,HK CLUB,1,05/10/2016,1-3,"[0-3, 3-1, 0-3, 0-3]",1,3,3,10,Away
1,Royal Hong Kong Yacht Club M1,Discovery Bay Masters,Royal Hong Kong Yacht Club (RHKYC),1,05/10/2016,4-0,"[3-2, 3-0, 3-0, 3-0]",4,0,12,2,Home
3,Hong Kong Cricket Club M1,CWB M1,Hong Kong Cricket Club (HKCC),1,05/10/2016,2-2,"[3-0, 1-3, 3-0, 1-3]",2,2,8,6,Home
4,Hong Kong Football Club M1A,Royal Hong Kong Yacht Club M1,Hong Kong Football Club (HKFC),2,19/10/2016,2-2,"[3-0, 1-3, 0-3, 3-0]",2,2,7,6,Home
6,Hong Kong Football Club M1B,Sovereign Group LRC Master 1,Hong Kong Football Club (HKFC),3,26/10/2016,0-4,"[1-3, 2-3, 0-3, 2-3]",0,4,5,12,Away
...,...,...,...,...,...,...,...,...,...,...,...,...
80,Hong Kong Football Club M1B,Royal Hong Kong Yacht Club M1,Hong Kong Football Club (HKFC),21,29/03/2017,0-4,"[1-3, 0-3, 0-3, 0-3]",0,4,1,12,Away
81,Hong Kong Cricket Club M1,Hong Kong Football Club M1A,Hong Kong Cricket Club (HKCC),21,29/03/2017,4-0,"[3-1, 3-0, 3-0, 3-0]",4,0,12,1,Home
82,Hong Kong Football Club M1B,CWB M1,Hong Kong Football Club (HKFC),22,05/04/2017,3-1,"[3-0, 2-3, 3-2, 3-1]",3,1,11,6,Home
83,Discovery Bay Masters,HK Club Gold,Discovery Bay Residents Club (DBRC),22,05/04/2017,2-2,"[2-3, 1-3, 3-2, 3-0]",2,2,9,8,Home


In [225]:
# Save the results_df to a CSV file
results_df.to_csv(r"C:\Users\bpali\PycharmProjects\SquashApp\previous_seasons\2016-2017\results_df\M1_results_df.csv", index=False)

In [226]:
# Save the team_average_scores to a CSV file
team_average_scores.to_csv(r"C:\Users\bpali\PycharmProjects\SquashApp\previous_seasons\2016-2017\home_away_data\M1_team_average_scores.csv", index=False)

In [227]:
output_folder = r"C:\Users\bpali\PycharmProjects\SquashApp\previous_seasons\2016-2017\home_away_data"

In [228]:
# Save average_home_overall_score_exc_neutral, average_away_overall_score_exc_neutral, home_win_percentage to a CSV file
average_scores_file = os.path.join(output_folder, f"M1_overall_scores.csv")
with open(average_scores_file, 'w') as f:
    f.write(f"{average_home_overall_score_exc_neutral},{average_away_overall_score_exc_neutral},{home_win_percentage}")
