In [9]:
import pandas as pd
import numpy as np
import os
import re
import logging

In [10]:
# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s - %(levelname)s - %(message)s",
    handlers=[
        logging.FileHandler("create_player_results_database_all_divisions.log"),
        logging.StreamHandler()
    ]
)

In [11]:
# Base directory
base_directory = "C:/Users/bpali/PycharmProjects/SquashApp/2024-2025"
current_week = 1
previous_week = 0  # Assuming week 0 has no data

In [12]:
division = "7A"

In [13]:
all_divisions = {
    "Premier Main": 424,
    "2": 425,
    "3": 426,
    "4": 427,
    "5": 428,
    "6": 429,
    "7A": 430,
    "7B": 431,
    "8A": 432,
    "8B": 433,
    "9": 434,
    "10": 435,
    "11": 436,
    "12": 437,
    "13A": 438,
    "13B": 439,
    "14": 440,
    "15A": 441,
    "15B": 442,
    "Premier Masters": 443,
    "M2": 444,
    "M3": 445,
    "M4": 446,
    "Premier Ladies": 447,
    "L2": 448,
    "L3": 449,
    "L4": 450,
    }

In [14]:
def build_player_mapping(all_divisions, base_directory, current_week):
    """
    Build a mapping of player names to their original teams.
    This mapping is essential to identify 'Playing Up' players.

    Args:
        all_divisions (dict): Dictionary of division names and their IDs.
        base_directory (str): Base directory path where data files are stored.
        current_week (int): Current week number.

    Returns:
        dict: Mapping of player names to their original teams.
              {
                  'Player Name': 'Original Team',
                  # ... other players
              }
    """
    player_mapping = {}
    
    for division in all_divisions:
        players_df_path = os.path.join(base_directory, "players_df", f"week_{current_week}", f"{division}_players_df.csv")
        if not os.path.exists(players_df_path):
            logging.warning(f"Players file not found for Division '{division}' Week '{current_week}'. Skipping.")
            continue
        try:
            players_df = pd.read_csv(players_df_path)
            for _, row in players_df.iterrows():
                player_name = row['Player']
                team = row['Team']
                # Store the original team
                player_mapping[player_name] = team
        except Exception as e:
            logging.exception(f"Error processing players_df for Division '{division}' Week '{current_week}': {e}")
            continue
    
    logging.info(f"Total players mapped: {len(player_mapping)}")
    return player_mapping


In [15]:
# Build the global player mapping
player_mapping = build_player_mapping(all_divisions, base_directory, current_week)
logging.info(f"Total players mapped across all divisions: {len(player_mapping)}")

2024-10-22 16:39:08,174 - INFO - Total players mapped: 1679
2024-10-22 16:39:08,174 - INFO - Total players mapped across all divisions: 1679


In [16]:
def parse_result(result):
    """
    Parse the 'Result' string into overall score and list of rubbers.
    Example result string: '3-2(3-0,2-3,3-1,1-3,3-2)'
    """
    try:
        if '(' in result and ')' in result:
            overall, rubbers_str = result.split('(')
            overall = overall.strip()
            rubbers_str = rubbers_str.strip(')')
            rubbers = rubbers_str.split(',')
            return overall, rubbers
        else:
            # Handle cases where result is not in expected format
            logging.warning(f"Result string '{result}' is not in expected format.")
            return result.strip(), []
    except Exception as e:
        logging.exception(f"Error parsing result string '{result}': {e}")
        return None, []

In [17]:
def determine_winner(rubber_score, home_player, away_player):
    """
    Determine the winner of a rubber based on the score.

    Args:
        rubber_score (str): Score string, e.g., '3-1', 'CR', 'WO'.
        home_player (str): Home player's name.
        away_player (str): Away player's name.

    Returns:
        str or None: Winner's name or None if undetermined.
    """
    if rubber_score in ['CR', 'WO', 'NA']:
        # 'CR' and 'WO' are handled separately in process_division
        return None  # No winner can be determined here
    try:
        home_score, away_score = map(int, rubber_score.split('-'))
        if home_score > away_score:
            return home_player
        elif away_score > home_score:
            return away_player
        else:
            return None  # Draw, unlikely in squash
    except ValueError as e:
        logging.warning(f"Invalid score format '{rubber_score}' for players '{home_player}' vs '{away_player}': {e}")
        return None  # Invalid score format


In [18]:
def process_division(division, current_week, previous_week, player_mapping, all_divisions, base_directory):
    """
    Process a division for a given week, handling 'CR', 'WO', 'Playing Up' players, and missing data.

    Args:
        division (str): Division name.
        current_week (int): Current week number.
        previous_week (int): Previous week number.
        player_mapping (dict): Global player mapping.
        all_divisions (dict): Dictionary of division names and their IDs.
        base_directory (str): Base directory path where data files are stored.
    """
    # Construct file paths
    players_df_path = os.path.join(base_directory, "players_df", f"week_{current_week}", f"{division}_players_df.csv")
    schedules_df_path = os.path.join(base_directory, "schedules_df", f"week_{current_week}", f"{division}_schedules_df.csv")
    ranking_df_current_path = os.path.join(base_directory, "ranking_df", f"week_{current_week}", f"{division}_ranking_df.csv")
    ranking_df_previous_path = os.path.join(base_directory, "ranking_df", f"week_{previous_week}", f"{division}_ranking_df.csv")
    
    # Check if current week files exist
    current_files_exist = all([
        os.path.exists(players_df_path),
        os.path.exists(schedules_df_path),
        os.path.exists(ranking_df_current_path)
    ])

    if not current_files_exist:
        logging.warning(f"Data files for Division '{division}' not found for week {current_week}. Skipping.")
        return
    
    # Load the current week's DataFrames
    try:
        players_df = pd.read_csv(players_df_path)
        schedules_df = pd.read_csv(schedules_df_path)
        ranking_df_current = pd.read_csv(ranking_df_current_path)
    except Exception as e:
        logging.exception(f"Error loading data for Division '{division}', Week {current_week}: {e}")
        return

    # Determine active players based on 'Games Played'
    if current_week == 1:
        # For week 1, players with 'Games Played' == 1 are active
        players_played_this_week = ranking_df_current[ranking_df_current['Games Played'] == 1]['Name of Player']
        active_players = set(players_played_this_week)
    else:
        # For weeks after week 1, compare with previous week's 'Games Played'
        if not os.path.exists(ranking_df_previous_path):
            logging.warning(f"Ranking data for Division '{division}' not found for previous week {previous_week}. Skipping.")
            return
        try:
            ranking_df_previous = pd.read_csv(ranking_df_previous_path)
        except Exception as e:
            logging.exception(f"Error loading previous ranking data for Division '{division}', Week {previous_week}: {e}")
            return

        # Merge current and previous rankings to find players who played this week
        ranking_comparison = ranking_df_current.merge(
            ranking_df_previous[['Name of Player', 'Games Played']],
            on='Name of Player',
            how='left',
            suffixes=('_current', '_previous')
        )
        ranking_comparison['Games Played_previous'] = ranking_comparison['Games Played_previous'].fillna(0)
        ranking_comparison['Games Played_diff'] = ranking_comparison['Games Played_current'] - ranking_comparison['Games Played_previous']
        players_played_this_week = ranking_comparison[ranking_comparison['Games Played_diff'] == 1]['Name of Player']
        active_players = set(players_played_this_week)

    logging.info(f"Number of players who played in Division '{division}' during week {current_week}: {len(active_players)}")

    if not active_players:
        logging.warning(f"No players played in Division '{division}' during week {current_week}. Skipping.")
        return  

    # Create Results DataFrame
    try:
        # Drop unnecessary columns
        schedules_df.drop(columns=['vs', 'Time'], inplace=True, errors='ignore')
    except Exception as e:
        logging.exception(f"Error dropping columns from schedules_df in Division '{division}', Week {current_week}: {e}")
        return

    # Exclude rows where 'Away Team' is '[BYE]' (indicative of a bye week)
    results_df = schedules_df[schedules_df['Away Team'] != '[BYE]'].copy()

    # Replace NaN values in 'Result' with an empty string before applying str.contains
    results_df['Result'] = results_df['Result'].fillna('')

    # Keep rows where 'Result' contains brackets (indicative of a played match)
    results_df = results_df[results_df['Result'].str.contains(r'\(')]

    # Check if results_df is empty
    if results_df.empty:
        logging.info(f"No match results found for Division '{division}' during week {current_week}.")
        return

    # Replace string 'nan' with actual NaN
    results_df['Result'] = results_df['Result'].replace(to_replace=['nan', 'NaN', 'NAN'], value=np.nan)

    # Now, fill NaN with 'Unknown'
    results_df['Result'] = results_df['Result'].fillna('Unknown')

    # Ensure 'Result' is string type
    results_df['Result'] = results_df['Result'].astype(str)

    # Apply the parse_result function to split 'Result' into 'Overall Score' and 'Rubbers'
    try:
        results_df[['Overall Score', 'Rubbers']] = results_df['Result'].apply(lambda x: pd.Series(parse_result(x)))
    except Exception as e:
        logging.exception(f"Error parsing results for Division '{division}', Week {current_week}: {e}")
        return

    # Determine maximum number of rubbers in any match
    max_rubbers = results_df['Rubbers'].apply(lambda x: len(x) if isinstance(x, list) else 0).max()

    # Assign players to each rubber
    # First, create a mapping of team to their regular active players ordered by 'Order'
    team_regular_players = {}
    for team in players_df['Team'].unique():
        # Filter players who are active and belong to the team
        team_data = players_df[(players_df['Team'] == team) & (players_df['Player'].isin(active_players))].sort_values('Order')
        team_regular_players[team] = list(team_data['Player'])

    # Identify 'playing up' players (active_players not in current division's players_df)
    playing_up_players = active_players - set(players_df['Player'])
    logging.info(f"Number of 'Playing Up' players in Division '{division}' during week {current_week}: {len(playing_up_players)}")

    # For 'playing up' players, find their original division and team
    playing_up_info = {}
    for player in playing_up_players:
        info = player_mapping.get(player)
        if info:
            playing_up_info[player] = info
        else:
            logging.warning(f"Playing Up player '{player}' not found in player_mapping. Marking as 'Unknown'.")
            playing_up_info[player] = {
                'Division': 'Unknown',
                'Team': 'Unknown',
                'Order': None
            }

    # Assign 'playing up' players to their current match's team
    # Since we process per match below, we don't append them globally to teams
    # Instead, we'll handle them within each match

    # Function to assign players to rubbers based on team and rubber number
    def assign_players(match_team, team_regular_players, playing_up_players_in_team, rubber_number):
        """
        Assign players to rubbers based on team players and rubber number.

        Args:
            match_team (str): The team name.
            team_regular_players (dict): Mapping of teams to their regular players.
            playing_up_players_in_team (list): List of 'Playing Up' players assigned to this team.
            rubber_number (int): The rubber number.

        Returns:
            str: Player name or 'Unknown'
        """
        try:
            regular_players = team_regular_players.get(match_team, [])
            num_regular = len(regular_players)
            if rubber_number <= num_regular:
                # Assign regular player
                if rubber_number - 1 < len(regular_players):
                    return regular_players[rubber_number - 1]
                else:
                    return 'Unknown'
            else:
                # Assign 'Playing Up' player
                playing_up_index = rubber_number - num_regular - 1
                if playing_up_index < len(playing_up_players_in_team):
                    return playing_up_players_in_team[playing_up_index]
                else:
                    return 'Unknown'
        except Exception as e:
            logging.exception(f"Error assigning player for team '{match_team}', rubber {rubber_number}: {e}")
            return 'Unknown'

    # Generate player match results
    player_match_results = []
    for idx, row in results_df.iterrows():
        try:
            match_date = row['Date']
            venue = row['Venue']
            home_team = row['Home Team']
            away_team = row['Away Team']
            
            # Identify 'Playing Up' players for Home Team in this match
            home_playing_up_players = []
            for player, info in playing_up_info.items():
                # Assuming 'Playing Up' players are assigned to this team in the current match
                # You might need additional logic to determine which 'Playing Up' players are playing for which team
                # For this example, let's assume that if a 'Playing Up' player is in 'playing_up_info', and they're playing for this team
                # This requires that 'playing_up_info' contains information about which team they're playing for in this match
                # Since such information isn't available, we'll need to adjust the mapping accordingly
                # For simplicity, let's assume 'Playing Up' players are only assigned to one team per week
                # This part may need to be customized based on your actual data structure
                if info['Team'] == home_team:
                    home_playing_up_players.append(player)
            
            # Similarly for Away Team
            away_playing_up_players = []
            for player, info in playing_up_info.items():
                if info['Team'] == away_team:
                    away_playing_up_players.append(player)
            
            # Assign Home Players
            for rubber_num in range(1, max_rubbers + 1):
                home_player = assign_players(home_team, team_regular_players, home_playing_up_players, rubber_num)
                row[f'Home Player {rubber_num}'] = home_player
            
            # Assign Away Players
            for rubber_num in range(1, max_rubbers + 1):
                away_player = assign_players(away_team, team_regular_players, away_playing_up_players, rubber_num)
                row[f'Away Player {rubber_num}'] = away_player
            
            # Process each rubber
            for rubber_num in range(1, max_rubbers + 1):
                rubber_score = row[f'Rubber {rubber_num} Score']
                home_player = row.get(f'Home Player {rubber_num}', 'Unknown')
                away_player = row.get(f'Away Player {rubber_num}', 'Unknown')
                
                if pd.isna(rubber_score) or home_player == 'Unknown' or away_player == 'Unknown':
                    # Handle missing data by assigning 'Unknown'
                    player_match_results.append({
                        'Player Name': home_player,
                        'Team': home_team,
                        'Opponent Name': away_player,
                        'Opponent Team': away_team,
                        'Match Date': match_date,
                        'Venue': venue,
                        'Rubber Number': rubber_num,
                        'Score': rubber_score,
                        'Result': 'Unknown',
                        'Home/Away': 'Home'
                    })
                    player_match_results.append({
                        'Player Name': away_player,
                        'Team': away_team,
                        'Opponent Name': home_player,
                        'Opponent Team': home_team,
                        'Match Date': match_date,
                        'Venue': venue,
                        'Rubber Number': rubber_num,
                        'Score': rubber_score,
                        'Result': 'Unknown',
                        'Home/Away': 'Away'
                    })
                    continue

                # Determine if the rubber was a CR or WO
                if rubber_score.upper() in ['CR', 'WO']:
                    # Assuming 'CR' or 'WO' indicates the away team conceded
                    if rubber_score.upper() == 'CR':
                        winner_player = home_player
                        loser_player = 'Conceded Rubber'
                        loser_team = away_team
                        result_home = 'Win'
                        result_away = 'Loss'
                        score_home = 'CR'
                        score_away = 'CR'
                    elif rubber_score.upper() == 'WO':
                        winner_player = home_player
                        loser_player = 'Walkover'
                        loser_team = away_team
                        result_home = 'Win'
                        result_away = 'Loss'
                        score_home = 'WO'
                        score_away = 'WO'
                    else:
                        # Extend this if there are more scenarios
                        winner_player = 'Unknown'
                        loser_player = 'Unknown'
                        loser_team = 'Unknown'
                        result_home = 'Unknown'
                        result_away = 'Unknown'
                        score_home = rubber_score
                        score_away = rubber_score
                else:
                    # Regular rubber
                    winner_player = determine_winner(rubber_score, home_player, away_player)
                    if winner_player == home_player:
                        result_home = 'Win'
                        result_away = 'Loss'
                    elif winner_player == away_player:
                        result_home = 'Loss'
                        result_away = 'Win'
                    else:
                        result_home = 'Unknown'
                        result_away = 'Unknown'
                    
                    # Adjust the score so that the player's own score is first
                    try:
                        home_score, away_score = map(int, rubber_score.split('-'))
                        # For home player
                        score_home = f"{home_score}-{away_score}"
                        # For away player
                        score_away = f"{away_score}-{home_score}"
                    except ValueError:
                        # If unable to parse the score, use the rubber_score as is
                        score_home = rubber_score
                        score_away = rubber_score

                # Assign Opponent Names based on CR/WO
                if rubber_score.upper() in ['CR', 'WO']:
                    opponent_name_home = loser_player
                    opponent_team_home = loser_team
                else:
                    opponent_name_home = away_player
                    opponent_team_home = away_team

                if rubber_score.upper() in ['CR', 'WO']:
                    opponent_name_away = winner_player
                    opponent_team_away = home_team
                else:
                    opponent_name_away = home_player
                    opponent_team_away = home_team

                # Append Home Player Result
                player_match_results.append({
                    'Player Name': home_player,
                    'Team': home_team,
                    'Opponent Name': opponent_name_home,
                    'Opponent Team': opponent_team_home,
                    'Match Date': match_date,
                    'Venue': venue,
                    'Rubber Number': rubber_num,
                    'Score': score_home,
                    'Result': result_home,
                    'Home/Away': 'Home'
                })
                
                # Append Away Player Result
                player_match_results.append({
                    'Player Name': away_player,
                    'Team': away_team,
                    'Opponent Name': opponent_name_away,
                    'Opponent Team': opponent_team_away,
                    'Match Date': match_date,
                    'Venue': venue,
                    'Rubber Number': rubber_num,
                    'Score': score_away,
                    'Result': result_away,
                    'Home/Away': 'Away'
                })
        except Exception as e:
            logging.exception(f"Error processing match at index {idx} in Division '{division}', Week {current_week}: {e}")
            continue  # Skip to the next match

    # Convert the results to a DataFrame
    player_results_df = pd.DataFrame(player_match_results)

    # Clean and format the DataFrame
    try:
        player_results_df['Match Date'] = pd.to_datetime(player_results_df['Match Date'], 
                                                         dayfirst=True, 
                                                         errors='coerce')
    except Exception as e:
        logging.exception(f"Error converting 'Match Date' to datetime in Division '{division}', Week {current_week}: {e}")
        return

    # Reorder columns and handle missing dates
    player_results_df = player_results_df[[
        'Player Name', 'Team', 'Opponent Name', 'Opponent Team', 'Match Date',
        'Venue', 'Rubber Number', 'Score', 'Result', 'Home/Away'
    ]]
    player_results_df['Match Date'] = player_results_df['Match Date'].fillna(pd.NaT)

    # Save the player_results_df
    output_path = os.path.join(base_directory, "player_results", f"week_{current_week}", f"{division}_player_results.csv")
    try:
        os.makedirs(os.path.dirname(output_path), exist_ok=True)
        player_results_df.to_csv(output_path, index=False)
        logging.info(f"Player match results saved to {output_path}")
    except Exception as e:
        logging.exception(f"Error saving player results for Division '{division}', Week {current_week}: {e}")
        return


In [19]:
# Run the script
week_numbers = [1]  # Adjust as needed

for current_week in week_numbers:
    previous_week = current_week - 1
    logging.info(f"Processing Division {division} for Week {current_week}")
    try:
            process_division(
                division=division,
                current_week=current_week,
                previous_week=previous_week,
                player_mapping=player_mapping,
                all_divisions=all_divisions,
                base_directory=base_directory
            )
    except Exception as e:
        logging.exception(f"Unexpected error processing Division {division}, Week {current_week}: {e}")
        continue  # Proceed to the next division

2024-10-22 16:39:08,261 - INFO - Processing Division 7A for Week 1
2024-10-22 16:39:08,271 - INFO - Number of players who played in Division '7A' during week 1: 28
2024-10-22 16:39:08,292 - INFO - Number of 'Playing Up' players in Division '7A' during week 1: 2
2024-10-22 16:39:08,306 - ERROR - Error processing match at index 0 in Division '7A', Week 1: string indices must be integers, not 'str'
Traceback (most recent call last):
  File "C:\Users\bpali\AppData\Local\Temp\ipykernel_33580\920610472.py", line 197, in process_division
    if info['Team'] == home_team:
       ~~~~^^^^^^^^
TypeError: string indices must be integers, not 'str'
2024-10-22 16:39:08,312 - ERROR - Error processing match at index 1 in Division '7A', Week 1: string indices must be integers, not 'str'
Traceback (most recent call last):
  File "C:\Users\bpali\AppData\Local\Temp\ipykernel_33580\920610472.py", line 197, in process_division
    if info['Team'] == home_team:
       ~~~~^^^^^^^^
TypeError: string indices 

In [21]:
# Process each division
try:
    process_division(
        division=division,
        current_week=current_week,
        previous_week=previous_week,
        player_mapping=player_mapping,
        all_divisions=all_divisions,
        base_directory=base_directory
    )
except Exception as e:
    logging.exception(f"Unexpected error processing Division '{division}', Week {current_week}: {e}")

# After processing, set previous_week to current_week for the next run
previous_week = current_week


2024-10-22 16:40:45,865 - INFO - Number of players who played in Division '7A' during week 1: 28
2024-10-22 16:40:45,878 - INFO - Number of 'Playing Up' players in Division '7A' during week 1: 2
2024-10-22 16:40:45,879 - ERROR - Error processing match at index 0 in Division '7A', Week 1: string indices must be integers, not 'str'
Traceback (most recent call last):
  File "C:\Users\bpali\AppData\Local\Temp\ipykernel_33580\920610472.py", line 197, in process_division
    if info['Team'] == home_team:
       ~~~~^^^^^^^^
TypeError: string indices must be integers, not 'str'
2024-10-22 16:40:45,881 - ERROR - Error processing match at index 1 in Division '7A', Week 1: string indices must be integers, not 'str'
Traceback (most recent call last):
  File "C:\Users\bpali\AppData\Local\Temp\ipykernel_33580\920610472.py", line 197, in process_division
    if info['Team'] == home_team:
       ~~~~^^^^^^^^
TypeError: string indices must be integers, not 'str'
2024-10-22 16:40:45,882 - ERROR - Error