In [1]:
import pandas as pd
import numpy as np
from collections import defaultdict
from rapidfuzz import process, fuzz

In [None]:
file_path = r'Data\new data.xlsx'

excel_data = pd.ExcelFile(file_path)

dfs = {}

for sheet_name in excel_data.sheet_names:
    dfs[sheet_name] = pd.read_excel(excel_data, sheet_name=sheet_name, header=0)

for df_name, df in dfs.items():
    if 'Rk' in df.columns:
        df.drop(columns=['Rk'], inplace=True)

#for sheet, df in dfs.items():
 #   print(f"Sheet name: {sheet}")
  #  print(df.head())

In [3]:
unique_names = set()

for df in dfs.values():
    if 'name' in df.columns:
        unique_names.update(df['name'].dropna().unique())

unique_names_list = list(unique_names)
master_df = pd.DataFrame(unique_names_list, columns=['name'])

name_mapping = {}
standard_names = []

for name in master_df['name']:
    if standard_names:
        match, score, _ = process.extractOne(name, standard_names, scorer=fuzz.WRatio)
        if score >= 85:
            name_mapping[name] = match
        else:
            standard_names.append(name)
            name_mapping[name] = name
    else:
        standard_names.append(name)
        name_mapping[name] = name

master_df['name'] = master_df['name'].replace(name_mapping)

for key, df in dfs.items():
    if 'name' in df.columns:
        df['name'] = df['name'].replace(name_mapping)

In [4]:
result_counts = defaultdict(lambda: defaultdict(int))

for key, df in dfs.items():
    df.columns = df.columns.str.strip()
    
    name_column = next((col for col in df.columns if 'name' in col.lower()), None)
    if not name_column:
        continue

    rank_columns = [col for col in df.columns if col.startswith('Rk')]
    
    for _, row in df.iterrows():
        name = row[name_column]
        for col in rank_columns:
            if row[col] in ['1st', '2nd', '3rd', '4th']:
                result_counts[name][row[col]] += 1

master_names = master_df['name'].tolist()
filtered_results = {name: result_counts[name] for name in master_names}

for rank in ['1st', '2nd', '3rd', '4th']:
    master_df[rank] = master_df['name'].map(lambda name: filtered_results.get(name, {}).get(rank, 0))

In [5]:
score_columns = ["R1", "R2", "R3", "R4", "R5"]

master_df['Total Accumulated Score'] = 0
master_df['Overall Average Score'] = 0.0
master_df['Count Appearances in DataFrames'] = 0

for col in score_columns:
    master_df[f'Total {col} Score'] = 0.0
    master_df[f'Count {col} Appearances'] = 0
    master_df[f'Average {col}'] = 0.0

for key, df in dfs.items():
    df.columns = df.columns.str.strip()
    
    name_column = next((col for col in df.columns if 'name' in col.lower()), None)
    if not name_column:
        continue

    available_columns = [col for col in score_columns if col in df.columns]
    for col in available_columns:
        df[col] = pd.to_numeric(df[col], errors='coerce')

    for _, row in df.iterrows():
        name = row[name_column]
        if name in master_df['name'].values:
            master_df.loc[master_df['name'] == name, 'Count Appearances in DataFrames'] += 1

            total_score = row[available_columns].sum()
            average_score = row[available_columns].mean()

            master_df.loc[master_df['name'] == name, 'Total Accumulated Score'] += total_score

            for col in available_columns:
                master_df.loc[master_df['name'] == name, f'Total {col} Score'] += row[col]
                master_df.loc[master_df['name'] == name, f'Count {col} Appearances'] += 1

for col in score_columns:
    master_df[f'Average {col}'] = (
        master_df[f'Total {col} Score'] / master_df[f'Count {col} Appearances']
    ).fillna(0).round(2)

master_df['Overall Average Score'] = (
    master_df['Total Accumulated Score'] / master_df['Count Appearances in DataFrames']
).fillna(0).round(2)

master_df.drop(
    [f'Total {col} Score' for col in score_columns]
    + [f'Count {col} Appearances' for col in score_columns]
    + ['Count Appearances in DataFrames'],
    axis=1,
    inplace=True
)

In [None]:
positions = ["Opening Government", "Opening Opposition", "Closing Government", "Closing Opposition"]
for position in positions:
    master_df[position] = 0

for sheet_name, df in dfs.items():
    if "name" in df.columns and any(col.startswith("side") for col in df.columns):
        side_columns = [col for col in df.columns if col.startswith("side")]
        
        for _, row in df.iterrows():
            participant_name = row["name"]
            
            for side_col in side_columns:
                if side_col in row and row[side_col] in positions:
                    if participant_name in master_df["name"].values:
                        master_df.loc[master_df["name"] == participant_name, row[side_col]] += 1

In [11]:
import pandas as pd
import numpy as np

# Assuming master_df and dfs are already defined

# Points mapping for each rank
rank_to_points = {"1st": 3, "2nd": 2, "3rd": 1, "4th": 0}

# Add new columns to master_df for the scores
score_columns = ["Opening Government Score", "Opening Opposition Score", "Closing Government Score", "Closing Opposition Score"]
for score_col in score_columns:
    master_df[score_col] = 0

# Debugging: Ensure master_df has expected columns
print("Initial master_df columns:", master_df.columns)

# Process each DataFrame in the dfs dictionary
for sheet_name, df in dfs.items():
    print(f"Processing sheet: {sheet_name}")
    
    # Ensure the necessary columns exist
    if "name" not in df.columns:
        print(f"Skipping sheet {sheet_name}: No 'name' column")
        continue
    
    # Extract rank and side columns
    rank_columns = [col for col in df.columns if col.startswith("Rk")]
    side_columns = [col for col in df.columns if col.startswith("side")]
    
    if not rank_columns or not side_columns:
        print(f"Skipping sheet {sheet_name}: No rank/side columns found")
        continue

    # Ensure side and rank columns are matched
    for side_col, rank_col in zip(side_columns, rank_columns):
        print(f"Matching {rank_col} with {side_col}")
        
        # Iterate over rows in the current DataFrame
        for _, row in df.iterrows():
            participant_name = row["name"]  # Extract participant name
            
            # Check if the name exists in master_df
            if participant_name not in master_df["name"].values:
                print(f"Name '{participant_name}' not found in master_df")
                continue
            
            # Extract side and rank information
            side = row.get(side_col, None)
            rank = row.get(rank_col, None)

            # Debugging: Check extracted side and rank
            print(f"Processing {participant_name}: side={side}, rank={rank}")
            
            # Validate side and rank
            if pd.notna(side) and side in score_columns and pd.notna(rank):
                # Convert rank to points
                points = rank_to_points.get(rank, 0)  # Default to 0 for invalid ranks
                
                # Debugging: Check calculated points
                print(f"Updating {participant_name}: side={side}, points={points}")
                
                # Update the score in master_df
                master_df.loc[master_df["name"] == participant_name, side] += points

# Display or return the updated master_df
print(master_df)

Initial master_df columns: Index(['name', '1st', '2nd', '3rd', '4th', 'Total Accumulated Score',
       'Overall Average Score', 'Average R1', 'Average R2', 'Average R3',
       'Average R4', 'Average R5', 'Opening Government', 'Opening Opposition',
       'Closing Government', 'Closing Opposition', 'Opening Government Score',
       'Opening Opposition Score', 'Closing Government Score',
       'Closing Opposition Score'],
      dtype='object')
Processing sheet: Linfield
Matching Rk1 with side1
Processing Catherine Dudley: side=Opening Opposition, rank=1st
Processing June LePage: side=Closing Government, rank=1st
Processing Spencer McDonald: side=Closing Government, rank=1st
Processing Charlie Said: side=Opening Opposition, rank=1st
Processing Brad Tomasovic: side=Opening Opposition, rank=1st
Processing Dylan Berman: side=Closing Government, rank=1st
Processing Kate Wilkins: side=Opening Opposition, rank=3rd
Processing Davis Thompson: side=Opening Opposition, rank=1st
Processing Malti