In [1]:
import os
os.chdir('../streamlit')

In [None]:
import pandas as pd

import pandas as pd

REQUIRED_COLS = [
    'TEGNum','TEG','Round','Hole','Pl','Player',
    'Sc','PAR','GrossVP','Stableford',
    'Rank_Stableford_TEG','Rank_GrossVP_TEG'
]

def build_events(df: pd.DataFrame) -> pd.DataFrame:
    """
    Emits a tidy hole-level events table.
    Position events include the competition in the EventType, e.g.:
      - 'took_lead_stableford', 'lost_lead_green', 'hit_bottom_spoon', etc.
    Hole outcome events stay competition-agnostic.

    Returns columns:
      ['TEGNum','TEG','Round','Hole','Pl','Player','Sc','PAR','Stableford','GrossVP',
       'EventType','Metric','RankAfter','RankBefore','NPlayers']
    """
    missing = [c for c in REQUIRED_COLS if c not in df.columns]
    if missing:
        raise ValueError(f"Missing required columns: {missing}")

    d = df.copy()

    # Players per round (for bottom detection)
    n_players = d.groupby(['TEGNum','Round'])['Pl'].nunique().rename('NPlayers')
    d = d.merge(n_players, on=['TEGNum','Round'], how='left')

    # Derive SPOON rank per hole by reversing the Stableford rank at that same hole
    d = d.sort_values(['TEGNum','Round','Hole'])
    d['MaxRank_ThisHole'] = d.groupby(['TEGNum','Round','Hole'])['Rank_Stableford_TEG'].transform('max')
    d['Rank_SPOON_TEG'] = d['MaxRank_ThisHole'] + 1 - d['Rank_Stableford_TEG']

    # Build per-comp RankAfter from your existing ranks
    comp_specs = {
        'stableford': 'Rank_Stableford_TEG',  # Trophy (higher Stableford better)
        'green'     : 'Rank_GrossVP_TEG',     # Green Jacket (lower GrossVP better)
        'spoon'     : 'Rank_SPOON_TEG',       # Spoon (lower Stableford worse)
    }
    for comp_name, rank_col in comp_specs.items():
        d[f'RankAfter_{comp_name}'] = d[rank_col]

    # 🔧 PATCH: compute RankBefore across the whole TEG (bridges R1H18 → R2H1, etc.)
    # Create a TEG-wide hole index (assumes 18 holes per round)
    d['TEG_Hole'] = (d['Round'] - 1) * 18 + d['Hole']

    # Shift per player across TEG-wide order (NOT within round)
    d = d.sort_values(['TEGNum','Pl','TEG_Hole'])
    for comp_name in comp_specs.keys():
        ra = f'RankAfter_{comp_name}'
        rb = f'RankBefore_{comp_name}'
        d[rb] = d.groupby(['TEGNum','Pl'])[ra].shift(1)

    # --- Position event flags (vectorised) ---
    pos_events_frames = []
    for comp_name in comp_specs.keys():
        ra = f'RankAfter_{comp_name}'
        rb = f'RankBefore_{comp_name}'

        took_lead   = (d[ra] == 1) & ((d[rb] > 1) | d[rb].isna())
        lost_lead   = (d[rb] == 1) & (d[ra] > 1)
        hit_bottom  = (d[ra] == d['NPlayers']) & ((d[rb] < d['NPlayers']) | d[rb].isna())
        left_bottom = (d[rb] == d['NPlayers']) & (d[ra] < d['NPlayers'])
        final_flip  = (d['Hole'] == 18) & (d[ra] != d[rb])

        wide = pd.DataFrame({
            f'took_lead_{comp_name}': took_lead,
            f'lost_lead_{comp_name}': lost_lead,
            f'hit_bottom_{comp_name}': hit_bottom,
            f'left_bottom_{comp_name}': left_bottom,
            f'final_hole_position_change_{comp_name}': final_flip,
        })

        base_cols = ['TEGNum','TEG','Round','Hole','Pl','Player','Sc','PAR','Stableford','GrossVP',
                     'NPlayers', ra, rb]

        out = (
            pd.concat([d[base_cols].reset_index(drop=True), wide.reset_index(drop=True)], axis=1)
             .melt(id_vars=base_cols, var_name='EventType', value_name='Flag')
             .query('Flag == True')
             .drop(columns='Flag')
             .rename(columns={ra: 'RankAfter', rb: 'RankBefore'})
        )
        pos_events_frames.append(out)

    pos_events = pd.concat(pos_events_frames, ignore_index=True)

    # --- Hole-outcome events (competition-agnostic) ---
    # (unchanged from your version)
    d_outcomes = d.sort_values(['TEGNum','Round','Pl','TEG_Hole'])  # keep same order as above

    hole_wide = pd.DataFrame({
        'hole_in_one'       : d_outcomes['Sc'].eq(1),
        'eagle'             : d_outcomes['GrossVP'].eq(-2),
        'birdie'            : d_outcomes['GrossVP'].eq(-1),
        'triple_bogey_plus' : d_outcomes['GrossVP'].ge(3),
        'five_over_plus'    : d_outcomes['GrossVP'].ge(5),
        'zero_points'       : d_outcomes['Stableford'].eq(0),
        'five_points_plus'  : d_outcomes['Stableford'].ge(5),
        'four_points_plus'  : d_outcomes['Stableford'].ge(4),
    })

    hole_events = (
        pd.concat([
            d_outcomes[['TEGNum','TEG','Round','Hole','Pl','Player','Sc','PAR','Stableford','GrossVP','NPlayers']].reset_index(drop=True),
            hole_wide.reset_index(drop=True)
        ], axis=1)
        .melt(
            id_vars=['TEGNum','TEG','Round','Hole','Pl','Player','Sc','PAR','Stableford','GrossVP','NPlayers'],
            var_name='EventType', value_name='Flag'
        )
        .query('Flag == True')
        .drop(columns='Flag')
    )
    hole_events['RankAfter'] = pd.NA
    hole_events['RankBefore'] = pd.NA

    # --- Metric helper (only for hole outcomes) ---
    metric_map = {
        'hole_in_one'       : 'Sc',
        'eagle'             : 'GrossVP',
        'birdie'            : 'GrossVP',
        'triple_bogey_plus' : 'GrossVP',
        'five_over_plus'    : 'GrossVP',
        'zero_points'       : 'Stableford',
        'four_points_plus'  : 'Stableford',
        'five_points_plus'  : 'Stableford',
    }

    events = pd.concat([pos_events, hole_events], ignore_index=True)

    events['Metric'] = events.apply(
        lambda r: r[metric_map[r['EventType']]] if r['EventType'] in metric_map else pd.NA,
        axis=1
    )

    # Final ordering
    events = events[['TEGNum','TEG','Round','Hole','Pl','Player','Sc','PAR','Stableford','GrossVP',
                     'EventType','Metric','RankAfter','RankBefore','NPlayers']]

    return events.sort_values(['TEGNum','Round','Hole','EventType','Pl']).reset_index(drop=True)


In [7]:
df = load_all_data()


In [11]:
build_events(df).to_clipboard(index=False)

  events = pd.concat([pos_events, hole_events], ignore_index=True)


In [9]:
make_events.to_clipboard(index=False)