In [2]:
import os
os.chdir('../streamlit')

In [3]:
from utils import load_all_data

all_data = load_all_data(exclude_teg_50 = True, exclude_incomplete_tegs = False)


2025-09-19 16:10:33.175 
  command:

    streamlit run c:\Users\jonba\Documents\Projects - not on onedrive\teg_v2\.venv\Lib\site-packages\ipykernel_launcher.py [ARGUMENTS]


In [8]:

# print(all_data.columns)
streaks_data = all_data[['Pl','HoleID','Sc','Career Count','GrossVP','Hole Order Ever']].copy()

streaks_data['eagle'] = streaks_data['GrossVP']<=-2
streaks_data['birdie'] = streaks_data['GrossVP']<=-1
streaks_data['par_better'] = streaks_data['GrossVP']<=0
streaks_data['double_bogey'] = streaks_data['GrossVP']>1
streaks_data['TBP'] = streaks_data['GrossVP']>2

print(streaks_data.columns)


Index(['Pl', 'HoleID', 'Sc', 'Career Count', 'GrossVP', 'Hole Order Ever',
       'eagle', 'birdie', 'par_better', 'double_bogey', 'TBP'],
      dtype='object')


In [9]:
import pandas as pd

BOOL_COLS = ["eagle", "birdie", "par_better", "double_bogey", "TBP"]

def add_streaks_multi(df, cols=BOOL_COLS, assume_sorted=False):
    # Sort once for correctness (stable sort keeps any ties predictable)
    if not assume_sorted:
        df = df.sort_values(["Pl", "Career Count"], kind="mergesort").copy()

    for col in cols:
        s = df[col].fillna(False).astype(bool)

        # New segment whenever player changes OR value flips
        reset = df["Pl"].ne(df["Pl"].shift()) | s.ne(s.shift())
        seg_id = reset.cumsum()

        # Position within each segment
        pos = df.groupby(seg_id).cumcount() + 1

        # True streak (0 on False), False streak (0 on True)
        df[f"{col}_true_streak"]  = pos.where(s, 0)
        df[f"{col}_false_streak"] = pos.where(~s, 0)

    return df

streaks_processed = add_streaks_multi(streaks_data)

In [10]:
# all the new streak columns
streak_cols = [c for c in streaks_processed.columns if c.endswith("_streak")]

max_streaks = (
    streaks_processed
    .groupby("Pl")[streak_cols]
    .max()
    .reset_index()
)


In [11]:
latest_idx = streaks_processed.groupby("Pl")["Career Count"].idxmax()

latest_streaks = (
    streaks_processed.loc[latest_idx, ["Pl"] + streak_cols]
    .reset_index(drop=True)
)


In [12]:
# merge best and latest (probably not needed)
summary = (
    max_streaks
    .merge(latest_streaks, on="Pl", suffixes=("_max", "_latest"))
)


In [15]:
# streak column mappings
RENAME_MAP = {
    "_true_streak": "",
    "_false_streak": "no ",
}

def tidy_streak_summary(summary):
    df = (
        summary
        .melt(id_vars="Pl", var_name="metric", value_name="value")
    )

    # split into base streak + max/latest
    df[["streak_type", "when"]] = df["metric"].str.rsplit("_", n=1, expand=True)

    def rename_streak(s):
        if s.endswith("_true_streak"):
            base = s.replace("_true_streak", "")
            return base + "s"
        elif s.endswith("_false_streak"):
            base = s.replace("_false_streak", "")
            return "no " + base + "s"
        else:
            return s

    df["streak_type"] = df["streak_type"].apply(rename_streak)

    # pivot so max/latest side by side
    df = (
        df.pivot(index=["Pl", "streak_type"], columns="when", values="value")
          .reset_index()
          .rename_axis(None, axis=1)
    )

    # optional: nicer column names
    df = df.rename(columns={"latest": "current streak", "max": "longest streak"})

    return df


# usage
summary_tidy = tidy_streak_summary(summary)



In [16]:
summary_tidy

Unnamed: 0,Pl,streak_type,current streak,longest streak
0,AB,TBPs,0,3
1,AB,birdies,0,1
2,AB,double_bogeys,2,9
3,AB,eagles,0,0
4,AB,no TBPs,3,28
...,...,...,...,...
65,SN,no birdies,16,134
66,SN,no double_bogeys,2,20
67,SN,no eagles,147,698
68,SN,no par_betters,6,20
