<a href="https://colab.research.google.com/github/cha-duh/bde-data/blob/main/league_calculator_colab.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### imports

In [1]:
import pandas as pd
import random
import glob
import os

from statistics import mean
import ipywidgets as widgets

pd.options.mode.chained_assignment = None  # default='warn'

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


### constants

In [3]:
SCALING_FACTOR = 0.1

In [4]:
path = '/content/drive/Shareddrives/bde/'
csv_files = sorted(glob.glob(os.path.join(path, "scores/2023/*.csv")))

In [5]:
# set up sse for each course and layout
# should we pull more data from dgcr? maybe course length per layout, etc?
# would allow us to get course names and such... that's probably better...

SSE = {
    'shmo_s': 47.7,
    'shmo_l': 54.4,
    'romo_s': 45.0, # total guess
    'romo_l': 50.4,
    'spva_s': 56.6,
    'spva_l': 64.0,
    'mbog_s': 44.8,
    'mbog_l': 53.4,
    'mbno_s': 45.3,
    'mbno_l': 53.9,
    'knol_s': 42.9,
    'knol_l': 42.9,
    'r_arro': 27.5,
    'r_arr2': 55.0,
    'vera_s': 48.1,
    'vera_l': 54.9,
    'coldbk': 67.0,
    'begg_p': 50.0,
    'brew_s': 43.9,
    'brew_l': 55.0,
    'johnsn': 43.1,
    'garfld': 46.1
}

### helper functions

In [6]:
def calculate_rating(score, sse):
    if sse > 50.328725:
        scoring_interval = -0.225067 * sse + 21.3858
    else:
        scoring_interval = -0.487095 * sse + 34.5734
    return 1000 - (score-sse)*scoring_interval

In [7]:
def calculate_target_stroke_count(rating, sse):
    return (1000/rating * sse).round(2)

In [8]:
# calculate_rating(58, SSE['shmo_l'])

In [9]:
# {
#     'joe': calculate_rating(56, SSE['mbno_l']),
#     'joshua': calculate_rating(59, SSE['mbno_l']),
#     'kevin': calculate_rating(50, SSE['mbno_s']),
#     'isaac': calculate_rating(59, SSE['mbno_s']),
#     'kyle': calculate_rating(44, SSE['mbno_s']),
#     'kat': calculate_rating(50, SSE['mbno_s']),
#     'noah': calculate_rating(64, SSE['mbno_l']),
#     'chase': calculate_rating(66, SSE['mbno_s'])
# }

### concatenate all files into a dataframe

### add each round for 2023

In [10]:
df = pd.DataFrame()
for filename in csv_files:
    date = filename.split('zoo_')[1].split('.csv')[0]
    newdf = pd.read_csv(filename)
    newdf['date'] = date
    df = pd.concat([df, newdf])
df = df.sort_values('date')

# df

### attendance and CTP/LP

In [11]:
attendance = df[['name','attendance']].groupby('name').sum()
weekly_attendance = df[['name','date','attendance']].pivot_table(
    values='attendance',
    index='date',
    columns='name',
    aggfunc='first'
)
ctplp = df[['name','ctp','lp']].groupby('name').sum()
ctplp['extra_points'] = ctplp['ctp'] + ctplp['lp']
weekly_ctplp = df[['name','date','ctp','lp']]
weekly_ctplp['extra_points'] = weekly_ctplp['ctp'] + weekly_ctplp['lp']
weekly_ctplp = weekly_ctplp.pivot_table(
    values='extra_points',
    index='date',
    columns='name',
    aggfunc='first'
)

# (
#     attendance,
#     ctplp['extra_points'],
#     weekly_attendance,
#     weekly_ctplp
# )

### per-hole stats

In [12]:
hole_stats = df[['course', 'name', 'hole_1', 'hole_2', 'hole_3', 'hole_4', 'hole_5', 'hole_6', 'hole_7',
       'hole_8', 'hole_9', 'hole_10', 'hole_11', 'hole_12', 'hole_13',
       'hole_14', 'hole_15', 'hole_16', 'hole_17', 'hole_18', 'hole_19', 'hole_20', 'hole_21', 'hole_22',
       'hole_23', 'hole_24']]
for col in hole_stats.columns:
    hole_stats[col] = pd.to_numeric(hole_stats[col]) if 'hole_' in col else hole_stats[col]
hole_stats_by_course = hole_stats.groupby(['course','name']).mean()
hole_stats_by_name = hole_stats.groupby(['name','course']).mean()

# hole_stats_by_name

### pivot round scores to total strokes

In [13]:
round_strokes = df[['course','name','total_score','date']].set_index(['date','name'])
round_strokes = round_strokes.pivot_table(
    values='total_score',
    index=['date'],
    columns='name',
    aggfunc='first'
)

round_strokes['the_field'] = round_strokes.mean(axis=1)
# round_strokes

### ratings

generate round ratings per player based on SSE

In [14]:
ratings = df[['course','name','total_score','date']].set_index(['date','name'])
ratings['sse'] = ratings['course'].map(SSE)

ratings['rating'] = ratings.apply(lambda row: calculate_rating(row['total_score'],row['sse']),axis=1)
ratings = ratings.pivot_table(
    values = 'rating',
    index = 'date',
    columns = 'name',
    aggfunc = 'first'
)
ratings['the_field'] = ratings.mean(axis=1)
# ratings

name,Chase Ramsey,Isaac Richmond,Joe Bos,Joshua Owens,Kat Owens,Kevin Richmond,Kyle P-H,Noah Mashni,the_field
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2023-04-19,,747.141002,930.51962,930.51962,894.547593,951.242435,930.51962,866.524534,893.002061
2023-04-26,759.571625,803.168505,903.592737,863.423044,886.112875,913.63516,890.741577,863.423044,860.458571
2023-05-03,811.664195,744.799412,935.889686,935.889686,867.384847,923.1055,878.528978,867.384847,870.580894
2023-05-10,,860.530687,1012.799017,976.230396,871.869656,917.22553,948.803931,829.955913,916.77359
2023-05-17,729.667267,818.928075,975.645222,947.543556,933.691971,933.691971,1010.201235,900.707445,906.259593
2023-05-24,748.665568,811.499176,965.09244,923.203368,846.406736,881.314296,895.27732,853.388248,865.605894
2023-05-31,826.513782,837.85275,994.514707,930.51962,894.547593,871.869656,939.903467,928.564498,903.035759
2023-06-07,833.952456,833.952456,935.889686,967.682022,822.808325,989.970283,911.961369,878.528978,896.843197
2023-06-28,,752.956389,963.847276,923.677584,911.421125,883.507891,883.507891,873.465468,884.626232
2023-07-05,755.170355,863.238556,975.645222,994.379667,920.940427,872.605778,869.934251,806.176531,882.261348


### rating average

calculate a rolling average of players' rating as calculated above

In [42]:
ratings_ave = ratings.fillna(ratings.mean()).rolling(6, axis=0, min_periods=4).apply(lambda x: mean(sorted(x)[1:4]))
# ratings_ave = ratings.fillna(ratings.mean()).rolling(6, axis=0, min_periods=4).apply(lambda x: mean(sorted(x)))
# ratings_ave
# ratings_ave.mean()
# ratings.mean()
# (ratings_ave.mean(), ratings.mean())

(name
 Chase Ramsey      768.168005
 Isaac Richmond    813.413591
 Joe Bos           964.853085
 Joshua Owens      942.438077
 Kat Owens         876.034808
 Kevin Richmond    899.663801
 Kyle P-H          913.959515
 Noah Mashni       864.485450
 the_field         889.629472
 dtype: float64,
 name
 Chase Ramsey      772.944563
 Isaac Richmond    814.195686
 Joe Bos           962.895060
 Joshua Owens      942.337093
 Kat Owens         875.556799
 Kevin Richmond    913.543255
 Kyle P-H          924.351437
 Noah Mashni       865.643883
 the_field         891.001830
 dtype: float64)

#### ratings against field

unclear if this will be valuable, but easy to calc

In [16]:
# ratings_field = ratings_ave.apply(lambda x: x-ratings_ave['the_field'])
# ratings_field

### expected strokes

based on current rating as calculated above, this is the number of strokes anticipated for each player at each round. factors into handicap and therefor ranking points.

In [17]:
## deprecated

# expected_strokes = ratings_ave.copy()
# expected_strokes['course'] = ratings['course']
# expected_strokes['sse'] = ratings['sse']
# for player in PLAYERS:
#     expected_strokes[player] = expected_strokes.apply(lambda x:(1000/x[player] * x['sse']), axis=1)
# expected_strokes

### ranking points

this is determined by adjusting scores by individual handicap and ranking those adjusted scores. the number of those bested is the number of points granted.

In [18]:
diff = ratings - ratings_ave
ranked = diff.rank(axis=1,ascending=True)
# ranked

### improvement points

In [19]:
improvement = (ratings - ratings_ave)*(SCALING_FACTOR * (ratings_ave/1000)).round(2)
improvement = improvement.where(improvement > 0).fillna(0).round(2)
# (improvement, improvement.sum())

### points per week

In [20]:
# (ranked,
#  improvement,
#  weekly_attendance,
#  ctplp['extra_points'])

weekly_attendance + weekly_ctplp + ranked + improvement

name,Chase Ramsey,Isaac Richmond,Joe Bos,Joshua Owens,Kat Owens,Kevin Richmond,Kyle P-H,Noah Mashni,the_field
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2023-04-19,,,,,,,,,
2023-04-26,,,,,,,,,
2023-05-03,,,,,,,,,
2023-05-10,,12.55,13.31,10.58,5.0,2.0,10.29,2.0,
2023-05-17,2.0,9.33,8.55,4.86,14.46,4.81,19.82,11.14,
2023-05-24,4.0,12.94,9.91,7.0,5.0,3.0,6.0,7.0,
2023-05-31,14.29,8.13,11.56,3.06,6.75,4.0,10.81,15.04,
2023-06-07,13.5,8.0,2.0,12.67,3.0,18.45,4.0,6.09,
2023-06-28,,1.0,8.0,7.0,12.64,3.0,2.0,9.45,
2023-07-05,6.0,13.34,7.75,16.44,12.31,5.0,3.0,2.0,


In [21]:
total_points = ranked.sum() + improvement.sum() + attendance['attendance'] + ctplp['extra_points']
total_points

name
Chase Ramsey       55.66
Isaac Richmond    116.44
Joe Bos           115.34
Joshua Owens      114.46
Kat Owens          99.40
Kevin Richmond    106.05
Kyle P-H          133.24
Noah Mashni       104.38
the_field            NaN
dtype: float64