### imports

In [1]:
import pandas as pd
import random
import glob
import os

from statistics import mean
import ipywidgets as widgets

### constants

In [2]:
SCALING_FACTOR = 0.1

In [3]:
path = os.getcwd()
csv_files = sorted(glob.glob(os.path.join(path, "scores/2023/*.csv")))

In [4]:
# set up sse for each course and layout
# should we pull more data from dgcr? maybe course length per layout, etc?
# would allow us to get course names and such... that's probably better...

SSE = {
    'shmo_s': 47.7,
    'shmo_l': 54.4,
    'romo_s': 45.0, # total guess
    'romo_l': 50.4,
    'spva_s': 56.6,
    'spva_l': 64.0,
    'mbog_s': 44.8,
    'mbog_l': 53.4,
    'mbno_s': 45.3,
    'mbno_l': 53.9,
    'knol_s': 42.9,
    'knol_l': 42.9,
    'r_arro': 27.5,
    'r_arr2': 55.0,
    'vira_s': 48.1,
    'vira_l': 54.9,
    'coldbk': 67.0,
    'begg_p': 50.0,
    'brew_s': 43.9,
    'brew_l': 55.0,
    'johnsn': 43.1,
    'garfld': 46.1
}

### helper functions

In [5]:
def calculate_rating(score, sse):
    if sse > 50.328725:
        scoring_interval = -0.225067 * sse + 21.3858
    else:
        scoring_interval = -0.487095 * sse + 34.5734
    return 1000 - (score-sse)*scoring_interval

In [6]:
def calculate_target_stroke_count(rating, sse):
    return (1000/rating * sse).round(2)

In [7]:
# calculate_rating(58, SSE['shmo_l'])

In [8]:
# {
#     'joe': calculate_rating(56, SSE['mbno_l']),
#     'joshua': calculate_rating(59, SSE['mbno_l']),
#     'kevin': calculate_rating(50, SSE['mbno_s']),
#     'isaac': calculate_rating(59, SSE['mbno_s']),
#     'kyle': calculate_rating(44, SSE['mbno_s']),
#     'kat': calculate_rating(50, SSE['mbno_s']),
#     'noah': calculate_rating(64, SSE['mbno_l']),
#     'chase': calculate_rating(66, SSE['mbno_s'])
# }

### concatenate all files into a dataframe

### add each round for 2023

In [9]:
df = pd.DataFrame()
for filename in csv_files:
    date = filename.split('zoo_')[1].split('.csv')[0]
    newdf = pd.read_csv(filename)
    newdf['date'] = date
    df = pd.concat([df, newdf])
df = df.sort_values('date')

df

### attendance and CTP/LP

In [11]:
attendance = df[['name','attendance']].groupby('name').sum()
ctplp = df[['name','ctp','lp']].groupby('name').sum()
ctplp['extra_points'] = ctplp['ctp'] + ctplp['lp']

(
    attendance,
    ctplp['extra_points']
)

(                attendance
 name                      
 Chase Ramsey             9
 Isaac Richmond          12
 Joe Bos                 13
 Joshua Owens            14
 Kat Owens               14
 Kevin Richmond          12
 Kyle P-H                13
 Noah Mashni             13,
 name
 Chase Ramsey       2
 Isaac Richmond     4
 Joe Bos            6
 Joshua Owens       9
 Kat Owens         10
 Kevin Richmond     9
 Kyle P-H          14
 Noah Mashni        3
 Name: extra_points, dtype: int64)

### per-hole stats

In [12]:
hole_stats = df[['course', 'name', 'hole_1', 'hole_2', 'hole_3', 'hole_4', 'hole_5', 'hole_6', 'hole_7',
       'hole_8', 'hole_9', 'hole_10', 'hole_11', 'hole_12', 'hole_13',
       'hole_14', 'hole_15', 'hole_16', 'hole_17', 'hole_18', 'hole_19', 'hole_20', 'hole_21', 'hole_22',
       'hole_23', 'hole_24']]
for col in hole_stats.columns:
    hole_stats[col] = pd.to_numeric(hole_stats[col]) if 'hole_' in col else hole_stats[col]
hole_stats_by_course = hole_stats.groupby(['course','name']).mean()
hole_stats_by_name = hole_stats.groupby(['name','course']).mean()

hole_stats_by_name

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


### pivot round scores to total strokes

In [14]:
round_strokes = df[['course','name','total_score','date']].set_index(['date','name'])
round_strokes = round_strokes.pivot_table(
    values='total_score',
    index=['date'],
    columns='name',
    aggfunc='first'
)

round_strokes['the_field'] = round_strokes.mean(axis=1)
round_strokes

name,Chase Ramsey,Isaac Richmond,Joe Bos,Joshua Owens,Kat Owens,Kevin Richmond,Kyle P-H,Noah Mashni,the_field
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2023-04-19,,70.0,62.0,62.0,57.0,52.0,62.0,69.0,62.0
2023-04-26,64.0,70.0,60.0,64.0,54.0,59.0,67.0,64.0,62.75
2023-05-03,65.0,71.0,62.0,62.0,60.0,55.0,59.0,60.0,61.75
2023-05-10,,60.0,53.0,57.0,59.0,55.0,60.0,73.0,59.571429
2023-05-17,66.0,59.0,56.0,59.0,50.0,50.0,44.0,64.0,56.0
2023-05-24,100.0,91.0,69.0,75.0,86.0,81.0,79.0,85.0,83.25
2023-05-31,63.0,62.0,55.0,62.0,57.0,59.0,53.0,54.0,58.125
2023-06-07,63.0,63.0,62.0,51.0,64.0,49.0,56.0,59.0,58.375
2023-06-28,,75.0,54.0,58.0,52.0,62.0,62.0,63.0,60.857143
2023-07-05,64.0,68.0,56.0,54.0,51.0,67.0,55.0,60.0,59.375


### ratings

generate round ratings per player based on SSE

In [15]:
ratings = df[['course','name','total_score','date']].set_index(['date','name'])
ratings['sse'] = ratings['course'].map(SSE)
ratings['rating'] = ratings.apply(lambda row: calculate_rating(row['total_score'],row['sse']),axis=1)
ratings = ratings.pivot_table(
    values = 'rating',
    index = 'date',
    columns = 'name',
    aggfunc = 'first'
)
ratings['the_field'] = ratings.mean(axis=1)
ratings

name,Chase Ramsey,Isaac Richmond,Joe Bos,Joshua Owens,Kat Owens,Kevin Richmond,Kyle P-H,Noah Mashni,the_field
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2023-04-19,,747.141002,930.51962,930.51962,894.547593,951.242435,930.51962,866.524534,893.002061
2023-04-26,759.571625,803.168505,903.592737,863.423044,886.112875,913.63516,,863.423044,856.132427
2023-05-10,,860.530687,1012.799017,976.230396,871.869656,917.22553,948.803931,829.955913,916.77359
2023-05-17,729.667267,818.928075,975.645222,947.543556,933.691971,933.691971,1010.201235,900.707445,906.259593
2023-05-24,748.665568,811.499176,965.09244,923.203368,846.406736,881.314296,895.27732,853.388248,865.605894
2023-05-31,826.513782,837.85275,994.514707,930.51962,894.547593,871.869656,939.903467,928.564498,903.035759
2023-06-28,,752.956389,963.847276,923.677584,911.421125,883.507891,883.507891,873.465468,884.626232
2023-07-05,755.170355,863.238556,975.645222,994.379667,920.940427,872.605778,869.934251,806.176531,882.261348
2023-07-19,780.366002,832.248049,979.055464,972.073952,884.130095,927.365134,,895.27732,895.788002
2023-08-02,,781.157908,1012.799017,,,884.808844,973.920372,905.886561,911.714541


### rating average

calculate a rolling average of players' rating as calculated above

In [16]:
ratings_ave = ratings.fillna(ratings.mean()).rolling(6, axis=0, min_periods=4).apply(lambda x: mean(sorted(x)[1:4]))
ratings_ave

name,Chase Ramsey,Isaac Richmond,Joe Bos,Joshua Owens,Kat Owens,Kevin Richmond,Kyle P-H,Noah Mashni,the_field
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2023-04-19,,,,,,,,,
2023-04-26,,,,,,,,,
2023-05-10,,,,,,,,,
2023-05-17,760.17629,827.542423,972.987953,951.431191,904.784146,934.053312,963.850532,876.885008,905.345081
2023-05-24,756.238605,811.198585,957.085761,933.755515,884.176708,921.517554,937.289994,861.111942,888.289183
2023-05-31,756.238605,811.198585,957.085761,928.08087,884.176708,904.058329,934.323173,861.111942,887.214571
2023-06-28,756.238605,811.198585,968.19498,925.800191,884.176708,892.819116,922.575739,863.425587,884.422628
2023-07-05,754.771515,822.760001,972.127628,933.913587,892.612791,879.142655,906.229559,852.269876,889.974447
2023-07-19,754.771515,820.891767,972.127628,933.913587,896.699604,879.142655,903.777214,874.043679,887.558528
2023-08-02,758.7092,808.301711,973.264375,931.57076,887.992616,879.142655,903.777214,874.043679,887.558528


#### ratings against field

unclear if this will be valuable, but easy to calc

In [17]:
# ratings_field = ratings_ave.apply(lambda x: x-ratings_ave['the_field'])
# ratings_field

### expected strokes

based on current rating as calculated above, this is the number of strokes anticipated for each player at each round. factors into handicap and therefor ranking points.

In [18]:
## deprecated

# expected_strokes = ratings_ave.copy()
# expected_strokes['course'] = ratings['course']
# expected_strokes['sse'] = ratings['sse']
# for player in PLAYERS:
#     expected_strokes[player] = expected_strokes.apply(lambda x:(1000/x[player] * x['sse']), axis=1)
# expected_strokes

### ranking points

this is determined by adjusting scores by individual handicap and ranking those adjusted scores. the number of those bested is the number of points granted.

In [24]:
diff = ratings - ratings_ave
ranked = diff.drop(columns=['the_field']).rank(axis=1,ascending=True)
ranked

name,Chase Ramsey,Isaac Richmond,Joe Bos,Joshua Owens,Kat Owens,Kevin Richmond,Kyle P-H,Noah Mashni
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2023-04-19,,,,,,,,
2023-04-26,,,,,,,,
2023-05-10,,,,,,,,
2023-05-17,1.0,2.0,5.0,3.0,7.0,4.0,8.0,6.0
2023-05-24,6.0,7.0,8.0,4.0,3.0,2.0,1.0,5.0
2023-05-31,8.0,5.0,6.0,2.0,4.0,1.0,3.0,7.0
2023-06-28,,1.0,4.0,5.0,7.0,3.0,2.0,6.0
2023-07-05,4.0,7.0,5.0,8.0,6.0,3.0,2.0,1.0
2023-07-19,5.0,3.0,2.0,6.0,1.0,7.0,,4.0
2023-08-02,,1.0,4.0,,,2.0,5.0,3.0


### improvement points

In [20]:
improvement = (ratings - ratings_ave)*(SCALING_FACTOR * (ratings_ave/1000)).round(2)
improvement = improvement.where(improvement > 0).fillna(0).round(2)
(improvement, improvement.sum())

(name        Chase Ramsey  Isaac Richmond  Joe Bos  Joshua Owens  Kat Owens  \
 date                                                                         
 2023-04-19          0.00            0.00     0.00          0.00       0.00   
 2023-04-26          0.00            0.00     0.00          0.00       0.00   
 2023-05-10          0.00            0.00     0.00          0.00       0.00   
 2023-05-17          0.00            0.00     0.27          0.00       2.60   
 2023-05-24          0.00            0.02     0.80          0.00       0.00   
 2023-05-31          5.62            2.13     3.74          0.22       0.93   
 2023-06-28          0.00            0.00     0.00          0.00       2.45   
 2023-07-05          0.03            3.24     0.35          5.44       2.55   
 2023-07-19          2.05            0.91     0.69          3.43       0.00   
 2023-08-02          0.00            0.00     3.95          0.00       0.00   
 2023-08-16          0.00            1.06     1.14  

### points per week

In [21]:
(ranked.sum(),
 improvement.sum(),
 attendance['attendance'],
 ctplp['extra_points'])

(name
 Chase Ramsey      29.0
 Isaac Richmond    39.0
 Joe Bos           43.0
 Joshua Owens      42.0
 Kat Owens         36.0
 Kevin Richmond    29.0
 Kyle P-H          40.0
 Noah Mashni       44.0
 dtype: float64,
 name
 Chase Ramsey       7.70
 Isaac Richmond    13.76
 Joe Bos           10.94
 Joshua Owens      11.66
 Kat Owens         10.34
 Kevin Richmond     5.76
 Kyle P-H          21.15
 Noah Mashni       17.58
 the_field          4.83
 dtype: float64,
 name
 Chase Ramsey       9
 Isaac Richmond    12
 Joe Bos           13
 Joshua Owens      14
 Kat Owens         14
 Kevin Richmond    12
 Kyle P-H          13
 Noah Mashni       13
 Name: attendance, dtype: int64,
 name
 Chase Ramsey       2
 Isaac Richmond     4
 Joe Bos            6
 Joshua Owens       9
 Kat Owens         10
 Kevin Richmond     9
 Kyle P-H          14
 Noah Mashni        3
 Name: extra_points, dtype: int64)

In [22]:
total_points = ranked.sum() + improvement.sum() + attendance['attendance'] + ctplp['extra_points']
total_points.drop('the_field')

name
Chase Ramsey      47.70
Isaac Richmond    68.76
Joe Bos           72.94
Joshua Owens      76.66
Kat Owens         70.34
Kevin Richmond    55.76
Kyle P-H          88.15
Noah Mashni       77.58
dtype: float64