### imports

In [1]:
import pandas as pd
import random
import glob
import os

from statistics import mean
import ipywidgets as widgets

### constants

In [2]:
SCALING_FACTOR = 0.1

In [3]:
path = os.getcwd()
csv_files = sorted(glob.glob(os.path.join(path, "scores/2023/*.csv")))

In [4]:
# set up sse for each course and layout
# should we pull more data from dgcr? maybe course length per layout, etc?
# would allow us to get course names and such... that's probably better...

SSE = {
    'shmo_s': 47.7,
    'shmo_l': 54.4,
    'romo_s': 45.0, # total guess
    'romo_l': 50.4,
    'spva_s': 56.6,
    'spva_l': 64.0,
    'mbog_s': 44.8,
    'mbog_l': 53.4,
    'mbno_s': 45.3,
    'mbno_l': 53.9,
    'knol_s': 42.9,
    'knol_l': 42.9,
    'r_arro': 27.5,
    'r_arr2': 55.0,
    'vira_s': 48.1,
    'vira_l': 54.9,
    'coldbk': 67.0,
    'begg_p': 50.0,
    'brew_s': 43.9,
    'brew_l': 55.0,
    'johnsn': 43.1,
    'garfld': 46.1
}

In [5]:
PLAYERS = [
    'Chase',
    'Isaac Richmond',
    'Joe Bos',
    'Joshua Owens',
    'Kat Owens',
    'Kevin Richmond',
    'Kyle P-H',
    'Noah Mashni',
    'the_field'
]

### helper functions

In [6]:
def calculate_rating(score, sse):
    if sse > 50.328725:
        scoring_interval = -0.225067 * sse + 21.3858
    else:
        scoring_interval = -0.487095 * sse + 34.5734
    return 1000 - (score-sse)*scoring_interval

In [7]:
def calculate_target_stroke_count(rating, sse):
    return (1000/rating * sse).round(2)

In [17]:
# calculate_rating(58, SSE['shmo_l'])

967.08824128

In [9]:
# {
#     'chase': calculate_rating(82, SSE['spva_s']),
#     'isaac': calculate_rating(76, SSE['spva_s']),
#     'joe': calculate_rating(67, SSE['spva_l']),
#     'joshua': calculate_rating(68, SSE['spva_l']),
#     'kat': calculate_rating(70, SSE['spva_s']),
#     'kevin': calculate_rating(65, SSE['spva_s']),
#     'noah': calculate_rating(79, SSE['spva_l'])
# }

{'chase': 780.36600188,
 'isaac': 832.24804868,
 'joe': 979.055464,
 'joshua': 972.073952,
 'kat': 884.13009548,
 'kevin': 927.36513448,
 'noah': 895.27732}

In [10]:
# {
#     'joe': calculate_rating(56, SSE['mbno_l']),
#     'joshua': calculate_rating(59, SSE['mbno_l']),
#     'kevin': calculate_rating(50, SSE['mbno_s']),
#     'isaac': calculate_rating(59, SSE['mbno_s']),
#     'kyle': calculate_rating(44, SSE['mbno_s']),
#     'kat': calculate_rating(50, SSE['mbno_s']),
#     'noah': calculate_rating(64, SSE['mbno_l']),
#     'chase': calculate_rating(66, SSE['mbno_s'])
# }

{'joe': 980.56515373,
 'joshua': 952.80108763,
 'kevin': 941.21241645,
 'isaac': 828.64044795,
 'kyle': 1016.2603954499999,
 'kat': 941.21241645,
 'noah': 906.52764413,
 'chase': 741.08447245}

### concatenate all files into a dataframe

### build base dataframe for 2022

In [15]:
# df = pd.read_csv('scores/2022/zoo_2022-04-21.csv')

### add each round for 2023

In [16]:
df = pd.DataFrame()
for filename in csv_files:
    date = filename.split('zoo_')[1].split('.csv')[0]
    newdf = pd.read_csv(filename)
    newdf['date'] = date
#     if 'hole_24' in newdf.columns:
#         newdf['course'] = 
#     else:
#         newdf['course'] = random.choice(['shmo_s','mbog_s','romo_s'])
    df = df.concat(newdf)
df = df.sort_values('date')
# df['course'] = df['date'].map(courses2022)

AttributeError: 'DataFrame' object has no attribute 'append'

### per-hole stats

In [None]:
hole_stats = df.groupby(['course','name']).mean()
hole_stats

### pivot round scores to simple total scores

filling NA with course-wise average per player

In [None]:
round_scores = df[['course','name','total_score','date']].set_index('date','course')
round_scores = round_scores.pivot_table(
    values='total_score',
    index=['date','course'],
    columns='name',
    aggfunc='first'
)
attendance = round_scores.count() # only used in final scoring, needed prior to 'fillna'.

round_scores['the_field'] = round_scores.mean(axis=1)
round_scores = round_scores.fillna(round_scores.groupby('course').transform('mean')).fillna(round_scores.mean()).round(2)
round_scores

### ratings

generate round ratings per player based on SSE

In [None]:
ratings = round_scores.reset_index().set_index('date')
ratings['sse'] = ratings['course'].map(SSE)
for player in PLAYERS:
    ratings[player] = ratings.apply(lambda x: calculate_rating(x[player],x['sse']),axis=1)

ratings

### rating average

calculate a rolling average of players' rating as calculated above

In [None]:
ratings_ave = ratings.drop(columns=['sse']).rolling(6, axis=0, min_periods=5).apply(lambda x: mean(sorted(x)[1:4]))
ratings_ave

#### ratings against field

unclear if this will be valuable, but easy to calc

In [None]:
# ratings_field = ratings_ave.apply(lambda x:x - ratings_ave['the_field'])
# ratings_field

### expected strokes

based on current rating as calculated above, this is the number of strokes anticipated for each player at each round. factors into handicap and therefor ranking points.

In [None]:
expected_strokes = ratings_ave.copy()
expected_strokes['course'] = ratings['course']
expected_strokes['sse'] = ratings['sse']
for player in PLAYERS:
    expected_strokes[player] = expected_strokes.apply(lambda x:(1000/x[player] * x['sse']), axis=1)
expected_strokes

### ranking points

this is determined by adjusting scores by individual handicap and ranking those adjusted scores. the number of those bested is the number of points granted.

In [None]:
handicap = expected_strokes.drop(columns=['course']).apply(lambda x: x-x['sse'], axis=1)
adjusted = round_scores - handicap.drop(columns=['sse'])*SCALING_FACTOR # this inflated scores heavily
ranked = adjusted.rank(axis=1,ascending=False)

# ranked.sum()
handicap

### improvement points

this is determined by the round score being less than the expected stroke count. in those cases, we find the difference, and where it's greater than zero, we sum the improvement

also testing with strict ratings comparison for improvement

In [None]:
# improvement = expected_strokes[PLAYERS] - round_scores[PLAYERS]
improvement = ((ratings_ave[PLAYERS] - ratings[PLAYERS])/2).round(2)
improvement.where(improvement > 0).fillna(0).sum()

In [None]:
ranked[PLAYERS].sum()

## total points minus LP/CTP

In [None]:
ranked[PLAYERS].sum() + improvement[PLAYERS].where(improvement > 0).fillna(0).sum() + attendance

### points per week

In [None]:
ranked + improvement.where(improvement > 0).fillna(0) + attendance # + lp/ctp