### imports

In [1]:
import pandas as pd
import random
import glob
import os

from statistics import mean

### constants

In [3]:
SCALING_FACTOR = 0.2

In [4]:
path = os.getcwd()
csv_files = sorted(glob.glob(os.path.join(path, "scores/2023/*.csv")))

In [5]:
# set up sse for each course and layout
# should we pull more data from dgcr? maybe course length per layout, etc?
# would allow us to get course names and such... that's probably better...

SSE = {
    'shmo_s': 47.7,
    'shmo_l': 54.4,
    'romo_s': 45.0, # total guess
    'romo_l': 50.4,
    'spva_s': 56.6,
    'spva_l': 64.0,
    'mbog_s': 44.8,
    'mbog_l': 53.4,
    'mbno_s': 45.3,
    'mbno_l': 53.9,
    'knol_s': 42.9,
    'knol_l': 42.9,
    'r_arro': 27.5,
    'r_arr2': 55.0,
    'vira_s': 48.1,
    'vira_l': 54.9,
    'coldbk': 67.0,
    'begg_p': 50.0,
    'brew_s': 43.9,
    'brew_l': 55.0,
    'johnsn': 43.1,
    'garfld': 46.1
}

In [6]:
PLAYERS = [
    'Chase',
    'Isaac Richmond',
    'Joe Bos',
    'Joshua Owens',
    'Kat Owens',
    'Kevin Richmond',
    'Kyle P-H',
    'Noah Mashni',
    'the_field'
]

### helper functions

In [7]:
def calculate_rating(score, sse):
    if sse > 50.328725:
        scoring_interval = -0.225067 * sse + 21.3858
    else:
        scoring_interval = -0.487095 * sse + 34.5734
    return 1000 - (score-sse)*scoring_interval

In [8]:
def calculate_target_stroke_count(rating, sse):
    return (1000/rating * sse).round(2)

In [11]:
calculate_rating(64, SSE['brew_l'])

918.935965

In [13]:
{
    'chase': calculate_rating(82, SSE['spva_s']),
    'isaac': calculate_rating(76, SSE['spva_s']),
    'joe': calculate_rating(67, SSE['spva_l']),
    'joshua': calculate_rating(68, SSE['spva_l']),
    'kat': calculate_rating(70, SSE['spva_s']),
    'kevin': calculate_rating(65, SSE['spva_s']),
    'noah': calculate_rating(79, SSE['spva_l'])
}

{'chase': 780.36600188,
 'isaac': 832.24804868,
 'joe': 979.055464,
 'joshua': 972.073952,
 'kat': 884.13009548,
 'kevin': 927.36513448,
 'noah': 895.27732}

In [19]:
{
    'joe': calculate_rating(56, SSE['mbno_l']),
    'joshua': calculate_rating(59, SSE['mbno_l']),
    'kevin': calculate_rating(50, SSE['mbno_s']),
    'isaac': calculate_rating(59, SSE['mbno_s']),
    'kyle': calculate_rating(44, SSE['mbno_s']),
    'kat': calculate_rating(50, SSE['mbno_s']),
    'noah': calculate_rating(64, SSE['mbno_l']),
    'chase': calculate_rating(66, SSE['mbno_s'])
}

{'joe': 980.56515373,
 'joshua': 952.80108763,
 'kevin': 941.21241645,
 'isaac': 828.64044795,
 'kyle': 1016.2603954499999,
 'kat': 941.21241645,
 'noah': 906.52764413,
 'chase': 741.08447245}

### concatenate all files into a dataframe

In [12]:
# we'll come back to this once we're able to add date and course manually

# dfs = []

# for filename in csv_files:
#     tdf = pd.read_csv(filename, index_col=None, header=0)
#     dfs.append(tdf)

# df = pd.concat(dfs, axis=0, ignore_index=True)

In [13]:
# players = df['name'].unique()

In [14]:
# courses2022 = {
#     '2022-04-21': 'shmo_s',
#     '2022-04-28': 'shmo_s',
#     '2022-05-05': 'shmo_s',
#     '2022-05-12': 'shmo_s',
#     '2022-05-19': 'spva_s',
#     '2022-05-26': 'shmo_s',
#     '2022-06-02': 'shmo_s',
#     '2022-06-09': 'shmo_s',
#     '2022-06-16': 'vera_s',
#     '2022-06-23': 'shmo_s',
#     '2022-06-30': 'mbog_l',
#     '2022-07-07': 'spva_s',
#     '2022-07-14': 'shmo_s',
#     '2022-07-21': 'shmo_s',
#     '2022-07-28': 'shmo_s',
#     '2022-08-04': 'shmo_s',
#     '2022-08-10': 'romo_s',
#     '2022-08-17': 'shmo_s',
#     '2022-08-24': 'spva_s',
#     '2022-08-31': 'mbog_s',
#     '2022-09-07': 'shmo_s',
#     '2022-09-14': 'spva_s',
#     '2022-09-21': 'shmo_s',
#     '2022-09-28': 'romo_l'
# }

In [14]:
# courses2023 = {
#     '2022-04-21': 'shmo_s',
#     '2022-04-28': 'shmo_s',
#     '2022-05-05': 'shmo_s',
#     '2022-05-12': 'shmo_s',
#     '2022-05-19': 'spva_s',
#     '2022-05-26': 'shmo_s',
#     '2022-06-02': 'shmo_s',
#     '2022-06-09': 'shmo_s',
#     '2022-06-16': 'vera_s',
#     '2022-06-23': 'shmo_s',
#     '2022-06-30': 'mbog_l',
#     '2022-07-07': 'spva_s',
#     '2022-07-14': 'shmo_s',
#     '2022-07-21': 'shmo_s',
#     '2022-07-28': 'shmo_s',
#     '2022-08-04': 'shmo_s',
#     '2022-08-10': 'romo_s',
#     '2022-08-17': 'shmo_s',
#     '2022-08-24': 'spva_s',
#     '2022-08-31': 'mbog_s',
#     '2022-09-07': 'shmo_s',
#     '2022-09-14': 'spva_s',
#     '2022-09-21': 'shmo_s',
#     '2022-09-28': 'romo_l'
# }

### build base dataframe for 2022

In [15]:
# df = pd.read_csv('scores/2022/zoo_2022-04-21.csv')

### add each round for 2022

In [16]:
df = pd.DataFrame()
for filename in csv_files:
    date = filename.split('zoo_')[1].split('.csv')[0]
    newdf = pd.read_csv(filename)
    newdf['date'] = date
#     if 'hole_24' in newdf.columns:
#         newdf['course'] = 
#     else:
#         newdf['course'] = random.choice(['shmo_s','mbog_s','romo_s'])
    df = df.append(newdf)
df = df.sort_values('date')
# df['course'] = df['date'].map(courses2022)

### per-hole stats

In [17]:
hole_stats = df.groupby(['course','name']).mean()
hole_stats

KeyError: 'course'

### pivot round scores to simple total scores

filling NA with course-wise average per player

In [16]:
round_scores = df[['course','name','total_score','date']].set_index('date','course')
round_scores = round_scores.pivot_table(
    values='total_score',
    index=['date','course'],
    columns='name',
    aggfunc='first'
)
attendance = round_scores.count() # only used in final scoring, needed prior to 'fillna'.

round_scores['the_field'] = round_scores.mean(axis=1)
round_scores = round_scores.fillna(round_scores.groupby('course').transform('mean')).fillna(round_scores.mean()).round(2)
round_scores

Unnamed: 0_level_0,name,Chase,Isaac Richmond,Joe Bos,Joshua Owens,Kat Owens,Kevin Richmond,Kyle P-H,Noah Mashni,the_field
date,course,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2022-04-21,shmo_s,69.11,62.0,55.0,50.0,54.0,53.0,52.85,55.58,53.0
2022-04-28,shmo_s,72.0,62.0,48.0,48.55,57.0,52.0,52.0,56.0,56.17
2022-05-05,shmo_s,73.0,62.0,51.0,48.55,58.0,61.0,56.0,61.0,60.0
2022-05-12,shmo_s,75.0,62.0,49.0,53.0,56.0,60.0,54.0,55.0,57.43
2022-05-19,spva_s,83.0,82.0,61.0,64.0,67.0,51.0,65.0,70.0,65.86
2022-05-26,shmo_s,69.11,62.0,52.0,45.0,53.0,60.0,50.0,59.0,53.17
2022-06-02,shmo_s,69.11,62.0,49.58,46.0,55.0,41.0,55.0,56.0,50.6
2022-06-09,shmo_s,69.11,62.0,50.0,48.0,54.77,58.0,54.0,53.0,52.6
2022-06-16,vera_s,75.0,70.38,51.0,54.0,60.0,57.0,55.16,59.0,59.8
2022-06-23,shmo_s,69.0,62.0,47.0,49.0,52.0,54.75,47.0,55.0,53.17


### ratings

generate round ratings per player based on SSE

In [17]:
ratings = round_scores.reset_index().set_index('date')
ratings['sse'] = ratings['course'].map(SSE)
for player in PLAYERS:
    ratings[player] = ratings.apply(lambda x: calculate_rating(x[player],x['sse']),axis=1)

ratings

name,course,Chase,Isaac Richmond,Joe Bos,Joshua Owens,Kat Owens,Kevin Richmond,Kyle P-H,Noah Mashni,the_field,sse
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2022-04-21,shmo_s,757.232684,837.85275,917.22553,973.920372,928.564498,939.903467,941.604312,910.648928,939.903467,47.7
2022-04-28,shmo_s,724.463065,837.85275,996.598309,990.361877,894.547593,951.242435,951.242435,905.886561,903.958937,47.7
2022-05-05,shmo_s,713.124097,837.85275,962.581404,990.361877,883.208624,849.191719,905.886561,849.191719,860.530687,47.7
2022-05-12,shmo_s,690.44616,837.85275,985.259341,939.903467,905.886561,860.530687,928.564498,917.22553,889.671836,47.7
2022-05-19,spva_s,771.718994,780.366002,961.953166,936.012142,910.071119,1048.423244,927.365134,884.130095,919.928708,56.6
2022-05-26,shmo_s,757.232684,837.85275,951.242435,1030.615215,939.903467,860.530687,973.920372,871.869656,937.975842,47.7
2022-06-02,shmo_s,757.232684,837.85275,978.682739,1019.276246,917.22553,1075.971089,917.22553,905.886561,967.116991,47.7
2022-06-09,shmo_s,757.232684,837.85275,973.920372,996.598309,919.833493,883.208624,928.564498,939.903467,944.439054,47.7
2022-06-16,vera_s,,,,,,,,,,
2022-06-23,shmo_s,758.479971,837.85275,1007.937278,985.259341,951.242435,920.060272,1007.937278,917.22553,937.975842,47.7


### rating average

calculate a rolling average of players' rating as calculated above

In [18]:
ratings_ave = ratings.drop(columns=['sse']).rolling(6, axis=0, min_periods=5).apply(lambda x: mean(sorted(x)[1:4]))
ratings_ave

name,Chase,Isaac Richmond,Joe Bos,Joshua Owens,Kat Owens,Kevin Richmond,Kyle P-H,Noah Mashni,the_field
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2022-04-21,,,,,,,,,
2022-04-28,,,,,,,,,
2022-05-05,,,,,,,,,
2022-05-12,,,,,,,,,
2022-05-19,731.606616,837.85275,969.931304,968.061905,903.501758,917.22553,932.511315,900.221862,904.519827
2022-05-26,731.606616,837.85275,958.592335,968.061905,903.501758,886.988281,932.511315,887.295438,904.519827
2022-06-02,731.606616,837.85275,967.739103,973.542407,903.501758,890.767937,924.385054,887.295438,904.519827
2022-06-09,742.529822,837.85275,966.151647,975.621218,911.06107,868.09,924.385054,887.295438,915.858796
2022-06-16,757.232684,837.85275,971.518759,985.259341,915.710047,930.720852,928.16471,902.414062,934.114535
2022-06-23,762.061454,837.85275,971.518759,1000.377966,925.654163,950.564047,943.283335,909.973375,940.130246


#### ratings against field

unclear if this will be valuable, but easy to calc

In [19]:
# ratings_field = ratings_ave.apply(lambda x:x - ratings_ave['the_field'])
# ratings_field

### expected strokes

based on current rating as calculated above, this is the number of strokes anticipated for each player at each round. factors into handicap and therefor ranking points.

In [20]:
expected_strokes = ratings_ave.copy()
expected_strokes['course'] = ratings['course']
expected_strokes['sse'] = ratings['sse']
for player in PLAYERS:
    expected_strokes[player] = expected_strokes.apply(lambda x:(1000/x[player] * x['sse']), axis=1)
expected_strokes

name,Chase,Isaac Richmond,Joe Bos,Joshua Owens,Kat Owens,Kevin Richmond,Kyle P-H,Noah Mashni,the_field,course,sse
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2022-04-21,,,,,,,,,,shmo_s,47.7
2022-04-28,,,,,,,,,,shmo_s,47.7
2022-05-05,,,,,,,,,,shmo_s,47.7
2022-05-12,,,,,,,,,,shmo_s,47.7
2022-05-19,77.363981,67.553636,58.354648,58.467335,62.645147,61.707833,60.696314,62.87339,62.574637,spva_s,56.6
2022-05-26,65.198973,56.931245,49.760465,49.273708,52.794585,53.777486,51.152194,53.75887,52.735162,shmo_s,47.7
2022-06-02,65.198973,56.931245,49.290144,48.996325,52.794585,53.549301,51.601873,53.75887,52.735162,shmo_s,47.7
2022-06-09,64.239844,56.931245,49.371131,48.891926,52.356534,54.94822,51.601873,53.75887,52.082264,shmo_s,47.7
2022-06-16,,,,,,,,,,vera_s,
2022-06-23,62.593377,56.931245,49.098383,47.681978,51.531125,50.180732,50.568051,52.419116,50.737651,shmo_s,47.7


### ranking points

this is determined by adjusting scores by individual handicap and ranking those adjusted scores. the number of those bested is the number of points granted.

In [21]:
handicap = expected_strokes.drop(columns=['course']).apply(lambda x: x-x['sse'], axis=1)
adjusted = round_scores - handicap.drop(columns=['sse'])*SCALING_FACTOR # this inflated scores heavily
ranked = adjusted.rank(axis=1,ascending=False)

# ranked.sum()
handicap

name,Chase,Isaac Richmond,Joe Bos,Joshua Owens,Kat Owens,Kevin Richmond,Kyle P-H,Noah Mashni,the_field,sse
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2022-04-21,,,,,,,,,,0.0
2022-04-28,,,,,,,,,,0.0
2022-05-05,,,,,,,,,,0.0
2022-05-12,,,,,,,,,,0.0
2022-05-19,20.763981,10.953636,1.754648,1.867335,6.045147,5.107833,4.096314,6.27339,5.974637,0.0
2022-05-26,17.498973,9.231245,2.060465,1.573708,5.094585,6.077486,3.452194,6.05887,5.035162,0.0
2022-06-02,17.498973,9.231245,1.590144,1.296325,5.094585,5.849301,3.901873,6.05887,5.035162,0.0
2022-06-09,16.539844,9.231245,1.671131,1.191926,4.656534,7.24822,3.901873,6.05887,4.382264,0.0
2022-06-16,,,,,,,,,,
2022-06-23,14.893377,9.231245,1.398383,-0.018022,3.831125,2.480732,2.868051,4.719116,3.037651,0.0


### improvement points

this is determined by the round score being less than the expected stroke count. in those cases, we find the difference, and where it's greater than zero, we sum the improvement

also testing with strict ratings comparison for improvement

In [22]:
# improvement = expected_strokes[PLAYERS] - round_scores[PLAYERS]
improvement = ((ratings_ave[PLAYERS] - ratings[PLAYERS])/2).round(2)
improvement.where(improvement > 0).fillna(0).sum()

name
Chase             131.72
Isaac Richmond    121.61
Joe Bos            88.91
Joshua Owens       84.91
Kat Owens          83.27
Kevin Richmond    100.96
Kyle P-H           74.15
Noah Mashni        49.16
the_field          74.42
dtype: float64

In [23]:
ranked[PLAYERS].sum()

name
Chase              44.0
Isaac Richmond     36.0
Joe Bos           113.0
Joshua Owens      132.0
Kat Owens          82.0
Kevin Richmond     90.0
Kyle P-H           89.0
Noah Mashni        72.0
the_field          83.0
dtype: float64

## total points minus LP/CTP

In [24]:
ranked[PLAYERS].sum() + improvement[PLAYERS].where(improvement > 0).fillna(0).sum() + attendance

name
Chase             189.72
Isaac Richmond    165.61
Joe Bos           222.91
Joshua Owens      235.91
Kat Owens         186.27
Kevin Richmond    209.96
Kyle P-H          182.15
Noah Mashni       141.16
the_field            NaN
dtype: float64

### points per week

In [25]:
ranked + improvement.where(improvement > 0).fillna(0) + attendance # + lp/ctp

Unnamed: 0_level_0,name,Chase,Isaac Richmond,Joe Bos,Joshua Owens,Kat Owens,Kevin Richmond,Kyle P-H,Noah Mashni,the_field
date,course,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2022-04-21,shmo_s,,,,,,,,,
2022-04-28,shmo_s,,,,,,,,,
2022-05-05,shmo_s,,,,,,,,,
2022-05-12,shmo_s,,,,,,,,,
2022-05-19,spva_s,17.0,37.74,32.99,39.02,26.0,28.0,27.57,30.05,
2022-05-26,shmo_s,18.0,10.0,29.67,28.0,28.0,33.23,27.0,30.71,
2022-06-02,shmo_s,16.0,9.0,27.0,27.0,26.0,28.0,25.58,24.0,
2022-06-09,shmo_s,15.0,10.0,28.0,28.0,25.0,22.0,24.0,28.0,
2022-06-16,vera_s,,,,,,,,,
2022-06-23,shmo_s,16.79,10.0,29.0,32.56,28.0,37.25,28.0,24.0,
