# Individual player stats

In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib as mpl

# set font to look like Latex
font = {'family' : 'serif',
        'size'   : 12}
mpl.rc('font', **font)

In [2]:
def players_in_year(year):
    # Return a list of the players for a given year
    
    # read and append names from every round onto a string
    names = []    # the string
    for rnd in range(1,5):
        fname = str(year)+' Friendly Playoff Pool Round '+str(rnd)+'.csv'    # read file
        if os.path.isfile(fname):
            # if file exists, return a pandas table
            fdata = pd.read_csv(fname, sep=',')
            # make the names of people the index (the row name)
            names.extend(fdata['Name:'])
    
    # remove duplicates and Results
    names = list(dict.fromkeys(names))
    names.remove('Results')
    
    return names

In [3]:
def rounds_list(year):
    # return the rounds for a given year
    # this really only matters for 2020 which had the qualification round

    if year == 2020:
        rounds = ['Q']
        rounds.extend(range(1,5))
    else:
        rounds = range(1,5)
    
    return rounds

In [4]:
def round_headers(year, rnd):
    # return the column headers for a given year and round

    if year == 2020:
        if rnd == 'Q':
            col_head = ['Time','Name','T1','G1','T2','G2','T3','G3','T4','G4','T5','G5','T6','G6','T7','G7','T8','G8']
        elif rnd == 1:
            col_head = ['Time','Name','T1','G1','T2','G2','T3','G3','T4','G4','T5','G5','T6','G6','T7','G7','T8','G8','WCC','ECC','SCC']
        elif rnd == 2:
            col_head = ['Time','Name','T1','G1','T2','G2','T3','G3','T4','G4']
        elif rnd == 3:
            col_head = ['Time','Name','T1','G1','T2','G2']
        elif rnd == 4:
            col_head = ['Time','Name','T1','G1']

    return col_head

In [5]:
def team_headers(rnd):
    # return the Team headers for a given round

    if rnd == 'Q' or rnd == 1:
        col_head = ['T1','T2','T3','T4','T5','T6','T7','T8']
    elif rnd == 2:
        col_head = ['T1','T2','T3','T4']
    elif rnd == 3:
        col_head = ['T1','T2']
    elif rnd == 4:
        col_head = ['T1']

    return col_head

In [6]:
def read_round(year, rnd):
    # Return a pandas structure of the data for a given round for a year

    # adjust for Stanley cup (the finals) picks
    rnd_orig = rnd
    if rnd == 'SC':
        rnd_orig = 'SC'
        rnd = 1

    fname = str(year)+' Friendly Playoff Pool Round '+str(rnd)+'.csv'    # read file
    if os.path.isfile(fname):    
        # if file exists, return a pandas table
        fdata = pd.read_csv(fname, sep=',')
        
        # make the names of people the index (the row name) and rename to 'Name'
        fdata=fdata.rename(columns={'Name:': 'Name'})
        fdata.index = fdata['Name']

        # over write the column headers
        fdata.columns = round_headers(year, rnd)

        # Remove 'Games' in games columns and cast numbers to integer
        if rnd == 'Q':
            N_series = 2**(4-1)      # number of series in round
        else:
            N_series = 2**(4-rnd)      # number of series in round
        for ind in ['G'+str(x) for x in range(1,N_series+1)]:
            if fdata[ind].values[0] == '- Games':
                # set to an integer because arithmetic won't work later (a python thing)
                fdata[ind].values[0] = '0 Games'
            # remove and cast
            fdata[ind] = fdata[ind].map(lambda x: x.rstrip(' Games')).astype(int)
    else:
        # if file does NOT exists, return a string
        fdata = '-'
    
    if rnd_orig == 'SC':
        fdata = fdata.loc[:,['Name','WCC','ECC','SCC']]
    return fdata

In [7]:
def player_year_total_team(year, player):
    # Return a pandas structure of the data for a given round for a year

    # setup arrays
    Ncorrect_teams = []
    Nattempts = []
    
    # loop over rounds
    for rnd in rounds_list(year):
        df = read_round(year,rnd)         # data structure for year and round
        if player not in df['Name']:
            continue    # skip if player is not in round
        results      = df.loc['Results']  # Correct results for the round
        player_picks = df.loc[player]     # players picks structure
        player_teams  =  player_picks.loc[team_headers(rnd)]
        results_teams =       results.loc[team_headers(rnd)]

        # append amount of correct picks and the attempts to array
        Ncorrect_teams.append(np.sum(player_teams == results_teams))
        Nattempts.append(np.sum(player_teams == player_teams))

    # sum over all rounds
    Ncorrect_teams_total = sum(Ncorrect_teams)
    Nattempts_total = sum(Nattempts)
    
    return Ncorrect_teams_total, Nattempts_total

In [8]:
def team_percent_table(year):
    # find the percent of correct team picks in a year for all players
    
    perc = []
    names = players_in_year(year)
    for name in names:
        Ncor,Ntry = player_year_total_team(year,name)
        perc.append(Ncor/Ntry)
    
    # create pandas series
    df2 = pd.Series(perc,index=names).sort_values(ascending=False)
    
    return df2

In [9]:
year = 2020
os.chdir('../'+str(year)+'/')

In [10]:
pwd

'/Users/daviddeepwell/Documents/Hockey/Hockey Pool/Playoffs/2020'

In [11]:
pp = players_in_year(year)

In [12]:
t2020 = team_percent_table(year)
t2020

Anthony C    0.652174
Michael D    0.625000
Alisha K     0.608696
Jon M        0.565217
Brian M      0.545455
Jack L       0.521739
Nathan S     0.521739
Tom K        0.500000
Ron K        0.500000
Mark D       0.478261
Andre D      0.478261
Andrew N     0.434783
David D      0.434783
Kollin H     0.428571
Josh H       0.409091
Alita D      0.350000
Kyle L       0.347826
Brandon P    0.250000
dtype: float64