# Evaluate Contest Results

In [1]:
# import os
# import pandas as pd
# import numpy as np
# import time
# import datetime
# import warnings
# import unidecode
# import re
# import statsmodels.api as sm
# import matplotlib.pyplot as plt
# from joblib import Parallel, delayed

# from Utilities import *

# warnings.simplefilter(action="ignore")
# baseball_path = r"C:\Users\james\Documents\MLB\Data2"

### Results

In [2]:
# Read in DK results and extracts player scores and lineup performances
def read_results(contestKey):
    results = pd.read_csv(os.path.join(baseball_path, "1. Results", "A. Contest Results", "contest-standings-" + str(contestKey) + ".csv"))
    
    # Keep player and points
    players = results[['Player', 'FPTS']]
    players.dropna(inplace=True)
    
    # Keep relevant variables from contest lineups
    lineups = results[['Rank', 'EntryId', 'EntryName', 'TimeRemaining', 'Points', 'Lineup']]
    
    # Convert to integer
    lineups['Rank'] = lineups['Rank'].astype('int')
    
    return players, lineups

In [3]:
# players, lineups = read_results(146482590)

In [4]:
# players

### Payouts

In [5]:
# Read in contest payout structure
def read_payouts(contestKey):
    payouts = pd.read_csv(os.path.join(baseball_path, "2. Contests", "B. Payouts", "Payouts " + str(contestKey) + ".csv"))
    
    payouts['payoutDescription'] = payouts['payoutDescription'].str.replace(',', '').str.replace('$', '').astype(float)
    
    # Rename 
    payouts.rename(columns={'minPosition':'Lower', 'maxPosition':'Upper', 'payoutDescription':'Payout'}, inplace=True)
    
    # Keep relevant variables
    payouts = payouts[['Lower', 'Upper', 'Payout']]
    
    return payouts

In [6]:
# payouts = read_payouts(146482590)
# payouts

In [7]:
# Assign payouts to existing contest lineups
def add_payouts(contestKey):
    # Read in results
    players, lineups = read_results(contestKey)
    # Read in payouts
    payouts = read_payouts(contestKey)
    
    # Loop over column rows
    for i, row in lineups.iterrows():
        rank = row['Rank']
        mask = (payouts['Lower'] <= rank) & (rank <= payouts['Upper'])
        if mask.any():
            payout = payouts.loc[mask, 'Payout'].values[0]
        else:
            payout = 0
        # Assign payout if Rank in range [Lower, Upper]
        lineups.at[i, 'Payout'] = payout

    return players, lineups

In [8]:
# players, lineups = add_payouts(146482590)
# players

### Test Lineups

In [9]:
# Read in lineup sims and assign points
def read_sims(contestKey):
    # Read in lineup sims
    sims = pd.read_csv(os.path.join(baseball_path, "12. Lineups", "B. Ranked", "Lineups Ranked " + str(contestKey) + ".csv"), encoding='iso-8859-1')
    
    # Read in players and opponent lineups
    players, lineups = add_payouts(contestKey)

    
    # For each position, merge the player's point total
    points_list = []
    for pos in ['P', 'P.1', 'C', '1B', '2B', '3B', 'SS', 'OF', 'OF.1', 'OF.2']:
        # Merges on name only, remove numeric characters
        sims[pos] = sims[pos].str.replace(r'[(0-9)]', "", regex=True)
        sims[pos] = sims[pos].str.rstrip()
        
        # Merge with points
        sims = sims.merge(players, left_on=pos, right_on='Player', how='left')
        points_name = 'FPTS' + "_" + pos
        points_list.append(points_name)
        sims.drop(columns={'Player'},inplace=True)
        sims.rename(columns={'FPTS':points_name},inplace=True)
        print(pos, sims[f'FPTS_{pos}'].isna().sum())
    
    # May duplicate if multiple players have the same name, usually want the first
    sims.drop_duplicates(subset=['P', 'P.1', 'C', '1B', '2B', '3B', 'SS', 'OF', 'OF.1', 'OF.2'], keep='first', inplace=True)
    sims.reset_index(inplace=True, drop=True)

    # Calculate lineup points
    sims['Points'] = sims[points_list].sum(axis=1)
    
    sims['Rank'] = np.nan
    sims['Payout'] = np.nan
    for index, row in sims.iterrows():
        points = row['Points']
        match = lineups[lineups['Points'] < points].iloc[0]
        sims.at[index, 'Rank'] = match['Rank']
        sims.at[index, 'Payout'] = match['Payout']

    sims['Payout'] = sims['Payout'].astype('float')
    
    return sims

In [10]:
# sims = read_sims("147198950")
# sims.describe()

### Run Evaluations

In [11]:
# Read sims, can't break
def read_sims2(contestKey):
    try:
        sims = read_sims(contestKey)
        sims['contestKey'] = contestKey
    except:
        sims = None
    
    return sims

In [12]:
# # Read in history file        
# history = pd.read_csv(os.path.join(baseball_path, "Utilities", "Contests.csv"))

# # Sort by date, then draft group, then fee
# history.sort_values(['date', 'draftGroupId', 'entryFee'], ascending=False)
# # Keep only one observation per draft group 
# history.drop_duplicates('draftGroupId', keep='first', inplace=True)

# history = history.query('result == 1').query('payout == 1').query('salary == 1')

# history = history[history['entryFee'] == 4]

# # Exclude rows with specific substrings
# excluded_substrings = ['vs', 'Turbo']
# history = history[~history['name'].str.contains('|'.join(excluded_substrings))]

# contestKeys = list(history['contestKey']) 
# print(len(contestKeys))


# # Run all in parallel
# all_sims = Parallel(n_jobs=-2, verbose=5)(delayed(read_sims2)(contestKey) for contestKey in contestKeys)

# # Create dataframe with every payout for each lineup
# payout_df = pd.concat(all_sims, axis=0)

# payout_df['Payout'].mean()

### Evaluate Metrics
Metrics: <br>
AvgPointsPerGame: Average projection
Sim STD: Standard deviation of all lineup projections
Plus#: AvgPointsPerGame + # * Sim STD
Tail: Sum of projections from 95th-100th percentile

In [13]:
def add_metrics(payout_df):
    # We want reverse ranks for ownership stats
    payout_df['Rarity'] = payout_df['ownership'] * -1 
    payout_df['Batter'] = payout_df['batter ownership'] * -1 
    payout_df['Pitcher'] = payout_df['pitcher ownership'] * -1 

    # Calculate rank
    for metric in ['AvgPointsPerGame', 'Sim STD', 'Plus1', 'Plus2', 'Plus3', 'Plus4', 'Plus5', 'Tail', 'Rarity', 'Batter', 'Pitcher']:
        print(metric)
        payout_df.sort_values(['contestKey', metric], ascending=False, inplace=True)
        payout_df[f'{metric} Rank'] = payout_df.groupby('contestKey').cumcount() + 1

    # Add together rarity and tail ranks
    payout_df['Choose'] = payout_df['Rarity Rank'] + payout_df['Tail Rank']
    payout_df['Choose2'] = payout_df['Batter Rank'] + payout_df['Tail Rank']

    for metric in ['Choose', 'Choose2']:
        print(metric)
        payout_df.sort_values(['contestKey', metric], ascending=True, inplace=True)
        payout_df[f'{metric} Rank'] = payout_df.groupby('contestKey').cumcount() + 1
        
    return payout_df

In [19]:
def evaluate_metric(metric, lineups=5):   
    # Sort by metric
    payout_df.sort_values(metric, ascending=True, inplace=True)
    # Select the top "lineups" lineup
    group = payout_df.groupby('contestKey')['Payout'].head(lineups)

    # Calculate average profit
    profit = group.mean() - 4

    return profit

### Main Result
If I enter my typical 5 lineups per night, which approach is the most profitable?

In [15]:
# lineups = 5
# for metric in metrics:
#     profit = evaluate_metric(metric, lineups)
#     print("With the {} metric, the top {} lineups would average a profit of ${:.2f}.".format(metric, lineups, profit))

### Robustness Check
Is the top performing metric still the best?

In [16]:
# lineups = 20
# for metric in metrics:
#     profit = evaluate_metric(metric, lineups)
#     print("With the {} metric, the top {} lineups would average a profit of ${:.2f}.".format(metric, lineups, profit))

### Regression
Are rankings meaningful? <br> 
Should I enter more than 5 lineups?

In [17]:
# # Step 1: Choose variables
# X = payout_df[['Tail Rank', 'Rarity Rank']]
# y = payout_df['Payout']

# # Step 2: Add a constant term to the input features
# X = sm.add_constant(X)

# # Step 3: Create and fit the OLS model
# model = sm.OLS(y, X)
# results = model.fit()

# # Step 4: Print the summary
# results.summary()

Conclusion: worse ranked lineups do perform worse, on average, but it's not statistically significant. <br>
Expected payout for any additional lineups is below $4, so adding lineups isn't necessarily wise, at this moment.

### Plot

In [18]:
def plot_payouts(metric):
    # Take average by rank
    df = payout_df.groupby([f'{metric} Rank'])['Payout'].mean().reset_index()

    # Graph it
    plt.scatter(df[f'{metric} Rank'], df['Payout'])

    # Fit line
    coefficients = np.polyfit(df[f'{metric} Rank'], df['Payout'], 1)
    p = np.poly1d(coefficients)
    plt.plot(df[f'{metric} Rank'], p(df[f'{metric} Rank']), color='red', label='Trend line')

    # Labels and title
    plt.xlabel(f'{metric} Rank')
    plt.ylabel('Payout')
    plt.title('Scatter Plot of Rank vs Payout')

    # Display trend line equation
    equation = f'Payout = {coefficients[0]:.2f} * Rank + {coefficients[1]:.2f}'
    plt.text(0.5, 0.9, equation, ha='center', va='center', transform=plt.gca().transAxes)

    # Legend and show plot
    plt.legend()
    plt.show()

    # plt.savefig('plot.png')
