In [3]:
%load_ext autoreload
%autoreload 2

import pandas as pd
import numpy as np
import math

import plotly.express as px

pd.options.display.max_rows = 999999
pd.options.display.max_columns = 99999

from IPython.core.display import display, HTML
display(HTML("<style>.container { width:85% !important; }</style>"))

#import sys
#sys.path.insert(0, 'C:/Users/Sean/Documents/python/ufc/scripts/functions/')

from functions import clean_entry_name, melt_crosstab, cleanup_mma_lineup_data

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# INPUTS

In [4]:
# Setup paths
data_dir = 'C:/Users/Sean/Documents/Sports/DFS/MMA/20210612_rewind_analysis/'
file_name = '150lineup_50k_gpp_standings.csv'
file_path = data_dir + file_name

dk_users = ['Packersfan203','gehrenberg','ahotebrown', 'MMAdamMartin']

# READ DATA

In [5]:
# Read in data
df = pd.read_csv(file_path)
print(len(df))

59453


In [6]:
df.head()

Unnamed: 0,Rank,EntryId,EntryName,TimeRemaining,Points,Lineup,Unnamed: 6,Player,Roster Position,%Drafted,FPTS
0,1,2718713293,Packersfan203 (149/150),0,649.32996,F Brandon Moreno F Paul Craig F Brad Riddell F...,,Leon Edwards,F,48.22%,90.97
1,2,2718297611,badact (21/32),0,643.69,F Israel Adesanya F Paul Craig F Brad Riddell ...,,Deiveson Figueiredo,F,42.29%,19.1
2,3,2715801944,vigiljp05 (132/150),0,641.06,F Israel Adesanya F Brandon Moreno F Paul Crai...,,Jamahal Hill,F,39.33%,2.39
3,3,2718025416,Onedrop33 (40/55),0,641.06,F Israel Adesanya F Brandon Moreno F Paul Crai...,,Israel Adesanya,F,36.53%,84.68
4,3,2718637271,DAN20122 (41/150),0,641.06,F Israel Adesanya F Brandon Moreno F Paul Crai...,,Brandon Moreno,F,34.48%,97.27


# PREP DATA

## We split the master data into 2 different datasets that in tandem will make the final output (have to do this because its basically 2 dataframes in a single csv side-by-side

## Dataset #1 - Player Points Scored and Contest Ownership
## Dataset #2 - Every Contest Entry Name and Lineup 

In [7]:
# Create the 1st dataset
points_own_df = pd.DataFrame()

# Add the main datapoints
points_own_df['player'] = df.Player.dropna()
points_own_df['position'] = df['Roster Position'].dropna() 
# Need to clean this up a bit, the percentages are coming in as strings from the file so we convert to a float
# Strip the percentage sign from the last char and then cast
points_own_df['ownership'] = [float(ownership[:-1]) for ownership in  df['%Drafted'].dropna()]
points_own_df['points'] = df['FPTS'].dropna()

# Now clean the player name a bit - THIS SHOULDN'T BE NECESSARY NOW
#points_own_df['player'] = points_own_df['player'].apply(lambda row: clean_player_name(row)) 

print(len(points_own_df))
points_own_df.head()

29


Unnamed: 0,player,position,ownership,points
0,Leon Edwards,F,48.22,90.97
1,Deiveson Figueiredo,F,42.29,19.1
2,Jamahal Hill,F,39.33,2.39
3,Israel Adesanya,F,36.53,84.68
4,Brandon Moreno,F,34.48,97.27


In [8]:
# First thing to do is drop the nans from the Lineup field - these are empty lineups that people submitted and should not be included in this analysis
raw_lineup_data = df[['Rank','EntryId','EntryName','Points','Lineup']]
raw_lineup_data = raw_lineup_data.dropna()

In [11]:
# Clean up the raw dataframe, parse out the lineups, clean the username, etc..
agg_lineups = cleanup_mma_lineup_data(raw_lineup_data)

agg_lineups.head()

Unnamed: 0,Rank,EntryId,EntryName,Points,F1,F2,F3,F4,F5,F6
0,1,2718713293,Packersfan203,649.32996,Brandon Moreno,Paul Craig,Brad Riddell,Lauren Murphy,Movsar Evloev,Terrance McKinney
1,2,2718297611,badact,643.69,Israel Adesanya,Paul Craig,Brad Riddell,Leon Edwards,Movsar Evloev,Terrance McKinney
2,3,2715801944,vigiljp05,641.06,Israel Adesanya,Brandon Moreno,Paul Craig,Leon Edwards,Movsar Evloev,Terrance McKinney
3,3,2718025416,Onedrop33,641.06,Israel Adesanya,Brandon Moreno,Paul Craig,Leon Edwards,Movsar Evloev,Terrance McKinney
4,3,2718637271,DAN20122,641.06,Israel Adesanya,Brandon Moreno,Paul Craig,Leon Edwards,Movsar Evloev,Terrance McKinney


# PROCESS DATA

# Here we need to transform the data by melting and creating a crosstab table to show exposures for every user

In [12]:
# Loop through each user and create a dictionary with their data
user_data_dict = {}

for user in dk_users:
    user_data_dict[user] = melt_crosstab(agg_lineups, user)
    #user_data_dict[user]['F'] = user_data_dict[user][['F1','F2','F3','F4','F5','F6']].sum(axis=1)
    user_data_dict[user] = user_data_dict[user][['player','count','exposure']]

In [15]:
# Aggregate the various dataframes into a single one

agg_exposures = pd.DataFrame()

for user in dk_users:
    if user == dk_users[0]:
        agg_exposures = user_data_dict[user][['player','exposure']].round(2)
        agg_exposures.rename(columns={'exposure':user}, inplace=True)
    else:
        agg_exposures = pd.merge(agg_exposures, user_data_dict[user][['player','exposure']].round(2), how='outer', on='player')
        agg_exposures.rename(columns={'exposure':user}, inplace=True)
        
    agg_exposures = agg_exposures.replace(np.nan, 0.0)    
    
agg_exposures.head()

columns,player,Packersfan203,gehrenberg,ahotebrown,MMAdamMartin
0,Marvin Vettori,65.33,68.06,30.0,48.0
1,Jamahal Hill,50.0,52.78,45.33,44.0
2,Matt Frevola,49.33,31.94,45.33,1.33
3,Brandon Moreno,42.0,47.22,35.33,32.0
4,Brad Riddell,40.0,0.0,18.67,19.33


In [16]:
# Now merge the 2 datasets that we've created together into 1
master_df = pd.merge(agg_exposures, points_own_df, on='player')
non_user_cols = ['player','position','points', 'ownership']
master_df = master_df[[*non_user_cols, *master_df.columns.difference(non_user_cols)]]

print(len(master_df))

28


# OUTPUT

In [17]:
master_df.sort_values('ownership', ascending=False)

Unnamed: 0,player,position,points,ownership,MMAdamMartin,Packersfan203,ahotebrown,gehrenberg
11,Leon Edwards,F,90.97,48.22,43.33,29.33,60.0,52.78
17,Deiveson Figueiredo,F,19.1,42.29,34.0,10.0,34.67,27.78
1,Jamahal Hill,F,2.39,39.33,44.0,50.0,45.33,52.78
24,Israel Adesanya,F,84.68,36.53,40.67,0.0,25.33,0.0
3,Brandon Moreno,F,97.27,34.48,32.0,42.0,35.33,47.22
0,Marvin Vettori,F,62.25,33.38,48.0,65.33,30.0,68.06
10,Luigi Vendramini,F,27.47,26.75,20.0,29.33,25.33,58.33
2,Matt Frevola,F,0.0,25.62,1.33,49.33,45.33,31.94
6,Chase Hooper,F,51.05,24.87,20.0,32.0,19.33,0.0
5,Joanne Calderwood,F,55.38,24.4,2.67,35.33,30.0,61.11


In [None]:
master_df.sort_values('ownership', ascending=False).to_csv('sample_master_df.csv',index=False)