In [147]:
import numpy as np
import pandas as pd
import re

## Create game_info table

In [256]:
def get_weekly_game_info(week_num):
    df = pd.read_excel('data/Boyz Pickem 23-24.xlsx', sheet_name=f"week{week_num}")
    # first row of all nulls (where I'm going to cut off the table)
    first_row = df.index[df.isnull().all(1)].values[0]
    # row after last row of games (where totals are calculated)
    last_row = df.index[df['Timestamp'].str.contains('total') == True].values[0]
    df_T = df.iloc[first_row+1:last_row].reset_index()
    df_T.drop(['index'], axis=1, inplace=True)
    df_T.columns = df_T.iloc[0]
    df_T.columns = df_T.columns.str.lower()
    df_T.columns.values[0] = 'game_id'
    df_T.drop([0,1], inplace=True)
    df_T = df_T.loc[:, 'game_id':'winner']
    df_T = df_T.reindex(sorted(df_T.columns), axis=1)
    df_T['week_id'] = [week_num] * len(df_T)
    # drop all columns with email address
    df_clean = df_T.loc[:, ~df_T.columns.str.contains("com")]
    return(df_clean)

In [216]:
info_sheets = []
for i in range(1,19):
    info_sheets.append(get_weekly_game_info(i))

In [217]:
game_info = pd.concat(info_sheets)
# sanity check -- should be 17 * 32 / 2 = 272 games in a season
print(game_info.shape)
game_info.head()

(272, 3)


Unnamed: 0,game_id,winner,week_id
2,Lions (+4) @ Chiefs [TNF],Lions,1
3,Panthers (+3.5) @ Falcons,Falcons,1
4,Texans (+9.5) @ Ravens,Ravens,1
5,Bengals (-2) @ Browns,Browns,1
6,Jaguars (-4.5) @ Colts,Jaguars,1


## Parse games for more info

In [257]:
def get_away_line(game_str):
    away_line_str = game_str.split(' ')[1]
    if away_line_str == '(PK)':
        return(0)
    return(float(re.sub('[()]', '', away_line_str)))

In [258]:
def get_favorite(row):
    if row['away_line'] < 0:
        # return favorite, underdog
        return(row['away_team'], row['home_team'])
    elif row['away_line'] > 0:
        return(row['home_team'], row['away_team'])
    return('None', 'None')

In [220]:
game_info[game_info['game_id'].str.contains('Cowboys')]

Unnamed: 0,game_id,winner,week_id
16,Cowboys (-3.5) @ Giants,Cowboys,1
13,Jets (+9) @ Cowboys,Cowboys,2
13,Cowboys (-12.5) @ Cardinals,Cardinals,3
14,Patriots (+6) @ Cowboys,Cowboys,4
14,Cowboys (+3.5) @ 49ers,49ers,5
16,Cowboys (-1.5) @ Chargers [MNF],Cowboys,6
4,Rams (+7) @ Cowboys,Cowboys,8
12,Cowboys (+3) @ Eagles,Eagles,9
12,Giants (+17) @ Cowboys,Cowboys,10
5,Cowboys (-10.5) @ Panthers,Cowboys,11


In [245]:
spread_info = game_info['game_id'].iloc[0].split(' ')

In [246]:
winner = game_info['winner'].iloc[0]

In [248]:
teams = (spread_info[0], spread_info[3])

In [249]:
result = next(team for team in teams if team != winner)

In [250]:
result

'Chiefs'

In [234]:
game_info['winner'].iloc[0] in game_info['game_id'].iloc[0].split(' ')

True

In [221]:

game_info['away_team'] = [g.split(' ')[0] for g in game_info['game_id']]
game_info['home_team'] = [g.split(' ')[3] for g in game_info['game_id']]
game_info['away_line'] = game_info['game_id'].apply(lambda x: get_away_line(x))
game_info['home_line'] = [line * -1 for line in game_info['away_line']]
game_info['favorite'], game_info['underdog'] = zip(*game_info.apply(get_favorite, axis=1))

In [222]:
game_info.to_excel('data/game_info.xlsx', index=False)

In [259]:
def make_game_info():
    info_sheets = []
    for i in range(1,19):
        info_sheets.append(get_weekly_game_info(i))
    game_info = pd.concat(info_sheets)
    game_info['away_team'] = [g.split(' ')[0] for g in game_info['game_id']]
    game_info['home_team'] = [g.split(' ')[3] for g in game_info['game_id']]
    game_info['away_line'] = game_info['game_id'].apply(lambda x: get_away_line(x))
    game_info['home_line'] = [line * -1 for line in game_info['away_line']]
    game_info['favorite'], game_info['underdog'] = zip(*game_info.apply(get_favorite, axis=1))
    game_info.to_excel('data/game_info.xlsx', index=False)

In [260]:
make_game_info()

## Create personal picks table

Each (person, game) is a unique row. Also includes that persons picks.

In [251]:
week_num = 18

In [252]:
df = pd.read_excel('data/Boyz Pickem 23-24.xlsx', sheet_name=f"week{week_num}")
first_row = df.index[df.isnull().all(1)].values[0]
df = df[:first_row]
df.drop(['Timestamp', 'Name', 'Email'], axis=1, inplace=True, errors='ignore')
df.rename(columns={'Email Address': 'email', 'ATS Bonus':'bonus'}, inplace=True)
df = df.loc[:, 'email':'bonus']
df.drop(['bonus'], axis=1, inplace=True)
#num_games = len(df_short.columns[1:])
num_games = len(df.columns[1:])
df['pick'] = df.iloc[:, 1:num_games+1].values.tolist()
df['game_id'] = [df.iloc[:, 1:num_games+1].columns.values.tolist()] * len(df)
df_explode = df[['email', 'pick', 'game_id']].set_index(['email']).apply(pd.Series.explode).reset_index()
df_explode['week_id'] = [week_num] * len(df_explode)

In [254]:
df = pd.read_excel('data/Boyz Pickem 23-24.xlsx', sheet_name=f"week{week_num}")
first_row = df.index[df.isnull().all(1)].values[0]
df = df[:first_row]

In [255]:
df

Unnamed: 0,Timestamp,Email Address,Name,Steelers (-3) @ Ravens [SAT],Texans (-1) @ Colts [SAT],Falcons (+3) @ Saints,Browns (+7) @ Bengals,Jaguars (-4) @ Titans,Vikings (+3) @ Lions,Jets (+2) @ Patriots,...,Bears (+3) @ Packers,Cowboys (-13) @ Commanders,Broncos (+3) @ Raiders,Chiefs (+3.5) @ Chargers,Rams (+4) @ 49ers,Eagles (-5.5) @ Giants,Seahawks (-3) @ Cardinals,Bills (-2.5) @ Dolphins,ATS Bonus,new years resolutions?
0,2024-01-06 04:57:08.449000,jtocci19@gmail.com,Tocci,Ravens,Colts,Saints,Bengals,Titans,Lions,Patriots,...,Packers,Cowboys,Raiders,Chiefs,49ers,Eagles,Cardinals,Dolphins,Ravens,"Stop drinking coffee , it’s going terrible."
1,2024-01-06 06:55:50.848000,greg.forward029@gmail.com,Greg,Ravens,Texans,Saints,Bengals,Jaguars,Vikings,Patriots,...,Bears,Commanders,Broncos,Chiefs,Rams,Eagles,Seahawks,Bills,Ravens,Play more guitar!!!
2,2024-01-06 10:04:52.058000,dalpert89@gmail.com,Daniel,Steelers,Texans,Saints,Bengals,Jaguars,Vikings,Patriots,...,Packers,Cowboys,Broncos,Chargers,49ers,Eagles,Seahawks,Bills,Packers,Less phone in bed
3,2024-01-06 10:21:51.194000,warnercp10@gmail.com,Colin,Steelers,Colts,Saints,Browns,Titans,Vikings,Patriots,...,Bears,Commanders,Broncos,Chargers,Rams,Giants,Cardinals,Bills,Bears,Go on more runs than I did in 2023 (2)
4,2024-01-06 16:32:18.436000,jordan.angel7472@gmail.com,Jordy McWinner Winner Steak Dinner???,Steelers,Texans,Falcons,Bengals,Jaguars,Lions,Patriots,...,Packers,Cowboys,Raiders,Chargers,49ers,Eagles,Cardinals,Bills,Bills please,Cook a bit more (was a top 2% user on chipotl...
5,2024-01-07 09:14:24.119000,andrespintopro@gmail.com,Dres,Ravens,Texans,Falcons,Browns,Titans,Lions,Patriots,...,Bears,Commanders,Raiders,Chiefs,Rams,Eagles,Cardinals,Bills,Eagles,Drink more water. Gotta stay well hydrated thi...
6,2024-01-07 10:33:38.388000,adam.lassman@gmail.com,Lassman,Steelers,Colts,Falcons,Bengals,Jaguars,Lions,Jets,...,Bears,Commanders,Broncos,Chargers,Rams,Giants,Seahawks,Bills,Bills,Only hit snooze twice


In [269]:
name_dict = {
    'adam.lassman@gmail.com': 'adam',
    'andrespintopro@gmail.com': 'andres',
    'dalpert89@gmail.com': 'daniel',
    'greg.forward029@gmail.com': 'greg',
    'jordan.angel7472@gmail.com': 'jordan',
    'jtocci19@gmail.com': 'james',
    'warnercp10@gmail.com': 'colin'
}

In [203]:
def get_weekly_picks(week_num):
    df = pd.read_excel('data/Boyz Pickem 23-24.xlsx', sheet_name=f"week{week_num}")
    first_row = df.index[df.isnull().all(1)].values[0]
    df = df[:first_row]
    df.drop(['Timestamp', 'Name', 'Email'], axis=1, inplace=True, errors='ignore')
    df.rename(columns={'Email Address': 'email', 'ATS Bonus':'bonus'}, inplace=True)
    df = df.loc[:, 'email':'bonus']
    df.drop(['bonus'], axis=1, inplace=True)
    num_games = len(df.columns[1:])
    df['pick'] = df.iloc[:, 1:num_games+1].values.tolist()
    df['game_id'] = [df.iloc[:, 1:num_games+1].columns.values.tolist()] * len(df)
    df_explode = df[['email', 'pick', 'game_id']].set_index(['email']).apply(pd.Series.explode).reset_index()
    df_explode['week_id'] = [week_num] * len(df_explode)
    return df_explode

In [223]:
weekly_picks_lst = []
for i in range(1,19):
    weekly_picks_lst.append(get_weekly_picks(i))
all_picks = pd.concat(weekly_picks_lst)

In [224]:
all_picks.to_excel('data/all_picks.xlsx', index=False)

In [225]:
all_picks.groupby('email').count()

Unnamed: 0_level_0,pick,game_id,week_id
email,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
adam.lassman@gmail.com,272,272,272
andrespintopro@gmail.com,272,272,272
dalpert89@gmail.com,272,272,272
greg.forward029@gmail.com,272,272,272
jordan.angel7472@gmail.com,272,272,272
jtocci19@gmail.com,272,272,272
warnercp10@gmail.com,272,272,272


In [284]:
def make_all_picks():
    weekly_picks_lst = []
    for i in range(1,19):
        weekly_picks_lst.append(get_weekly_picks(i))
    all_picks = pd.concat(weekly_picks_lst)
    all_picks['user_id'] = all_picks['email'].apply(lambda x: name_dict[x])
    all_picks.to_excel('data/all_picks.xlsx', index=False)

In [279]:
all_picks = pd.concat(weekly_picks_lst)

In [282]:
all_picks['user_id'] = all_picks['email'].apply(lambda x: name_dict[x])

In [421]:
all_picks

Unnamed: 0,email,pick,game_id,week_id,user_id
0,jordan.angel7472@gmail.com,Lions,Lions (+4) @ Chiefs [TNF],1,jordan
1,jordan.angel7472@gmail.com,Falcons,Panthers (+3.5) @ Falcons,1,jordan
2,jordan.angel7472@gmail.com,Ravens,Texans (+9.5) @ Ravens,1,jordan
3,jordan.angel7472@gmail.com,Bengals,Bengals (-2) @ Browns,1,jordan
4,jordan.angel7472@gmail.com,Colts,Jaguars (-4.5) @ Colts,1,jordan
...,...,...,...,...,...
75,jordan.angel7472@gmail.com,Chargers,Chiefs (+3.5) @ Chargers,18,jordan
76,jordan.angel7472@gmail.com,49ers,Rams (+4) @ 49ers,18,jordan
77,jordan.angel7472@gmail.com,Eagles,Eagles (-5.5) @ Giants,18,jordan
78,jordan.angel7472@gmail.com,Cardinals,Seahawks (-3) @ Cardinals,18,jordan


## Rerun data

In [391]:
make_game_info()
make_all_picks()

## Comparisons

In [320]:
def get_weekly_comparison_table(week_num):
    df = pd.read_excel('data/Boyz Pickem 23-24.xlsx', sheet_name=f"week{week_num}")
    first_row = df.index[df.isnull().all(1)].values[0]
    df = df[first_row+1:]
    df = df
    col_names = [name_dict[e] for e in df.iloc[0][1:8]]
    df = df.iloc[:, :9]
    df.columns = ['game_id'] + col_names + ['winner']
    df = df[2:]
    df = df[df['winner'].notnull()]
    return df.reset_index(drop=True)

In [323]:
weekly_comparison_lst = []
for i in range(1,19):
    weekly_comparison_lst.append(get_weekly_comparison_table(i))
comp_table = pd.concat(weekly_comparison_lst)

In [359]:
comp_table[comp_table['daniel'] != comp_table['winner']][['daniel', 'winner']].iloc[:50]

Unnamed: 0,daniel,winner
0,Chiefs,Lions
1,Panthers,Falcons
2,Texans,Ravens
3,Bengals,Browns
9,Bears,Packers
12,Patriots,Eagles
13,Seahawks,Rams
14,Giants,Cowboys
15,Bills,Jets
0,Vikings,PUSH


In [356]:
sum(comp_table['jordan'] == comp_table['daniel']) / len(comp_table)

0.5147058823529411

In [336]:
users = list(name_dict.values())

In [337]:
users

['adam', 'andres', 'daniel', 'greg', 'jordan', 'james', 'colin']

In [338]:
from itertools import combinations

In [339]:
all_combinations = list(combinations(users, 2))

In [340]:
all_combinations

[('adam', 'andres'),
 ('adam', 'daniel'),
 ('adam', 'greg'),
 ('adam', 'jordan'),
 ('adam', 'james'),
 ('adam', 'colin'),
 ('andres', 'daniel'),
 ('andres', 'greg'),
 ('andres', 'jordan'),
 ('andres', 'james'),
 ('andres', 'colin'),
 ('daniel', 'greg'),
 ('daniel', 'jordan'),
 ('daniel', 'james'),
 ('daniel', 'colin'),
 ('greg', 'jordan'),
 ('greg', 'james'),
 ('greg', 'colin'),
 ('jordan', 'james'),
 ('jordan', 'colin'),
 ('james', 'colin')]

In [384]:
#for p1, p2 in all_combinations:
rows = []
for p1 in users:
    for p2 in users:
        table = comp_table[[p1, p2]]
        agree_rate = round(sum(comp_table[p1] == comp_table[p2]) / len(comp_table), 3)
        agree_table = comp_table[(comp_table[p1] == comp_table[p2]) & (comp_table['winner'] != 'PUSH')]
        disagree_table = comp_table[(comp_table[p1] != comp_table[p2]) & (comp_table['winner'] != 'PUSH')]
        win_pct_when_agree = round((sum(agree_table[p1] == agree_table['winner']) / len(agree_table)), 3)
        try:
            p1_win_pct_when_disagree = round((sum(disagree_table[p1] == disagree_table['winner']) / len(disagree_table)), 3)
        except ZeroDivisionError:
            p1_win_pct_when_disagree = None
        #print(p1, p2, agree_rate, win_pct_when_agree, p1_win_pct_when_disagree)
        rows.append([p1, p2, agree_rate, win_pct_when_agree, p1_win_pct_when_disagree])
columns = ['p1', 'p2', 'agree_rate', 'win_pct_when_agree', 'p1_win_pct_when_disagree']
comp_statistics = pd.DataFrame(rows, columns=columns)

In [389]:
comp_statistics.sort_values(by=['p1','agree_rate'])

Unnamed: 0,p1,p2,agree_rate,win_pct_when_agree,p1_win_pct_when_disagree
1,adam,andres,0.467,0.496,0.507
5,adam,james,0.467,0.549,0.46
3,adam,greg,0.493,0.528,0.477
4,adam,jordan,0.5,0.543,0.462
2,adam,daniel,0.515,0.496,0.508
6,adam,colin,0.551,0.518,0.483
0,adam,adam,0.993,0.506,0.0
13,andres,colin,0.434,0.504,0.479
9,andres,daniel,0.445,0.478,0.5
7,andres,adam,0.467,0.496,0.486


In [397]:
def get_ats_bonus(week_num):
    df = pd.read_excel('data/Boyz Pickem 23-24.xlsx', sheet_name=f"week{week_num}")
    first_row = df.index[df.isnull().all(1)].values[0]
    df = df[:first_row]
    df.drop(['Timestamp', 'Name', 'Email'], axis=1, inplace=True, errors='ignore')
    df.rename(columns={'Email Address': 'email', 'ATS Bonus':'bonus'}, inplace=True)
    df = df.loc[:, 'email':'bonus']
    return df[['email','bonus']]

In [400]:
bonus_dfs = []
for i in range(1,19):
    bonus_dfs.append(get_ats_bonus(i))
bonus_df = pd.concat(bonus_dfs)

In [402]:
from pandasql import sqldf 

In [420]:
q = '''
select 
    email,
    bonus, 
    count(*) num_ats
from bonus_df
group by 1,2
order by 3 desc, 2
'''
df = sqldf(q)
df[df['email'].str.contains('jordan')]

Unnamed: 0,email,bonus,num_ats
4,jordan.angel7472@gmail.com,Vikings,3
7,jordan.angel7472@gmail.com,Bills,2
14,jordan.angel7472@gmail.com,Dolphins,2
15,jordan.angel7472@gmail.com,Eggles,2
25,jordan.angel7472@gmail.com,,1
26,jordan.angel7472@gmail.com,49ers,1
35,jordan.angel7472@gmail.com,Bills please,1
38,jordan.angel7472@gmail.com,Brownies,1
43,jordan.angel7472@gmail.com,Chiefs,1
53,jordan.angel7472@gmail.com,Cowboys,1
