## Import Libraries

In [None]:
!pip install pandas -q; pip install bs4 -q; pip install html5lib -q; pip install matplotlib -q; pip install requests -q; pip install lxml -q; pip install tabulate -q

In [1]:
import pandas as pd
pd.options.display.max_columns = None
from bs4 import BeautifulSoup
import html5lib
%matplotlib inline
import matplotlib.pyplot as plt
import time
import glob
import re
import requests

## Request HTML from given URL

In [2]:
url = "https://projects.fivethirtyeight.com/2022-nba-predictions/"

html = requests.get(url)

## Create a BeautifulSoup object by passing the HTML to the BeautifulSoup() constructor

In [3]:
soup = BeautifulSoup(html.text, "html5lib")

## Grab prediction Table

In [4]:
prediction_table = str(soup.select('table#standings-table')[0])

## Read into Pandas

In [5]:
projections = pd.read_html(prediction_table)[0]

## Get rid of junk columns (e.g. logos/anything not numbers)

In [6]:
projections.drop(projections.columns[[2,4,8,9,10,11,12,13]], axis=1, inplace=True)

## Rename Column Headers

In [7]:
projections.columns = ['Current rating', '1 Week Change', 'Teams', 'Full-strength rating', 'Projected Record', 'Proj. Point Diff/G']
#list(projections)

## Create new column called Projected Wins by spliting on the hyphen in Projected Record

In [8]:
projections['Projected Wins'] = projections['Projected Record'].str.split('-', 1).str[0]

## Create 3 new columns called Current Wins, Current Loses, and Games Left

In [9]:
teamconcat = projections['Teams']
teamlist = []
templist = []
templist1 = []
tempGR = []
for team in teamconcat:
    t = re.search('([76]{0,2}[a-zA-Z\s]{3,14})([0-9]{1,2})?[-]?([0-9]{1,2})?', team)
    if(t != None):
        teamlist.append(t.group(1))
        if(t.group(2) != None and t.group(3) != None):
            templist.append(t.group(2))
            templist1.append(t.group(3))
            tempGR.append(82 - (int(t.group(2)) + int(t.group(3))))
        else:
            templist.append(0)
            templist1.append(0)
            tempGR.append(82)
    else:
        templist.append(0)
        templist1.append(0)
        tempGR.append(82)
CurWins = pd.Series(templist)
CurLoses = pd.Series(templist1)
CurGR = pd.Series(tempGR)
if teamlist:
    CurTeams = pd.Series(teamlist)
    projections['Teams'] = CurTeams.values
projections['Current Wins'] = CurWins.values
projections['Current Loses'] = CurLoses.values
projections['Games Left'] = CurGR.values

In [10]:
projections

Unnamed: 0,Current rating,1 Week Change,Teams,Full-strength rating,Projected Record,Proj. Point Diff/G,Projected Wins,Current Wins,Current Loses,Games Left
0,1600,,Bucks,1673,53-29,4.8,53,4,5,73
1,1667,,Jazz,1672,58-24,6.9,58,7,1,74
2,1596,,Clippers,1657,47-35,2.5,47,4,4,74
3,1595,,Nuggets,1633,50-32,3.8,50,4,4,74
4,1613,,76ers,1656,53-29,4.7,53,7,2,73
5,1592,,Nets,1654,49-33,2.6,49,6,3,73
6,1626,,Suns,1619,50-32,3.3,50,4,3,75
7,1588,,Heat,1600,50-32,3.8,50,6,2,74
8,1576,,Celtics,1576,44-38,1.3,44,4,5,73
9,1592,,Trail Blazers,1592,46-36,2.5,46,4,5,73


## You need to have the following csv file in the same folder as the ipynb.

In [10]:
over_unders = pd.read_csv('bets_lite_22.csv')
#over_unders.head()

## Merge the CSV with the Prediction Table

In [11]:
master_projections = pd.merge(left=projections ,right=over_unders, on='Teams')

## List of characters representing participants in the Bets Table

In [12]:
migos = [
         {'name': 'Allie', 'character' : 'L'},
         {'name': 'Amanda', 'character' : 'G'},
         {'name': 'Antonio', 'character' : 'A'},
         {'name': 'Morgan', 'character' : 'M'},
         {'name': 'Nick', 'character' : 'N'},
         {'name': '538', 'character' : '538'}
] 

## Create a function to calculate the # of correct Over Unders

In [13]:
def number_right(a_list):
    """
    Function that takes a list of single characters and calculates whether or not the characters bet was over 
    or under based on projected win totals and games remaining, assigns them a 1 or 0 respectively and adds that 
    value to a new column in the dataFrame
    """
    for s in a_list:
        temp_nums = []
        proWins = master_projections['Projected Wins']
        overUnder = master_projections['Over/Under']
        guess = master_projections['%s'% s['character']]
        for wins, ovun, pick in zip(proWins, overUnder, guess):
            if (((int(wins) > ovun) & (pick == 'O')) | ((int(wins) < ovun) & (pick == 'U'))):
                temp_nums.append(1)
            else:
                temp_nums.append(0)
        se = pd.Series(temp_nums)
        master_projections['%s #'% s['character']] = se.values

## Call the number_right function, and pass in the list of friends

In [14]:
number_right(migos)

## Create a function to calculate the amount of $ won

In [15]:
def get_money(a_list):
    """
    Function that takes a list of single characters and calculates the amount of money won based on 
    the amount of capital allocated and whether or not the over under was correct. The result is 
    added to a new column in the dataFrame
    """
    for s in a_list:
        temp_bills = []
        bets = master_projections['%s Bets'% s['character']] 
        overunWins = master_projections['%s #'% s['character']] 
        for b, o in zip(bets, overunWins):
            temp_bills.append(b*o)
        se = pd.Series(temp_bills)
        master_projections['%s $'% s['character']] = se.values

## Call the get_money function, and pass in the list of friends

In [16]:
get_money(migos)

## Create Pandas dataFrame with the Total Correct O/Us and Total Money Won

In [17]:
temp_total = [master_projections['%s #'% s['character']].sum() for s in migos]
temp_money = [master_projections['%s $'% s['character']].sum() for s in migos]
peeps = [s['name'] for s in migos]
peeps_sorted = [s['name'] for s in migos]
peeps_sorted.sort()
total = [dict(zip(peeps, temp_total)), dict(zip(peeps, temp_money))]
final_total = pd.DataFrame(total)
final_total.rename(index={0:'Correct O/Us',1:'Money Won'}, inplace=True)

## Check to see if the proper directories exist, and make them if they don't

In [18]:
from pathlib import Path

mp_path = Path("output/master_projections")
ft_path = Path("output/final_total")

if not mp_path.exists():
    Path('output/master_projections').mkdir(parents=True, exist_ok=True)
if not ft_path.exists():
    Path('output/final_total').mkdir(parents=True, exist_ok=True)

## Save master_projections & final_total as CSVs to their respective directories

In [19]:
master_projections.to_csv('output/master_projections/538_master_projections_'+ (time.strftime("%m_%d_%Y_%H_%M_%S")) + '.csv', encoding = 'utf-8')
final_total.to_csv('output/final_total/538_final_total_'+ (time.strftime("%m_%d_%Y_%H_%M_%S")) + '.csv', encoding = 'utf-8')

## Create a function to graph the results of the bet over time

In [20]:
def final_line_graph():
    """
    Function that doesn't take any arguments but when invoked will pull all CSVs out of the final_total
    directory and will use the timestamps and values in the CSV to plot a timeseries line graph of both
    the wins and the money won, on seperate sub-plots
    """
    #Create Dateframe to contain all the output
    Files = []
    FT = glob.glob('output/final_total/538_*.csv')
    for val in FT:
        # define the dataframe
        data = pd.read_csv(val)
        m=re.search('538_final_total_([0-9]{2}\_[0-9]{2}\_[0-9]{4}_[0-9]{2}\_[0-9]{2}\_[0-9]{2})\.csv',val)
        for s in migos:
            Files.append(pd.DataFrame([['%s'%s['name'],data['%s'%s['name']][0],data['%s'%s['name']][1], m.group(1)]]))
    df = pd.concat(Files, axis=0)
    df.columns = ['name','wins','money','datetime']
    df['datetime'] = pd.to_datetime(df['datetime'], format='%m_%d_%Y_%H_%M_%S')
    df = df.reset_index(drop=True)
    df = df.sort_values('datetime', ascending=True).reset_index()
    df.drop(df.columns[0], axis=1, inplace=True)
    #plot data
    fig, (ax1, ax2) = plt.subplots(2,1, num="Standings Over Time",sharex=True)
    df.groupby('name').plot(x='datetime', y='wins', ax=ax1, ylim=(8,26))
    df.groupby('name').plot(x='datetime', y='money', ax=ax2, ylim=(140,500), legend=False)
    ax1.legend(peeps_sorted, bbox_to_anchor=(0., 1.02, 1., .102), loc=3, mode="expand", borderaxespad=0., ncol=6, fancybox=True, shadow=True)
    fig.autofmt_xdate()
    plt.style.use('ggplot')
    plt.show()

## Create a function to calculate the # of locked Over Unders

In [23]:
def calc_locked():
    """
    Function that calculates whether or not the teams under/over is locked based on actual win totals and 
    games remaining, assigns them a 1 or 0 respectively and adds that value to a new column in the dataFrame
    """
    team_lock = []
    team_outcome = []
    curWins = master_projections['Current Wins']
    overUnder = master_projections['Over/Under']
    gamesLeft = master_projections['Games Left']
    
    for wins, ovun, gameRem in zip(curWins, overUnder, gamesLeft):
        if ((int(wins) > ovun)  | ((int(wins) + int(gameRem)) < ovun)):
            team_lock.append(1)
            if (int(wins) > ovun):
                team_outcome.append('O')
            else:
                team_outcome.append('U')
        else:
            team_lock.append(0)
            team_outcome.append('')
            
    master_projections['Is Locked'] = pd.Series(team_lock).values
    master_projections['Locked Outcome'] = pd.Series(team_outcome).values

## Call the calc_locked function

In [24]:
calc_locked()

## Create a function to calculate the # of locked Over Unders of individual bettors

In [27]:
def locked_right(a_list):
    """
    Function that takes a list of single characters and calculates whether or not the characters bet was over 
    or under based on actual win totals and games remaining, assigns them a 1 or 0 respectively and adds that
    value to a new column in the dataFrame
    """
    for s in a_list:
        locked_nums = []
        curWins = master_projections['Current Wins']
        overUnder = master_projections['Over/Under']
        gamesLeft = master_projections['Games Left']
        guess = master_projections['%s'% s['character']]
        for wins, ovun, gameRem, pick in zip(curWins, overUnder, gamesLeft, guess):
            if (((int(wins) > ovun) & (pick == 'O')) | (((int(wins) + int(gameRem)) < ovun) & (pick == 'U'))):
                locked_nums.append(1)
            else:
                locked_nums.append(0)
        se = pd.Series(locked_nums)
        master_projections['%s Locked #'% s['character']] = se.values

## Call the locked_right function, and pass in the list of friends

In [28]:
locked_right(migos)

## Create a function to calculate the amount of locked $ won

In [29]:
def locked_money(a_list):
    """
    Function that takes a list of single characters and calculates the amount of locked money won  
    based on the amount of capital allocated and whether or not the over under was correct and 
    no longer undetermined. The result is added to a new column in the dataFrame
    """
    for s in a_list:
        locked_bills = []
        bets = master_projections['%s Bets'% s['character']] 
        overunWins = master_projections['%s Locked #'% s['character']] 
        for b, o in zip(bets, overunWins):
            locked_bills.append(b*o)
        se = pd.Series(locked_bills)
        master_projections['%s Locked $'% s['character']] = se.values

## Call the locked_money function, and pass in the list of friends

In [30]:
locked_money(migos)

## Create Pandas dataFrame with the Total Locked O/Us and Total Locked Money Won

In [31]:
temp_locked_total = [master_projections['%s Locked #'% s['character']].sum() for s in migos]
temp_locked_money = [master_projections['%s Locked $'% s['character']].sum() for s in migos]
new_total = [dict(zip(peeps, temp_locked_total)), dict(zip(peeps, temp_locked_money))]
locked_total = pd.DataFrame(new_total)
locked_total.rename(index={0:'Correct O/Us',1:'Money Won'}, inplace=True)

## Create function to retrieve locked teams and their outcome

In [42]:
def get_locked_teams():
    """Function to retrieve locked teams and their outcome"""
    return master_projections.loc[master_projections['Is Locked'] == 1, ['Teams', 'Locked Outcome']]