In [5]:
# IMPORTS
# data-related libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import sklearn

# api-related libraries
import requests

In [129]:
# STEP 0 - INPUT CORRECT VARIABLES

# players you are querying for
all_players = [
    {"first_name": "Anthony", "last_name":"Edwards", "stat_value": 3.5},
    {"first_name": "Ja", "last_name":"Morant", "stat_value": 1.5},
    {"first_name": "Desmond", "last_name":"Bane", "stat_value": 2.5},
    {"first_name": "Jaren", "last_name":"Jackson Jr.", "stat_value": 1.5},
    {"first_name": "Dillon", "last_name":"Brooks", "stat_value": 1.5},
    {"first_name": "Mike", "last_name":"Conley", "stat_value": 1.5},
#     {"first_name": "Scottie", "last_name":"Barnes", "stat_value": 16.5},
#     {"first_name": "Collin", "last_name":"Sexton", "stat_value": 18.5},
#     {"first_name": "Kelly", "last_name":"Olynyk", "stat_value": 10.5},

]
_stat = "fg3m"
current_season = 2022



In [119]:
# STEP 1 - FIND STATS OF ALL PLAYERS

needed_players = []

for p in all_players:
    possible_players = []
    for page in range(1, 5):
        res = (requests.get(f"https://www.balldontlie.io/api/v1/players?search={p['first_name']}&search={p['last_name']}&page={page}"))
        res.raise_for_status()
        possible_players += res.json()['data']
    for pp in possible_players:
        f_name = pp['first_name']
        l_name = pp['last_name']
        if (f_name == p['first_name'] and l_name == p['last_name']):
            # player is found
            needed_players.append(pp)
            

In [120]:
# STEP 2 - GET ALL GAMES FOR THIS SEASON FOR EACH PLAYER

player_stats = {}

for p in needed_players:
    all_games = []
    full_name = f"{p['first_name']} {p['last_name']}"
    for page in range(1, 4):
        res = (requests.get(f"https://www.balldontlie.io/api/v1/stats?player_ids[]={p['id']}&seasons[]={current_season}&page={page}"))
        res.raise_for_status()
        if full_name in player_stats:
            player_stats[full_name] += res.json()['data']
        else:
            player_stats[full_name] = res.json()['data']


In [114]:
# HELPER FUNCTIONS
all_stat_attributes = ["ast", "blk", "reb", "pts", "fg3a", "fg3m", "dreb", "fg3_pct", "fg_pct", "fga", "fgm", "ft_pct", "fta", "ftm", "oreb", "pf", "stl", "turnover"]

def removeUnplayedGames(df):
    return df[(df["ast"] != 0) 
              | (df["blk"] != 0) 
              | (df["reb"] != 0) 
              | (df["stl"] != 0) 
              | (df["pts"] != 0) 
              | (df["fg3a"] != 0) 
              | (df["fg3m"] != 0) 
              | (df["dreb"] != 0) 
              | (df["fg3_pct"] != 0) 
              | (df["fga"] != 0) 
              | (df["ftm"] != 0) 
              | (df["oreb"] != 0)
              | (df["pf"] != 0) 
              | (df["stl"] != 0) 
              | (df["turnover"] != 0)]



In [115]:
# SINGLE PARLAY - SINGLE PLAYER
# Be able to see ALL games where this player met a SINGLE stat

# Based on FanDuel
# pts 10-35 (increments of 5)
# reb 4-16 (increments of 2)
# ast 2-12 (increments of 2)
# fg3m 1-5 (increments of 1)

parlay_options = {
    "pts": [7.5, 8, 8.5, 10, 10.5, 11, 12, 13, 14, 15, 15.5, 16.5, 17.5, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 30, 30.5, 31.5, 35, 35.5, 40],
    'reb': [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 12.5, 14, 16],
    "ast": [2, 2.5, 3, 4, 5, 6, 7, 8, 10, 12],
    "fg3m": [1, 2, 3, 4, 5]
}

# df = dataframe
# stat = "pts, reb, ast, fg3m"
# player = "*first_name + last_name* E.g: Stephen Curry"
def filterSingleParlay(df, stat, player):
    for amount in parlay_options[stat]:
        filtered_df = df[(df[stat] >= amount)]
        
        fraction = len(filtered_df) / len(df)
        percentage = round(100 * fraction, 2)
        print(f"{player} had {len(filtered_df)} / {len(df)} games with {amount}+ {stat}: {percentage}%")
#         print(filtered_df)
        

In [116]:
# MULTIPLE PARLAY - SINGLE PLAYER #
# Be able to see ALL games where this player met MULTIPLE stats

# df = dataframe
# stats = {"pts": 15, "ast": 2, "fg3m": 2}
# player = "*first_name + last_name* E.g: Stephen Curry"
def filterMultipleParlay(df, stats, player):
    filtered_expression = ""
    for s in stats:
        stat = s
        amount = stats[s]
        filtered_expression += f"{s} >= {amount} & "
    filtered_expression = filtered_expression[:-2]
    filtered_df = df.query(filtered_expression)
    
    fraction = len(filtered_df) / len(df)
    percentage = round(100 * fraction, 2)
#     print(f"{player} had {len(filtered_df)}/{len(df)} games with {filtered_expression}: {percentage}%")
#     print(filtered_df)
    return {"filtered": filtered_df, "unfiltered": df}

        
    

In [130]:
# Analyzing Single Player
visible_attributes = ["id", "date", "ast", "blk", "reb", "stl", "pts", "fg3a", "fg3m"]
needed_stats = ["ast", "reb", "pts", "fg3m"]

# sp_sp = single player and single parlay
# sp_mp = single player and multiple parlay

def sp_sp(player_full_name, stat):
    df = pd.DataFrame(player_stats[player_full_name])
    df['date'] = df['game'].apply(lambda x: x.get('date'))
    df = removeUnplayedGames(df)
    df = df[visible_attributes]
    sorted_df = df.sort_values(by="date", ascending=True)
    filterSingleParlay(sorted_df, stat, player_full_name)

def sp_mp(player_full_name, parlays):
    df = pd.DataFrame(player_stats[player_full_name])
    df['date'] = df['game'].apply(lambda x: x.get('date'))
    df = removeUnplayedGames(df)
    df = df[visible_attributes]
    sorted_df = df.sort_values(by="date", ascending=True)
    return filterMultipleParlay(sorted_df, parlays ,player_full_name)

for player in all_players:
    full_name = player['first_name'] + " " + player['last_name']
#     sp_sp(full_name, _stat)
    df_stats = sp_mp(full_name, {_stat: player["stat_value"]})
    filtered_len = len(df_stats['filtered']) 
    unfiltered_len = len(df_stats['unfiltered']) 
    over_prob = round(100 * (filtered_len / unfiltered_len))
    under_prob = round(100 - over_prob)
    print(f"{full_name} {_stat}")
    print(f"O {player['stat_value']} = {over_prob}% || U {player['stat_value']} = {under_prob}%")
    print("--------------------------------------")
    
# sp_mp("Devin Booker", { "reb": 3.5, "ast": 4.5})

Anthony Edwards fg3m
O 3.5 = 26% || U 3.5 = 74%
--------------------------------------
Ja Morant fg3m
O 1.5 = 51% || U 1.5 = 49%
--------------------------------------
Desmond Bane fg3m
O 2.5 = 62% || U 2.5 = 38%
--------------------------------------
Jaren Jackson Jr. fg3m
O 1.5 = 49% || U 1.5 = 51%
--------------------------------------
Dillon Brooks fg3m
O 1.5 = 43% || U 1.5 = 57%
--------------------------------------
Mike Conley fg3m
O 1.5 = 49% || U 1.5 = 51%
--------------------------------------


In [80]:
# Analyzing Multiple Players
player_parlays = {
    "Devin Booker": {"reb": 3.5, "ast": 4.5},
#     "Terance Mann": { "pts": 13.5}
}


# go through each player to find the games for their parlays
filtered_players_games = {} # {[player]: dataframe}
for player in player_parlays:
    filtered_players_games[player] = sp_mp(player, player_parlays[player])

# merge all the games together based on the game (we are using date)
filtered_players_games = list(filtered_players_games.values()) # reformat the obj into array
merged_df = pd.DataFrame(filtered_players_games[0])
for i in range(1, len(filtered_players_games)):
    new_df =  pd.DataFrame(filtered_players_games[i])
    merged_df = merged_df.merge(new_df, on="date", how="inner")

# get all the games where these players played together in general (we are using date)
all_players_games = {}
for player in player_parlays:
    all_players_games[player] = player_stats[player]
all_players_games2 = list(all_players_games.values())


merged_df2 = pd.DataFrame(all_players_games2[0])
merged_df2['date'] = merged_df2['game'].apply(lambda x: x.get('date'))
merged_df2 = removeUnplayedGames(merged_df2)
for i in range(1, len(all_players_games2)):
    new_df =  pd.DataFrame(all_players_games2[i])
    new_df['date'] = new_df['game'].apply(lambda x: x.get('date'))
    new_df = removeUnplayedGames(new_df)
    merged_df2 = merged_df2.merge(new_df, on='date', how='inner')
merged_df2 = merged_df2.sort_values(by="date", ascending=True)

print('---------------------------------------------------------------------------')
fraction = len(merged_df) / len(merged_df2)
percentage = round(100 * fraction, 2)
print(f"These {len(filtered_players_games)} players have played {len(merged_df)}/{len(merged_df2)} games together with these stats: {percentage}%")
print(merged_df['date'])


KeyError: 'Devin Booker'

In [350]:
# BET SIZE BASED ON RISK PERCENTAGE - KELLY CRITERION FORMULA (f = (bp - q) / b)
# f is the fraction of the bankroll to bet (fraction of my total money)
# b is the odds of winning expressed as a decimal, with 1 representing even odds
# p is the probability of winning (in decimal)
# q is the probability of losing (1 - p)

# variables I need to change
total_money = 45.73
bookmaker_odds = 114
p = .5476

# variables I leave alone
b = (abs(bookmaker_odds) / 100) + 1
q = 1 - p

f = (b * p - q) / b
print(f"According to Kelly Criterion, you should bet ${f * total_money}")

According to Kelly Criterion, you should bet $15.374340523364483


In [None]:
# The closer the odds are proportionally, the better
