In [1]:
import requests
import pandas as pd
import time

from datetime import date
from datetime import timedelta

pd.set_option("display.max_columns", None)

In [2]:
from functions import make_request
from functions import get_recent_games
from functions import clean_stats
from functions import aggregate_stats
from functions import get_stats

In [58]:
def get_team_code_map(df=False):
    # Make balldontlie api request and convert the json response to pandas dataframe
    team_code_df = make_request("teams", record_path="data")
    team_code_df = team_code_df[["id", "city", "abbreviation", "full_name", "name"]]
    team_code_df = team_code_df.set_index("id")
    # using said dataframe, map team names to team id
    team_code_map = {}
    for row in team_code_df.iterrows():
        team_code_map.update(dict.fromkeys(row[1].str.lower().values, row[0]))
        # Make sure "1" maps to 1. i.e. string maps to integer. This is so people can enter the team code
        # in the text box for convenience and everything still works fine.
        team_code_map.update({str(row[0]): row[0]})                   
    if df:
        return team_code_df
    else:
        return team_code_map  # returning a dictionary

In [75]:
team_codes = get_team_code_map(df=True)
team_codes

Unnamed: 0_level_0,city,abbreviation,full_name,name
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,Atlanta,ATL,Atlanta Hawks,Hawks
2,Boston,BOS,Boston Celtics,Celtics
3,Brooklyn,BKN,Brooklyn Nets,Nets
4,Charlotte,CHA,Charlotte Hornets,Hornets
5,Chicago,CHI,Chicago Bulls,Bulls
6,Cleveland,CLE,Cleveland Cavaliers,Cavaliers
7,Dallas,DAL,Dallas Mavericks,Mavericks
8,Denver,DEN,Denver Nuggets,Nuggets
9,Detroit,DET,Detroit Pistons,Pistons
10,Golden State,GSW,Golden State Warriors,Warriors


In [50]:
# Get a pandas dataframe of the 20 most recent games for the following team
# NOTE: this code is not used. it's a template for what is now get_recent_games() in functions.py
home_team_id = 1

today = date.today()                                                           # Get today
today = f"{today.year}-{today.month}-{today.day}"                              # Convert to format yyyy-mm-dd
one_year_ago = date.today() - timedelta(days=365)                              # Get last-year-today
one_year_ago = f"{one_year_ago.year}-{one_year_ago.month}-{one_year_ago.day}"  # convert to format yyyy-mm-dd

recent_games = pd.DataFrame()
res = make_request("games", record_path="data", params={"end_date": today,
                                                        "start_date": one_year_ago,
                                                        "team_ids[]": [home_team_id],
                                                        "page": "1",
                                                       "per_page": "100"})

res = res.sort_values("date", ascending=False)
res = res[res["home_team.id"].eq(home_team_id)]

recent_games = recent_games.append(res)
                                ### Should write a line of code here that drops the row if "time" is not an empty string (meaning the game is still in progress)
recent_games = recent_games.head(20)

In [51]:
recent_games

Unnamed: 0,id,date,home_team_score,period,postseason,season,status,time,visitor_team_score,home_team.id,home_team.abbreviation,home_team.city,home_team.conference,home_team.division,home_team.full_name,home_team.name,visitor_team.id,visitor_team.abbreviation,visitor_team.city,visitor_team.conference,visitor_team.division,visitor_team.full_name,visitor_team.name
38,473700,2021-11-27T00:00:00.000Z,90,4,False,2021,Final,,99,1,ATL,Atlanta,East,Southeast,Atlanta Hawks,Hawks,20,NYK,New York,East,Atlantic,New York Knicks,Knicks
46,473663,2021-11-22T00:00:00.000Z,113,4,False,2021,Final,,101,1,ATL,Atlanta,East,Southeast,Atlanta Hawks,Hawks,21,OKC,Oklahoma City,West,Northwest,Oklahoma City Thunder,Thunder
35,473652,2021-11-20T00:00:00.000Z,115,4,False,2021,Final,,105,1,ATL,Atlanta,East,Southeast,Atlanta Hawks,Hawks,4,CHA,Charlotte,East,Southeast,Charlotte Hornets,Hornets
56,473621,2021-11-17T00:00:00.000Z,110,4,False,2021,Final,,99,1,ATL,Atlanta,East,Southeast,Atlanta Hawks,Hawks,2,BOS,Boston,East,Atlantic,Boston Celtics,Celtics
43,473607,2021-11-15T00:00:00.000Z,129,4,False,2021,Final,,111,1,ATL,Atlanta,East,Southeast,Atlanta Hawks,Hawks,22,ORL,Orlando,East,Southeast,Orlando Magic,Magic
55,473603,2021-11-14T00:00:00.000Z,120,4,False,2021,Final,,100,1,ATL,Atlanta,East,Southeast,Atlanta Hawks,Hawks,17,MIL,Milwaukee,East,Central,Milwaukee Bucks,Bucks
52,473528,2021-11-04T00:00:00.000Z,98,4,False,2021,Final,,116,1,ATL,Atlanta,East,Southeast,Atlanta Hawks,Hawks,29,UTA,Utah,West,Northwest,Utah Jazz,Jazz
47,473505,2021-11-01T00:00:00.000Z,118,4,False,2021,Final,,111,1,ATL,Atlanta,East,Southeast,Atlanta Hawks,Hawks,30,WAS,Washington,East,Southeast,Washington Wizards,Wizards
44,473454,2021-10-25T00:00:00.000Z,122,4,False,2021,Final,,104,1,ATL,Atlanta,East,Southeast,Atlanta Hawks,Hawks,9,DET,Detroit,East,Central,Detroit Pistons,Pistons
34,473424,2021-10-21T00:00:00.000Z,113,4,False,2021,Final,,87,1,ATL,Atlanta,East,Southeast,Atlanta Hawks,Hawks,7,DAL,Dallas,West,Southwest,Dallas Mavericks,Mavericks


In [82]:
game_ids_home, game_ids_away = get_recent_games("22","3")

In [47]:
def get_stats(game_ids_home, game_ids_away):
    """
    This function makes a request to balldontlie API for stats from specific games.
    The arguments for this function should be:
    1. a list of the 20 most recent game ids for the home team
    2. a list of the 20 most recent game ids for the away team
    
    The order matters. Putting the away team as the first argument and home team as the
    second will produce inaccurate results.
    
    The function returns a Numpy array that the model is expecting as input.
    """
    
    
    def format_params(game_ids):
        """ 
        Format query paramaters in a format the balldontlie API accepts
        e.g. ?game_ids[]=345686&game_ids[]=234356&gameids[]=3456356...
        """
        params = "game_ids[] " * len(game_ids)
        params = list(zip(params.split(" "), game_ids))
        params.append(("per_page", 100))
        return params
    
    stats_cols = ["ast","blk","dreb","fg3_pct","fg3a","fg3m","fg_pct","fga","fgm","ft_pct","fta","ftm","oreb",
              "pf","pts","reb","stl","turnover"]
    
    # Get pandas Series of home team stats
    params_home = format_params(game_ids_home)                                 # Get param list
    stats_home = make_request("stats", record_path="data", params=params_home) # Make request with said param list
    stats_home = clean_stats(stats_home)                                       # clean the data
    stats_home = stats_home[stats_home["team.id"].eq(stats_home["game.home_team_id"])]  # Filter for stats of players that played for the home team
    stats_home = aggregate_stats(stats_home)                                   # aggregate individual player stats into team stats
    stats_home = stats_home[stats_cols]                                        # Drop the columns that aren't basketball stats
    stats_home = stats_home.mean()                                             # average the stats
    
    # Get pandas Series of away team stats
    params_away = format_params(game_ids_away)
    stats_away = make_request("stats", record_path="data", params=params_away)
    stats_away = clean_stats(stats_away)
    stats_away = stats_away[stats_away["team.id"].eq(stats_away["game.visitor_team_id"])]
    stats_away = aggregate_stats(stats_away)
    stats_away = stats_away[stats_cols]
    stats_away = stats_away.mean()
    
    # Make a stats diff Series
    stats_diff = stats_home - stats_away
    
    # Rename columns and put it all together
    stats_home.index = "home_" + stats_home.index
    stats_away.index = "away_" + stats_away.index
    stats_diff.index = "diff_" + stats_diff.index
    
    stats = stats_home.append([stats_away, stats_diff])
    model_input = stats.values.reshape(1,-1)
    
    return model_input

In [83]:
stats = get_stats(game_ids_home, game_ids_away)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)


In [84]:
stats

array([[ 2.00000000e+01,  4.30000000e+00,  3.22500000e+01,
         2.81646105e+01,  3.36000000e+01,  1.06000000e+01,
         3.98489493e+01,  8.64000000e+01,  3.56500000e+01,
         4.60851507e+01,  2.01000000e+01,  1.54500000e+01,
         9.35000000e+00,  1.74500000e+01,  9.73500000e+01,
         4.16000000e+01,  7.05000000e+00,  1.33000000e+01,
         2.16500000e+01,  5.00000000e+00,  3.49500000e+01,
         2.67874977e+01,  3.39500000e+01,  1.29500000e+01,
         4.06367328e+01,  8.42500000e+01,  3.82500000e+01,
         4.24293569e+01,  2.13000000e+01,  1.75500000e+01,
         8.60000000e+00,  1.98500000e+01,  1.07000000e+02,
         4.35500000e+01,  6.40000000e+00,  1.32500000e+01,
        -1.65000000e+00, -7.00000000e-01, -2.70000000e+00,
         1.37711285e+00, -3.50000000e-01, -2.35000000e+00,
        -7.87783536e-01,  2.15000000e+00, -2.60000000e+00,
         3.65579383e+00, -1.20000000e+00, -2.10000000e+00,
         7.50000000e-01, -2.40000000e+00, -9.65000000e+0

### Testing the model

In [7]:
import pickle

In [8]:
model = pickle.load(open("model.sav", "rb"))

In [86]:
prediction = model.predict(stats)
prediction

array([0])

In [92]:
model.predict_proba(stats)[0][0]

0.7484022927067175