# Overview
The goal of this project is to predict the final score of an NBA playoff game based on the previous scoring activity of the teams who are playing on a given day. The code included is meant to be run the same day as the playoff game occuring.

In [1]:
#Install the nba_api to pull down nba stats
# pip install nba_api

In [2]:
#Importing Packages
import pandas as pd
import pprint
from nba_api.stats.endpoints import playercareerstats, leaguegamefinder
from nba_api.live.nba.endpoints import scoreboard

In [3]:
#Hardcoding important season dates
season_start_date = '2023-10-24'
season_end_date = '2024-04-14'
trade_deadline_date = '2024-02-08'

In [4]:
# Pull in the current day's games from the NBA API and store the values as a list
def get_todays_games():
    """
    Pulls in the current day's games from the NBA API
    This object is a list of dictionaries
    -
    Input:
    None
    -
    Output:
    games_list : list of dictionaries
    """
    matchups_list = scoreboard.ScoreBoard().get_dict()['scoreboard']['games']
    return matchups_list

In [5]:
def get_game_index():
    """
    Asks the user which game (from a list of games) they would like to view.
    -
    Input:
    None
    -
    Output:
    selection_index : int
    
    """
    # Pull in the current day's games from the NBA API and store the values as a list
    games_list = scoreboard.ScoreBoard().get_dict()['scoreboard']['games']
    
    # Present the matchup name to the user and store the selected matchup
    print("Select a game:")
    for index, game in enumerate(games_list):
        matchup_name = f"{game['homeTeam']['teamName']} vs {game['awayTeam']['teamName']}"
        print(f"{index + 1}. {matchup_name}")
    
    while True:
        selection = input("Enter the number of the game you're interested in: ")
        try:
            selection_index = int(selection) - 1
            if 0 <= selection_index < len(games_list):
                return selection_index
            else:
                print("Invalid selection. Please enter a number within the range.")
        except ValueError:
            print("Invalid input. Please enter a valid number.")

In [6]:
def get_game_information(user_index):
    """
    Creates a dictionary that that contains information about the user requested game
    -
    Input:
    user_index : int
    -
    Output:
    team_id_dict : dictionary
    
    """
    # Pull in the current day's games from the NBA API and store the values as a list
    games_list = get_todays_games()
        
    #Get the relevant game based on the user's selection
    game = games_list[user_index]
        
    team_id_dict = {
        'home_team_id' : game['homeTeam']['teamId'],
        'away_team_id' : game['awayTeam']['teamId'],
        'h2h_home_team_home_matchup_string' : f"{game['homeTeam']['teamTricode']} vs. {game['awayTeam']['teamName']}",
        'h2h_home_team_away_matchup_string' : f"{game['awayTeam']['teamName']} vs. {game['homeTeam']['teamTricode']}",
        'h2h_away_team_home_matchup_string' : f"{game['homeTeam']['teamTricode']} @ {game['awayTeam']['teamName']}",
        'h2h_away_team_away_matchup_string' : f"{game['awayTeam']['teamName']} @ {game['homeTeam']['teamTricode']}"
        }
        
    return team_id_dict

In [7]:
def get_game_dataframes(user_index):
    """
    Creates a DataFrame that has all games each team has played (to date)
    -
    Input:
    user_index : int
    -
    Output:
    matchup_df : Pandas DataFrame
    """
    #Get team id dictionary
    game = get_game_information(user_index)

    #Pulling in the individual game information played by the home teams
    home_team_games_df = (leaguegamefinder
        .LeagueGameFinder(team_id_nullable=game['home_team_id'])
        .get_data_frames()[0])
    
    away_team_games_df = (leaguegamefinder
        .LeagueGameFinder(team_id_nullable=game['away_team_id'])
        .get_data_frames()[0])

    #Filtering the Home DataFrame so we see all games (that have occurred) from this season.
    home_team_games_df = home_team_games_df[(home_team_games_df['GAME_DATE'] >= season_start_date)]

    #Filtering the Home DataFrame so we see all games (that have occurred) from this season. 
    away_team_games_df = away_team_games_df[(away_team_games_df['GAME_DATE'] >= season_start_date)]

    #Merge the DataFrames
    matchup_df = home_team_games_df.merge(away_team_games_df, how='outer')

    #Drop NaN values and reset the index to 0
    matchup_df.reset_index(drop=True, inplace=True)
    
    return matchup_df

In [8]:
def create_dataframe_categories(user_index, matchup_df):
    """
    Creates new category columns on the matchup DataFrame
    The matchup DataFrame is overwritten in this process (the input and output share the same name)
    These categories help to isolate games of interest
    - 
    Input:
    user_index : int
    matchup_df : Pandas DataFrame
    -
    Output:
    matchup_df : Pandas DataFrame
    """
    #Get team id dictionary
    game = get_game_information(user_index)
    
    # Adding in additional column labels to each DataFrame
    matchup_df['is_regular_season'] = (
        (matchup_df['GAME_DATE'] >= season_start_date) & 
        (matchup_df['GAME_DATE'] <= season_end_date)
    )
    
    matchup_df['is_past_trade_deadline'] = (
        (matchup_df['GAME_DATE'] >= trade_deadline_date) & 
        (matchup_df['GAME_DATE'] <= season_end_date)
    )
    
    matchup_df['is_h2h_matchup'] = (
        (matchup_df['MATCHUP'] == game['h2h_home_team_home_matchup_string']) |
        (matchup_df['MATCHUP'] == game['h2h_home_team_away_matchup_string']) |
        (matchup_df['MATCHUP'] == game['h2h_away_team_home_matchup_string']) |
        (matchup_df['MATCHUP'] == game['h2h_away_team_away_matchup_string'])
    )
    
    matchup_df['is_post_season'] = (matchup_df['GAME_DATE'] > season_end_date)

    return matchup_df

In [9]:
def create_team_stats_dict(user_index, matchup_df):
    """
    Creates a new DataFrame that displays the relevant aggregated statistics for each team
    - 
    Input:
    user_index : int
    matchup_df : Pandas DataFrame
    -
    Output:
    team_stats_df : Pandas DataFrame
    """
    
    #Filter the columns
    team_stats_df = matchup_df[['TEAM_ID', 'PTS', 'FGA', 'FGM', 'FG3A', 'FG3M']]

    #Group by the team id so we can see what each teams aggregated metrics are
    team_stats_df = team_stats_df.groupby('TEAM_ID').mean()

    #Rename the columns
    team_stats_df.columns = ['AVG_PTS', 'AVG_FGA', 'AVG_FGM', 'AVG_FG3A', 'AVG_FG3M']

    #Make the DataFrame 0 indexed
    team_stats_df.reset_index(inplace=True)
    
    return team_stats_df

In [10]:
#Example of UI for selecting aggregated DataFrame
user_index = get_game_index()
matchup_df = get_game_dataframes(user_index)
matchup_df = create_dataframe_categories(user_index, matchup_df)

create_team_stats_dict(user_index, matchup_df)

Select a game:
1. Celtics vs Pacers
Enter the number of the game you're interested in: 1


Unnamed: 0,TEAM_ID,AVG_PTS,AVG_FGA,AVG_FGM,AVG_FG3A,AVG_FG3M
0,1610612738,119.43617,89.361702,43.478723,42.010638,16.234043
1,1610612754,121.857143,92.020408,46.581633,35.102041,13.112245
