In [1]:
# Load modules
import pandas as pd
import numpy as np

In [2]:
# Load hustle stats for 2018-2019 and 2020-2021 season as well as team game stats for those seasons
season_2018 = pd.read_csv("Data/season18_19.csv")
team_2018 = pd.read_csv("Data/team18_19.csv")
season_2020 = pd.read_csv("Data/season20_21.csv")
team_2020 = pd.read_csv("Data/team20_21.csv")

In [3]:
# Combine the 2018 datasets together
all_2018 = season_2018.merge(team_2018[['GAME_ID', 'TEAM_ID', 'FGA', 'FG3A']], how = 'inner', on = ['GAME_ID', 'TEAM_ID'])

In [4]:
# Combine the 2020 datasets together
all_2020 = season_2020.merge(team_2020[['GAME_ID', 'TEAM_ID', 'FGA', 'FG3A']], how = 'inner', on = ['GAME_ID', 'TEAM_ID'])

In [5]:
# Bind together 2018 and 2020 datasets
all_data = pd.concat([all_2018, all_2020])

In [6]:
# Calculate the number of 2 point field goal attempts
all_data['FG2'] = all_data['FGA'] - all_data['FG3A']

# Reset the index
all_data.reset_index(inplace = True)

In [7]:
# Set number of rows for for loop
n = all_data.shape[0]
# Define empty lists for the categories to calculate
winner = []
deflections_ratio = []
deflections_difference = []
contested_shot_rate = []
contest_2_rate = []
contest_3_rate = []
charges_ratio = []
charges_difference = []
screen_ratio = []
screen_difference = []
screen_pts_ratio = []
screen_pts_difference = []
loose_difference = []
loose_ratio = []
off_box_difference = []
off_box_ratio = []
def_box_difference = []
def_box_ratio = []
box_rbs_difference = []
box_rbs_ratio = []

# Loop through every two observations and compare the two observations since each game is two rows in the data set
for i in range(0, n, 2):
    
    # Calculate whether the home or away team had more points
    score = all_data['PTS'][i] < all_data['PTS'][i + 1]
    
    # Based on the points scored, assign the winner of the game
    if score == True:
        winner.append(0)
        winner.append(1)
    else:
        winner.append(1)
        winner.append(0)
    
    # Calculate the difference in deflections for both teams
    deflections_difference.append(all_data['DEFLECTIONS'][i] - all_data['DEFLECTIONS'][i+1])
    deflections_difference.append(all_data['DEFLECTIONS'][i + 1] - all_data['DEFLECTIONS'][i])   
    
    # Calculate the total number of deflections for the game
    game_deflections = all_data['DEFLECTIONS'][i] + all_data['DEFLECTIONS'][i+1]
    
    # If the total number of deflections in the game are not zero, then calculate the ratio, else
    # set the ratio to 0
    if game_deflections != 0:
        deflections_ratio.append(all_data['DEFLECTIONS'][i]/game_deflections)
        deflections_ratio.append(all_data['DEFLECTIONS'][i + 1]/game_deflections)
    else:
        deflections_ratio.append(0)
        deflections_ratio.append(0)
    
    # Calculate the difference in charges drawn for both teams
    charges_difference.append(all_data['CHARGES_DRAWN'][i] - all_data['CHARGES_DRAWN'][i+1])
    charges_difference.append(all_data['CHARGES_DRAWN'][i + 1] - all_data['CHARGES_DRAWN'][i]) 
    
    # Calculate the total number of charges drawn for the game
    game_charges = all_data['CHARGES_DRAWN'][i] + all_data['CHARGES_DRAWN'][i+1]
    
    # If the total number of charges drawn in the game are not zero, then calculate the ratio, else
    # set the ratio to 0
    if game_charges != 0:
        charges_ratio.append(all_data['CHARGES_DRAWN'][i]/game_charges)
        charges_ratio.append(all_data['CHARGES_DRAWN'][i + 1]/game_charges)
    else:
        charges_ratio.append(0)
        charges_ratio.append(0)  
    
    # Calculate the difference in screen assists for both teams
    screen_difference.append(all_data['SCREEN_ASSISTS'][i] - all_data['SCREEN_ASSISTS'][i+1])    
    screen_difference.append(all_data['SCREEN_ASSISTS'][i + 1] - all_data['SCREEN_ASSISTS'][i]) 
    
    # Calculate the total number of screen assists for the game
    game_screen = all_data['SCREEN_ASSISTS'][i] + all_data['SCREEN_ASSISTS'][i+1]
    
    # If the total number of screen assists in the game are not zero, then calculate the ratio, else
    # set the ratio to 0
    if game_screen != 0:
        screen_ratio.append(all_data['SCREEN_ASSISTS'][i]/game_screen)
        screen_ratio.append(all_data['SCREEN_ASSISTS'][i + 1]/game_screen)
    else:
        screen_ratio.append(0)
        screen_ratio.append(0)
        
    # Calculate the difference in points off screen assists for both teams
    screen_pts_difference.append(all_data['SCREEN_AST_PTS'][i] - all_data['SCREEN_AST_PTS'][i+1])    
    screen_pts_difference.append(all_data['SCREEN_AST_PTS'][i + 1] - all_data['SCREEN_AST_PTS'][i])
    
    # Calculate the total number of points off screen assists for the game
    game_screen_pts = all_data['SCREEN_AST_PTS'][i] + all_data['SCREEN_AST_PTS'][i+1]
    
    # If the total number of points off screen assists in the game are not zero, then calculate the ratio, else
    # set the ratio to 0
    if game_screen_pts != 0:
        screen_pts_ratio.append(all_data['SCREEN_AST_PTS'][i]/game_screen_pts)
        screen_pts_ratio.append(all_data['SCREEN_AST_PTS'][i + 1]/game_screen_pts)
    else:
        screen_pts_ratio.append(0)
        screen_pts_ratio.append(0)
        
    # Calculate the difference in loose balls recovered for both teams    
    loose_difference.append(all_data['LOOSE_BALLS_RECOVERED'][i] - all_data['LOOSE_BALLS_RECOVERED'][i+1])    
    loose_difference.append(all_data['LOOSE_BALLS_RECOVERED'][i + 1] - all_data['LOOSE_BALLS_RECOVERED'][i]) 
    
    # Calculate the total number of loose balls recovered for the game
    game_loose = all_data['LOOSE_BALLS_RECOVERED'][i] + all_data['LOOSE_BALLS_RECOVERED'][i+1]
    
    # If the total number of loose balls recovered in the game are not zero, then calculate the ratio, else
    # set the ratio to 0
    if game_loose != 0:
        loose_ratio.append(all_data['LOOSE_BALLS_RECOVERED'][i]/game_loose)
        loose_ratio.append(all_data['LOOSE_BALLS_RECOVERED'][i + 1]/game_loose)
    else:
        loose_ratio.append(0)
        loose_ratio.append(0)
    
    # Calculate the difference in offensvie boxouts for both teams
    off_box_difference.append(all_data['OFF_BOXOUTS'][i] - all_data['OFF_BOXOUTS'][i+1])    
    off_box_difference.append(all_data['OFF_BOXOUTS'][i + 1] - all_data['OFF_BOXOUTS'][i]) 
    
    # Calculate the total number of offensive boxouts for the game
    game_off_box = all_data['OFF_BOXOUTS'][i] + all_data['OFF_BOXOUTS'][i+1]
    
    # If the total number of offensive boxouts in the game are not zero, then calculate the ratio, else
    # set the ratio to 0
    if game_off_box != 0:
        off_box_ratio.append(all_data['OFF_BOXOUTS'][i]/game_off_box)
        off_box_ratio.append(all_data['OFF_BOXOUTS'][i + 1]/game_off_box)
    else:
        off_box_ratio.append(0)
        off_box_ratio.append(0)
    
    # Calculate the difference in defensive boxouts for both teams
    def_box_difference.append(all_data['DEF_BOXOUTS'][i] - all_data['DEF_BOXOUTS'][i+1])    
    def_box_difference.append(all_data['DEF_BOXOUTS'][i + 1] - all_data['DEF_BOXOUTS'][i]) 
    
    # Calculate the total number of defensive boxouts for the game
    game_def_box = all_data['DEF_BOXOUTS'][i] + all_data['DEF_BOXOUTS'][i+1]
    
    # If the total number of defensive boxouts in the game are not zero, then calculate the ratio, else
    # set the ratio to 0
    if game_def_box != 0:
        def_box_ratio.append(all_data['DEF_BOXOUTS'][i]/game_def_box)
        def_box_ratio.append(all_data['DEF_BOXOUTS'][i + 1]/game_def_box)
    else:
        def_box_ratio.append(0)
        def_box_ratio.append(0)
        
    # Calculate the difference in rebounds for the team when boxing out for both teams    
    box_rbs_difference.append(all_data['BOX_OUT_PLAYER_TEAM_REBS'][i] - all_data['BOX_OUT_PLAYER_TEAM_REBS'][i+1])    
    box_rbs_difference.append(all_data['BOX_OUT_PLAYER_TEAM_REBS'][i + 1] - all_data['BOX_OUT_PLAYER_TEAM_REBS'][i])   
    # Calculate the total number of rebounds for the team boxing out for the game
    game_rbs_box = all_data['BOX_OUT_PLAYER_TEAM_REBS'][i] + all_data['BOX_OUT_PLAYER_TEAM_REBS'][i+1]
    
    # If the total number of rebounds for a team boxing out in the game are not zero, then calculate the ratio, else
    # set the ratio to 0
    if game_rbs_box != 0:
        box_rbs_ratio.append(all_data['BOX_OUT_PLAYER_TEAM_REBS'][i]/game_rbs_box)
        box_rbs_ratio.append(all_data['BOX_OUT_PLAYER_TEAM_REBS'][i + 1]/game_rbs_box)
    else:
        box_rbs_ratio.append(0)
        box_rbs_ratio.append(0)
        
    # Calculate the contested shot rate for all shots and for 2 and 3 pointers individually for both teams    
    contested_shot_rate.append(all_data['CONTESTED_SHOTS'][i]/all_data['FGA'][i+1])
    contested_shot_rate.append(all_data['CONTESTED_SHOTS'][i + 1]/all_data['FGA'][i])
    contest_2_rate.append(all_data['CONTESTED_SHOTS_2PT'][i]/all_data['FG2'][i+1])
    contest_2_rate.append(all_data['CONTESTED_SHOTS_2PT'][i + 1]/all_data['FG2'][i])
    contest_3_rate.append(all_data['CONTESTED_SHOTS_3PT'][i]/all_data['FG3A'][i+1])
    contest_3_rate.append(all_data['CONTESTED_SHOTS_3PT'][i + 1]/all_data['FG3A'][i])
    

In [8]:
# Create a dictionary with all of the important variables and make it a data frame
d = {'GAME_ID': all_data['GAME_ID'], 'TEAM_NAME': all_data['TEAM_NAME'], 'PTS': all_data['PTS'], 'WINNER': winner, 'DEFLECTIONS_RATIO': deflections_ratio, 'DEFLECTIONS_DIFF': deflections_difference, 'CONTEST_RATE': contested_shot_rate, 'CONTEST_RATE_2': contest_2_rate, 'CONTEST_RATE_3': contest_3_rate, 'CHARGES_RATIO': charges_ratio, 'CHARGES_DIFF': charges_difference, 'SCREEN_AST_RATIO': screen_ratio, 'SCREEN_AST_DIFF': screen_difference, 'SCREEN_AST_PTS_RATIO': screen_pts_ratio, 'SCREEN_AST_PTS_DIFF': screen_pts_difference, 'LOOSE_RATIO': loose_ratio, 'LOOSE_DIFFERENCE': loose_difference, 'OFF_BOXOUT_RATIO': off_box_ratio, 'OFF_BOXOUT_DIFF': off_box_difference, 'DEF_BOXOUT_RATIO': def_box_ratio, 'DEF_BOXOUT_DIFF': def_box_difference,
'BOXOUT_TM_RBS_RATIO': box_rbs_ratio, 'BOXOUT_TM_RBS_DIFF': box_rbs_difference}
out = pd.DataFrame(d)

In [9]:
# Write the data to be used for fitting model to a .csv file
out.to_csv("model_data.csv")