# HexCourtVision Preprocessing

This notebook is dedicated to the preprocessing of NBA game data. It covers the necessary steps to clean and prepare the data for subsequent analysis. The preprocessing includes data cleaning, alignment of player and ball positions, distance calculations between players and the ball, and identification of significant game events such as possession changes, shots, and passes. Follow the step-by-step guide provided in each section to transform raw game data into a structured and analyzable format.

In [1]:
GAME_KEY = "20151228SACGSW" # Change to the game key you would like to process

In [2]:
from ml_nba.preprocessing.process_game import process_game
pd.set_option('display.max_rows', None)  # For all rows

# First, process the raw game and event data
game_df = process_game(GAME_KEY)

[{'player_id': 2571, 'team_id': 1610612744, 'first_name': 'Leandro', 'last_name': 'Barbosa', 'jersey_number': '19', 'position': 'G'}, {'player_id': 2733, 'team_id': 1610612744, 'first_name': 'Shaun', 'last_name': 'Livingston', 'jersey_number': '34', 'position': 'G'}, {'player_id': 2738, 'team_id': 1610612744, 'first_name': 'Andre', 'last_name': 'Iguodala', 'jersey_number': '9', 'position': 'G-F'}, {'player_id': 101106, 'team_id': 1610612744, 'first_name': 'Andrew', 'last_name': 'Bogut', 'jersey_number': '12', 'position': 'C'}, {'player_id': 201574, 'team_id': 1610612744, 'first_name': 'Jason', 'last_name': 'Thompson', 'jersey_number': '1', 'position': 'F-C'}, {'player_id': 201575, 'team_id': 1610612744, 'first_name': 'Brandon', 'last_name': 'Rush', 'jersey_number': '4', 'position': 'F-G'}, {'player_id': 201578, 'team_id': 1610612744, 'first_name': 'Marreese', 'last_name': 'Speights', 'jersey_number': '5', 'position': 'F-C'}, {'player_id': 201939, 'team_id': 1610612744, 'first_name': 'S

KeyError: 'EVENT_NUM'

In [None]:
from ml_nba.preprocessing.extract_dho_candidates import extract_dho_candidates

# Next, perform the candidate extraction
# NOTE: If using as training data, will require manual labeling
dho_candidates = extract_dho_candidates(GAME_KEY)

In [None]:
from ml_nba.preprocessing.persist_processed_game import persist_processed_game

# Once candidates have been extracted and labeled, persist relevant game events
persist_processed_game(GAME_KEY, overwrite=True)

In [None]:
from ml_nba.preprocessing.generate_dho_feature_vectors import generate_dho_feature_vectors

# Finally, prepare a feature vector per candidate to inform the classification models
generate_dho_feature_vectors(GAME_KEY)

##### Below code can be used to visualize any game event (either in notebook, or to .gif)

In [None]:
from ml_nba.preprocessing.utilities.DataLoader import DataLoader
from ml_nba.visualization.AnimationUtil import AnimationUtil

EVENTNUM = 1

#game_df = DataLoader.load_raw_game(GAME_KEY)
#animator = AnimationUtil(game_df)

#animator.display_animation(EVENTNUM) 
#animator.save_animation(EVENTNUM, f'{GAME_KEY}-{EVENTNUM}.gif')