# Basketball Anomaly Dataset

For both Synth and ADMERCS.

Based on our earliest idea of a demo dataset that would contain typical contextual anomalies which should be easy for MERCS to detect.



# Preliminaries

In [1]:
# (Optional) Black codeformatter (`pip install nb_black`) for jupyterlab. In jupyter notebook, this changes slightly.
%load_ext lab_black

## Imports

In [2]:
import nba_api
from nba_api.stats import endpoints
from nba_api.stats.static import players, teams

import pandas as pd
import numpy as np

# show all columns
pd.set_option("display.max_columns", None)

In [3]:
from nba_anomaly_generator.data import (
    get_team_roster_dataframe,
    get_team_plyr_stats_dataframe,
)

# Static Data

In [4]:
player_dict = players.get_players()

# Use ternary operator or write function
# Names are case sensitive
bron = [player for player in player_dict if player["full_name"] == "LeBron James"][0]
bron_id = bron["id"]

# find team Ids
teams_dict = teams.get_teams()
LAL = [x for x in teams_dict if x["full_name"] == "Los Angeles Lakers"][0]
LAL_id = LAL["id"]

# Testing Data Functions

## Data Retrieval

Retrieving data from API.

In [5]:
# tiny test
team_id = LAL_id
plyr_id = bron_id
season_id = "2018-19"

In [6]:
df_team_roster = get_team_roster_dataframe(
    team_id=LAL_id, season_id=season_id, timeout_s=50
)

df_team_roster.head()

Unnamed: 0,TeamID,SEASON,LeagueID,PLAYER,NUM,POSITION,HEIGHT,WEIGHT,BIRTH_DATE,AGE,EXP,SCHOOL,PLAYER_ID,SEASON_ID
0,1610612747,2018,0,Kyle Kuzma,0,F,6-9,220,"JUL 24, 1995",23.0,1,Utah,1628398,2018-19
1,1610612747,2018,0,Kentavious Caldwell-Pope,1,G,6-5,205,"FEB 18, 1993",26.0,5,Georgia,203484,2018-19
2,1610612747,2018,0,Lonzo Ball,2,G,6-6,190,"OCT 27, 1997",21.0,1,UCLA,1628366,2018-19
3,1610612747,2018,0,Josh Hart,3,G,6-5,215,"MAR 06, 1995",24.0,1,Villanova,1628404,2018-19
4,1610612747,2018,0,Alex Caruso,4,G,6-5,186,"FEB 28, 1994",25.0,1,Texas A&M,1627936,2018-19


In [7]:
df = get_team_plyr_stats_dataframe(team_id=team_id, season_id=season_id, timeout_s=30)
df.head()

Unnamed: 0,TeamID,SEASON,LeagueID,PLAYER,NUM,POSITION,HEIGHT,WEIGHT,BIRTH_DATE,AGE,EXP,SCHOOL,PLAYER_ID,SEASON_ID,LEAGUE_ID,TEAM_ID,TEAM_ABBREVIATION,PLAYER_AGE,GP,GS,MIN,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PTS
0,1610612747,2018,0,Kyle Kuzma,0,F,6-9,220,"JUL 24, 1995",23.0,1,Utah,1628398,2018-19,0,1610612747,LAL,23.0,70,68,2314.0,496,1087,0.456,128,422,0.303,188,250,0.752,60,322,382,178,41,26,133,170,1308
1,1610612747,2018,0,Kentavious Caldwell-Pope,1,G,6-5,205,"FEB 18, 1993",26.0,5,Georgia,203484,2018-19,0,1610612747,LAL,26.0,82,23,2035.0,325,756,0.43,151,435,0.347,137,158,0.867,48,190,238,110,73,13,65,137,938
2,1610612747,2018,0,Lonzo Ball,2,G,6-6,190,"OCT 27, 1997",21.0,1,UCLA,1628366,2018-19,0,1610612747,LAL,21.0,47,45,1423.0,185,456,0.406,75,228,0.329,20,48,0.417,54,197,251,255,69,19,103,114,465
3,1610612747,2018,0,Josh Hart,3,G,6-5,215,"MAR 06, 1995",24.0,1,Villanova,1628404,2018-19,0,1610612747,LAL,24.0,67,22,1715.0,189,464,0.407,92,274,0.336,55,80,0.688,35,213,248,93,64,40,58,147,525
4,1610612747,2018,0,Alex Caruso,4,G,6-5,186,"FEB 28, 1994",25.0,1,Texas A&M,1627936,2018-19,0,1610612747,LAL,25.0,25,4,531.0,77,173,0.445,24,50,0.48,51,64,0.797,20,47,67,77,24,9,42,54,229


# Filter Columns

In [8]:
COLUMNS_OF_INTEREST = [
    "SEASON_ID",
    # "TEAM_ID",
    "TEAM_ABBREVIATION",
    "PLAYER_ID",
    "PLAYER",
    "NUM",
    "POSITION",
    "HEIGHT",
    "WEIGHT",
    "BIRTH_DATE",
    "PLAYER_AGE",
    # "EXP",
    "GP",
    "GS",
    "MIN",
    "FGM",
    "FGA",
    "FG_PCT",
    "FG3M",
    "FG3A",
    "FG3_PCT",
    "FTM",
    "FTA",
    "FT_PCT",
    # "OREB",
    # "DREB",
    "REB",
    "AST",
    "STL",
    "BLK",
    # "TOV",
    # "PF",
    "PTS",
]

In [9]:
df = df[COLUMNS_OF_INTEREST]
df.head()

Unnamed: 0,SEASON_ID,TEAM_ABBREVIATION,PLAYER_ID,PLAYER,NUM,POSITION,HEIGHT,WEIGHT,BIRTH_DATE,PLAYER_AGE,GP,GS,MIN,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,REB,AST,STL,BLK,PTS
0,2018-19,LAL,1628398,Kyle Kuzma,0,F,6-9,220,"JUL 24, 1995",23.0,70,68,2314.0,496,1087,0.456,128,422,0.303,188,250,0.752,382,178,41,26,1308
1,2018-19,LAL,203484,Kentavious Caldwell-Pope,1,G,6-5,205,"FEB 18, 1993",26.0,82,23,2035.0,325,756,0.43,151,435,0.347,137,158,0.867,238,110,73,13,938
2,2018-19,LAL,1628366,Lonzo Ball,2,G,6-6,190,"OCT 27, 1997",21.0,47,45,1423.0,185,456,0.406,75,228,0.329,20,48,0.417,251,255,69,19,465
3,2018-19,LAL,1628404,Josh Hart,3,G,6-5,215,"MAR 06, 1995",24.0,67,22,1715.0,189,464,0.407,92,274,0.336,55,80,0.688,248,93,64,40,525
4,2018-19,LAL,1627936,Alex Caruso,4,G,6-5,186,"FEB 28, 1994",25.0,25,4,531.0,77,173,0.445,24,50,0.48,51,64,0.797,67,77,24,9,229


In [10]:
df.to_csv("lal.csv")