In [1]:
import pandas as pd

In [2]:
# load data
player_data = "../data/processed/player_data.csv"
team_data = "../data/processed/team_data.csv"

def load_data(team_loc, player_loc):
    
    return pd.read_csv(team_loc), pd.read_csv(player_loc)

team_df, player_df = load_data(team_loc=team_data, player_loc=player_data)

In [46]:
NBA_TEAMS = list(set(list(team_df["teamName"])))

In [147]:
ROOT_DATA_DIR = "../data/insights"

### Team Data
This data shows each individual team from each individual game in the 2022-2023 season, and tracks the calls from the L2M report. 
> `teamName` is the team that benefits from "callsReceived" or "errorsInFavor" for a given row.

In [3]:
team_df

Unnamed: 0.1,Unnamed: 0,teamName,opponent,game,gameId,gameDate,pointsScored,pointsAllowed,outcome,callsReceived,errorsInFavor,posessionsInFavor
0,0,76ers,Nuggets,76ers vs Nuggets,22200741,"January 28, 2023",126,119,WON,1,1,0
1,1,Nuggets,76ers,Nuggets @ 76ers,22200741,"January 28, 2023",119,126,LOSS,0,0,0
2,2,Pistons,Rockets,Pistons vs Rockets,22200743,"January 28, 2023",114,117,LOSS,4,0,0
3,3,Rockets,Pistons,Rockets @ Pistons,22200743,"January 28, 2023",117,114,WON,0,0,0
4,4,Spurs,Suns,Spurs vs Suns,22200748,"January 28, 2023",118,128,LOSS,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...
549,549,Thunder,Timberwolves,Thunder @ Timberwolves,22200010,"October 19, 2022",108,115,LOSS,0,1,0
550,550,Suns,Mavericks,Suns vs Mavericks,22200013,"October 19, 2022",107,105,WON,2,0,0
551,551,Mavericks,Suns,Mavericks @ Suns,22200013,"October 19, 2022",105,107,LOSS,0,1,0
552,552,Kings,Trail Blazers,Kings vs Trail Blazers,22200014,"October 19, 2022",108,115,LOSS,7,1,0


### Player Data
This data is a scrape of each individual L2M report, tracking the specific call, decision and players involved in the last two minutes of NBA games. 
> `disadvantagedPlayer` is not always a "victim". In the case of an `Incorrect Call` the committingPlayer is disadvantaged

In [4]:
player_df

Unnamed: 0.1,Unnamed: 0,id,committingPlayer,disadvantagedPlayer,callType,decision,time,teamInPosession,difficulty,comment
0,0,1535-2110,P.J. Tucker,Nikola Jokic,Foul: Personal,Correct Non-Call,01:59,Nuggets,Observable,Tucker (PHI) and Jokic (DEN) engage and diseng...
1,1,1535-2111,P.J. Tucker,Nikola Jokic,Foul: Personal,Correct Non-Call,01:54,Nuggets,Observable,Tucker (PHI) establishes a legal guarding posi...
2,2,1535-2112,P.J. Tucker,Nikola Jokic,Foul: Shooting,Correct Non-Call,01:52.6,Nuggets,Observable,Tucker (PHI) brings his left arm forward sligh...
3,3,1536-2113,Aaron Gordon,Joel Embiid,Foul: Shooting,Correct Non-Call,01:35.8,76ers,Observable,Gordon (DEN) jumps vertically and incidental b...
4,4,1537-2114,James Harden,Kentavious Caldwell-Pope,Foul: Shooting,Correct Non-Call,01:28.1,Nuggets,Observable,Harden (PHI) legally contests Caldwell-Pope&ap...
...,...,...,...,...,...,...,...,...,...,...
6828,6828,1545-2191,Josh Hart,Kevin Huerter,Foul: Shooting,Correct Non-Call,00:16.5,Kings,Observable,Hart (POR) legally contests Huerter?s (SAC) ju...
6829,6829,1546-2192,De'Aaron Fox,Damian Lillard,Foul: Personal,Correct Call,00:14.1,Trail Blazers,Observable,Fox (SAC) commits a take foul on Lillard (POR).
6830,6830,1547-2193,Jerami Grant,Malik Monk,Foul: Personal,Correct Call,00:11.4,Kings,Observable,Grant (POR) places two hands on Fox (SAC) and ...
6831,6831,1547-2194,Anfernee Simons,Kevin Huerter,Foul: Shooting,Correct Non-Call,00:09.4,Kings,Observable,Simons (POR) maintains a legal guarding positi...


### Exploring Team Data

In [111]:
aggregated_favorable_errors = team_df.groupby("teamName")["errorsInFavor"].sum().sort_values(ascending=False)
aggregated_favorable_errors = aggregated_favorable_errors.to_frame().reset_index()


### Which teams have benefited from the most errors?

In [113]:
aggregated_favorable_errors.head(32)

Unnamed: 0,teamName,errorsInFavor
0,Mavericks,22
1,Cavaliers,21
2,Lakers,20
3,76ers,20
4,Pacers,19
5,Heat,18
6,Thunder,18
7,Celtics,17
8,Warriors,17
9,Hornets,17


In [115]:
victim_teams = team_df.groupby("opponent")["errorsInFavor"].sum().sort_values(ascending=False)
victim_teams = victim_teams.to_frame().reset_index()
victim_teams.rename(columns={"errorsInFavor": "errorsAgainst", "opponent": "teamName"}, inplace=True)


### Which teams have experienced the most incorrect calls go against them? 

In [117]:
victim_teams.head(32)

Unnamed: 0,teamName,errorsAgainst
0,Mavericks,26
1,Nuggets,23
2,Kings,21
3,Pacers,20
4,Heat,17
5,76ers,17
6,Nets,16
7,Hornets,16
8,Celtics,16
9,Knicks,16


In [118]:
combined = pd.merge(aggregated_favorable_errors, victim_teams, how="outer", on="teamName")
combined["errorDelta"] = combined["errorsInFavor"] - combined["errorsAgainst"]
combined = combined.sort_values(by="errorDelta").reset_index()
del combined["index"]

### Examining the difference between favorable and unfavorable calls show which teams have experienced the largest delta.

In [119]:
combined.head(32)

Unnamed: 0,teamName,errorsInFavor,errorsAgainst,errorDelta
0,Nuggets,14,23,-9
1,Kings,14,21,-7
2,Jazz,7,14,-7
3,Nets,11,16,-5
4,Mavericks,22,26,-4
5,Bucks,10,13,-3
6,Timberwolves,12,15,-3
7,Suns,9,11,-2
8,Bulls,11,13,-2
9,Knicks,14,16,-2


In [149]:
combined.to_csv(f"{ROOT_DATA_DIR}/team_errors.csv")

## Exploring Player Data

In [124]:
aggregated_disadvantaged_player_decisions = player_df.loc[player_df["decision"] == "Incorrect Non-Call"].groupby(["disadvantagedPlayer", "decision"])["callType"].count().sort_values(ascending=False)
aggregated_disadvantaged_player_decisions = aggregated_disadvantaged_player_decisions.to_frame().reset_index()
aggregated_disadvantaged_player_decisions.rename(columns={"callType": "callCount"}, inplace=True)


In [135]:
players_only = aggregated_disadvantaged_player_decisions[
    ~aggregated_disadvantaged_player_decisions["disadvantagedPlayer"].isin(NBA_TEAMS)
].reset_index()
teams_only = aggregated_disadvantaged_player_decisions[
    aggregated_disadvantaged_player_decisions["disadvantagedPlayer"].isin(NBA_TEAMS)
]
del players_only["index"]


### The top 50 most disadvantaged players - the players who have had the most Incorrect Non-Calls go against them

In [137]:
players_only.head(50)

Unnamed: 0,disadvantagedPlayer,decision,callCount
0,Donovan Mitchell,Incorrect Non-Call,6
1,De'Aaron Fox,Incorrect Non-Call,5
2,Shai Gilgeous-Alexander,Incorrect Non-Call,5
3,Lauri Markkanen,Incorrect Non-Call,4
4,Nikola Jokic,Incorrect Non-Call,4
5,Scottie Barnes,Incorrect Non-Call,4
6,DeMar DeRozan,Incorrect Non-Call,4
7,Tyrese Haliburton,Incorrect Non-Call,4
8,Jalen Green,Incorrect Non-Call,3
9,Joel Embiid,Incorrect Non-Call,3


In [150]:
players_only.to_csv(f"{ROOT_DATA_DIR}/player_incorrect_non_calls.csv")

In [145]:
aggregated_committing_player_decisions = player_df.loc[player_df["decision"] == "Incorrect Call"].groupby(["committingPlayer", "decision"])["callType"].count().sort_values(ascending=False)
aggregated_committing_player_decisions = aggregated_committing_player_decisions.to_frame().reset_index()
committing_players_only = aggregated_committing_player_decisions[
    ~aggregated_committing_player_decisions["committingPlayer"].isin(NBA_TEAMS)
].reset_index()
committing_players_only.rename(columns={"callType": "callCount"}, inplace=True)
del committing_players_only["index"]

### The top 50 most disadvantaged players - the players who have had the most Incorrect Calls called against them

In [146]:
committing_players_only.head(50)

Unnamed: 0,committingPlayer,decision,callCount
0,Jusuf Nurkic,Incorrect Call,2
1,Jimmy Butler,Incorrect Call,2
2,Shai Gilgeous-Alexander,Incorrect Call,2
3,Kenyon Martin Jr.,Incorrect Call,1
4,Klay Thompson,Incorrect Call,1
5,Lauri Markkanen,Incorrect Call,1
6,LeBron James,Incorrect Call,1
7,Mikal Bridges,Incorrect Call,1
8,Paul George,Incorrect Call,1
9,Al Horford,Incorrect Call,1


In [None]:
committing_players_only.to_csv(f"{D}/player_incorrect_fouls.csv")