In [1]:
import pandas as pd
import networkx as nx
from nba_api.stats.endpoints import playerdashptpass, commonteamroster
from nba_api.stats.static import teams
import logging as log

# Set up logging
log.basicConfig(level=log.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

In [2]:
def get_team_id(abbreviation):
    """
    Retrieve the team ID for a given team abbreviation.

    :param abbreviation: Abbreviation of the NBA team.
    :return: Team ID or None if not found.
    """
    try:
        nba_teams = teams.get_teams()
        for team in nba_teams:
            if team['abbreviation'] == abbreviation:
                return team['id']
        return None
    except Exception as e:
        log.error(f"Error in get_team_id: {e}")
        return None

In [3]:
def get_team_roster(team_id, season):
    """
    Get the roster of a team for a given season.

    :param team_id: ID of the NBA team.
    :param season: The season for which to retrieve the roster.
    :return: DataFrame containing the team roster.
    """
    try:
        roster = commonteamroster.CommonTeamRoster(team_id=team_id, season=season)
        return roster.common_team_roster.get_data_frame()
    except Exception as e:
        log.error(f"Error getting roster for team ID {team_id}: {e}")
        return pd.DataFrame()

In [4]:
def get_player_passing_data(player_id, team_id, season):
    """
    Retrieve the passing data for a player.

    :param player_id: ID of the player.
    :param team_id: ID of the player's team.
    :param season: The season for which to retrieve the data.
    :return: DataFrame with the player's passing data.
    """
    try:
        passing = playerdashptpass.PlayerDashPtPass(player_id=player_id, team_id=team_id, season=season)
        return passing.get_data_frames()[0]
    except Exception as e:
        log.error(f"Error getting passing data for player ID {player_id}: {e}")
        return pd.DataFrame()

In [5]:
def get_team_passing_data(team_id, season):
    """
    Get passing data for all players in a team for a given season.

    :param team_id: ID of the NBA team.
    :param season: The season for which to retrieve the data.
    :return: DataFrame with the passing data of all team players.
    """
    passing_data = pd.DataFrame()
    players = get_team_roster(team_id, season)
    for index, player in players.iterrows():
        player_data = get_player_passing_data(player['PLAYER_ID'], team_id, season)
        passing_data = pd.concat([passing_data, player_data], ignore_index=True)
    return passing_data

In [6]:
def collect_nba_passing_data(season):
    """
    Collect NBA passing data for all teams for a given season.

    :param season: The NBA season for which to collect data.
    :return: Dictionary with team abbreviations as keys and passing data as values.
    """
    all_teams_passing_data = {}
    for team in teams.get_teams():
        team_id = team['id']
        team_abbreviation = team['abbreviation']
        team_data = get_team_passing_data(team_id, season)
        all_teams_passing_data[team_abbreviation] = team_data
        log.info(f"Collected data for team: {team_abbreviation}")
    return all_teams_passing_data

In [7]:
def calculate_centrality_for_team(team_data, team_abbreviation):
    """
    Calculate centrality measures for a team's passing data.

    :param team_data: DataFrame containing team's passing data.
    :param team_abbreviation: Abbreviation of the team.
    :return: DataFrame with centrality measures for each player.
    """
    try:
        G = nx.Graph()

        # Add nodes to the graph
        for player in team_data['PLAYER_NAME_LAST_FIRST'].unique():
            G.add_node(player)

        # Add edges to the graph
        for _, row in team_data.iterrows():
            G.add_edge(row['PLAYER_NAME_LAST_FIRST'], row['PASS_TO'])

        # Calculate centrality measures
        return create_centrality_df(G, team_abbreviation)
    except Exception as e:
        log.error(f"Error in calculate_centrality_for_team: {e}")
        return pd.DataFrame()

In [8]:
def create_centrality_df(G, team_abbreviation):
    """
    Create a DataFrame of centrality measures from a graph.

    :param G: NetworkX graph object.
    :param team_abbreviation: Abbreviation of the team.
    :return: DataFrame with centrality measures.
    """
    degree_centrality = nx.degree_centrality(G)
    betweenness_centrality = nx.betweenness_centrality(G)
    closeness_centrality = nx.closeness_centrality(G)

    return pd.DataFrame({
        'PLAYER_NAME_LAST_FIRST': list(degree_centrality.keys()),
        'DEGREE': list(degree_centrality.values()),
        'BETWEENNESS': list(betweenness_centrality.values()),
        'CLOSENESS': list(closeness_centrality.values()),
        'TEAM': team_abbreviation
    })

In [9]:
# Example usage
season = '2022-23'
try:
    nba_passing_data = collect_nba_passing_data(season)
except Exception as e:
    log.error(f"Error in collecting NBA passing data: {e}")


2023-11-16 22:35:23,604 - INFO - Collected data for team: ATL
2023-11-16 22:35:31,305 - INFO - Collected data for team: BOS
2023-11-16 22:35:37,659 - INFO - Collected data for team: CLE
2023-11-16 22:35:44,124 - INFO - Collected data for team: NOP
2023-11-16 22:35:51,185 - INFO - Collected data for team: CHI
2023-11-16 22:35:57,300 - INFO - Collected data for team: DAL
2023-11-16 22:36:05,304 - INFO - Collected data for team: DEN
2023-11-16 22:36:12,040 - INFO - Collected data for team: GSW
2023-11-16 22:36:20,564 - INFO - Collected data for team: HOU
2023-11-16 22:36:28,226 - INFO - Collected data for team: LAC
2023-11-16 22:36:33,541 - INFO - Collected data for team: LAL
2023-11-16 22:36:39,079 - INFO - Collected data for team: MIA
2023-11-16 22:36:44,914 - INFO - Collected data for team: MIL
2023-11-16 22:36:51,132 - INFO - Collected data for team: MIN
2023-11-16 22:36:56,091 - INFO - Collected data for team: BKN
2023-11-16 22:37:01,342 - INFO - Collected data for team: NYK
2023-11-

In [9]:
excel_file_name = 'nba_passing_data.xlsx'
save_data_to_excel(nba_passing_data, excel_file_name)

Saved data for team: ATL
Saved data for team: BOS
Saved data for team: CLE
Saved data for team: NOP
Saved data for team: CHI
Saved data for team: DAL
Saved data for team: DEN
Saved data for team: GSW
Saved data for team: HOU
Saved data for team: LAC
Saved data for team: LAL
Saved data for team: MIA
Saved data for team: MIL
Saved data for team: MIN
Saved data for team: BKN
Saved data for team: NYK
Saved data for team: ORL
Saved data for team: IND
Saved data for team: PHI
Saved data for team: PHX
Saved data for team: POR
Saved data for team: SAC
Saved data for team: SAS
Saved data for team: OKC
Saved data for team: TOR
Saved data for team: UTA
Saved data for team: MEM
Saved data for team: WAS
Saved data for team: DET
Saved data for team: CHA


In [10]:
# Combine centrality data for all teams into a single DataFrame
all_teams_centrality = pd.DataFrame()

for team_abbreviation, team_data in nba_passing_data.items():
    team_centrality_df = calculate_centrality_for_team(team_data, team_abbreviation)
    all_teams_centrality = pd.concat([all_teams_centrality, team_centrality_df], ignore_index=True)

# Display the combined DataFrame
log.info("Combined Centrality Data for All Teams")
print(all_teams_centrality)

2023-11-16 23:19:55,013 - INFO - Combined Centrality Data for All Teams


    PLAYER_NAME_LAST_FIRST    DEGREE  BETWEENNESS  CLOSENESS TEAM
0        Williams, Donovan  0.200000     0.000000   0.540541  ATL
1          Fernando, Bruno  0.500000     0.012640   0.666667  ATL
2           Johnson, Jalen  0.900000     0.022630   0.909091  ATL
3           Forrest, Trent  0.850000     0.019752   0.869565  ATL
4           Holiday, Aaron  0.950000     0.058595   0.952381  ATL
..                     ...       ...          ...        ...  ...
608             Jones, Kai  0.894737     0.017053   0.904762  CHA
609       Washington, P.J.  0.842105     0.005585   0.863636  CHA
610       McDaniels, Jalen  0.736842     0.002492   0.791667  CHA
611        Gabriel, Wenyen  0.052632     0.000000   0.487179  CHA
612         Plumlee, Mason  0.631579     0.002127   0.730769  CHA

[613 rows x 5 columns]


In [15]:
def most_heliocentric_player(df, centrality_metric='DEGREE'):
    """
    Find the player who is most central in the team with the greatest disparity in centrality measures.

    :param df: DataFrame containing centrality data for NBA players.
    :param centrality_metric: The centrality metric to consider ('DEGREE', 'BETWEENNESS', 'CLOSENESS').
    :return: A tuple containing the team with the most unequal centrality and the most central player in that team.
    """
    # Calculate the spread of centrality measures for each team
    spread_df = df.groupby('TEAM').agg({'DEGREE': ['max', 'min'], 
                                        'BETWEENNESS': ['max', 'min'], 
                                        'CLOSENESS': ['max', 'min']})

    # Calculate the spread for each centrality measure
    spread_df['DEGREE_SPREAD'] = spread_df[('DEGREE', 'max')] - spread_df[('DEGREE', 'min')]
    spread_df['BETWEENNESS_SPREAD'] = spread_df[('BETWEENNESS', 'max')] - spread_df[('BETWEENNESS', 'min')]
    spread_df['CLOSENESS_SPREAD'] = spread_df[('CLOSENESS', 'max')] - spread_df[('CLOSENESS', 'min')]

    # Determine the team with the greatest spread in the specified centrality measure
    if centrality_metric not in ['DEGREE', 'BETWEENNESS', 'CLOSENESS']:
        raise ValueError("Invalid centrality metric. Choose from 'DEGREE', 'BETWEENNESS', 'CLOSENESS'.")

    max_spread_team = spread_df[f'{centrality_metric}_SPREAD'].idxmax()

    # Identify the player with the highest centrality on that team
    top_player = df[df['TEAM'] == max_spread_team].sort_values(centrality_metric, ascending=False).iloc[0]

    return max_spread_team, top_player['PLAYER_NAME_LAST_FIRST']

# Example usage
centrality_metric = 'DEGREE'
team, player = most_heliocentric_player(all_teams_centrality, centrality_metric)
print(f"Team with the most unequal {centrality_metric} centrality: {team}")
print(f"Top player in this team ({centrality_metric} centrality): {player}")


Team with the most unequal DEGREE centrality: DET
Top player in this team (DEGREE centrality): Hayes, Killian
