# Notebook to create graph of players and teams

The resulting graph is a bipartite graph with players on one side and teams on the other.
Teams are additionally separated by season, meaning that every team node also has the season attached to it.

Importing all relevant libraries

In [41]:
import csv
import networkx as nx
import numpy as np

Define csv file path

In [42]:
filepath = '../data/NBA Player Stats(1950 - 2022).csv'

Create a graph

In [43]:
graph = nx.DiGraph()

Read csv and add players and teams to it

In [44]:
with open(filepath, 'r', encoding="utf8") as file:
    csvreader = csv.reader(file)
    next(csvreader)
    for row in csvreader:
        year = row[2]
        name = row[3]
        team = row[6]
        if row[7] != '':
            games_played = float(row[7])
        if row[31] != '':
            total_points = float(row[31])
        points_per_game = total_points
        if games_played != 0:
            points_per_game = round(points_per_game / games_played, 2)
        if team == 'TOT':
            # ignore TOT, meaning player played for multiple teams in the same season
            continue
        # handle rebrands
        if team == 'SEA':
            team = 'OKC'
        team_node_name = team + "-" + year
        # if not graph.has_node(team):
        #     graph.add_node(team, type='team')
        if not graph.has_node(team_node_name):
            graph.add_node(team_node_name, type='team-season')
        if not graph.has_node(name):
            graph.add_node(name, type='player')
        # graph.add_edge(team_node_name, team)
        graph.add_edge(name, team_node_name, ppg=str(points_per_game))

Mark nodes that represent star players

In [45]:
# calculate average points per game for entire career from a player
ppgs = {}
for (p, d) in graph.nodes(data=True):
    if d['type'] == 'player':
        sum = 0
        seasons = 0
        for (u, v, attr) in graph.edges(p, data=True):
            sum = sum + float(attr['ppg'])
            seasons = seasons + 1
        average = round(sum / seasons, 2)
        ppgs[p] = average
desired_percentile = 97
threshold = np.percentile(list(ppgs.values()), desired_percentile)
print('threshold for star player:', threshold)
star_players = {k: v for k, v in ppgs.items() if v > threshold}
print('found', len(star_players), 'star players')

threshold for star player: 17.427999999999994
found 135 star players


Mark players as star players in the graph

In [46]:
for node in graph.nodes():
    if graph.nodes[node]['type'] == 'player':
        graph.nodes[node]['star_player'] = str(node in star_players)

Display network info and save the file

In [47]:
print(graph)
player_nodes = []
team_nodes = []
season_nodes = []
for (p, d) in graph.nodes(data=True):
    if d['type'] == 'player':
        player_nodes.append(p)
    # elif d['type'] == 'team':
    #     team_nodes.append(p)
    elif d['type'] == 'team-season':
        season_nodes.append(p)
print('players =', len(player_nodes))
print('teams =', len(team_nodes))
print('seasons =', len(season_nodes))
nx.write_pajek(graph, '../data/graph.net', encoding='utf-8')

Graph with 6059 nodes and 25732 edges
players = 4486
teams = 0
seasons = 1573
