In [5]:
import networkx as nx
import numpy as np
import pandas as pd

In [6]:
# Read in data
df = pd.read_csv("../../data/forwards/forwards_edgelist_corsi.csv")
players_df = pd.read_csv("../../data/forwards/aggregate_forwards.csv", index_col='playerId', header=0)

In [7]:
# Graph with 2 edges per pairing
# Create a network of nodes which are players and the links are players that played together
# We create 2 graphs, one for each relationship direction and then compose them for our total graph
df = df.rename(columns={'cf_inf_on2':'weight'})
G0 = nx.Graph()
G0 = nx.from_pandas_edgelist(df, 'playerId1', 'playerId2', ['weight'],create_using=nx.DiGraph)

df = df.rename(columns={'weight':'notweight','cf_inf_on1':'weight'})
G1 = nx.Graph()
G1 = nx.from_pandas_edgelist(df, 'playerId2', 'playerId1', edge_attr='weight',create_using=nx.DiGraph)

G = nx.compose(G0, G1)

In [8]:
# Add data to nodes that may be useful for visualization
def copy_attr_to_nodes(G, df, attr_name):
    nodes_dict = dict.fromkeys(G.nodes)
    for node in G.nodes:
        node_attr = df.loc[[node]][attr_name].values[0]
        nodes_dict[node] = node_attr
    return nodes_dict

In [9]:
# Add player names to nodes
player_names = copy_attr_to_nodes(G, players_df, "playerName")
nx.set_node_attributes(G, player_names, "playerName")

In [10]:
# output a gephi file for ez visualization
nx.write_gexf(G, 'forwards.gexf')

In [11]:
# Number of nodes and edges
num_nodes = G.number_of_nodes()
num_edges = G.number_of_edges()
print("Number of nodes: ", num_nodes)
print("Number of edges: ", num_edges)

Number of nodes:  802
Number of edges:  7114


In [12]:
# Average outgoing link weight. This determines which players had the greatest effect on their linemates
# Do total out degree as well
out_degrees = G.out_degree(weight="weight")
for player_out_degree in out_degrees:
    player_id = player_out_degree[0]
    avg_out_weight = player_out_degree[1] / G.out_degree(player_id)
    players_df.loc[player_id, "average_corsi_influence"] = avg_out_weight
    players_df.loc[player_id, "total_out_influence"] = player_out_degree[1]

output_file = '../../data/forwards/aggregate_forwards.csv'
players_df.to_csv(output_file)

In [14]:
# Correlations between stats. We are mainly interested in the correlation with average and total influence
players_df.corr(numeric_only=True, method='spearman').to_csv("../../data/forwards/correlations.csv")

In [20]:
# Numeric assortativity between nodes
def ordinal_stat_assortativity(G, players_df, stat):
    # Copy stat in question as node attribute first
    stats = copy_attr_to_nodes(G, players_df, stat)
    nx.set_node_attributes(G, stats, stat)
    return nx.numeric_assortativity_coefficient(G, attribute=stat)

print(ordinal_stat_assortativity(G, players_df, 'onIce_corsiPercentage'))
print(ordinal_stat_assortativity(G, players_df, 'on_off_corsi_diff'))
print(ordinal_stat_assortativity(G, players_df, 'OnIce_F_goals_per60'))
print(ordinal_stat_assortativity(G, players_df, 'I_F_dZoneGiveaways_per60'))
print(ordinal_stat_assortativity(G, players_df, 'I_F_giveaways_per60'))
print(ordinal_stat_assortativity(G, players_df, 'shotsBlockedByPlayer_per60'))
print(ordinal_stat_assortativity(G, players_df, 'I_F_hits_per60'))
print(ordinal_stat_assortativity(G, players_df, 'I_F_takeaways_per60'))

0.3926470240386087
0.3023187808008627
0.43906941019581985
0.21659358921311042
0.20768090493916566
0.12043422114686607
0.21176270513776924
0.1503037837128194
