In [2]:
import networkx as nx
import numpy as np
import pandas as pd

In [6]:
# Graph with 2 edges per pairing
# Read in data
df = pd.read_csv("../../data/forwards/forwards_edgelist_corsi.csv")

# Create a network of nodes which are players and the links are players that played together
# We create 2 graphs, one for each relationship direction and then compose them for our total graph
df = df.rename(columns={'cf_inf_on2':'weight'})
G0 = nx.Graph()
G0 = nx.from_pandas_edgelist(df, 'playerId1', 'playerId2', ['weight'],create_using=nx.DiGraph)

df = df.rename(columns={'weight':'notweight','cf_inf_on1':'weight'})
G1 = nx.Graph()
G1 = nx.from_pandas_edgelist(df, 'playerId2', 'playerId1', edge_attr='weight',create_using=nx.DiGraph)

G = nx.compose(G0, G1)

# Add player names to nodes
players_df = pd.read_csv("../../data/forwards/aggregate_forwards.csv", index_col='playerId', header=0)
player_names = dict.fromkeys(G.nodes)
for player in G.nodes:
    player_name = players_df.loc[[player]]["playerName"].values[0]
    player_names[player] = player_name

nx.set_node_attributes(G, player_names, "playerName")

# output a gephi file for ez visualization
nx.write_gexf(G, 'forwards.gexf')

In [7]:
# Number of nodes and edges
num_nodes = G.number_of_nodes()
num_edges = G.number_of_edges()
print("Number of nodes: ", num_nodes)
print("Number of edges: ", num_edges)

# Average outgoing link weight. This determines which players had the greatest effect on their linemates
# Do total out degree as well
out_degrees = G.out_degree(weight="weight")
for player_out_degree in out_degrees:
    player_id = player_out_degree[0]
    avg_out_weight = player_out_degree[1] / G.out_degree(player_id)
    players_df.loc[player_id, "average_corsi_influence"] = avg_out_weight
    players_df.loc[player_id, "total_out_influence"] = player_out_degree[1]

output_file = '../../data/forwards/aggregate_forwards.csv'
players_df.to_csv(output_file)

# Outgoing link weight correlations
print(players_df.corr(numeric_only=True))
players_df.corr(numeric_only=True, method='spearman').to_csv("../../data/forwards/correlations.csv")


Number of nodes:  802
Number of edges:  7114
                                  games_played   icetime    shifts  gameScore  \
games_played                          1.000000  0.988493  0.699727   0.624954   
icetime                               0.988493  1.000000  0.704565   0.674590   
shifts                                0.699727  0.704565  1.000000   0.810992   
gameScore                             0.624954  0.674590  0.810992   1.000000   
onIce_xGoalsPercentage                0.202116  0.209697  0.276793   0.339170   
...                                        ...       ...       ...        ...   
I_F_dZoneShiftStarts_per60            0.136322  0.106885  0.074618  -0.032548   
I_F_neutralZoneShiftStarts_per60      0.122820  0.117837  0.064724   0.047276   
I_F_flyShiftStarts_per60             -0.437606 -0.457112 -0.421671  -0.422875   
average_corsi_influence               0.128407  0.154460  0.134968   0.252533   
total_out_influence                   0.325855  0.360362  0.2107