In [62]:
import csv
from operator import itemgetter
import networkx as nx
from networkx.algorithms import community
import pandas as pd

# Read data

In [47]:
with open('UCF Lake Nona Medical Center - Nodes.csv', 'r') as nodecsv: # Open the file
    nodereader = csv.reader(nodecsv) # Read the csv
    # Retrieve the data (using Python list comprhension and list slicing to remove the header row, see footnote 3)
    nodes = [n for n in nodereader][1:]

node_names = [n[0] for n in nodes] # Get a list of only the node names

In [48]:
with open('UCF Lake Nona Medical Center - Internal Edges.csv', 'r') as edgecsv: # Open the file
    edgereader = csv.reader(edgecsv) # Read the csv
    edges = [tuple(e)[1:3] for e in edgereader if e[0] == "Hyperlink"][1:] # Retrieve the data

# Build Network

In [49]:
G = nx.Graph()
G.add_nodes_from(node_names)
G.add_edges_from(edges)

In [50]:
degree_dict = dict(G.degree(G.nodes()))
nx.set_node_attributes(G, degree_dict, 'degree')

# Metrics

In [51]:
n_nodes = G.number_of_nodes()
print("Number of Nodes:", n_nodes)

Number of Nodes: 260


In [52]:
n_edges = G.number_of_edges()
print("Number of Edges:", n_edges)

Number of Edges: 5765


In [53]:
avg_degree = sum(dict(G.degree()).values()) / G.number_of_nodes()
print("Average Degree:", avg_degree)

Average Degree: 44.34615384615385


In [54]:
diameter = max([max(j.values()) for (i,j) in nx.shortest_path_length(G)])
print("Diameter of Connected Components:", diameter)

Diameter of Connected Components: 5


In [55]:
density = nx.density(G)
print("Network density:", density)

Network density: 0.17122067122067122


In [56]:
triadic_closure = nx.transitivity(G)
print("Triadic closure:", triadic_closure)

Triadic closure: 0.6208305048136262


In [57]:
betweenness_dict = nx.betweenness_centrality(G) 
eigenvector_dict = nx.eigenvector_centrality(G)

In [58]:
nx.set_node_attributes(G, betweenness_dict, 'betweenness')
nx.set_node_attributes(G, eigenvector_dict, 'eigenvector')

In [59]:
sorted_betweenness = sorted(betweenness_dict.items(), key=itemgetter(1), reverse=True)

print("Top 10 nodes by betweenness centrality:")
for b in sorted_betweenness[:10]:
    print(b)

Top 10 nodes by betweenness centrality:
('https://ucflakenonamedicalcenter.com/patients/insurance.dot', 0.17796385825339084)
('https://ucflakenonamedicalcenter.com/legal/index.dot', 0.10722249130639364)
('https://ucflakenonamedicalcenter.com/covid-19/covid-19-vaccine-information/', 0.08159806991855742)
('https://ucflakenonamedicalcenter.com/covid-19/latest-updates/staying-connected-from-a-distance.dot', 0.07759143167008295)
('https://ucflakenonamedicalcenter.com/', 0.0616651317558549)
('https://ucflakenonamedicalcenter.com/professionals/employees/', 0.05533957283840775)
('https://ucflakenonamedicalcenter.com/legal/', 0.04226387335322465)
('https://ucflakenonamedicalcenter.com/covid-19/covid-19-vaccine-information/covid-19-vaccine-faqs.dot', 0.03950841164091848)
('https://ucflakenonamedicalcenter.com/covid-19/', 0.034196298526578135)
('https://ucflakenonamedicalcenter.com/patients/florida-pricing-transparency.dot', 0.025229728775075354)


In [60]:
sorted_eigenvector = sorted(eigenvector_dict.items(), key=itemgetter(1), reverse=True)

print("Top 10 nodes by eigenvector centrality:")
for b in sorted_eigenvector[:10]:
    print(b)

Top 10 nodes by eigenvector centrality:
('https://ucflakenonamedicalcenter.com/covid-19/', 0.12093040226236088)
('https://ucflakenonamedicalcenter.com/covid-19/latest-updates/enhanced-safety-protections.dot', 0.12081164248896978)
('https://ucflakenonamedicalcenter.com/covid-19/covid-19-vaccine-information/', 0.12073856653691616)
('https://ucflakenonamedicalcenter.com/covid-19/visitor-policy.dot', 0.12059456247547833)
('https://ucflakenonamedicalcenter.com/legal/index.dot', 0.12027709648488526)
('https://ucflakenonamedicalcenter.com/patients/insurance.dot', 0.1201648341882133)
('https://ucflakenonamedicalcenter.com/professionals/employees/', 0.11992098704756803)
('https://ucflakenonamedicalcenter.com/patients/registration.dot', 0.11988218394817408)
('https://ucflakenonamedicalcenter.com/about/legal/er-wait-times.dot', 0.11986940256879423)
('https://ucflakenonamedicalcenter.com/professionals/physicians/', 0.11986648272907549)


# Function to calculate network metrics based on manifest

In [85]:
def network_metrics(manifest):

    output_df = manifest.copy()

    output_df['n_nodes'] = 0
    output_df['n_edges'] = 0
    output_df['avg_degree'] = 0
    output_df['diameter'] = 0
    output_df['density'] = 0
    output_df['triadic_closure'] = 0

    for x in range(len(manifest)):

        with open(manifest.loc[x, "Node_File"], 'r') as nodecsv: # Open the file
            nodereader = csv.reader(nodecsv) # Read the csv
            # Retrieve the data (using Python list comprhension and list slicing to remove the header row, see footnote 3)
            nodes = [n for n in nodereader][1:]

        node_names = [n[0] for n in nodes] # Get a list of only the node names

        with open(manifest.loc[x, "Edges_File"], 'r') as edgecsv: # Open the file
            edgereader = csv.reader(edgecsv) # Read the csv
            edges = [tuple(e)[1:3] for e in edgereader if e[0] == "Hyperlink"][1:] # Retrieve the data

        G = nx.Graph()
        G.add_nodes_from(node_names)
        G.add_edges_from(edges)

        degree_dict = dict(G.degree(G.nodes()))
        nx.set_node_attributes(G, degree_dict, 'degree')
        output_df.loc[x, 'n_nodes'] = G.number_of_nodes()
        output_df.loc[x, 'n_edges'] =  G.number_of_edges()
        output_df.loc[x, 'avg_degree'] = sum(dict(G.degree()).values()) / G.number_of_nodes()
        output_df.loc[x, 'diameter'] = max([max(j.values()) for (i,j) in nx.shortest_path_length(G)])
        output_df.loc[x, 'density'] = nx.density(G)
        output_df.loc[x, 'triadic_closure'] = nx.transitivity(G)

        nx.write_gexf(G, manifest.loc[x, "Node_File"].split("-")[:1][0] + ".gexf")
        
    return output_df

In [86]:
manifest = pd.read_excel('network_manifest.xlsx')
network_metrics(manifest).to_excel("network_metrics.xlsx")