# Network construction example

Illustrate how to construct a networkx graph from the physician network data.

# Preliminaries

In [1]:
# load some packages
import pandas as pd
import networkx as nx
import numpy as np
import config # edit data path in here

# Load the data

In [2]:
# load edges_df
edges_df = pd.read_csv(config.DATA_PATH + "data/network_panel_undirected_local_hsa_edges.csv.gz")

### edges_df codebook
* __year__ is the year of observation
* __hsanum__ is the identifier for the region
* __npi_a__ is the unique identifier for physician a
* __npi_b__ is the unique identifier for physician b
* __a2b__ is the number of patients who were referred from physician a to physician b (could be used for edge weighting)
* __b2a__ is the number of patients who were referred from physician b to physician a (could be used for edge weighting)

# Functions

In [3]:
def build_network(edges_df, hsanum, year):
    """Create a networkx graph given a dataframe of edges, a region, and year."""
  
    # initialize an undirected graph G
    G = nx.Graph()

    # populate G
    G = nx.from_pandas_edgelist(df=edges_df[(edges_df.hsanum==hsanum) & (edges_df.year==year)], 
                              source="npi_a",
                              target="npi_b",
                              edge_attr=["a2b", 
                                         "b2a"])

    # sanity check
    assert G.is_directed() is False
    assert G.is_multigraph() is False

    # return
    return G

# Build the networks

In [4]:
# create a dataframe to hold region X year network measures
df = edges_df[["hsanum",
               "year"]].drop_duplicates()

In [5]:
# build the networks
df = df.assign(G=df.apply(lambda row: build_network(edges_df=edges_df,
                                                    hsanum=row["hsanum"], 
                                                    year=row["year"]),
                          axis=1))

In [6]:
# add some measures
df = df.assign(nnodes=df.G.apply(nx.number_of_nodes),
               nedges=df.G.apply(nx.number_of_edges),
               density=df.G.apply(nx.density),
               degree_assortativity=df.G.apply(nx.degree_assortativity_coefficient))

In [7]:
df

Unnamed: 0,hsanum,year,G,nnodes,nedges,density,degree_assortativity
0,1001,2014,"(1750540035, 1982684999, 1245433648, 184138472...",98,1464,0.308016,-0.429306
1464,1002,2014,"(1376504977, 1700952751, 1679545131, 171008073...",37,413,0.620120,-0.276127
1877,1003,2014,"(1104821099, 1679847776, 1689769739, 173018496...",25,143,0.476667,-0.350290
2020,1004,2014,"(1700811072, 1952375354, 1699838185, 172006773...",117,2421,0.356764,-0.410506
4441,1006,2014,"(1710935457, 1801811799, 1194718759, 128562782...",8,14,0.500000,-0.458333
...,...,...,...,...,...,...,...
39139836,53021,2017,"(1295896751, 1700914306, 1295282457, 133653985...",50,743,0.606531,-0.232525
39140579,53023,2017,"(1114969698, 1447256888, 1164494241, 171095916...",9,34,0.944444,-0.283019
39140613,53024,2017,"(1326146846, 1427291491, 1427153873, 142733473...",7,21,1.000000,
39140634,53025,2017,"(1336218890, 1457352452, 1861494460, 190229213...",10,45,1.000000,


# Compute Curvature Filtration

In [18]:
#subsample
sample1 = df.sample(n=200)
sample1.head()

sample1.to_pickle("patient_network_sample1.pkl")

In [19]:
sample2 = df.sample(n=200)
sample2.head()

sample2.to_pickle("patient_network_sample2.pkl")