# Import

In [1]:
import pandas as pd
import numpy as np
import sqlite3
import matplotlib.pyplot as plt
import seaborn as sns
import re
import plotly.express as px
from jupyterthemes import jtplot
jtplot.style()

# Read in

In [2]:
hosp_geo = pd.read_csv('../data/neo4j/hosp_geo.csv')
hc = pd.read_csv('../data/neo4j/hosp_simple_communities.csv')

# Investigate the communities derived from Neo4J

Here we group by community and count the NPIs in each group. Because this came from a Neo4J output, where the nodes are all distinct, this is implicitly a distinct count.

There are only 13 actual communities; all the other communities have no NPI associated with them.

In [3]:
hc_counts = (hc.groupby('communityId')['npi']
             .count()
             .reset_index()
             .rename(columns = {'npi':'count'})
             .sort_values('count', ascending = False)
            )
hc_counts.head(15)

Unnamed: 0,communityId,count
127,3302,1898
126,3300,926
109,2725,853
169,5231,637
0,859,501
178,5871,329
110,2727,327
1,861,221
2,862,121
125,3299,43


Let's only keep the communities with NPIs in them.

In [4]:
real_coms = hc_counts.loc[hc_counts['count'] > 0, 'communityId']
hc_clean = hc[hc['communityId'].isin(real_coms)]

For the sake of combining the communities data with our hosp_geo data, let's create separate community tables for the providers and the hospitals.

In [5]:
prov_coms = hc_clean[hc_clean['organization'].isnull()]
hosp_coms = hc_clean[~hc_clean['organization'].isnull()]

These will also be useful for adding properties to the Neo4J graph, so save to disk.

In [6]:
hosp_coms.to_csv('../data/neo4j/hosp_simple_coms.csv', index = False)
prov_coms.to_csv('../data/neo4j/prov_simple_coms.csv', index = False)

Merge the communityIds columns as well as the count of NPIs in each community together with the hosp_coms table.

In [7]:
coms_counts = hc_clean.groupby('communityId')['npi'].count().reset_index().rename(columns = {'npi':'community_count'})
hosp_coms_counts = coms_counts.merge(hosp_coms).sort_values('community_count', ascending = False)

Now also merge in the addresses, latitudes, and longitudes from hosp_geo.

The result is a summary table of all the hospitals in the Nashville CBSA together with their respective communityId, count, and address details.

In [8]:
hosp_addresses = hosp_geo[['to_npi', 'address_hospital', 'lng_hospital', 'lat_hospital']].drop_duplicates()

In [11]:
hospital_coms = hosp_coms_counts.merge(hosp_addresses, left_on = 'npi', right_on = 'to_npi')

In [12]:
hospital_coms

Unnamed: 0,communityId,community_count,npi,name,organization,to_npi,address_hospital,lng_hospital,lat_hospital
0,3302,1898,1396882000.0,,VANDERBILT UNIVERSITY MEDICAL CENTER,1396882205,"1211 MEDICAL CENTER DRIVE, NASHVILLE, TN 37232",-86.801524,36.142499
1,3302,1898,1558409000.0,,VANDERBILT UNIVERSITY MEDICAL CENTER,1558408633,"1601 23RD AVE S, NASHVILLE, TN 37212",-86.804185,36.138489
2,3302,1898,1598738000.0,,VANDERBILT STALLWORTH REHABILITATION HOSPITAL LP,1598738205,"2201 CHILDRENS WAY, NASHVILLE, TN 37212",-86.802498,36.138306
3,3300,926,1497827000.0,,MIDDLE TENNESSEE HOSPITALIST,1497827364,"3443 DICKERSON PK, SUITE 680, NASHVILLE, TN 37207",-86.750047,36.245245
4,3300,926,1023055000.0,,"HCA HEALTH SERVICES OF TENNESSEE, INC.",1023055126,"2300 PATTERSON ST, NASHVILLE, TN 37203",-86.810282,36.153671
5,3300,926,1649576000.0,,CRESCENT MEDICAL GROUP PLLC,1649576414,"1412 COUNTY HOSPITAL RD, NASHVILLE, TN 37218",-86.84793,36.182484
6,3300,926,1679924000.0,,"CURAHEALTH NASHVILLE, LLC",1679924005,"1412 COUNTY HOSPITAL RD, NASHVILLE, TN 37218",-86.84793,36.182484
7,3300,926,1932146000.0,,"HCA HEALTH SERVICES OF TENNESSEE, INC.",1932146032,"2300 PATTERSON ST, NASHVILLE, TN 37203",-86.810282,36.153671
8,3300,926,1265487000.0,,"HCA HEALTH SERVICES OF TENNESSEE, INC.",1265487193,"313 N MAIN ST, ASHLAND CITY, TN 37015",-87.065626,36.277115
9,3300,926,1992776000.0,,"HCA HEALTH SERVICES OF TENNESSEE, INC.",1992776405,"200 STONECREST BLVD, SMYRNA, TN 37167",-86.564678,35.975763
