# Visualizing Service Data

---
Create a geographic heatmap of service data. Use to visually determine where patients are coming from.

### Import libraries

In [1]:
import pandas as pd
import gmplot
from IPython.display import display

### Read the data

In [3]:
raw_data = pd.read_csv("combined.csv", index_col=0)
raw_data.head(3)

Unnamed: 0,DOS,MRN_COMB,SERVICE,SITE,ZIP,lat,long
0,02/12/2018,,ep,Traverse City,49601.0,44.272229,-85.478806
1,10/05/2015,,ep,Toledo,43450.0,41.407624,-83.496357
2,06/29/2018,,ep,Toledo,49267.0,41.766931,-83.698657


In [8]:
# Create coordinates for latitude and longitude
raw_data['coordinates'] = list(zip(raw_data.lat, raw_data.long))
raw_data['coordinates'].head(5)

0    (44.2722287, -85.47880649999998)
1           (41.4076239, -83.4963567)
2    (41.76693119999999, -83.6986568)
3    (41.4646868, -83.99558789999998)
4    (40.8587757, -83.91089680000002)
Name: coordinates, dtype: object

In [9]:
siteList = list(raw_data['SITE'].unique())
siteList

['Traverse City',
 'Toledo',
 'Grand Rapids',
 'Jackson',
 'Marquette',
 'Petoskey',
 'Alpena',
 'St. Joe Ann Arbor',
 'West Branch',
 'Kalamazoo']

In [10]:
serviceList = list(raw_data['SERVICE'].unique())
serviceList

['ep', 'cath', 'echo', 'mri', 'srg']

### Functions

In [26]:
def checkData(dataFrame):
    # Aggregate the data based on coordinates. Grab a random variable for count
    aggregated = dataFrame.groupby(['lat','long']).count()["DOS"]
    aggregated = aggregated.reset_index()

    # Check the first 5 rows
    display(aggregated.head(n=5))
    display(aggregated.info())
    return aggregated

In [24]:
def mapper(data, name):
    # Fetch map, center around point of interest
    gmap = gmplot.GoogleMapPlotter(43.0, -85.0, 6)

    # Overlay our datapoints onto the map
    gmap.heatmap(lats=data['lat'], \
                 lngs=data['long'], \
                 threshold=100, \
                 radius=10, \
                 gradient=None, \
                 opacity=0.7, \
                 dissipating=True \
                 )
    # Draw map
    gmap.draw(name)

In [16]:
def naming(name, folder):
    outputName = str(folder) + "/" + str(name) + ".html"
    return outputName

### Check Data

In [29]:
# Check data for data based on site location, then create maps for each location
for i in siteList:
    print(i)
    data = checkData(raw_data[raw_data['SITE'] == i])
    mapper(data, naming(i, "allServiceOneLocation"))
    
# Check data for data based on service, then create maps for each service
for i in serviceList:
    print(i)
    data = checkData(raw_data[raw_data['SERVICE'] == i])
    mapper(data, naming(i, "oneServiceAllLocation"))

Traverse City


Unnamed: 0,lat,long,DOS
0,41.885177,-84.059029,2
1,42.464344,-83.405644,5
2,42.465601,-83.095185,22
3,42.689883,-82.551047,1
4,43.683324,-85.499419,3


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 30 entries, 0 to 29
Data columns (total 3 columns):
lat     30 non-null float64
long    30 non-null float64
DOS     30 non-null int64
dtypes: float64(2), int64(1)
memory usage: 800.0 bytes


None

Toledo


Unnamed: 0,lat,long,DOS
0,33.024321,-96.674504,2
1,36.125926,-83.826088,1
2,40.692802,-83.826088,3
3,40.719979,-84.154066,15
4,40.858776,-83.910897,5


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 55 entries, 0 to 54
Data columns (total 3 columns):
lat     55 non-null float64
long    55 non-null float64
DOS     55 non-null int64
dtypes: float64(2), int64(1)
memory usage: 1.4 KB


None

Grand Rapids


Unnamed: 0,lat,long,DOS
0,33.594176,-117.573064,1
1,41.363469,-87.470303,3
2,42.005794,-86.519865,9
3,42.315121,-84.375233,3
4,42.327996,-85.569563,1


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 88 entries, 0 to 87
Data columns (total 3 columns):
lat     88 non-null float64
long    88 non-null float64
DOS     88 non-null int64
dtypes: float64(2), int64(1)
memory usage: 2.1 KB


None

Jackson


Unnamed: 0,lat,long,DOS
0,41.893345,-84.626982,2
1,41.964595,-84.259486,6
2,42.100355,-84.616515,1
3,42.109989,-84.206799,4
4,42.134827,-84.511732,7


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 22 entries, 0 to 21
Data columns (total 3 columns):
lat     22 non-null float64
long    22 non-null float64
DOS     22 non-null int64
dtypes: float64(2), int64(1)
memory usage: 608.0 bytes


None

Marquette


Unnamed: 0,lat,long,DOS
0,33.361596,-79.89688,7
1,34.058126,-101.934807,2
2,41.382257,-82.583495,3
3,41.975165,-85.62293,8
4,42.260941,-85.612648,3


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 51 entries, 0 to 50
Data columns (total 3 columns):
lat     51 non-null float64
long    51 non-null float64
DOS     51 non-null int64
dtypes: float64(2), int64(1)
memory usage: 1.3 KB


None

Petoskey


Unnamed: 0,lat,long,DOS
0,42.06605,-88.016741,3
1,42.434457,-83.656121,33
2,42.464947,-83.003907,2
3,42.592855,-83.517682,1
4,42.597659,-82.9394,1


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 37 entries, 0 to 36
Data columns (total 3 columns):
lat     37 non-null float64
long    37 non-null float64
DOS     37 non-null int64
dtypes: float64(2), int64(1)
memory usage: 968.0 bytes


None

Alpena


Unnamed: 0,lat,long,DOS
0,40.883077,-85.497418,5
1,41.846175,-83.698657,2
2,42.304722,-83.481942,1
3,42.464947,-83.003907,1
4,42.466205,-82.944778,2


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 16 entries, 0 to 15
Data columns (total 3 columns):
lat     16 non-null float64
long    16 non-null float64
DOS     16 non-null int64
dtypes: float64(2), int64(1)
memory usage: 464.0 bytes


None

St. Joe Ann Arbor


Unnamed: 0,lat,long,DOS
0,41.948052,-83.400304,1
1,41.971748,-83.826088,1
2,42.080867,-83.656121,2
3,42.168921,-83.826088,2
4,42.189436,-83.485691,12


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 21 entries, 0 to 20
Data columns (total 3 columns):
lat     21 non-null float64
long    21 non-null float64
DOS     21 non-null int64
dtypes: float64(2), int64(1)
memory usage: 584.0 bytes


None

West Branch


Unnamed: 0,lat,long,DOS
0,44.076893,-84.480261,3
1,44.325768,-83.464355,1
2,44.462692,-84.647912,4


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3 entries, 0 to 2
Data columns (total 3 columns):
lat     3 non-null float64
long    3 non-null float64
DOS     3 non-null int64
dtypes: float64(2), int64(1)
memory usage: 152.0 bytes


None

Kalamazoo


Unnamed: 0,lat,long,DOS
0,33.594176,-117.573064,1
1,42.180573,-85.571502,1
2,42.274377,-85.210127,1
3,42.293696,-84.752448,1
4,42.295611,-85.663917,1


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8 entries, 0 to 7
Data columns (total 3 columns):
lat     8 non-null float64
long    8 non-null float64
DOS     8 non-null int64
dtypes: float64(2), int64(1)
memory usage: 272.0 bytes


None

In [52]:
# Special one for overall
allData = checkData(raw_data)

gmap = gmplot.GoogleMapPlotter(43.0, -85.0, 6)

# Overlay our datapoints onto the map
gmap.heatmap(lats=allData['lat'], \
             lngs=allData['long'], \
             radius=19, \
             opacity=0.7, \
             dissipating=True
             )
# Draw map
gmap.draw('allServicesAllLocations.html')

Unnamed: 0,lat,long,DOS
0,33.024321,-96.674504,2
1,33.361596,-79.89688,7
2,33.594176,-117.573064,2
3,34.058126,-101.934807,2
4,36.125926,-83.826088,1


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 287 entries, 0 to 286
Data columns (total 3 columns):
lat     287 non-null float64
long    287 non-null float64
DOS     287 non-null int64
dtypes: float64(2), int64(1)
memory usage: 6.8 KB


None