# Creating Deck.GL visualizations using pydeck

### Installations

In [1]:
#!pip install pydeck

Collecting pydeck
  Downloading pydeck-0.7.1-py2.py3-none-any.whl (4.3 MB)
[K     |████████████████████████████████| 4.3 MB 3.3 MB/s eta 0:00:01


Installing collected packages: pydeck
Successfully installed pydeck-0.7.1


### Imports

In [15]:
import pandas as pd
import geopandas as gpd
import pydeck as pdk
import urllib, json, requests
import numpy as np
import math

In [5]:
# Opening JSON file
f = open('gz_2010_us_050_00_500k.json', encoding = "ISO-8859-1")
 
# returns JSON object as a dictionary
dataGeo = json.load(f)

# Closing file
f.close()

# Mapping the prevalences and the health score to the geojson based on FIPS

In [7]:
counties = pd.read_csv("uscounties.csv")

In [9]:
for i in range(len(dataGeo['features'])):
    fips = dataGeo['features'][i]['properties']['GEO_ID'][-5:].lstrip("0")
    dataGeo['features'][i]['properties']["FIPS"] = fips

### Loading and manipulating each dataset to include FIPS of each county

In [10]:
ff = pd.read_csv("Fast_Food_Restaurants_US.csv")

In [11]:
diabetes = pd.read_excel("IHME_county_data_Diabetes_NATIONAL.xlsx", sheet_name="Total", header=1)

In [12]:
obesity = pd.read_excel("IHME_county_data_LifeExpectancy_Obesity_PhysicalActivity_NATIONAL.xlsx", sheet_name="Obesity")

In [13]:
le = pd.read_excel("IHME_county_data_LifeExpectancy_Obesity_PhysicalActivity_NATIONAL.xlsx", sheet_name="Life Expectancy")

In [14]:
ff["FIPS"] = 0
for i in range(len(ff)):
    lat = ff.latitude[i]
    long = ff.longitude[i]
    with urllib.request.urlopen("https://geo.fcc.gov/api/census/area?lat=" + str(lat) + "&lon=" + str(long) + "&format=json") as url:
        data = json.loads(url.read().decode())
    ff["FIPS"][i] = json.loads(json.dumps(data, indent=4, sort_keys=True))["results"][0]['county_fips']

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ff["FIPS"][i] = json.loads(json.dumps(data, indent=4, sort_keys=True))["results"][0]['county_fips']


### Clean and merge each health issue dataset with fast food data and county data

In [16]:
ff = ff.drop("Unnamed: 0", axis=1)

In [17]:
diabetes = diabetes[diabetes.FIPS > 60].reset_index(drop=True)

In [18]:
diabetes["DiabetesPrevalence"] = (diabetes["Prevalence, 2012, Females"] + diabetes["Prevalence, 2012, Males"])/2

In [19]:
diabetes = diabetes[["Location", "FIPS", "DiabetesPrevalence"]]

In [20]:
diabetesFF = ff.merge(diabetes, left_on="FIPS", right_on="FIPS")

In [21]:
diabetesFFC = diabetesFF.merge(counties, left_on='FIPS', right_on='county_fips')

In [22]:
temp2 = diabetesFFC.groupby("county_fips").agg({'latitude': 'mean', 'longitude': 'mean', 'name':'count', 'DiabetesPrevalence':'mean', 'population':'mean'})
temp2["PopPerFF"] = temp2.population / temp2.name

In [23]:
temp2 = temp2.reset_index()
temp2.DiabetesPrevalence = round(temp2.DiabetesPrevalence,2)
diabetesVis2 = temp2[["county_fips", "DiabetesPrevalence", "PopPerFF"]]

In [24]:
obesity["ObesityPrevalence"] = (obesity["Male obesity  prevalence, 2011* (%)"] + obesity["Female obesity prevalence, 2011* (%)"])/2

In [25]:
obesity = obesity[["State", "County", "ObesityPrevalence"]][~obesity.County.isna()].reset_index(drop=True)

In [26]:
obesity["CoSt"] = obesity["County"] + ", " + obesity["State"]
counties["CoSt"] = counties["county"] + ", " + counties["state_name"]

In [27]:
obesity = obesity.merge(counties, left_on="CoSt", right_on="CoSt")

In [28]:
obesityFF = ff.merge(obesity, left_on="FIPS", right_on="county_fips")

In [29]:
temp = obesityFF.groupby("county_fips").agg({'latitude': 'mean', 'longitude': 'mean', 'name':'count', 'ObesityPrevalence':'mean', 'population':'mean'})

In [30]:
temp["PopPerFF"] = temp.population / temp.name
temp = temp.reset_index()
temp.ObesityPrevalence = round(temp.ObesityPrevalence,2)
obesityVis2 = temp[["county_fips", "ObesityPrevalence", "PopPerFF"]]

In [31]:
le["LifeExpectancy"] = (le["Male life expectancy, 2010 (years)"] + le["Female life expectancy, 2010 (years)"])/2

In [32]:
le = le[["State", "County", "LifeExpectancy"]][~le.County.isna()].reset_index(drop=True)

In [33]:
le["CoSt"] = le["County"] + ", " + le["State"]
le = le.merge(counties, left_on="CoSt", right_on="CoSt")

In [34]:
leFF = ff.merge(le, left_on="FIPS", right_on="county_fips")

In [35]:
temp3 = leFF.groupby("county_fips").agg({'latitude': 'mean', 'longitude': 'mean', 'name':'count', 'LifeExpectancy':'mean', 'population':'mean'})


In [36]:
temp3["PopPerFF"] = temp3.population / temp3.name
temp3 = temp3.reset_index()
temp3.LifeExpectancy = round(temp3.LifeExpectancy,2)
temp3.PopPerFF = temp3.PopPerFF.astype(int)
leVis2 = temp3[["county_fips", "LifeExpectancy", "PopPerFF"]]

### Now place data into geojson to call in deck.gl

In [37]:
for i in range(0,len(dataGeo['features'])):
    geoFIPS = dataGeo['features'][i]['properties']['FIPS']
    if(len(diabetesVis2.DiabetesPrevalence[diabetesVis2.county_fips == int(geoFIPS)])>0):
        dataGeo['features'][i]['properties']['DiabetesPrevalence'] = diabetesVis2.DiabetesPrevalence[diabetesVis2.county_fips == int(geoFIPS)].item()
    else:
        dataGeo['features'][i]['properties']['DiabetesPrevalence'] = np.nan
    if(len(obesityVis2.ObesityPrevalence[obesityVis2.county_fips == int(geoFIPS)])>0):
        dataGeo['features'][i]['properties']['ObesityPrevalence'] = obesityVis2.ObesityPrevalence[obesityVis2.county_fips == int(geoFIPS)].item()
    else:
        dataGeo['features'][i]['properties']['ObesityPrevalence'] = np.nan
    if(len(leVis2.LifeExpectancy[leVis2.county_fips == int(geoFIPS)])>0):
        dataGeo['features'][i]['properties']['LifeExpectancy'] = leVis2.LifeExpectancy[leVis2.county_fips == int(geoFIPS)].item()
        dataGeo['features'][i]['properties']['PopulationPerFastFoodRestaurant'] = leVis2.PopPerFF[leVis2.county_fips == int(geoFIPS)].item()
    else:
        dataGeo['features'][i]['properties']['LifeExpectancy'] = np.nan
        dataGeo['features'][i]['properties']['PopulationPerFastFoodRestaurant'] = np.nan
    

## Create all 3 Deck.GL visualizations for Visualization #2

### Drop NA values

In [57]:
with open('testing.json', 'w') as f:
    json.dump(dataGeo, f)

In [58]:
t = gpd.read_file("testing.json")

In [59]:
outT = t[~t.LifeExpectancy.isna()].reset_index(drop=True)

### Get data ranges to manually choose color

In [298]:
255/(leVis2.LifeExpectancy.max() - leVis2.LifeExpectancy.min())

17.171717171717177

In [300]:
leVis2.LifeExpectancy.min()

68.4

### Create Deck.GL Visualization

In [69]:
DATA_URL = "https://raw.githubusercontent.com/visgl/deck.gl-data/master/examples/geojson/vancouver-blocks.json"

INITIAL_VIEW_STATE = pdk.ViewState(
  latitude=40,
  longitude=-98,
  zoom=3,
  max_zoom=16,
  pitch=45,
  bearing=0
)

polygon = pdk.Layer(
    'PolygonLayer',
    stroked=False,
    # processes the data as a flat longitude-latitude pair
    get_polygon='-',
    get_fill_color=[0, 0, 0, 20]
)

geojson = pdk.Layer(
    'GeoJsonLayer',
    outT,
    opacity=0.8,
    stroked=False,
    filled=True,
    extruded=True,
    wireframe=False,
    get_elevation='PopulationPerFastFoodRestaurant*5',
    get_fill_color='[255-((LifeExpectancy-68.4)*17.17), (LifeExpectancy-68.4)*17.17, 0]',
    get_line_color=[255, 255, 255],
    pickable=True
)



r = pdk.Deck(
    layers=[polygon, geojson],
    initial_view_state=INITIAL_VIEW_STATE,
    tooltip={
        'html': '<b> {NAME} {LSAD}  </b><br><b>Population per Fast Food Restaurant:</b> {PopulationPerFastFoodRestaurant} <br> <b>Life Expectancy:</b> {LifeExpectancy}',
        'style': {
            'color': 'white'
        }
    })

r.to_html("deckGLLEVis2.html")

### Drop NA values

In [46]:
outT2 = t[~t.ObesityPrevalence.isna()].reset_index(drop=True)

### Get data ranges to manually choose color

In [295]:
255/(obesityVis2.ObesityPrevalence.max() - obesityVis2.ObesityPrevalence.min())

8.225806451612904

In [296]:
obesityVis2.ObesityPrevalence.min()

19.6

### Create Deck.GL Visualization

In [70]:
DATA_URL = "https://raw.githubusercontent.com/visgl/deck.gl-data/master/examples/geojson/vancouver-blocks.json"

INITIAL_VIEW_STATE = pdk.ViewState(
  latitude=40,
  longitude=-98,
  zoom=3,
  max_zoom=16,
  pitch=45,
  bearing=0
)

polygon = pdk.Layer(
    'PolygonLayer',
    stroked=False,
    # processes the data as a flat longitude-latitude pair
    get_polygon='-',
    get_fill_color=[0, 0, 0, 20]
)

geojson = pdk.Layer(
    'GeoJsonLayer',
    outT2,
    opacity=0.8,
    stroked=False,
    filled=True,
    extruded=True,
    wireframe=False,
    get_elevation='PopulationPerFastFoodRestaurant*5',
    get_fill_color='[(ObesityPrevalence-19.6)*8.2, 255-((ObesityPrevalence-19.6)*8.2), 0]',
    get_line_color=[255, 255, 255],
    pickable=True
)

r = pdk.Deck(
    layers=[polygon, geojson],
    initial_view_state=INITIAL_VIEW_STATE,
    tooltip={
        'html': '<b> {NAME} {LSAD}  </b><br><b>Population per Fast Food Restaurant:</b> {PopulationPerFastFoodRestaurant} <br> <b>Obesity Prevalence:</b> {ObesityPrevalence}%',
        'style': {
            'color': 'white'
        }
    })

r.to_html("deckGLObesVis2.html")

### Drop NA values

In [48]:
outT3 = t[~t.DiabetesPrevalence.isna()].reset_index(drop=True)

### Get data ranges to manually choose color

In [38]:
255/(diabetesVis2.DiabetesPrevalence.max() - diabetesVis2.DiabetesPrevalence.min())

16.547696301103176

In [39]:
diabetesVis2.DiabetesPrevalence.min()

8.87

### Create Deck.GL Visualization

In [71]:
DATA_URL = "https://raw.githubusercontent.com/visgl/deck.gl-data/master/examples/geojson/vancouver-blocks.json"

INITIAL_VIEW_STATE = pdk.ViewState(
  latitude=40,
  longitude=-98,
  zoom=3,
  max_zoom=16,
  pitch=45,
  bearing=0
)

polygon = pdk.Layer(
    'PolygonLayer',
    stroked=False,
    # processes the data as a flat longitude-latitude pair
    get_polygon='-',
    get_fill_color=[0, 0, 0, 20]
)

geojson = pdk.Layer(
    'GeoJsonLayer',
    outT3,
    opacity=0.8,
    stroked=False,
    filled=True,
    extruded=True,
    wireframe=False,
    get_elevation='PopulationPerFastFoodRestaurant*5',
    get_fill_color='[(DiabetesPrevalence-8.87)*16.5, 255-((DiabetesPrevalence-8.87)*16.5), 0]',
    get_line_color=[255, 255, 255],
    pickable=True
)

r = pdk.Deck(
    layers=[polygon, geojson],
    initial_view_state=INITIAL_VIEW_STATE,
    tooltip={
        'html': '<b> {NAME} {LSAD}  </b><br><b>Population per Fast Food Restaurant:</b> {PopulationPerFastFoodRestaurant} <br> <b>Diabetes Prevalence:</b> {DiabetesPrevalence}%',
        'style': {
            'color': 'white'
        }
    })

r.to_html("deckGLDiabVis2.html")

# Vis 3

### Load the health scores data (created in HealthScoreCreate.csv)

In [63]:
healthScores = pd.read_csv("HealthScores.csv").drop("Unnamed: 0", axis=1)

### Get the ranges to manually choose color

In [64]:
healthScores.OverallHealthScore.describe()

count    9750.000000
mean        0.602702
std         0.066648
min         0.304854
25%         0.554771
50%         0.601553
75%         0.649865
max         0.948513
Name: OverallHealthScore, dtype: float64

In [65]:
255/(0.948513-0.304854)

396.17250749232124

### Final cleaning of the data

In [66]:
healthScores['OverallHealthScore'] = round(healthScores.OverallHealthScore * 100,2)

In [67]:
healthScores["PrevalenceDiab"] = round(healthScores.PrevalenceDiab,2)
healthScores["PrevalenceObes"] = round(healthScores.PrevalenceObes,2)
healthScores["LE"] = round(healthScores.LE,2)

### Create Deck.GL visualization

In [72]:
# Define a layer to display on a map
layer = pdk.Layer(
    "ScatterplotLayer",
    healthScores,
    pickable=True,
    opacity=0.8,
    stroked=True,
    filled=True,
    radius_scale=6,
    radius_min_pixels=4,
    radius_max_pixels=4,
    line_width_min_pixels=1,
    get_position=['longitude','latitude'],#"coordinates",
    get_radius=100,
    #get_fill_color=[255, 140, 0],
    get_fill_color="[255-((OverallHealthScore-30.4854)*3.9617), (OverallHealthScore-30.4854)*3.9617, 0]",
    get_line_color=[0, 0, 0, 0],
)

# Set the viewport location
view_state = pdk.ViewState(latitude=40, longitude=-98, zoom=3, bearing=0, pitch=0)

# Render
r = pdk.Deck(layers=[layer], initial_view_state=view_state, tooltip={
        'html': '<b> {name} </b><br><b>Address:</b> {address} <br><b>City:</b> {city} <br> <b>State:</b> {province} <br><b>Diabetes Prevalence:</b> {PrevalenceDiab}% <br><b>Obesity Prevalence:</b> {PrevalenceObes}% <br><b>Life Expectancy:</b> {LE} years<br><b>Health Score:</b> {OverallHealthScore}',
        'style': {
            'color': 'white'
        }
    })
r.to_html("deckGLVis3.html")