In [None]:
import pydeck as pdk
from presentation import widen_margins

widen_margins()

# pydeck: High scale geospatial visualization in Python

You can find this Notebook and the others associated with this talk at: https://github.com/ajduberstein/jupytercon2020

Follow the installation instructions before diving into this demo: https://pydeck.gl/installation.html

### Use case: Detecting click farms

Fraudsters might set up account registration “click farms” to create accounts that they can then resell.

These user accounts might take advantage of first time user promotions (e.g. “Sign up and get your first order free”)

![img](https://video-images.vice.com/_uncategorized/1564604987916-image.jpeg?resize=500:*)

Logging GPS location of sign-up from a mobile app is one way to combat this kind of fraud. Below we see a map of signup locations in Ningbo, China.

In [None]:
SIGNUPS_URL = 'https://raw.githubusercontent.com/ajduberstein/geo_datasets/master/synthetic_signups.csv'

view = pdk.ViewState(**{
    "latitude": 29.87540918861734,
    "longitude": 121.68547038460048,
    "bearing": 0,
    "pitch": 0,
    "zoom": 9.029756252609365
})

layer = pdk.Layer(
    'ScatterplotLayer',
    data=SIGNUPS_URL,
    get_position=['x', 'y'],
    radius_min_pixels=1,
    pickable=True,
    auto_highlight=True,
    stroked=True,
    get_line_color=[255, 255, 255],
    get_fill_color=[200, 0, 200, 170],
    get_radius=350,
)

deck = pdk.Deck(
    layer,
    initial_view_state=view,
    map_style=pdk.map_styles.DARK_NO_LABELS,
    tooltip=None
)
deck.show()

In [None]:
# Color palette from ColorBrewer2: https://colorbrewer2.org/
COLOR_RANGE = [
    [254, 217, 118],
    [254, 178, 76],
    [253, 141, 60],
    [252, 78, 42],
    [227, 26, 28],
    [189, 0, 38],
    [128, 0, 38],
]


layer = pdk.Layer(
    'HexagonLayer',
    data=SIGNUPS_URL,
    get_position=['x', 'y'],
    elevation_scale=50,
    stroked=True,
    opacity=0.6,
    pickable=True,
    auto_highlight=True,
    color_range=COLOR_RANGE,
    elevation_range=[0, 1000],
    extruded=True,
    material=False,
    coverage=1,
)

deck = pdk.Deck(
    layer,
    initial_view_state=view,
    map_style=pdk.map_styles.SATELLITE,
    tooltip={'text': 'Number of sign-ups in hexagon: {elevationValue}'}
)
deck.show()

## How does pydeck work?

pydeck is a wrapper around [deck.gl](https://deck.gl), which is a JavaScript library for high scale geospatial visualization built at Uber.

### High volumes of data

In [None]:
from IPython.display import IFrame
IFrame('https://deck.gl/examples/scatterplot-layer/', height=700, width=1400)

### Update on high volume data

In [None]:
IFrame('https://deck.gl/examples/data-filter-extension/', height=700, width=1400)

In [None]:
from IPython.display import IFrame

# See the API docs: https://deck.gl/docs/api-reference/json/overview
IFrame('https://deck.gl/playground', height=700, width=1400)

# Under the hood

### Beneath the hood, most the heavy lifting of pydeck is done by two JavaScript libraries:

```
pydeck  <-  Python library that writes out a config for a deck.gl visualization in JSON
  |
  +---- @deck.gl/json  <-  JavaScript library that converts a JSON config to a deck.gl visualization
  |
  +---- @deck.gl/jupyter-widget  <-  Connects visualization to Jupyter, Streamlit, Colab
```

### The goal is that JavaScript is abstracted away, so that you only have to understand Python to use most of deck.gl.

In [None]:
import pprint
import json

# Copy the JSON from the signups visualization into the IFrame above
print(deck.deck_widget.json_input)

### Like its parent library, pydeck aims to be interactive and easy-to-update

In [None]:
from datetime import datetime
import time

import ipywidgets
import pandas as pd

WHALES = 'https://raw.githubusercontent.com/ajduberstein/migration_data/master/whales.csv'

df = pd.read_csv(WHALES)

def day_of_year_to_display(d: int) -> str:
    return datetime.strptime(str(d), '%j').strftime('%B %d')

def to_day_of_year(d: str) -> int:
    return int(datetime.strptime(d, '%Y-%m-%d %H:%M:%S').strftime('%-j'))

# Clean date columns
df['day'] = df['timestamp'].apply(to_day_of_year)

# Assign a random color to each individual
color_lookup = pdk.data_utils.assign_random_colors(df['tag_ident'])
df['c'] = df['tag_ident'].apply(lambda row: color_lookup.get(row))
df = df[['c', 'tag_ident', 'day', 'long', 'lat']]

scatter = pdk.Layer(
    'ScatterplotLayer',
    df,
    get_position=['long', 'lat'],
    auto_highlight=True,
    pickable=True,
    radius_min_pixels=3,
    get_radius=100,
    get_fill_color='c'
)

view = pdk.ViewState(**{
    "latitude": 35.163421085393786,
    "longitude": -128.40947805316154,
    "bearing": 0,
    "pitch": 0,
    "zoom": 3.275700834082031
})

deck = pdk.Deck(
    [scatter],
    initial_view_state=view,
    tooltip={'text': '{tag_ident} on day {day}'}
)

ts = ipywidgets.HTML()
ts.value = '<h1>Blue whale migratory paths</h1>'

display(ts)
display(deck.show())

def run():
    global deck
    global ts
    global df


    for i in range(1, 366):
        deck.layers[0].data = df[(i - 5 < df['day']) & (df['day'] <= i)]
        deck.update()
        display_date = day_of_year_to_display(i)
        ts.value = f'<h1>{display_date}</h1>'
        time.sleep(0.1)

def display_all():
    global ts
    global deck
    ts.value = '<h1>Blue whale migratory paths</h1>'
    deck.layers[0].data = df
    deck.update()

In [None]:
run()
display_all()

# How to use pydeck: Line by line

In [None]:
import geopandas as gpd
import pandas as pd

UK_ACCIDENTS_DATA = (
    "https://raw.githubusercontent.com/ajduberstein/geo_datasets/master/uk-accidents-2014.csv"
)

df = pd.read_csv(UK_ACCIDENTS_DATA)
df.head()

## Basic example: Plotting a scatter plot

In [None]:
import pydeck as pdk

UK_ACCIDENTS_DATA = (
    "https://raw.githubusercontent.com/ajduberstein/geo_datasets/master/uk-accidents-2014.csv"
)


df = pd.read_csv(UK_ACCIDENTS_DATA)

# Define a layer to display on a map
layer = pdk.Layer(
    "ScatterplotLayer",
    df,
    get_position=["lng", "lat"],
    # You can see the the deck.gl ScatterplotLayer docs for more info:
    # https://deck.gl/docs/api-reference/layers/scatterplot-layer
    auto_highlight=True,
    # Smallest a point is allowed to get–here, at least one pixel
    radius_min_pixels=1,
    # RGBA color value of point's fill
    get_fill_color=[255, 139, 51, 140],
    # Radius of the point in meters
    get_radius=100,
)

# Set the viewport location
## You can use a tool like https://duberste.in/viewport_finder/ to find this quickly
view_state = pdk.ViewState(**{
    "longitude": -3.2765753,
    "latitude": 54.7023545,
    "zoom": 4.482198792800248,
    "pitch": 0,
    "bearing": 0
})

# Render
r = pdk.Deck(
    layers=[layer],
    initial_view_state=view_state,
    map_provider='google_maps',
    map_style='satellite',
)
r.to_html()

In [None]:
IFrame('https://duberste.in/viewport_finder/', width=1000, height=700)

## Aggregating to hexagon

In [None]:
import pydeck as pdk

# Same data set, read directly from a URL
UK_ACCIDENTS_DATA = (
    "https://raw.githubusercontent.com/ajduberstein/geo_datasets/master/uk-accidents-2014.csv"
)

# Define a layer to display on a map
layer = pdk.Layer(
    "HexagonLayer",
    UK_ACCIDENTS_DATA,
    get_position=["lng", "lat"],
    # You can see the the deck.gl HexagonLayer docs for more info:
    # https://deck.gl/docs/api-reference/aggregation-layers/hexagon-layer
    auto_highlight=True,
    elevation_scale=50,
    pickable=True,
    elevation_range=[0, 3000],
    extruded=True,
    coverage=1,
)

# Set the viewport location
## You can use a tool like https://duberste.in/viewport_finder/ to find this quickly
view_state = pdk.ViewState(
    longitude=-1.415,
    latitude=52.2323,
    zoom=6,
    min_zoom=5,
    max_zoom=15,
    pitch=40.5,
    bearing=-27.36,
)

# Render
r = pdk.Deck(
    layers=[layer],
    initial_view_state=view_state,
    map_style=pdk.map_styles.DARK_NO_LABELS,
    tooltip={'html': '<b>Number of observations:</b> {elevationValue}'}
)
r.show()


## Plotting GeoJSON with geopandas

In [None]:
HURRICANES_URL = 'https://raw.githubusercontent.com/ajduberstein/geo_datasets/master/historical-hurricanes.geo.json'
gdf = gpd.read_file(HURRICANES_URL)
gdf = gdf[['name', 'geometry', 'year', 'maximum_sustained_wind']]
gdf.head()

In [None]:
# You can plot GeoJSON via GeoPandas, without specifying a position column
layers = pdk.Layer(
    "GeoJsonLayer",
    data=gdf,
    pickable=True,  # Data can be selected
    stroked=False,  # Data shouldn't have an outline
    filled=True,  # Polygons should be filled
    line_width_scale=20,
    line_width_min_pixels=2,
    get_line_color='[maximum_sustained_wind, 0, maximum_sustained_wind, 200]',
    get_radius=10,
    get_line_width=1,
)
TOOLTIP_HTML = ('<b>{name}</b><br />'
                '<b>Year:</b> {year}<br />'
                '<b>Wind speed:</b> {maximum_sustained_wind} MPH')
deck = pdk.Deck(
    layers,
    map_style=pdk.map_styles.LIGHT,
    tooltip={'html': TOOLTIP_HTML})
deck.show()

## Plotting data over time

In [None]:
import time
import ipywidgets

# You can plot GeoJSON via GeoPandas, without specifying a position column
layer = pdk.Layer(
    "GeoJsonLayer",
    data=gdf,
    pickable=True,  # Data can be selected
    stroked=False,  # Data shouldn't have an outline
    filled=True,    # Polygons should be filled
    line_width_scale=20,
    line_width_min_pixels=2,
    get_line_color='[maximum_sustained_wind, 0, maximum_sustained_wind, 200]',
    get_radius=10,
    get_line_width=1,
)
deck = pdk.Deck(layer, tooltip={'text': '{name} in {year}'}, map_style=pdk.map_styles.LIGHT)

year = ipywidgets.HTML()
display(year)
display(deck.show())
gdf['year'] = gdf['year'].astype('int32')

In [None]:
for i in range(gdf.year.min(), gdf.year.max() + 1):
    deck.layers[0].data = gdf[gdf['year'] == i]
    deck.update()
    year.value = f'<b>{i}</b>'
    time.sleep(0.1)