# Interactive Plotting of Open Flight Data with Datashader

In [17]:
import os
import bokeh
import bokeh.plotting as plotting
# from bokeh.io import save
from bokeh.models import WMTSTileSource
from bokeh.palettes import Category20, Category20b, Set3
plotting.output_notebook()
# output_file('bokeh')
import datashader.transfer_functions as tf
import datashader as ds
from datashader.colors import viridis
from datashader.bokeh_ext import create_ramp_legend, create_categorical_legend
from bokeh.io import show
from datashader.bokeh_ext import InteractiveImage
from datashader.utils import lnglat_to_meters as webm
import dask
import dask.dataframe as dd
# from dask.diagnostics import ProgressBar
# ProgressBar().register()
import pandas as pd
import numpy as np
from functools import partial
import datetime as dt
# from datashader.bokeh_ext import create_categorical_legend
from sklearn.externals.joblib import Memory
memory = Memory(location='/tmp', verbose=0)
np.warnings.filterwarnings('ignore')

In [18]:
print(f'Pandas version: {pd.__version__}')
print(f'Bokeh version: {bokeh.__version__}')
print(f'Datashader version: {ds.__version__}')
print(f'dask version: {dask.__version__}')

Pandas version: 0.23.4
Bokeh version: 0.13.0
Datashader version: 0.6.8
dask version: 0.17.4


In [19]:
# import datetime as dt
# import logging
# now = dt.datetime.now().strftime('%m-%d-%y_%H%M%S')
# logging.basicConfig(filename=now+".log", level=logging.INFO, format='%(asctime)s %(message)s')

In [20]:
%%time
# main_ops =  ['Southwest', 'American', 'Delta', 'SkyWest', 'Air Canada', 'Alaska', 
#         'Virgin', 'United','JetBlue', 'Spirit', 'Frontier', 'Wells Fargo', 
#         'WestJet','Private','British Airways', 'Aeroflot','Republic','Qantas',
#         'Air France','Lufthansa','Jetstar','Wizz','Compass', 'Aeroméxico']

h5_dir = r'c:\adsb'
parq_file = os.path.join(h5_dir, '2018_Feb.parq')
df = dd.read_parquet(parq_file)

Wall time: 7.95 ms


In [21]:
MaxBounds = ((-20048966.10, 20048966.10), (-20026376.39, 20026376.39))
WholeWorld = ((-20_037_508, 20_037_508), (-7_670_608, 13_971_466))
TwoBounds = ((-20_000_000, 20_000_000), (-20_000_000, 20_000_000))
USA_CONUS = ((-13884029, -7453304), (2698291, 6455972))
WesternEuro = ((-1181114, 4270391), (3000000, 8081620))
Germany = ((709336, 1600000), (6026907, 7270000))
Chicago = (( -9828281, -9717659), (5096658, 5161298))
Chinatown = (( -9759210, -9754583), (5137122, 5139825))
NewYorkCity = (( -8280656, -8175066), (4940514, 4998954))
LosAngeles = ((-13195052, -13114944), (3979242, 4023720))
Houston = ((-10692703, -10539441), (3432521, 3517616))
Austin = ((-10898752, -10855820), (3525750, 3550837))
NewOrleans = ((-10059963, -10006348), (3480787, 3510555))
Atlanta = ((-9507853,-9274873), (3927030, 4069506))
Southeast = ((-10_126_000, -8_903_000), (3_429_000, 4_217_000))

In [22]:
%%time
box = Southeast
pickle_name = f'{os.path.basename(parq_file)}-SE.p'
pickle_path = os.path.join(os.getcwd(), 'data', pickle_name)
if os.path.exists(pickle_path):
    df_se = pd.read_pickle(pickle_path)
else:
    (x1, x2), (y1, y2) = box
    df_se = df[(df.x > x1) & (df.x < x2) & (df.y > y1) & (df.y < y2)].compute()
    df_se = df_se.dropna(subset=['x', 'y'], how='any')
    df_se.to_pickle(pickle_path)

Wall time: 477 ms


In [23]:
# color_mapper26 = Category20[20] + Category20b[20][2::3]
# color_mapper = {v:c for v,c in zip(main_ops + ['Other'], color_mapper26)}

In [24]:
f'Row Count: {len(df_se):,}'

'Row Count: 17,213,158'

In [25]:
# l = ['United States', 'Canada', 'United Kingdom', 'Germany', 'China',  'Japan', 'Australia', 'Other']
# color_mapper = Set3[len(l)]

In [26]:
# source: https://leaflet-extras.github.io/leaflet-providers/preview/
Esri_NatGeoWorldMap = 'https://server.arcgisonline.com/ArcGIS/rest/services/NatGeo_World_Map/MapServer/tile/{z}/{y}/{x}'
Esri_OceanBasemap = 'https://server.arcgisonline.com/ArcGIS/rest/services/Ocean_Basemap/MapServer/tile/{z}/{y}/{x}'
CartoDB_Positron = 'https://cartodb-basemaps-{s}.global.ssl.fastly.net/light_all/{z}/{x}/{y}{r}.png'
CartoDB_Voyager = 'https://cartodb-basemaps-{s}.global.ssl.fastly.net/rastertiles/voyager/{z}/{x}/{y}{r}.png'
OpenStreetMap_Mapnik = 'https://{s}.tile.openstreetmap.org/{z}/{x}/{y}.png'
OpenTopoMap = 'https://{s}.tile.opentopomap.org/{z}/{x}/{y}.png'
Hydda_Full = 'https://{s}.tile.openstreetmap.se/hydda/full/{z}/{x}/{y}.png'
Esri_WorldStreetMap = 'https://server.arcgisonline.com/ArcGIS/rest/services/World_Street_Map/MapServer/tile/{z}/{y}/{x}'
Esri_WorldTopoMap = 'https://server.arcgisonline.com/ArcGIS/rest/services/World_Topo_Map/MapServer/tile/{z}/{y}/{x}'
Esri_WorldImagery = 'https://server.arcgisonline.com/ArcGIS/rest/services/World_Imagery/MapServer/tile/{z}/{y}/{x}'

In [27]:
def spread(pts):
    return ((pts[0][1] - pts[0][0]),
            (pts[1][1] - pts[1][0]))
def ratio(pts):
    s = spread(pts)
    x, y = s
    return x / y

In [28]:
def base_plot(xrange, yrange, plot_width=int(850), plot_height=int(500),
              tools='pan,wheel_zoom,zoom_in,zoom_out,reset', 
              bok_cir = True, tile_url=CartoDB_Positron):
    p = plotting.figure(tools=tools,
                  plot_width=plot_width, plot_height=plot_height,
                  x_range=(xrange), y_range=yrange, outline_line_color=None,
                  min_border=0, min_border_left=0, min_border_right=0,
                  min_border_top=0, min_border_bottom=0)
    p.match_aspect = True
    p.axis.visible = False
    p.xgrid.grid_line_color = None
    p.ygrid.grid_line_color = None
    if bok_cir:
        p.circle(x="x", y="y",color='red', size=2, alpha=0.4) 
    tile_renderer = p.add_tile(WMTSTileSource(url=tile_url)) 
    tile_renderer.alpha = 0.4
    return p

In [32]:
# @memory.cache
def create_image(x_range, y_range, plot_width, plot_height, 
                 source=df, cat=None, thresh=None, spread=False, color_mapper = None):
#     logging.info(f'Create Image - {x_range}, {y_range}')
    start = dt.datetime.now()
    cmap=viridis
    r = ratio((x_range, y_range))
    plot_height = int(plot_width / r)
#     logging.info(f'Create Image - {plot_width}, {plot_height}')
    cvs = ds.Canvas(plot_width, plot_height, x_range, y_range)
    if cat is not None:
        agg = cvs.points(source, 'x', 'y',  ds.count_cat(cat))
    else:
        agg = cvs.points(source, 'x', 'y')
    if thresh is not None:
        agg = agg.where(agg > thresh)
    img = tf.shade(agg, cmap = cmap, color_key=color_mapper)
    if spread:
        img = tf.dynspread(img, threshold=0.5, max_px=4)
    stop = dt.datetime.now()
    t = stop - start
    print(f"Image creating time: {t.total_seconds()}s")
#     logging.info(f'Image Generated ({t.total_seconds()}s)')

    return img

def image_callback(xr, yr, w, h, **kwargs):
#     logging.info(f'Callback')
    return create_image(xr, yr, w, h, **kwargs)

In [33]:
# Show with Southeast and Atlanta
# Start with low threshold and move up to 50 
p = base_plot(*Southeast, bok_cir=False, tile_url=Esri_WorldStreetMap)
cb = partial(image_callback, source=df_se, thresh=50, spread=True)
InteractiveImage(p, cb)

Image creating time: 0.320141s
