# Using bokeh to visualize on a map
*Author: Chris*

In [1]:
# Install bokeh library
#!pip install bokeh

In [1]:
# Load data collection libraries
import numpy as np
import pandas as pd

# Load visualization libraries
import matplotlib.pyplot as plt
import seaborn as sns
from os import path
from PIL import Image
from wordcloud import WordCloud, STOPWORDS, ImageColorGenerator
from bokeh.plotting import figure, show, output_file
from bokeh.io import output_notebook
from bokeh.tile_providers import CARTODBPOSITRON
import bokeh

%matplotlib inline

In [2]:
# Read csv file
df = pd.read_csv('./datasets/finalized_learned_tweets_df.csv', index_col=0)
df.head()

Unnamed: 0,id,text,timestamp,city,state_id,state_name,county_name,lat,lng,score_serious,score_non_serious,serious_blackout
0,1079527340617723904,I bought a portable cell charger. It stays cha...,2018-12-30 23:59:09,Arlington,MA,Massachusetts,Middlesex,42.4186,-71.1638,0.623218,0.447429,1.0
1,1079527053555392513,The filthy scum media @cnn didn’t show the REA...,2018-12-30 23:58:00,Blaine,ME,Maine,Aroostook,46.499,-67.8688,0.623218,0.447429,0.0
2,1079523909312098305,Massive power outage hits southern Zim http://...,2018-12-30 23:45:31,Groton Long Point,CT,Connecticut,New London,41.3145,-72.0087,0.623218,0.447429,0.0
3,1079522345134538752,Massive power outage hits southern Zim https:/...,2018-12-30 23:39:18,Shelburne,VT,Vermont,Chittenden,44.3759,-73.2265,0.623218,0.447429,0.0
4,1079522317284237312,A major power outage this afternoon is impacti...,2018-12-30 23:39:11,North Woodstock,NH,New Hampshire,Grafton,44.0364,-71.6895,0.623218,0.447429,1.0


### Drop  all the columns that is unnecessary for plotting.

In [3]:
df.drop(['id', 'text', 'timestamp', 'city', 'state_name', 'score_serious', 'score_non_serious'], axis=1, inplace=True)
df.columns = ['state', 'name', 'lat', 'lng', 'blackout']

In [4]:
df.head()

Unnamed: 0,state,name,lat,lng,blackout
0,MA,Middlesex,42.4186,-71.1638,1.0
1,ME,Aroostook,46.499,-67.8688,0.0
2,CT,New London,41.3145,-72.0087,0.0
3,VT,Chittenden,44.3759,-73.2265,0.0
4,NH,Grafton,44.0364,-71.6895,1.0


In [5]:
#Set-up an index to aviod the duplicate of county names
df['index'] = df['state'] + df['name']
df.head()

Unnamed: 0,state,name,lat,lng,blackout,index
0,MA,Middlesex,42.4186,-71.1638,1.0,MAMiddlesex
1,ME,Aroostook,46.499,-67.8688,0.0,MEAroostook
2,CT,New London,41.3145,-72.0087,0.0,CTNew London
3,VT,Chittenden,44.3759,-73.2265,0.0,VTChittenden
4,NH,Grafton,44.0364,-71.6895,1.0,NHGrafton


In [6]:
# Download bokeh sample data
bokeh.sampledata.download();

Using data directory: /Users/sungwonlee/.bokeh/data
Downloading: CGM.csv (1589982 bytes)
   1589982 [100.00%]
Downloading: US_Counties.zip (3182088 bytes)
   3182088 [100.00%]
Unpacking: US_Counties.csv
Downloading: us_cities.json (713565 bytes)
    713565 [100.00%]
Downloading: unemployment09.csv (253301 bytes)
    253301 [100.00%]
Downloading: AAPL.csv (166698 bytes)
    166698 [100.00%]
Downloading: FB.csv (9706 bytes)
      9706 [100.00%]
Downloading: GOOG.csv (113894 bytes)
    113894 [100.00%]
Downloading: IBM.csv (165625 bytes)
    165625 [100.00%]
Downloading: MSFT.csv (161614 bytes)
    161614 [100.00%]
Downloading: WPP2012_SA_DB03_POPULATION_QUINQUENNIAL.zip (5148539 bytes)
   5148539 [100.00%]
Unpacking: WPP2012_SA_DB03_POPULATION_QUINQUENNIAL.csv
Downloading: gapminder_fertility.csv (64346 bytes)
     64346 [100.00%]
Downloading: gapminder_population.csv (94509 bytes)
     94509 [100.00%]
Downloading: gapminder_life_expectancy.csv (73243 bytes)
     73243 [100.00%]
Download

## Plot outage cities on a Google Map

In [7]:
# Import libraries
from bokeh.io import output_file, show
from bokeh.models import ColumnDataSource, GMapOptions
from bokeh.plotting import gmap

In [8]:
# Split dataframe by blackout or not.
df_blackout     = df[df['blackout'] == 1]
df_non_blackout = df[df['blackout'] == 0]

In [17]:
# Import the gmap.html
output_file("gmap_Cities.html")

# Set-up map options.
map_options = GMapOptions(lat=44, lng=-70, map_type="roadmap", zoom=6)

# Set-up the tools.
TOOLS = "pan,wheel_zoom,reset,hover,save"

# Plot the base map.
p = gmap("AIzaSyCNFcTpVRS9AeqYPN3RA0tdoYCnJBS1Q6g", map_options, title="New England Power Outage by City", tools=TOOLS)

# Plot circles with blackout
source1 = ColumnDataSource(
    data=dict(lat=df_blackout['lat'],
              lon=df_blackout['lng'])
)

p.circle(x="lon", y="lat", size=5, fill_color="red", line_color = "red", fill_alpha=1.0, source=source1, legend='Serious Blackout')

# Plot circles without blackout
source2 = ColumnDataSource(
    data=dict(lat=df_non_blackout['lat'],
              lon=df_non_blackout['lng'])
)

p.circle(x="lon", y="lat", size=3, fill_color="gray", line_color = "gray", fill_alpha=0.4, source=source2, legend='Irrelevant')

# Plot
show(p)

## Plot 2: County Interactive Outage Map

In [10]:
# Import libraries
from bokeh.io import show
from bokeh.models import LogColorMapper
from bokeh.palettes import Viridis6 as palette
from bokeh.plotting import figure

from bokeh.sampledata.us_counties import data as counties
palette.reverse()

In [11]:
# From previous data, make a datafram of tweets by county
county_df = pd.DataFrame(df_blackout['index'].value_counts())
county_df.columns = ['num_occurrence']
#county_df['index'] = county_df.index
county_df.head()

Unnamed: 0,num_occurrence
MAPlymouth,25
MAMiddlesex,23
MAWorcester,21
MAEssex,17
MABarnstable,14


In [12]:
# Bokeh library had sampledata for US countries and county
# Extracethe 6 counties information
counties = {
    code: county for code, county in counties.items() if (county["state"] == 'ma')|(county["state"] == 'me')|(county["state"] == 'vt')|(county["state"] == 'nh')|(county["state"] == 'ct')|(county["state"] == 'ri')
}

# Make a dataframe
bokeh_counties_df = pd.DataFrame(columns = ["name", "state", "longitude", "latitude"])
bokeh_counties_df['name'] = [county["name"] for county in counties.values()]
bokeh_counties_df['state'] = [county["state"].upper() for county in counties.values()]
bokeh_counties_df['longitude'] = [county["lons"] for county in counties.values()]
bokeh_counties_df['latitude'] = [county["lats"] for county in counties.values()]
bokeh_counties_df.index = bokeh_counties_df['state'] + bokeh_counties_df['name']
bokeh_counties_df.head()

Unnamed: 0,name,state,longitude,latitude
CTFairfield,Fairfield,CT,"[-73.34681, -73.33126, -73.31111, -73.31047, -...","[41.47794, 41.47249, 41.46844, 41.46881, 41.46..."
CTHartford,Hartford,CT,"[-72.49726, -72.49732, -72.50388, -72.50643, -...","[41.85876, 41.8585, 41.82747, 41.8155, 41.8109..."
CTLitchfield,Litchfield,CT,"[-72.94389, -72.95357, -72.94779, -72.9375, -7...","[41.88481, 41.85409, 41.84317, 41.83734, 41.82..."
CTMiddlesex,Middlesex,CT,"[-72.74704, -72.75218, -72.75214, -72.73448, -...","[41.5263, 41.57889, 41.5789, 41.59541, 41.6029..."
CTNew Haven,New Haven,CT,"[-72.74704, -72.74704, -72.73766, -72.72259, -...","[41.5263, 41.52579, 41.45431, 41.42709, 41.433..."


In [13]:
# Join previous data with bokeh data
bokeh_counties_df = bokeh_counties_df.join(county_df)

# After merge, check if there is any null values.
bokeh_counties_df.isnull().sum()

name              0
state             0
longitude         0
latitude          0
num_occurrence    4
dtype: int64

In [14]:
bokeh_counties_df.fillna(0, inplace=True)
bokeh_counties_df.head()

Unnamed: 0,name,state,longitude,latitude,num_occurrence
CTFairfield,Fairfield,CT,"[-73.34681, -73.33126, -73.31111, -73.31047, -...","[41.47794, 41.47249, 41.46844, 41.46881, 41.46...",9.0
CTHartford,Hartford,CT,"[-72.49726, -72.49732, -72.50388, -72.50643, -...","[41.85876, 41.8585, 41.82747, 41.8155, 41.8109...",11.0
CTLitchfield,Litchfield,CT,"[-72.94389, -72.95357, -72.94779, -72.9375, -7...","[41.88481, 41.85409, 41.84317, 41.83734, 41.82...",6.0
CTMiddlesex,Middlesex,CT,"[-72.74704, -72.75218, -72.75214, -72.73448, -...","[41.5263, 41.57889, 41.5789, 41.59541, 41.6029...",2.0
CTNew Haven,New Haven,CT,"[-72.74704, -72.74704, -72.73766, -72.72259, -...","[41.5263, 41.52579, 41.45431, 41.42709, 41.433...",5.0


In [15]:
# Import bokeh sample data of US states and counties
from bokeh.sampledata.us_counties import data as counties

In [16]:
palette.reverse()

# Import the gmap.html
output_file("gmap_County.html")

# Pull county information from bokeh sample data
counties = {
    code: county for code, county in counties.items() if (county["state"] == 'ma')|(county["state"] == 'me')|(county["state"] == 'vt')|(county["state"] == 'nh')|(county["state"] == 'ct')|(county["state"] == 'ri')
}

# This is our outline of the color map
county_xs = [county["lons"] for county in counties.values()]
county_ys = [county["lats"] for county in counties.values()]

# This is how we will differenciate each county with what values.
county_names = bokeh_counties_df['name']
num_occurrence = bokeh_counties_df['num_occurrence']
color_mapper = LogColorMapper(palette=['#FBFCBF', '#FD9F6C', '#DD4968', '#8C2980', '#3B0F6F', '#000003'])
     # You a color references: https://bokeh.pydata.org/en/latest/docs/reference/palettes.html

# Make a dictionary from the data collected above
data=dict(
    x=county_xs,
    y=county_ys,
    name=county_names,
    rate=num_occurrence,
)

# The Tools which will show up with the image.
TOOLS = "pan,wheel_zoom,reset,hover,save"

# Set-up the figure of the plot
p = figure(
    title="New England Power Outage by County (aggregated)", tools=TOOLS,
    x_axis_location=None, y_axis_location=None,
    tooltips=[
        ("(Name)", "@name"), ("(Number of occurrence)", "@rate times"), ("(Long, Lat)", "($x, $y)")
    ])
p.grid.grid_line_color = 'white'
p.hover.point_policy = "follow_mouse"

p.patches('x', 'y', source=data,
          fill_color={'field': 'rate', 'transform': color_mapper},
          fill_alpha=0.7, line_color="black", line_width=0.5)

show(p)