In [None]:
# Import packages

import folium
import requests
import geocoder
import pandas as pd
import simplejson as json
import string

from folium import Map, Marker, GeoJson, LayerControl
from ediblepickle import checkpoint

%matplotlib inline

In [None]:
# Read API keys from file

with open("secrets/.wmata") as fin:
    wmata_key = fin.read().strip()
with open("secrets/.walkscore") as fin:
    walkscore_key = fin.read().strip()

# Folium: A Leaflet Wrapper for Python

What can Folium and by extension, Leaflet can do to help you explore the structure of cities?

1. [Live bus tracking](#Bus-tracker)
1. [Neighborhood choropleths](#Neighborhood-choropleths)
1. [Walk Score&reg; along bus routes](#Walk-Score)

[Leaflet](http://leafletjs.com) is currently one of the most popular JavaScript libraries for mapping.
* Interactive through web browser
* Flexibility of JavaScript
* Mobile-friendly
* Great API
* Open source

[Python](https://www.python.org) is a great language for the web. Using the same language, you can
* Get data from APIs or scrape the web with libraries like [requests](docs.python-requests.org) and [Beautiful Soup](http://www.crummy.com/software/BeautifulSoup/bs4/)
* Process and analyze data with [NumPy, SciPy](https://docs.scipy.org), and [Pandas](http://pandas.pydata.org)
* Model relationships with state-of-the-art machine learning library [Scikit-learn](http://scikit-learn.org)
* Build a web framework for your project with [Flask](http://flask.pocoo.org) and [Jinja2](http://jinja.pocoo.org)
* Host your application with anything from [Heroku](http://www.heroku.com) to [Google App Engine](https://developers.google.com/appengine).

Python's continuing utility is due to its *ecosystem*, not its syntax or speed. For example, according to Wikipedia, TensorFlow was the fastest growing deep learning framework in fall 2016. The second fastest was a high-level Python library called Keras which can seamlessly plug into TensorFlow's architecture.

There's a similar relationship between Leaflet.js and the Python package Folium.

## Folium

Folium uses the Leaflet API to allow users to write Python code to generate and manipulate interactive JavaScript maps. This also allows for drawing those maps in Jupyter notebooks.

### Interactivity

You can drag, zoom, click, and hover. More generally you can provide **input** and get **output**, even if that output requires backend calculations. JavaScript was designed to ferry information between the frontend and the backend seamlessly.

* More seamless than redrawing maps
* Answer questions in real time
* Simultaneous exploration and explanation

All this makes for a good tool.

### Documentation

* [GitHub](https://github.com/python-visualization/folium)
* [Docs and API](http://python-visualization.github.io/folium/index.html)
* [Examples](https://github.com/python-visualization/folium/tree/master/examples)

## Bus tracker

The WMATA API is free to use. Learn more [here](https://developer.wmata.com).

* Investigate real time positions compared to scheduled locations to say something about actual vs. ideal transit
* Look at how connections between regions vary throughout the day

Interactivity:
* Input: location of interest, search radius
* Output: locations and information of buses in the area

### Geocoder

An alternative to calling eg. the OpenStreetMap API, [geocoder](http://geocoder.readthedocs.io) provides a wrapper around a number of popular geocoding services.

In [None]:
location = 'Dupont Circle'
loc = geocoder.google(location)
print loc.latlng

In [None]:
# Maps are hierarchical objects

bus_map = Map(location=loc.latlng,
               zoom_start=15)
bus_map.add_child(Marker(location=loc.latlng, popup=loc.address, icon = folium.Icon(color = 'blue')))
# bus_map.add_child(GeoJson(loc.geojson))
bus_map

In [None]:
# Saving maps

# bus_map.save('bus_map.html')

In [None]:
# Set general WMATA parameters

session = requests.Session()
session.mount('https://api.wmata.com', requests.adapters.HTTPAdapter(max_retries = 2))

headers = {'api_key': wmata_key}

radius = '1000'

### Checkpointing

Caching and checkpointing is crucial for dealing with APIs sustainably and respectfully. You should never hit an endpoint twice for the same data.

[Edible Pickle](https://pypi.python.org/pypi/ediblepickle) is a checkpointing tool that allows you to save the expensive results of a function so that it need not be run again if that result is already present.

In the following cell, setting `refresh = True` will make the function get current data instead of relying on the cache.

In [None]:
# Call API for bus locations

bus_endpoint = 'https://api.wmata.com/Bus.svc/json/jBusPositions'

@checkpoint(key = string.Template('{0}x{1}_radius{2}.buslist'), work_dir = 'cache/', refresh = False)
def get_buses(lat, lon, radius):
    """
    All values passed as strings and radius in meters
    """

    params = {
              # 'RouteID': 'B12',
              'Lat': lat,
              'Lon': lon,
              'Radius': radius
             }

    response = session.get(bus_endpoint, params = params, headers = headers)
    if not response.status_code == 200:
        raise ValueError("Response status not 200")
    else:
        return response.json()['BusPositions']

In [None]:
bus_list = get_buses(loc.lat, loc.lng, radius)
# buses_in_the_area = len(bus_list)

In [None]:
# example response element

bus_list[0]

In [None]:
for bus in bus_list:
    folium.features.RegularPolygonMarker(location = [bus['Lat'], bus['Lon']],
                                         popup = 'Route %s to %s' % (bus['RouteID'], bus['TripHeadsign']),
                                         number_of_sides = 3,
                                         radius = 10,
                                         weight = 1,
                                         fill_opacity = 0.8).add_to(bus_map)
bus_map

### Exercises

* Use the WMATA bus routes endpoint to identify a bus' next stop, and use math to rotate its triangle to the right direction
* Overlay trains in the area on the map

## Neighborhood choropleths

* Visualize metrics of interest that have different values for each region
* Overlay metrics to perform an "and" query

Interactivity:
* Input: neighborhood shape information, a value for each neighborhood to plot
* Output: neighborhood regions highlighted on map, colored by value

In [None]:
nh_map = Map(location = loc.latlng,
             zoom_start = 13,
             tiles = 'Stamen Toner')

### GeoJSON

A file format that combines geographical data with associated JSON attributes. You can find or create these datasets in a variety of ways. This particular dataset comes from [this GitHub repository](https://github.com/benbalter/dc-maps/tree/master/maps).

In [None]:
with open('geojson/neighborhood-composition.geojson') as fin:
    gjdata = json.load(fin)
nhoods = gjdata['features']
nhoods[0]

In [None]:
# Create Pandas DataFrame

nhdata = pd.DataFrame([nhood['properties'] for nhood in nhoods], columns = sorted(nhoods[0]['properties'].keys()))

In [None]:
nhdata.head()

In [None]:
# Using Pandas to create derived variables

nhdata['Density'] = nhdata['POP90'] / nhdata['AREA_']
nhdata.describe()

### Colormaps

Check out [ColorBrewer](http://colorbrewer2.org) for advice about coloring for cartography.

In [None]:
# Set up colormaps to represent the range of values

from branca.colormap import linear

popcolors = linear.GnBu.scale(
    nhdata['POP90'].min(),
    nhdata['POP90'].max() / 1.5)

povcolors = linear.PuRd.scale(
    nhdata['POVRATE'].min(),
    nhdata['POVRATE'].max() / 2)

print popcolors(1000)

povcolors

In [None]:
# Adds a caption to the map that shows the color scale

# popcolors.caption = 'Population Scale'
# popcolors.add_to(nh_map)

In [None]:
GeoJson(gjdata,
        name = 'population',
        style_function = lambda feature: {
            'color': 'black',
            'weight': 1,
            'dashArray': '5, 5',
            'fillColor': popcolors(feature['properties']['POP90'])
            }
        ).add_to(nh_map)

In [None]:
GeoJson(gjdata,
        name = 'poverty rate',
        style_function = lambda feature: {
            'color': 'black',
            'weight': 1,
            'dashArray': '5, 5',
            'fillColor': povcolors(feature['properties']['POVRATE'])
            }
        ).add_to(nh_map)

In [None]:
LayerControl().add_to(nh_map)

In [None]:
# Colormaps can be changed on the fly

nh_map

### More succinct choropleths

This [example notebook](http://nbviewer.jupyter.org/github/python-visualization/folium/blob/master/examples/GeoJSON_and_choropleth.ipynb) goes through some other techniques for creating choropleths with additional functionality all within one choropleth method.

### Exercises

* Come up with a standard way of setting colormap thresholds for different variable distributions
* Get a shapefile from the DC OpenData API and [convert it to GeoJSON](https://ben.balter.com/2013/06/26/how-to-convert-shapefiles-to-geojson-for-use-on-github/) using [ogr2ogr](https://ogre.adc4gis.com) or a similar tool

## Walk Score

A measure of how dependent an address is on having a car. For example, areas that require cars are more expensive than they seem to live in. An [API](https://www.walkscore.com/professional/api.php) is available.

<a href = "https://www.walkscore.com/how-it-works/"><img src = 'https://cdn.walk.sc/images/api-logo.gif' style = "float: left; margin: 0px 10px 0px 0px" /></a>
[84](https://www.walkscore.com/how-it-works/)
<a href = "https://www.redfin.com/how-walk-score-works"><img src = 'https://cdn.walk.sc/images/api-more-info.gif' style = "float: right; margin: 0px 810px 0px 0px" /></a>

* Investigate how transit routes lie on top of underlying people - what's between the bus stops?
* Is the purpose of a bus route to increase "walkability" for populations?

Interactivity:
* Input: bus route
* Output: line along the shape of the route, colored by score

In [None]:
# Get Metrobus route data from WMATA

route_endpoint = 'https://api.wmata.com/Bus.svc/json/jRouteDetails'
date = '2017-08-01'  # Changing this constant will likely require hitting the API

def get_route_shape(route, date):  # eg. 'L2', 'YYYY-MM-DD'
    params = {'RouteID': route}
    if date:
        params['Date'] = date
    response = session.get(route_endpoint, params = params, headers = headers)
    if response.status_code != 200:
        raise ValueError("Error: Response status not 200")
    else:
        return response.json()['Direction0']['Shape']

# for stop in route_shape:
#     Marker(location=[stop['Lat'], stop['Lon']], popup=str(stop['SeqNum'])).add_to(m)

In [None]:
# Get Walkscore data - 500 ft grid resolution

"""
http://api.walkscore.com/score?format=json&
address=1119%8th%20Avenue%20Seattle%20WA%2098101&lat=47.6085&
lon=-122.3295&transit=1&bike=1&wsapikey=<YOUR-WSAPIKEY>
"""

walkscore_endpoint = 'http://api.walkscore.com/score'

def get_walkscore(pin):
    params = {'format': 'json',
              'wsapikey': walkscore_key,
              'lat': pin[0],
              'lon': pin[1],
              'transit': '1',
              'bike': '1',
              'address': geocoder.google(pin, method='reverse').address}
    response = requests.get(walkscore_endpoint, params = params)
    if response.status_code != 200:
        return None
    else:
        return response.json()

In [None]:
@checkpoint(key = string.Template('{0}_scores_{1}.panda'), work_dir = 'cache/', refresh = False)
def get_route_scores(route, date):
    shape = get_route_shape(route, date)
    pins = [(pin['Lat'], pin['Lon']) for pin in shape]
    
    walk_scores = []
    transit_scores = []
    bike_scores = []
    for pin in pins:
        score_json = get_walkscore(pin)
        if not score_json:
            walk_scores.append(-1)
            transit_scores.append(-1)
            bike_scores.append(-1)
            continue

        walk_scores.append(score_json.get('walkscore', -1))
        transit_scores.append(score_json.get('transit', {}).get('score', -1))
        bike_scores.append(score_json.get('bike', {}).get('score', -1))
    df = pd.DataFrame({'pin': pins,
                       'walk_score': walk_scores,
                       'transit_score': transit_scores,
                       'bike_score': bike_scores})
    df = df[['pin', 'walk_score', 'bike_score', 'transit_score']]
    return df

In [None]:
# Example response from Walk Score API

test = get_walkscore(loc.latlng)

In [None]:
test

In [None]:
# This is where the magic happens
# Cached data for: L2, V5, E4, W4, 38B, 70
# Maps here: https://www.wmata.com/schedules/maps/

route = 'L2'

df = get_route_scores(route, date)

In [None]:
print df.shape
df.head()

In [None]:
score_map = Map(location = loc.latlng, zoom_start = 12, tiles = 'Stamen Terrain')

In [None]:
color_line = folium.features.ColorLine(
    df['pin'],
    colors = df['walk_score'],
    colormap = ['y', 'orange', 'r'],
    weight = 6,
    name = u'Route %s Walk Score\u00ae' % route)
color_line.add_to(score_map)

In [None]:
# This adds the population layer back in

GeoJson(gjdata,
        name = 'population',
        style_function = lambda feature: {
            'color': 'black',
            'weight': 1,
            'dashArray': '5, 5',
            'fillColor': popcolors(feature['properties']['POP90'])
            }
        ).add_to(score_map)

In [None]:
LayerControl().add_to(score_map)

In [None]:
score_map

In [None]:
# score_map.save("score_map.html")

### Exercises

* Use a for loop to add all colorlines to the map
* Overlay Walk Score fluctuations with census block or block group populations
* Cluster bus routes by "purpose" based on how much time they spend in highly walkable areas

## Call to Action

"There's a lot of energy behind open source."

Go learn, go do, go share!

&copy; Ariel M'ndange-Pfupfu 2017