# DATA 608: HW 4

Data: New York City tree census
https://data.cityofnewyork.us/Environment/2015-Street-Tree-Census-Tree-Data/pi5s-9p35

Task: build a dash app for an arborist studying the health of various tree species (as defined by the variable 'spc_common') across each borough (as defined by the variable 'borough').  This arborist would like to answer the following two questions for each species and in each borough:

1. What proportion of trees are in good, fair, or poor health according to the 'health' variable?
2. Are stewards (steward activity measured by the 'steward' variable) having an impact on the health of trees?

Deployment: optional (EC).  Instructions for deploying app to heroku here: https://dash.plot.ly/deployment

You can read more about paging using the Socrata API [here](https://dev.socrata.com/docs/paging.html)

In [1]:
import pandas as pd
import numpy as np

In [2]:
import dash
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input, Output

In [3]:
#firstfive_url = 'https://data.cityofnewyork.us/resource/nwxe-4ae8.json?$limit=5&$offset=0'
#nextfive_url = 'https://data.cityofnewyork.us/resource/nwxe-4ae8.json?$limit=5&$offset=5'
#'https://api-url.com/?query with spaces'.replace(' ', '%20')

In [5]:
#trees
treenames_url = ('https://data.cityofnewyork.us/resource/nwxe-4ae8.json?' +\
        '$select=spc_common,count(tree_id)' +\
        #'&$where=boroname=\'Bronx\'' +\
        '&$group=spc_common').replace(' ', '%20')
treenames = pd.read_json(treenames_url)

In [6]:
treenames

Unnamed: 0,count_tree_id,spc_common
0,273,American beech
1,7975,American elm
2,1081,American hophornbeam
3,1517,American hornbeam
4,46,American larch
5,13530,American linden
6,183,Amur cork tree
7,2197,Amur maackia
8,2049,Amur maple
9,328,arborvitae


In [7]:
#boroughs
boronames_url = ('https://data.cityofnewyork.us/resource/nwxe-4ae8.json?' +\
        '$select=boroname,count(tree_id)' +\
        #'&$where=boroname=\'Bronx\'' +\
        '&$group=boroname').replace(' ', '%20')
boronames = pd.read_json(boronames_url)

In [8]:
boronames['boroname'].unique()

array([u'Bronx', u'Brooklyn', u'Manhattan', u'Queens', u'Staten Island'],
      dtype=object)

In [9]:
#reference: https://hub.mybinder.org/user/4quantoss-dashintro-soatf1u4/notebooks/notebooks/Tutorial.ipynb
from IPython import display
import os
def show_app(app, port = 9999, 
             width = 700, 
             height = 350, 
             offline = False,
            in_binder = None):
    in_binder ='JUPYTERHUB_SERVICE_PREFIX' in os.environ if in_binder is None else in_binder
    if in_binder:
        base_prefix = '{}proxy/{}/'.format(os.environ['JUPYTERHUB_SERVICE_PREFIX'], port)
        url = 'https://hub.mybinder.org{}'.format(base_prefix)
        app.config.requests_pathname_prefix = base_prefix
    else:
        url = 'http://localhost:%d' % port
        
    iframe = '<a href="{url}" target="_new">Open in new window</a><hr><iframe src="{url}" width={width} height={height}></iframe>'.format(url = url, 
                                                                                  width = width, 
                                                                                  height = height)
    
    display.display_html(iframe, raw = True)
    if offline:
        app.css.config.serve_locally = True
        app.scripts.config.serve_locally = True
    return app.run_server(debug=False, # needs to be false in Jupyter
                          host = '0.0.0.0',
                          port=port)

In [24]:
# References:
# https://dash.plot.ly/
# https://github.com/4QuantOSS/DashIntro
# https://hub.mybinder.org/user/4quantoss-dashintro-soatf1u4/notebooks/notebooks/Tutorial.ipynb
# https://dash.plot.ly/interactive-graphing
# https://dev.socrata.com/docs/queries/where.html


#data
treenames_list =  treenames['spc_common'].unique()
boroughs_list = boronames['boroname'].unique()


#app
app_simple = dash.Dash()

app_simple.layout = html.Div(children=[
    html.H1(children='DATA 608 HW 4: NYC Tree Census'),
    
    html.H3(children='Task:'),

    html.Div(children='''
        Build a dash app for an arborist studying the health of various tree species (as defined by the variable 
        'spc_common') across each borough (as defined by the variable 'borough'). This arborist would like to answer 
        the following two questions for each species and in each borough:
        '''),
    
    dcc.Markdown('''

1. What proportion of trees are in good, fair, or poor health according to the 'health' variable?
2. Are stewards (steward activity measured by the 'steward' variable) having an impact on the health of trees?       
    
    '''),
    
        dcc.Markdown('''
        
*Select Tree Species:*
        
        '''),
    
    dcc.Dropdown(id='trees_id',
     options=[{'label': i, 'value': i} for i in treenames_list],
    multi=False,
    value="American beech"
),
    
        dcc.Markdown('''
        
*Select Borough:*
        
        '''),
    
    dcc.Dropdown(id='borough_id',
     options=[{'label': i, 'value': i} for i in boroughs_list],
    multi=False,
    value="Bronx"
),
    
    dcc.Graph(
        id='q1_healthGraph'
            ),
    dcc.Graph(
        id='q2_stewardGraph'
            )
])



@app_simple.callback(
    dash.dependencies.Output('q1_healthGraph', 'figure'),
    [dash.dependencies.Input('borough_id', 'value'),
     dash.dependencies.Input('trees_id', 'value')])
def update_q1Graph(borough_filter, tree_filter):
    #read in data
    soql_url = ('https://data.cityofnewyork.us/resource/nwxe-4ae8.json?' +\
        '$select=spc_common, boroname, health, count(tree_id)' +\
        '&$where=spc_common=\'' + tree_filter + '\' AND boroname=\'' + borough_filter + '\'' +\
        '&$group=spc_common,boroname, health').replace(' ', '%20')
    trees = pd.read_json(soql_url)
    
    
    #assign categories
    def Sort_Health(x):
        if( x == 'Poor'):
            return 1
        elif( x == 'Fair'):
            return 2
        else:
            return 3
        
        
    #assign color
    def color_assign(x):
        if( x == 'Poor'):
            return 'red'
        elif( x == 'Fair'):
            return 'yellow'
        else:
            return 'green'
        
        
    #assign values    
    trees = trees.assign(sort_order = pd.Series(trees.health.apply(Sort_Health)))
    trees = trees.assign(color_assign = pd.Series(trees.health.apply(color_assign)))
    
    #get ratio
    tree_sum = sum(trees.count_tree_id)
    trees = trees.assign(ratio = pd.Series(trees.count_tree_id/tree_sum * 100))
    
    #sort
    trees = trees.sort_values(by=['sort_order'])
    
    #visualize
    data = trees
    figure={
        'data': [
            {'x': data['health'], 'y': data['ratio'], 'type': 'bar', 'marker': {'color': data['color_assign']}},
        ],
        'layout': {
            'title': 'Q1: Tree Health',
            'xaxis':{'title': 'Health'},
            'yaxis':{'title': '% of Trees'},
        }
    }
    return figure



@app_simple.callback(
    dash.dependencies.Output('q2_stewardGraph', 'figure'),
    [dash.dependencies.Input('borough_id', 'value'),
     dash.dependencies.Input('trees_id', 'value')])
def update_q2Graph(borough_filter, tree_filter):
    #read in data
    soql_url = ('https://data.cityofnewyork.us/resource/nwxe-4ae8.json?' +\
        '$select=spc_common, boroname, health, steward, count(tree_id)' +\
        '&$where=spc_common=\'' + tree_filter + '\' AND boroname=\'' + borough_filter + '\'' +\
        '&$group=spc_common,boroname, health, steward').replace(' ', '%20')
    trees = pd.read_json(soql_url)
    
    
    #assign categories
    def Sort_Health(x):
        if( x == 'Poor'):
            return 1
        elif( x == 'Fair'):
            return 2
        else:
            return 3
        
    #assign stewards
    def Sort_Steward(x):
        if( x == 'None'):
            return 0
        elif( x == '1or2'):
            return 1.5
        else:
            return 3.5 #3or4
        
        
    #assign color
    def color_assign(x):
        if( x == 'Poor'):
            return 'red'
        elif( x == 'Fair'):
            return 'yellow'
        else:
            return 'green'
        
        
    #create categories    
    trees = trees.assign(sort_order_health = pd.Series(trees.health.apply(Sort_Health)))
    trees = trees.assign(sort_order_steward = pd.Series(trees.steward.apply(Sort_Steward)))
    tree_sum = sum(trees.count_tree_id)
    trees = trees.assign(ratio = pd.Series(trees.count_tree_id/tree_sum))
    trees = trees.assign(color_assign = pd.Series(trees.health.apply(color_assign)))
  
    
    #visualize
    data = trees
    figure={
        'data': [
            {'x': data['sort_order_steward'], 'y': data['sort_order_health'], 'mode': 'markers',
            'marker':{'size': data['ratio'] * 200,'color': data['color_assign']},
            }
            
        ],
        'layout': {
            'title': 'Q2: Steward Activity vs. Health',
            'xaxis':{'title': 'Steward Activity (Avg. # Stewards)'},
            'yaxis':{'title': 'Health (Poor=1, Fair=2, Good=3)'},
           
        }
    }
    return figure



In [25]:
show_app(app_simple)

 * Serving Flask app "__main__" (lazy loading)
 * Environment: production
   Use a production WSGI server instead.
 * Debug mode: off


 * Running on http://0.0.0.0:9999/ (Press CTRL+C to quit)
127.0.0.1 - - [14/Oct/2018 15:13:58] "GET / HTTP/1.1" 200 -
127.0.0.1 - - [14/Oct/2018 15:13:59] "GET /_dash-dependencies HTTP/1.1" 200 -
127.0.0.1 - - [14/Oct/2018 15:13:59] "GET /_dash-layout HTTP/1.1" 200 -
127.0.0.1 - - [14/Oct/2018 15:14:00] "POST /_dash-update-component HTTP/1.1" 200 -
127.0.0.1 - - [14/Oct/2018 15:14:00] "POST /_dash-update-component HTTP/1.1" 200 -
127.0.0.1 - - [14/Oct/2018 15:14:01] "GET / HTTP/1.1" 200 -
127.0.0.1 - - [14/Oct/2018 15:14:02] "GET /_dash-layout HTTP/1.1" 200 -
127.0.0.1 - - [14/Oct/2018 15:14:02] "GET /_dash-dependencies HTTP/1.1" 200 -
127.0.0.1 - - [14/Oct/2018 15:14:03] "POST /_dash-update-component HTTP/1.1" 200 -
127.0.0.1 - - [14/Oct/2018 15:14:03] "POST /_dash-update-component HTTP/1.1" 200 -
127.0.0.1 - - [14/Oct/2018 15:14:54] "GET / HTTP/1.1" 200 -
127.0.0.1 - - [14/Oct/2018 15:14:55] "GET /_dash-dependencies HTTP/1.1" 200 -
127.0.0.1 - - [14/Oct/2018 15:14:55] "GET /_dash-lay