## DATA 608 – Knowledge and Visual Analytics: Assignment 4
### Amber Ferger
### 10/18/2020


**Assignment:** In this module we’ll be looking at data from the New York City tree census: 
https://data.cityofnewyork.us/Environment/2015-Street-Tree-Census-Tree-Data/uvpi-gqnh

This data is collected by volunteers across the city, and is meant to catalog information about every single tree in the city.

Build a dash app for a arborist studying the health of various tree species (as defined by the variable ‘spc_common’) across each borough (defined by the variable ‘borough’). This arborist would like to answer the following two questions for each species and in each borough:
1. What proportion of trees are in good, fair, or poor health according to the ‘health’ variable?
2. Are stewards (steward activity measured by the ‘steward’ variable) having an impact on the health of trees?

In [None]:
##### libraries
import pandas as pd
import numpy as np

In [None]:
# distinct boroughs
borough_url = ('https://data.cityofnewyork.us/resource/nwxe-4ae8.json?' +\
        '$select=boroname' +\
        '&$group=boroname').replace(' ', '%20')
borough_df = pd.read_json(borough_url)

# distinct trees, no nulls
tree_url = ('https://data.cityofnewyork.us/resource/nwxe-4ae8.json?' +\
        '$select=spc_common'  +\
        '&$where=spc_common!=\'NaN\'' +\
        '&$group=spc_common').replace(' ', '%20')
tree_df = pd.read_json(tree_url)

In [None]:
#################### dash application
import dash
import dash_table
from dash.dependencies import Input, Output
import dash_core_components as dcc
import dash_html_components as html
import plotly.graph_objects as go

import flask
import pandas as pd
import time
import os
import plotly.express as px

server = flask.Flask('app')
server.secret_key = os.environ.get('secret_key', 'secret')

app = dash.Dash('app', server=server)

app.scripts.config.serve_locally = False
#dcc._js_dist[0]['external_url'] = 'https://cdn.plot.ly/plotly-basic-latest.min.js'

app.layout = html.Div([
    html.H1('Tree Health by Borough'),
    html.Div(children="Select a Borough and a tree type:"), 
    dcc.Dropdown(
        id='boroname',
        options=[{'label': v, 'value': v} for v in borough_df['boroname'].tolist()],
        value='Bronx',
        style={'width': '300px'}
    ),
    dcc.Dropdown(
        id='treename',
        options = [{'label': v, 'value': v} for v in tree_df['spc_common'].tolist()],
        value='American beech',
        style={'width': '300px'}
    ),
    html.Br(),
    html.Div(children='What proportion of trees are in good, fair, or poor health according to the ‘health’ variable?'),
    dcc.Graph(id='prop-health'),
    html.Br(),
    html.Div(children='Are stewards (steward activity measured by the ‘steward’ variable) having an impact on the health of trees?'),    
    dcc.Graph(id = 'health-graph')
], className="container")

#############################################################
@app.callback(Output('prop-health', 'figure'),
              [Input('boroname', 'value'),
              Input('treename','value')])
def table_update(boroname, treename):
    bor_var = boroname
    spc_var = treename

    health_url =  ('https://data.cityofnewyork.us/resource/nwxe-4ae8.json?' +\
            '$select= health,count(tree_id) as count_by_health'  +\
            '&$where=spc_common=\''+spc_var+'\''+\
            'and boroname=\''+bor_var+'\'' +\
            '&$group=health').replace(' ', '%20')
    health_df = pd.read_json(health_url)
    health_df['prop_total'] = health_df['count_by_health']/health_df['count_by_health'].sum()
    
    health_df['health'] = pd.Categorical(health_df['health'], ["Poor", "Fair", "Good"])
    health_df = health_df.sort_values(['health'])
    
    fig = px.bar(health_df, x = 'health', y = 'count_by_health', title = 'Trees by Health Status')

    #fig = go.Figure(data=[go.Table(
    #    header=dict(values=['Health Status', 'Total Number of Trees', 'Proportion of Trees']),
     #   cells=dict(values=[health_df.health, health_df.count_by_health, health_df.prop_total]))
     #   ])
    
    return fig

@app.callback(Output('health-graph', 'figure'),
              [Input('boroname', 'value'),
              Input('treename','value')])
def graph_update(boroname, treename):
    bor_var = boroname
    spc_var = treename
    
    health_url =  ('https://data.cityofnewyork.us/resource/nwxe-4ae8.json?' +\
            '$select= health,count(tree_id) as count_by_health'  +\
            '&$where=spc_common=\''+spc_var+'\''+\
            'and boroname=\''+bor_var+'\'' +\
            '&$group=health').replace(' ', '%20')
    health_df = pd.read_json(health_url)
    health_df['prop_total'] = health_df['count_by_health']/health_df['count_by_health'].sum()

    steward_url = ('https://data.cityofnewyork.us/resource/nwxe-4ae8.json?' +\
        '$select= steward,health,count(tree_id) as count_by_steward_health'  +\
        '&$where=spc_common=\''+spc_var+'\''+\
        'and boroname=\''+bor_var+'\'' +\
        '&$group=steward,health').replace(' ', '%20')
    
    steward_df = pd.read_json(steward_url)
    merged_df = pd.merge(steward_df, health_df, on = 'health')
    merged_df['prop_health'] = merged_df['count_by_steward_health']/merged_df['count_by_health']
    
    
    merged_df['steward'] = pd.Categorical(merged_df['steward'], ["None", "1or2", "3or4", "4orMore"])
    merged_df['health'] = pd.Categorical(merged_df['health'], ["Poor", "Fair", "Good"])
    merged_df = merged_df.sort_values(["health","steward"])

    fig2 = px.bar(merged_df, x="health", y="prop_health", color="steward", title="Tree Health with respect to Stewards",
                labels = {'health': "Tree Health", 
                         'prop_health': "Proportion of trees cared for by stewards"})
    return fig2


if __name__ == '__main__':
    app.run_server()
