# general_demographics-dash.ipynb

### CSc-59867 - Senior Design - Prof. Etemadpour

* Purpose: Present an interactive visualization with the NYC census demographics dataset from NYC Open Data
* Date started: 2021-04-29
* Authors: Xin Chen, Ian S. McBride, Lifu Tao

In [None]:
from jupyter_dash import JupyterDash

In [None]:
import dash
import dash_core_components as dcc
import dash_html_components as html
import pandas as pd
import plotly.express as px
from sklearn.preprocessing import StandardScaler
from sklearn.manifold import TSNE

When running in JupyterHub or Binder, call the `infer_jupyter_config` function to detect the proxy configuration.

In [None]:
# JupyterDash.infer_jupyter_proxy_config()

### Load and preprocess data

In [None]:
# Helper for displaying CD numbers
cd_boro_dict = {
    1: 'Manhattan',
    2: 'Bronx',
    3: 'Brooklyn',
    4: 'Queens',
    5: 'Staten Island',
}
def cd_to_name(cd):
    cd_str = str(cd)
    if not len(cd_str) == 3:
        raise Exception('incorrect length of CD')
    cd_num = cd_str[1:]
    boro = cd_boro_dict.get(int(cd_str[0]))
    if not boro:
        raise Exception('incorrect borough prefix in CD')
    return f'{boro} CD-{cd_num}'
def cd_to_borough(cd):
    cd_str = str(cd)
    if not len(cd_str) == 3:
        raise Exception('incorrect length of CD')
    cd_num = cd_str[1:]
    boro = cd_boro_dict.get(int(cd_str[0]))
    if not boro:
        raise Exception('incorrect borough prefix in CD')
    return boro

In [None]:
# Load dataset
df = pd.read_csv(
    'data/general_demographics/nyc_demographics_2010_by_cd-preprocessed.csv',
    index_col='cd',
)
# Scale data
scaler = StandardScaler()
df[df.columns] = scaler.fit_transform(df[df.columns])

# Group labels
classes = map(cd_to_borough, df.index.to_list())
cds = map(cd_to_name, df.index.to_list())

# Run t-SNE
n_components = 2
RS = 123
tsne_results = TSNE(random_state=RS, n_components=n_components).fit_transform(df)
df_tsne = pd.DataFrame(
    data={
        'x': tsne_results[:, 0],
        'y': tsne_results[:, 1],
        'class': classes,
        'cd': cds,
    }
)

### Construct the app and callbacks

In [None]:
app = JupyterDash(__name__)

# # Create server variable with Flask server object for use with gunicorn
# server = app.server

app.layout = html.Div([
    html.Div([
        'Stub content'
    ], style={}),
    html.Div([
        dcc.Dropdown(
            id='scatter-dropdown',
            options=[{'label': i, 'value': i} for i in ['a', 'b']],
            value='a'
        ),
    ], style={}),
    html.Div([
        dcc.Graph(
            id='scatter',
            hoverData={'points': [{'customdata': ['Manhattan CD-09']}]}
        )
    ], style={}),
    html.Div(
        id='cd-name',
        style={}
    ),
])

@app.callback(
    dash.dependencies.Output('scatter', 'figure'),
    [dash.dependencies.Input('scatter-dropdown', 'value')])
def create_scatter(dropdown):
    return px.scatter(
        df_tsne,
        x='x', y='y', color='class',
        hover_name='cd',
        custom_data=['cd'],
    )

@app.callback(
    dash.dependencies.Output('cd-name', 'children'),
    [dash.dependencies.Input('scatter', 'hoverData')])
def update_y_timeseries(hoverData):
    return hoverData['points'][0]['customdata'][0]

In [None]:
# app.run_server() # Require opening a new browser tab to view app
# app.run_server(mode="inline") # Display app in notebook cell
app.run_server(mode="jupyterlab") # Display app in JupyterLab tab