# general_demographics-dash.ipynb

### CSc-59867 - Senior Design - Prof. Etemadpour

* Purpose: Present an interactive visualization with the NYC census demographics dataset from NYC Open Data
* Date started: 2021-04-29
* Authors: Xin Chen, Ian S. McBride, Lifu Tao

In [None]:
from jupyter_dash import JupyterDash

In [None]:
import dash
import dash_bootstrap_components as dbc
import dash_core_components as dcc
import dash_html_components as html
import pandas as pd
import plotly.express as px
from sklearn.preprocessing import StandardScaler
from sklearn.manifold import TSNE

When running in JupyterHub or Binder, call the `infer_jupyter_config` function to detect the proxy configuration.

In [None]:
# JupyterDash.infer_jupyter_proxy_config()

### Load and preprocess data

In [None]:
# Helper for displaying CD numbers
cd_boro_dict = {
    1: 'Manhattan',
    2: 'Bronx',
    3: 'Brooklyn',
    4: 'Queens',
    5: 'Staten Island',
}
def cd_to_name(cd):
    cd_str = str(cd)
    if not len(cd_str) == 3:
        raise Exception('incorrect length of CD')
    cd_num = cd_str[1:]
    boro = cd_boro_dict.get(int(cd_str[0]))
    if not boro:
        raise Exception('incorrect borough prefix in CD')
    return f'{boro} CD-{cd_num}'
def cd_to_borough(cd):
    cd_str = str(cd)
    if not len(cd_str) == 3:
        raise Exception('incorrect length of CD')
    cd_num = cd_str[1:]
    boro = cd_boro_dict.get(int(cd_str[0]))
    if not boro:
        raise Exception('incorrect borough prefix in CD')
    return boro

In [None]:
# Names of columns for the different demographic categories
demographics_plot_titles = {
    'age': 'Age groups by CD',
    'housing_unit_size': 'Housing unit size by CD',
    'housing_renter_owner': 'Housing units rented or owned by CD',
    'persons_in_housing_type': 'Household type by CD',
    'race': 'Race by CD',
    'sex': 'Sex by CD',
}
demographics_categories = {
    'age': [
        'age-under-5-years',
        'age-5-to-9-years',
        'age-10-to-14-years',
        'age-15-to-19-years',
        'age-20-to-24-years',
        'age-25-to-44-years',
        'age-45-to-64-years',
        'age-65-years-and-over',
    ],
    'housing_unit_size': [
        'housing-unit-size-1-person',
        'housing-unit-size-2-person',
        'housing-unit-size-3-person',
        'housing-unit-size-4-person',
        'housing-unit-size-5-persons-and-over',
    ],
    'housing_renter_owner': [
        'housing-units-occupied-renter',
        'housing-units-occupied-owner',
    ],
    'persons_in_housing_type': [
        'persons-living-in-group-quarters',
        'persons-living-in-family-households',
        'persons-living-in-nonfamily-household',
    ],
    'race': [
        'race-white-nonhispanic',
        'race-black-nonhispanic',
        'race-asian-and-pacific-islander-nonhispanic',
        'race-other-nonhispanic',
        'race-two-or-more-races-nonhispanic',
        'race-hispanic-origin',
    ],
    'sex': [
        'sex-female',
        'sex-male',
    ],
}

In [None]:
# Load dataset
df = pd.read_csv(
    'data/general_demographics/nyc_demographics_2010_by_cd-preprocessed.csv',
    index_col='cd',
)
df.index = df.index.astype(str)

# Scale data
scaler = StandardScaler()
df_scaled = df.copy()
df_scaled[df_scaled.columns] = scaler.fit_transform(df_scaled[df_scaled.columns])

# Group labels
cd_nums = df.index.to_list()
cd_names = list(map(cd_to_name, cd_nums))
classes = list(map(cd_to_borough, cd_nums))
classes_unique = list(cd_boro_dict.values())

# Create external legend for scatter
colors = px.colors.qualitative.Plotly
scatter_legend_items = [
    html.Li([
        html.Span(
            '●',
            style={
                'color': colors[i],
                'font-size': 20,
                'vertical-align': 'middle',
            }
        ),
        html.Span(
            f' {class_name}',
            style={'vertical-align': 'middle'}),
    ],
        style={
            'margin': '-10px 0 0',
        },
    )
    for i, class_name in enumerate(classes_unique)
]
scatter_legend_html = html.Div([
    html.Div('Borough'),
    html.Ul(
        scatter_legend_items,
        style={'list-style': 'none'},
        className='ml-3 p-0',
    )
])

### Construct the app and callbacks

In [None]:
app = JupyterDash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])

# # Create server variable with Flask server object for use with gunicorn
# server = app.server

app.layout = html.Div([
    dbc.Row([
        dbc.Col([
            html.Div('General demographics scatter (2-component t-SNE)', className='h4'),
            dcc.Graph(
                hoverData={'points': [{'customdata': ['109']}]},
                id='scatter',
                style={'height': '45vh'},
            ),
            html.Div(id='bar-title', className='h4'),
            dcc.Graph(
                id='bar',
                style={'height': '40vh'},
            ),
        ],
            sm=9,
            md=9,
        ),
        dbc.Col([
            scatter_legend_html,
            html.Div(id='scatter-slider-title'),
            dcc.Slider(
                id='scatter-slider',
                marks={'5': '5', '50': '50'},
                min=5,
                max=50,
                value=30,
                step=1,
            ),
            html.Div('Demographic category', className='h6'),
            dcc.Dropdown(
                id='bar-dropdown',
                options=[{'label': i, 'value': i} for i in demographics_categories.keys()],
                value='age',
            ),
            html.Div('Todo', id='bar-legend'),
        ],
            sm=3,
            md=3,
        )
    ])
],
    className='container',
)

@app.callback(
    dash.dependencies.Output('bar', 'figure'),
    [dash.dependencies.Input('scatter', 'selectedData'),
     dash.dependencies.Input('bar-dropdown', 'value'),
    ])
def create_bar(selectedData, feature_category):
    # Identify what cds to plot
    if not selectedData:
        selected_cd_nums = cd_nums
    else:
        selected_cd_nums = [point['customdata'][0] for point in selectedData['points']]

    # Lookup features names, plot titles
    feat_cols = demographics_categories[feature_category]
    title = demographics_plot_titles[feature_category]

    # Normalize dataframe by hand
    df_feat = df.copy()[feat_cols].loc[selected_cd_nums]
    df_feat[feat_cols] = df_feat.div(df_feat.sum(axis=1), axis=0).mul(100)
    df_feat['cd_name'] = list(map(cd_to_name, df_feat.index.to_list()))
    df_feat = df_feat.round(1)

    # Plot
    fig = px.bar(
        df_feat,
        color_discrete_sequence=colors,
        x=df_feat.index,
        y=feat_cols,
        hover_name='cd_name',
        labels={
            'cd': 'Community District (CD)',
            'value': 'Percentage of population',
            'variable': 'Demographic',
        },
    )
    fig.update_layout(
        margin={'r': 0, 'b': 0, 'l': 0, 't': 0},
        showlegend=False,
        xaxis_tickangle=-45,
    )

    return fig

@app.callback(
    dash.dependencies.Output('bar-legend', 'children'),
    [dash.dependencies.Input('bar-dropdown', 'value')])
def create_bar_legend(feature_category):
    features = demographics_categories[feature_category]
    legend_items = [
        html.Li([
            html.Span(
                '■',
                style={
                    'color': colors[i],
                    'font-size': 20,
                    'vertical-align': 'middle'
                }
            ),
            html.Span(
                f' {feature}',
                style={'vertical-align': 'middle'}),
        ],
            style={'margin': '-10px 0 0'},
        )
        for i, feature in enumerate(features)
    ]
    legend_html = html.Div([
        html.Div('Demographic'),
        html.Ul(
            legend_items,
            style={'list-style': 'none'},
            className='ml-3 p-0',
        )
    ])
    return legend_html

@app.callback(
    dash.dependencies.Output('scatter', 'figure'),
    [dash.dependencies.Input('scatter-slider', 'value')])
def create_scatter(perplexity):
    # Run t-SNE
    n_components = 2
    RS = 123
    tsne_results = TSNE(
        random_state=RS,
        n_components=n_components,
        perplexity=perplexity,
    ).fit_transform(df_scaled)
    df_tsne = pd.DataFrame(
        data={
            'x': tsne_results[:, 0],
            'y': tsne_results[:, 1],
            'class': classes,
            'cd_name': cd_names,
            'cd_num': cd_nums,
        }
    )
    
    fig = px.scatter(
        df_tsne,
        color='class',
        custom_data=['cd_num'],
        labels={'class': 'Borough', 'x': 'X component', 'y': 'Y component'},
        hover_name='cd_name',
        hover_data={'class': False, 'x': ':.2f', 'y': ':.2f'},
        color_discrete_sequence=colors,
        x='x', y='y',
    )
    fig.update_layout(
        margin={'r': 0, 'b': 0, 'l': 0, 't': 0},
        showlegend=False,
        xaxis_tickangle=-45,
    )
    
    return fig

@app.callback(
    dash.dependencies.Output('bar-title', 'children'),
    [dash.dependencies.Input('bar-dropdown', 'value')],
)
def display_bar_title(feature_category):
    return demographics_plot_titles[feature_category]

@app.callback(
    dash.dependencies.Output('scatter-slider-title', 'children'),
    [dash.dependencies.Input('scatter-slider', 'value')],
)
def display_perplexity(value):
    return f'Current t-SNE perplexity: {value}'

In [None]:
app.run_server() # Require opening a new browser tab to view app
# app.run_server(mode='inline') # Display app in notebook cell
# app.run_server(mode='jupyterlab') # Display app in JupyterLab tab