In [1]:
#| warning: false
#| code-fold: true
#import library
import dash
from dash import dcc, html
from dash.dependencies import Input, Output
from dash import dash_table
import plotly.express as px
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
import geopandas as gpd
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.io as pio
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import ipywidgets as widgets
from IPython.display import display

#render ploty
pio.renderers.default = "notebook"

#import data
us_states=gpd.read_file("../final_data/tl_2020_us_state.shp")
df = pd.read_csv("../final_data/final_data.csv")
original_df = pd.read_csv("../final_data/final_data.csv")
# Initialize the Dash app
app = dash.Dash(__name__)

#normalize data
columns_to_normalize = ['Personal Income', 'Median Rent', 'Median Sale Price',
                        'Walkability', 'Unemployment', 'Purchasing Power',
                        'Violent Crime Rate', 'Non-Violent Crime Rate']

scaler = MinMaxScaler()
df[columns_to_normalize] = scaler.fit_transform(df[columns_to_normalize])

# Columns where lower values are better
columns_lower_better = ['Median Rent', 'Unemployment', 'Violent Crime Rate', 'Non-Violent Crime Rate']

# Invert scaling for those columns
df[columns_lower_better] = 1 - df[columns_lower_better]

# Define color scale mapping each political affiliation to a specific color
color_scale = {
    "Democrat": "blue",
    "Republican": "red",
    "Independent": "yellow",
    "Nonpartisan": "grey"
    }

#set name dict
new_column_name = {'Personal Income': 'Median Personal Income (USD)',
                   'Median Rent': 'Median Rent (USD)',
                   'Median Sale Price': "Median Home Sale Price (USD)",
                   'Purchasing Power':'Purchasing Power (per $100)',
                   'Violent Crime Rate': 'Violent Crime Rate (per 100k)',
                   'Non-Violent Crime Rate': 'Non-Violent Crime Rate (per 100k)',
                   'Unemployment': 'Unemployment Rate'
                   }
#rename col
original_df.rename(columns=new_column_name, inplace=True)
#set merge df
merge_df = original_df.drop(['Walkability', 'Latitude', 'Longitude','Political Affiliation', 'Average Yearly Temperature(F)'], axis = 1)
#merge
df = pd.merge(df, merge_df, on='Region', how='left')

## Introduction


The interactive graph below empowers you to prioritize what factors are most important to you on a scale from 0-10, 10 being more important. It then generates a weighted score for each metropolitan area, with higher scores indicating a better match to your preferences. All values that contribute to the weighted score and rank are normalized from 0-1, ensuring that features with larger raw values don't disproportionately influence the result. Values where lower is better, such as unemployment and crime rates,  are subtracted from 1 before being calculated in the weighted sum score. Hover over each area to reveal the region's name, rank, and weighted sum based on your preferences!

The bigger the circle, the better the city is for you!

**So what are you waiting for? Find your next adventure now!**

<div>

> **How to use this viz**<br>
> Adjust the sliders to reflect your preferences. The graph will update in real-time to reflect your choices. Hover over each area to see the region's name, rank, and weighted sum based on your preferences.<br>
> N.B. You can use the dropdown menu to color code the areas based on the actual values of any of the available features.

</div>

In [2]:
app = dash.Dash(__name__)
# Define the layout of the app
app.layout = html.Div([
    html.H1("Adjust by how important each category is to you",style={"color": "black"}),
    html.Div([
        html.Div([
            html.Label("Low Rent",style={"color": "black"}),
            dcc.Slider(
                id="Median Rent",
                min=0,
                max=10,
                step=1,
                value=0.5,
                marks={i: str(i) for i in range(11)},
                tooltip={"always_visible": False, "placement": "bottom"}
            ),
            html.Label("High Income",style={"color": "black"}),
            dcc.Slider(
                id="Personal Income",
                min=0,
                max=10,
                step=1,
                value=0.5,
                marks={i: str(i) for i in range(11)},
                tooltip={"always_visible": False, "placement": "bottom"}
            ),
            html.Label("High Market Value for Houses",style={"color": "black"}),
            dcc.Slider(
                id="Median Sale Price",
                min=0,
                max=10,
                step=1,
                value=0.5,
                marks={i: str(i) for i in range(11)},
                tooltip={"always_visible": False, "placement": "bottom"}
            ),
            html.Label("Great Walkablity",style={"color": "black"}),
            dcc.Slider(
                id="Walkable",
                min=0,
                max=10,
                step=1,
                value=0.5,
                marks={i: str(i) for i in range(11)},
                tooltip={"always_visible": False, "placement": "bottom"}
            ),
            html.Label("Low Unemployment",style={"color": "black"}),
            dcc.Slider(
                id="Unemployment",
                min=0,
                max=10,
                step=1,
                value=0.5,
                marks={i: str(i) for i in range(11)},
                tooltip={"always_visible": False, "placement": "bottom"}
            ),
            html.Label("High Purchasing Power",style={"color": "black"}),
            dcc.Slider(
                id="Purchasing Power",
                min=0,
                max=10,
                step=1,
                value=0.5,
                marks={i: str(i) for i in range(11)},
                tooltip={"always_visible": False, "placement": "bottom"}
            ),
            html.Label("Low Violent Crime Rate",style={"color": "black"}),
            dcc.Slider(
                id="Crime Rate",
                min=0,
                max=10,
                step=1,
                value=0.5,
                marks={i: str(i) for i in range(11)},
                tooltip={"always_visible": False, "placement": "bottom"}
            ),
            html.Label("Variable to Visualize", style={"color": "black"}),
            dcc.Dropdown(
                id="variable",
                options=[{"label": i, "value": i} for i in df.columns if i not in ['Latitude',
                                                                                   'Longitude',
                                                                                   'Personal Income',
                                                                                   'Median Rent',
                                                                                   'Median Sale Price',
                                                                                   'Walkability',
                                                                                   'Unemployment',
                                                                                   'Purchasing Power',
                                                                                   'Violent Crime Rate',
                                                                                   'Non-Violent Crime Rate']],
                value=df.columns[0]
            ),
        ], style={"width": "30%", "float": "left"}),
        html.Div([
            dcc.Graph(id="city-graph")
        ], style={"width": "70%", "float": "right"}),
    ]),
])

# Define callback to update the graph based on user input
@app.callback(
    Output("city-graph", "figure"),
    [Input("Median Rent", "value"),
     Input("Personal Income", "value"),
     Input("Median Sale Price","value"),
     Input("Walkable","value"),
     Input("Unemployment","value"),
     Input("Purchasing Power","value"),
     Input("Crime Rate","value"),
     Input("variable", "value")])
def update_graph(importance_personal_income, importance_median_rent,importance_median_sale_price,importance_walkable,importance_unemployment,importance_purchasing_power,importance_crime, selected_variable):
    # Weighted sum of variables
    df["Weighted_Sum"] = np.log(df["Median Rent"] * importance_personal_income +
                          df["Personal Income"] * importance_median_rent + df["Median Sale Price"]*importance_median_sale_price +df["Walkability"]*importance_walkable+df["Unemployment"]*importance_unemployment+
                          df["Purchasing Power"]*importance_purchasing_power+df["Violent Crime Rate"]*importance_crime)
    # Apply power transformation for better visualization
    df["Weighted_Sum"] = df["Weighted_Sum"] ** 3

    # Find Rank of weighted sum for the tool tip
    df["Rank"] = df["Weighted_Sum"].rank(ascending=False, method='dense').astype(int)
    
    # Define color scale mapping each political affiliation to a specific color
    color_scale = {
        "Democrat": "blue",
        "Republican": "red",
        "Independent": "yellow",
        "Nonpartisan": "grey"
    }
    
    # Plotly scatter plot
    fig = px.scatter_geo(df, lat='Latitude', lon='Longitude', scope='usa', size="Weighted_Sum", color=selected_variable,
                     color_discrete_map=color_scale, custom_data=["Region", "Weighted_Sum","Rank"])
    
    fig.update_layout(title="Best Metro Areas by Personal Livability Score",
                      mapbox_style="carto-positron",
                      mapbox_zoom=3,
                      mapbox_center={"lat": 37.0902, "lon": -95.7129})
    
    fig.update_traces(hovertemplate='<b>Region:</b> %{customdata[0]}<br>Rank: %{customdata[2]}<br>Weighted Sum: %{customdata[1]:.3f}<extra></extra>')
    
    return fig

# Run the app
if __name__ == "__main__":
    app.run_server(debug=False,port=8052)


---------

### A closer look at each metric<br>

While the chart above may seem complex with its array of options and data, the straightforward dropdown menu below simplifies the process. It displays a chart with the actual metrics for each feature and sorts the regions from highest to lowest value. The 'better' value will only sometimes be on the left; most people don't prefer higher unemployment or crime rates. This allows you to focus on what matters to you, feature by feature, in a user-friendly manner.  


<div>

> **How to use this viz**<br>
> Select a feature from the dropdown menu to see the actual values for each metropolitan area. The chart will sort the regions from highest to lowest value for the selected feature. <br>
> You can also hover over each bar to see the actual value for the selected feature.

</div>

In [3]:
app = dash.Dash(__name__)

app.layout = html.Div([
    dcc.Dropdown(
        id="variable",
        options=[{"label": i, "value": i} for i in original_df.columns if i not in ['Latitude', 'Longitude', 'Region', 'Political Affiliation']],
        value='Median Personal Income (USD)'
    ),
    # Removed dcc.Graph(id="scatter-plot"),
    dcc.Graph(id="bar-chart"),
])

@app.callback(
    Output("bar-chart", "figure"),
    [Input("variable", "value")]
)
def update_graph(variable):
    bar_chart = px.bar(original_df.sort_values(variable, ascending=False), x='Region', y=variable)
    return bar_chart

if __name__ == "__main__":
    app.run_server(debug=True)


----------

## Economic data<br>

Here are some economic indicators for each metropolitan region. The dropdown menu lets you choose what area to investigate. The four charts show the actual Median Personal Income, Median Rent, Median Home Sale Price, and Purchasing power for the area selected compared to the average of all regions. These features can be handy when comparing multiple job offers to ensure you will get more than what you earn after having to relocate. A ten percent raise that requires moving from Kansas City to San Fransico could mean a lower quality of life based on what you can afford in the two regions. 



In [4]:
from plotly.offline import init_notebook_mode, iplot
import plotly.graph_objs as go

init_notebook_mode(connected=True)

<div>

> **How to use this viz**<br>
> Select a metropolitan area from the dropdown menu to see the actual values for each economic feature. The 4 charts will show the selected area's Median Personal Income, Median Rent, Median Home Sale Price, and Purchasing Power compared to the average of all regions.<br>
> You can also hover over each bar to see the actual value for the selected feature.

</div>

In [5]:
regions = original_df['Region'].unique()

attributes = ['Median Personal Income (USD)', 'Median Rent (USD)', 'Median Home Sale Price (USD)', 'Purchasing Power (per $100)']

fig = make_subplots(rows=1, cols=4, subplot_titles=attributes)

# Create a list of traces, one for each region and attribute
for region in regions:
    region_df = original_df[original_df['Region'] == region]
    for i, attribute in enumerate(attributes, start=1):
        row = 1
        col = i
        fig.add_trace(go.Bar(
            x=[region, 'Metro average'],
            y=[region_df[attribute].values[0], original_df[attribute].mean()],
            name=region,
            visible=(region == regions[0]) 
        ), row=row, col=col)

# Update layout to add a dropdown menu
fig.update_layout(
    updatemenus=[
        dict(
            buttons=list([
                dict(label=region,
                     method="update",
                     args=[{"visible": [(region == r.name) for r in fig.data]}])
                for region in regions
            ]),
            direction="down",
            pad={"r": 10, "t": 10},
            showactive=True,
            x=0, 
            xanchor="left",
            y=1.6,
            yanchor="top"
        ),
    ],
    height=400, 
    width=1200, 
    showlegend=False
)

# Show the figure
iplot(fig)

----------

## Multiple Trace Radar Chart<br>

The following dropdown menu lets you view a radar chart comparing each region's values to the averages of all regions. The chart again uses the normalized data from the first plot, where higher is always better. The further away from the center a point is, the better the value is. The larger the area, the better the region is overall. This chart is an intuitive way to compare what region you want to investigate to the average of all the regions without overwhelming your visual cortex. 

<div>

> **How to use this viz**<br>
> Select a metropolitan area from the dropdown menu to see a radar chart comparing the selected area's values to the averages of all regions.<br>
> You can also rotate the chart at your convenience.

</div>

In [6]:
categories = df.columns[2:13]

remove_elements = ['Latitude', 'Longitude', 'Political Affiliation', 'Average Yearly Temperature(F)']

mask = np.isin(categories, remove_elements, invert=True)

categories = categories[mask]

fig = go.Figure()

# Add a trace for each region
for region in regions:
    region_df = df[df['Region'] == region]
    fig.add_trace(go.Scatterpolar(
        r=region_df[categories].values[0],
        theta=categories,
        fill='toself',
        name=region,
        visible=(region == regions[0]) 
    ))

# Add a trace for the average of each category
average_values = df[categories].mean()
fig.add_trace(go.Scatterpolar(
    r=average_values,
    theta=categories,
    fill='toself',
    name='Average',
))

# Update layout to add a dropdown menu
fig.update_layout(
    updatemenus=[
        dict(
            buttons=list([
                dict(label=region,
                     method="update",
                     args=[{"visible": [(region == r.name or 'Average' == r.name) for r in fig.data]}])
                for region in regions
            ]),
            direction="down",
            pad={"r": 10, "t": 10},
            showactive=True,
            x=0,
            xanchor="left",
            y=1.5,
            yanchor="top"
        ),
    ],
    polar=dict(
        radialaxis=dict(
            visible=True,
            range=[0, 1] 
        )
    ),
    showlegend=True
)

# Show the figure
iplot(fig)

-------------

## A closer look at crime

<div>

For some people, money cannot buy happiness. They worry less about material needs and more about feeling safe and secure. Below are plots showing the violent and non-violent crime rates for each region. The crime rates used in these plots and everywhere else on the page are self-reported by law enforcement agencies with jurisdiction over the area. Ask yourself if you trust the people who originate the data before looking too closely at it. 

> **How to use this viz**<br>
> All values for violent and non-violent crime per 100,000 people are shown in the chart below. You can hover over each point to see the actual value for each metropolitan area. They are sorted from highest to lowest value for each crime type.

</div>

In [7]:
# Sort the dfs
df_sorted_violent = df.sort_values('Violent Crime Rate (per 100k)')
df_sorted_non_violent = df.sort_values('Non-Violent Crime Rate (per 100k)')

# Create a subplot with 1 row and 2 columns
fig = make_subplots(rows=1, cols=2, subplot_titles=("Violent Crime Rate (per 100k)", "Non-Violent Crime Rate (per 100k)"))

# Add a lollipop chart for 'Violent Crime Rate (per 100k)' to the first column
fig.add_trace(go.Scatter(y=df_sorted_violent['Region'], x=df_sorted_violent['Violent Crime Rate (per 100k)'], mode='markers', name='Violent Crime Rate', marker=dict(size=10)), row=1, col=1)

# Add a line for each point
for i in range(len(df_sorted_violent)):
    fig.add_trace(go.Scatter(y=[df_sorted_violent['Region'].iloc[i], df_sorted_violent['Region'].iloc[i]], x=[0, df_sorted_violent['Violent Crime Rate (per 100k)'].iloc[i]], mode='lines', showlegend=False), row=1, col=1)

# Add a lollipop chart for 'Non-Violent Crime Rate (per 100k)' to the second column
fig.add_trace(go.Scatter(y=df_sorted_non_violent['Region'], x=df_sorted_non_violent['Non-Violent Crime Rate (per 100k)'], mode='markers', name='Non-Violent Crime Rate', marker=dict(size=10)), row=1, col=2)

# Add a line for each point
for i in range(len(df_sorted_non_violent)):
    fig.add_trace(go.Scatter(y=[df_sorted_non_violent['Region'].iloc[i], df_sorted_non_violent['Region'].iloc[i]], x=[0, df_sorted_non_violent['Non-Violent Crime Rate (per 100k)'].iloc[i]], mode='lines', showlegend=False), row=1, col=2)


fig.update_layout(title_text="Crime Rates by Region", width=1400, height=900, showlegend=False)

iplot(fig)