# Introduction


# Demographic Insights

## Geographic 

In [15]:
import pandas as pd
import altair as alt
import seaborn as sns
import plotly.express as px
from vega_datasets import data
import matplotlib.pyplot as plt

In [7]:
df = pd.read_csv('../data/clean_data.csv')

In [9]:
custom_palette = ['#00072D', '#0A2472', '#0E6BA8', '#A6E1FA', '#99ABC5', '#8B748F', '#6F0624']

In [8]:
#calculate averages of all numeric columns
num_cols = df[['State', 'GPA', 'WorkExp', 'TestScore', 'WritingScore', 'VolunteerLevel']]
avg_df = num_cols.groupby('State').mean().reset_index()
state_abbr = {
    'Alabama': 'AL',
    'California': 'CA',
    'Colorado': 'CO',
    'Florida': 'FL',
    'Georgia': 'GA',
    'Mississippi': 'MS',
    'New York': 'NY',
    'Oregon': 'OR',
    'Utah': 'UT',
    'Vermont': 'VT',
    'Virginia': 'VA'

}
avg_df['State_Abbr'] = avg_df['State'].map(state_abbr)
avg_df = avg_df.drop(columns=['State'])
avg_df = avg_df.rename(columns={'State_Abbr': 'State'})

In [12]:
fig = px.choropleth(avg_df, locationmode="USA-states", 
                    locations=avg_df['State'], 
                    scope="usa",
                    color=avg_df['GPA'],
                    hover_data={"State": True, "GPA": True},  
                    labels={"GPA": "Selected Variable"},
                    color_continuous_scale=custom_palette
                )

dropdown = []
for col in avg_df.columns[:-1]:
    dropdown.append({'label': col, 'method': 'update', 'args': [{'z': [avg_df[col]]}]})

fig.update_layout(updatemenus=[{'buttons': dropdown, 'direction': 'down', 'showactive': True}],
                  title='Choropleth Map of Average Selected Variable')
fig.update_coloraxes(colorbar_title=dict(text='Selected Variable'))

fig.show()

In [17]:
#create dataframe of rates for each state by decision
decision_state = df.groupby(['Decision', 'State'])[["GPA"]].count().reset_index()
decision_state = decision_state.rename(columns={'GPA':'StateCount'})
decision_state['DecisionCount'] = decision_state.groupby('Decision')['StateCount'].transform('sum')
decision_state['Rate'] = decision_state['StateCount'] / decision_state['DecisionCount'] * 100

state_id_dict = dict(zip(data.population_engineers_hurricanes()["state"], data.population_engineers_hurricanes()["id"]))
decision_state["StateID"] = decision_state["State"].map(state_id_dict)

admit_states = decision_state[decision_state['Decision'] == "Admit"]
decline_states = decision_state[decision_state['Decision'] == "Decline"]

In [26]:
states = alt.topo_feature('https://raw.githubusercontent.com/vega/vega-datasets/master/data/us-10m.json', 'states')
click = alt.selection_multi(fields = ["State"])

existing_states = alt.Chart(states).mark_geoshape(stroke='black').encode(
    color = alt.Color("Rate:Q", scale=alt.Scale(range=custom_palette)),
    tooltip = ["State:N", "Rate:Q"],
    opacity = alt.condition('isValid(datum.Rate)', alt.value(1), alt.value(0.2)),
).transform_lookup(
    lookup = "id",
    from_ = alt.LookupData(admit_states, "StateID", list(admit_states.columns))
).properties(width = 333, height = 200, title="Admission Rates by State").add_selection(click).project(type = "albersUsa").interactive()

missing_states = (
    alt.Chart(states)
    .mark_geoshape(fill = "grey", stroke = "white")
    .encode(opacity=alt.condition("isValid(datum.Rate)", alt.value(0), alt.value(0.2))).add_selection(click).project(type = "albersUsa")
    )

admit_map = existing_states + missing_states
admit_map = admit_map.encode(
    tooltip= ["State:N", "Rate:Q"]
    ).transform_lookup(
        lookup="id",
        from_=alt.LookupData(admit_states, "StateID", list(admit_states.columns))
    ).interactive()

existing_states = alt.Chart(states).mark_geoshape(stroke='black').encode(
    color = alt.Color("Rate:Q", scale=alt.Scale(range=custom_palette)),
    tooltip = ["State:N", "Rate:Q"],
    opacity = alt.condition('isValid(datum.Rate)', alt.value(1), alt.value(0.2)),
).transform_lookup(
    lookup = "id",
    from_ = alt.LookupData(decline_states, "StateID", list(decline_states.columns))
).properties(width = 333, height = 200, title="Rejection Rates by State").add_selection(click).project(type = "albersUsa").interactive()

missing_states = (
    alt.Chart(states)
    .mark_geoshape(fill = "grey", stroke = "white")
    .encode(opacity=alt.condition("isValid(datum.Rate)", alt.value(0), alt.value(0.2))).add_selection(click).project(type = "albersUsa")
    )

decline_map = existing_states + missing_states
decline_map = decline_map.encode(
    tooltip= ["State:N", "Rate:Q"]
    ).transform_lookup(
        lookup="id",
        from_=alt.LookupData(decline_states, "StateID", list(decline_states.columns))
    ).interactive()

admit_map | decline_map

## Gender

# Machine Learning

In [19]:
%%html
<img src="../website/images/decision_pairplot.png">

# Conclusions