To render the data necessary for index.html, run the cells below in order.

In [9]:
# Imports
import pandas as pd
from vega_datasets import data
import altair as alt
hatecrimes = pd.read_csv("hate_crime.csv")
population = pd.read_csv("population.csv")
population.dropna(inplace=True)
# Need to rename population columns for easier merging
population.rename(columns={"Label (Grouping)": "state", "Total": "population"}, 
                  inplace=True)

In [None]:
def filter_dataset(bias_type):
    """
    Function used to filter the data, calculate percapita, and export

    Args:
        .csv file: raw hatecrimes data in .csv form
        bias_type (str): type of bias we're interested in
    """
    # If bias type set, filter
    if bias_type != "All":
        return_dataset = hatecrimes[hatecrimes['bias_desc'].str.contains(bias_type)]
    else:
        return_dataset = hatecrimes

    # Counting hatecrimes by state IDs
    state_ids = data.population_engineers_hurricanes(usecols=['state', 'id'])
    hatecrimes_w_ids = return_dataset.merge(state_ids, left_on='state_name', right_on='state')
    state_counts = hatecrimes_w_ids.groupby(['id'], as_index=False).size()
    # Note: right join to account for those with 0 hate crimes
    state_counts = state_counts.merge(state_ids, how='right')
    state_counts.fillna(0, inplace=True)
    
    # Re-adding state name and population, adding percapita column
    state_counts_w_pop = state_counts.merge(population, on="state")
    state_counts_w_pop["percap"] = (state_counts_w_pop["size"]/state_counts_w_pop["population"])*100000
    state_counts_w_pop["percap"] = state_counts_w_pop["percap"].round(2)

    # FIPS ID need leading zeros
    state_counts_w_pop["id"]= state_counts_w_pop["id"].astype(str).str.pad(width=2, side='left', fillchar='0')

    state_counts_w_pop.to_csv(bias_type + ".csv")

In [11]:
other_biases = ["All","Anti-White", "Anti-Black", "Anti-Hispanic", "Anti-Jewish", "Anti-Asian", "Anti-Female"]
for b in other_biases:
    filter_dataset(b)

In [None]:
# Special process for Anti-LGBT Crime -- have to combine lots of categorires
searchfor = ["Gay", "Lesbian", "Transgender", "Bisexual"]
lgbtcrimes = hatecrimes[hatecrimes["bias_desc"].str.contains('|'.join(searchfor))]
# NOTE: Used the following SO link to help with multiple search keys
# https://stackoverflow.com/questions/26577516/how-to-test-if-a-string-contains-one-of-the-substrings-in-a-list-in-pandas

# Doing state/percap conversions
state_ids = data.population_engineers_hurricanes(usecols=['state', 'id'])
hatecrimes_w_ids = lgbtcrimes.merge(state_ids, left_on='state_name', right_on='state')
state_counts = hatecrimes_w_ids.groupby(['id'], as_index=False).size()
state_counts = state_counts.merge(state_ids)
state_counts_w_pop = state_counts.merge(population, on="state")
state_counts_w_pop["percap"] = (state_counts_w_pop["size"]/state_counts_w_pop["population"])*100000
state_counts_w_pop["percap"] = state_counts_w_pop["percap"].round(2)
state_counts_w_pop["id"]= state_counts_w_pop["id"].astype(str).str.pad(width=2, side='left', fillchar='0')
state_counts_w_pop.to_csv("Anti-LGBT.csv")