In [None]:
import pandas as pd
import altair as alt
import vega_datasets
alt.data_transformers.disable_max_rows()

In [None]:
hatecrimes = pd.read_csv("../data/hate_crime.csv")
alt.themes.enable('fivethirtyeight')

In [None]:
# GRAPH 1 -- HEATMAP (SEASONALITY)
alt.Chart(hatecrimes, title="Seasonality of Hate Crimes").mark_rect().encode(
    alt.X('date(incident_date):O').title("Date"),
    alt.Y('month(incident_date):O').title("Month"),
    alt.Color('count()').title("Incidents").scale(scheme='orangered')
).display()

This heatmap demonstrates the seasonality of hate crimes by showing the days and months in which hate crimes surge. We see here, across the 30+ years represented in this dataset, hate crimes are concentrated in the middle of September/end of October, and are less frequent in December. I hope to use this graph in my final narrative to show when hate crimes are most likely to occur.

In [None]:
# GRAPH 2 -- LINE CHART (ALL TIME)
alt.Chart(hatecrimes, title='Hate Crimes Over Time').mark_line().encode(
        alt.X('data_year:O').title("Year"),
        alt.Y('count():N').title("Hate Crime Incidents"),
        color=alt.value("#ad2e03")
    ).display()

This simple line chart demonstrates the prevalence of hate crimes over time -- specifically from 1991-2023. We see a particular surge in 2001, as well as a steady increase since 2018. I hope to use this chart in the final narrative as a simple way to understand the changing rate of hate crimes over time.

In [None]:
# Cleaning pandas 'bias description' variable
#hatecrimes.groupby(['bias_desc']).size().sort_values(ascending=False).head(10)
# Most popular hate crimes are anti Black, Jewish, White, Gay, Hispanic, Other Race, LGBT, Asian, Multi-Race, Lesbian

In [None]:
# GRAPH 3 -- LINE CHART (BIAS)
top10 = ["Anti-Black or African American", "Anti-Jewish", "Anti-White", "Anti-Gay (Male)", "Anti-Hispanic or Latino", "Anti-Other Race/Ethnicity/Ancestry","Anti-Lesbian, Gay, Bisexual, or Transgender (Mixed Group)", "Anti-Asian", "Anti-Multiple Races, Group", "Anti-Lesbian (Female)"]
hatecrimestop10 = hatecrimes.loc[hatecrimes['bias_desc'].isin(top10)]
alt.Chart(hatecrimestop10, title="Hate Crimes by Bias Over Time").mark_line().encode(
        alt.X('data_year:O').title("Year"),
        alt.Y('count():N').title("Incidents"),
        alt.Color('bias_desc:N', legend=alt.Legend(labelLimit=300)).title("Crime Bias").scale(scheme='category10')
    ).display()

This line graph demonstrates the prevalence of hate crimes over time, but broken down the "bias" of the crime. We can see that Anti-Black crimes are the most prevalent type of hate-crime every year, while Anti-Jewish hate crimes have streadily risen since 2021. I hope to use this graph to demonstrate how hate crimes affect different populations at different rates, and how those rates change over time.
Note: There are over 300 types of bias represented in this dataset. For this graph, only the ten biases with the highest hate crime rate were depicted.

In [7]:
from vega_datasets import data
states = alt.topo_feature(data.us_10m.url, feature='states')

In [8]:
pop = data.population_engineers_hurricanes(usecols=['state', 'id'])

In [9]:
hatecrimes_w_ids = hatecrimes.merge(pop, left_on='state_name', right_on='state')
state_counts = hatecrimes_w_ids.groupby(['id'], as_index=False).size()


In [None]:
# GRAPH 4 -- US MAP
states = alt.topo_feature(data.us_10m.url, 'states')
alt.Chart(states, title="Hate Crimes Across the U.S.").mark_geoshape().encode(
    alt.Color('size:Q').scale(scheme='browns').title("Hate Crime Incidents")
).transform_lookup(
    lookup='id',
    from_=alt.LookupData(state_counts, 'id', list(state_counts.columns))
).project(
    type='albersUsa'
).properties(
    width=500,
    height=300
)

This chloropleth map illustrates the prevalence of hate crimes across the United States, coloring each state by its total number of hate crime incidents from 1991-2023. We see California has by far the highest rate of hate crimes, with New York/New Jersey also showing higher rates. I hope to use this map to demonstrate not only where hate crimes occur, but address and possibly dispel stereotypes about hate crimes being concentrated in "conservative" areas of the country.

In [None]:
# GRAPH 5 -- BAR CHART (OFFENDER RACE)
alt.Chart(hatecrimes, title="Hate Crimes by Offender Race").mark_bar().encode(
        alt.Y('offender_race:O', axis=alt.Axis(labelLimit=200)).title("Offender Race").sort('-x'),
        alt.X('count():N').title("Hate Crime Incidents"),
        color=alt.value("#ad2e03")
    ).display()

This bar chart demonstrates the amount of total hate crime incidents grouped by offender race. We see that most hate crimes are committed by white individuals, but there is a lot of missing data ("Unknown" offender race). I want to use this graph to complement the previous bar chart that shows the most prevalent biases in hate crimes. 

In [9]:
# Tracking hate crimes specifically in 2019-2023
years = [2019, 2020, 2021, 2022, 2023]
hatecrimes1924= hatecrimes.loc[hatecrimes['data_year'].isin(years)]

In [10]:
hatecrimes1924.groupby(['bias_desc']).size().sort_values(ascending=False).head(10)
top10_rec = ["Anti-Black or African American", "Anti-Jewish", "Anti-White", "Anti-Gay (Male)", "Anti-Hispanic or Latino", "Anti-Other Race/Ethnicity/Ancestry","Anti-Lesbian, Gay, Bisexual, or Transgender (Mixed Group)", "Anti-Asian", "Anti-Multiple Races, Group", "Anti-Transgender"]
hatecrimestop10_rec = hatecrimes1924.loc[hatecrimes1924['bias_desc'].isin(top10_rec)]


In [None]:
# GRAPH 6 - LINE GRAPH (RECENT EVENTS)
main_c = alt.Chart(hatecrimestop10_rec, title="Top Hate Crimes From 2019-2023").mark_line().encode(
        alt.X('yearmonth(incident_date):O').title('Month'),
        alt.Y('count()').title('Hate Crime Incidents'), 
        alt.Color('bias_desc:N', legend=alt.Legend(labelLimit=300)).title("Bias")
    )

text1 = alt.Chart({'values': [{'x': 100, 'y': 500}]}).mark_text(
    text="Murder of George Floyd").encode(
        alt.X('x:Q', axis=None),
        y='y:Q')

text2 = alt.Chart({'values': [{'x': 260, 'y': 150}]}).mark_text(
    text="2021 Atlanta Spa Shootings").encode(
        alt.X('x:Q', axis=None),
        y='y:Q')

text3 = alt.Chart({'values': [{'x': 640, 'y': 300}]}).mark_text(
    text="Hamas Attacks Israel").encode(
        alt.X('x:Q', axis=None).scale(domain=(-100, 100)),
        y='y:Q')

text = text3 + text2 + text1
main_c + text
# Want to add text here to note particular events that saw a spike -- George Floyd's murder, the 2021 Atlanta spa shootings, Oct. 7

I view this graph as central to my narrative -- demonstrating how large-scale political events lead to an increase in targeted violence. This graph shows occurences of hate crimes throughout the past 5 years, focusing on the most prevalent biases in that timeframe. I've annotated the graph to mark notable sociopolitical events that may help explain sudden increases in hate crimes against certain groups.

In [None]:
# GRAPH 7 - SCATTERPLOT?
hatecrimes_people = hatecrimes.loc[hatecrimes['total_individual_victims'] > 0]
alt.Chart(hatecrimes_people, title='Number of Offenders vs. Number of Victims').mark_circle().encode(
    alt.X('total_offender_count').title("Total Offenders"),
    alt.Y('total_individual_victims').title("Total Victims"),
    color=alt.value("#ad2e03")
).display()

This scatterplot focuses on the strange relationship between total offenders vs. total victims in hate crimes. Among crimes with at least 1 individual victim (i.e., not just property crime), most incidents appear to have less than 10 total offenders and less than 20 total victims. However, the events with the highest number of individual victims (above 60) appear to be largely perpetrated by individual offenders. Conversely, the hate crimes perpetrated by large groups (50+ offenders) appear to have fewer than average victims. I hope to use this graph in my final narrative to provide insight on "who" is perpetrating hate crimes -- individuals vs. large collected groups.

In [None]:
# GRAPH 8 - BAR CHART (LOCATIONS)
hatecrimes.groupby(['location_name']).size().sort_values(ascending=False).head(10)
top_locs = ["Residence/Home", "Highway/Road/Alley/Street/Sidewalk", "Other/Unknown", "School/College", "Parking/Drop Lot/Garage", "Church/Synagogue/Temple/Mosque", "Commercial/Office Building", "Restaurant", "School-Elementary/Secondary", "Bar/Nightclub"]
hatecrimestop10_loc = hatecrimes.loc[hatecrimes['location_name'].isin(top_locs)]

alt.Chart(hatecrimestop10_loc, title="Top Locations of Hate Crimes").mark_bar().encode(
        alt.Y('location_name:O', axis=alt.Axis(labelLimit=200)).title("Location of Incident").sort('-x'),
        alt.X('count():N').title("Hate Crime Incidents"),
        color=alt.value("#ad2e03")
    ).display()

Similar to the previous bar chart, this chart demonstrates the most common locations at which hate crimes occur. Far and away the most popular is personal residency, followed by roads and highways. While there are many cases of unknown location, I hope to use this graph to further focus on the "where" of hate crimes.