In [None]:
import pandas as pd
import altair as alt
import vega_datasets
alt.data_transformers.disable_max_rows()

In [4]:
hatecrimes = pd.read_csv("../data/hate_crime.csv")

In [3]:
# GRAPH 1 -- HEATMAP (SEASONALITY)
alt.Chart(hatecrimes, title="Seasonality of Hate Crimes").mark_rect().encode(
    alt.X('date(incident_date):O').title("Date"),
    alt.Y('month(incident_date):O').title("Month"),
    alt.Color('count()').title("Incidents").scale(scheme='orangered')
).display()

In [4]:
# GRAPH 2 -- LINE CHART (ALL TIME)
alt.Chart(hatecrimes, title='Hate Crimes Over Time').mark_line().encode(
        alt.X('data_year:O').title("Year"),
        alt.Y('count():N').title("Hate Crime Incidents"),
        color=alt.value("#ad2e03")
    ).display()

In [5]:
alt.Chart(hatecrimes).mark_line().encode(
        x='data_year:O',
        y = 'count():N',
        color='bias_desc:N'
    ).display()

In [None]:
# Cleaning pandas 'bias description' variable
#hatecrimes.groupby(['bias_desc']).size().sort_values(ascending=False).head(10)
# Most popular hate crimes are anti Black, Jewish, White, Gay, Hispanic, Other Race, LGBT, Asian, Multi-Race, Lesbian

In [6]:
# GRAPH 3 -- LINE CHART (BIAS)
top10 = ["Anti-Black or African American", "Anti-Jewish", "Anti-White", "Anti-Gay (Male)", "Anti-Hispanic or Latino", "Anti-Other Race/Ethnicity/Ancestry","Anti-Lesbian, Gay, Bisexual, or Transgender (Mixed Group)", "Anti-Asian", "Anti-Multiple Races, Group", "Anti-Lesbian (Female)"]
hatecrimestop10 = hatecrimes.loc[hatecrimes['bias_desc'].isin(top10)]
alt.Chart(hatecrimestop10).mark_line().encode(
        alt.X('data_year:O').title("Year"),
        alt.Y('count():N').title("Incidents"),
        alt.Color('bias_desc:N').title("Crime Bias").scale(scheme='category10')
    ).display()

In [None]:
# Here we can see that Anti-Black hate crimes are far more prevalent than other hate crimes, but have decreased from 2022-2023. Anti-Jewish hate crimes on the other hand, have risen since 2021

In [5]:
from vega_datasets import data
states = alt.topo_feature(data.us_10m.url, feature='states')

In [6]:
pop = data.population_engineers_hurricanes(usecols=['state', 'id'])

In [7]:
hatecrimes_w_ids = hatecrimes.merge(pop, left_on='state_name', right_on='state')
state_counts = hatecrimes_w_ids.groupby(['id'], as_index=False).size()


In [None]:
# GRAPH 4 -- US MAP
states = alt.topo_feature(data.us_10m.url, 'states')
alt.Chart(states, title="Hate Crime Map").mark_geoshape().encode(
    alt.Color('size:Q').scale(scheme='browns').title("Hate Crime Incidents")
).transform_lookup(
    lookup='id',
    from_=alt.LookupData(state_counts, 'id', list(state_counts.columns))
).project(
    type='albersUsa'
).properties(
    width=500,
    height=300
)

In [None]:
# Total heatmap of all hatecrimes in continental US -- consider adjusting for population

In [None]:
# GRAPH 5 -- BAR CHART (OFFENDER RACE)
alt.Chart(hatecrimes).mark_bar().encode(
        alt.Y('offender_race:O', axis=alt.Axis(labelLimit=200)).title("Offender Race").sort('-x'),
        alt.X('count():N').title("Hate Crime Incidents")
    ).display()

In [21]:
# Tracking hate crimes specifically in 2019-2023
years = [2019, 2020, 2021, 2022, 2023]
hatecrimes1924= hatecrimes.loc[hatecrimes['data_year'].isin(years)]

In [22]:
hatecrimes1924.groupby(['bias_desc']).size().sort_values(ascending=False).head(10)
top10_rec = ["Anti-Black or African American", "Anti-Jewish", "Anti-White", "Anti-Gay (Male)", "Anti-Hispanic or Latino", "Anti-Other Race/Ethnicity/Ancestry","Anti-Lesbian, Gay, Bisexual, or Transgender (Mixed Group)", "Anti-Asian", "Anti-Multiple Races, Group", "Anti-Transgender"]
hatecrimestop10_rec = hatecrimes1924.loc[hatecrimes1924['bias_desc'].isin(top10_rec)]


In [None]:
# GRAPH 6 - LINE GRAPH (RECENT EVENTS)
main_c = alt.Chart(hatecrimestop10_rec, title="Top Hate Crimes From 2019-2023").mark_line().encode(
        alt.X('yearmonth(incident_date):O').title('Month'),
        alt.Y('count()').title('Hate Crime Incidents'), 
        color='bias_desc:N'
    )

text1 = alt.Chart({'values': [{'x': 100, 'y': 500}]}).mark_text(
    text="Murder of George Floyd").encode(
        alt.X('x:Q', axis=None),
        y='y:Q')

text2 = alt.Chart({'values': [{'x': 260, 'y': 150}]}).mark_text(
    text="2021 Atlanta Spa Shootings").encode(
        alt.X('x:Q', axis=None),
        y='y:Q')

text3 = alt.Chart({'values': [{'x': 640, 'y': 300}]}).mark_text(
    text="Hamas Attacks Israel").encode(
        alt.X('x:Q', axis=None).scale(domain=(-100, 100)),
        y='y:Q')

text = text3 + text2 + text1
main_c + text
# Want to add text here to note particular events that saw a spike -- George Floyd's murder, the 2021 Atlanta spa shootings, Oct. 7

In [None]:
# GRAPH 7 - SCATTERPLOT?
hatecrimes_people = hatecrimes.loc[hatecrimes['total_individual_victims'] > 0]
alt.Chart(hatecrimes_people, title='Number of Offenders vs. Number of Victims').mark_circle().encode(
    alt.X('total_offender_count').title("Total Offenders"),
    alt.Y('total_individual_victims').title("Total Victims")
).display()

In [None]:
# GRAPH 8 - BAR CHART (LOCATIONS)
hatecrimes.groupby(['location_name']).size().sort_values(ascending=False).head(10)
top_locs = ["Residence/Home", "Highway/Road/Alley/Street/Sidewalk", "Other/Unknown", "School/College", "Parking/Drop Lot/Garage", "Church/Synagogue/Temple/Mosque", "Commercial/Office Building", "Restaurant", "School-Elementary/Secondary", "Bar/Nightclub"]
hatecrimestop10_loc = hatecrimes.loc[hatecrimes['location_name'].isin(top_locs)]

alt.Chart(hatecrimestop10_loc, title="Top Locations of Hate Crimes").mark_bar().encode(
        alt.Y('location_name:O', axis=alt.Axis(labelLimit=200)).title("Location of Incident").sort('-x'),
        alt.X('count():N').title("Hate Crime Incidents")
    ).display()