In [None]:
# Imports, setup
import pandas as pd
import altair as alt
import vega_datasets
alt.data_transformers.disable_max_rows()

# Data, default theme
hatecrimes = pd.read_csv("../data/hate_crime.csv")
alt.themes.enable('fivethirtyeight')

In [None]:
# GRAPH 1 -- LINE CHART (ALL TIME)
alt.Chart(hatecrimes, title='Hate Crimes Over Time').mark_line().encode(
        alt.X('data_year:O').title("Year"),
        alt.Y('count():N').title("Hate Crime Incidents"),
        color=alt.value("#ad2e03")
    ).display()
# Not many comments on this one

In [None]:
# GRAPH 2 -- LINE CHART (BY BIAS)
# NOTE: The "Top 10" biases were found in 'exploration' files
top10 = ["Anti-Black or African American", "Anti-Jewish", "Anti-White", "Anti-Gay (Male)", "Anti-Hispanic or Latino", "Anti-Other Race/Ethnicity/Ancestry","Anti-Lesbian, Gay, Bisexual, or Transgender (Mixed Group)", "Anti-Asian", "Anti-Multiple Races, Group", "Anti-Lesbian (Female)"]
hatecrimestop10 = hatecrimes.loc[hatecrimes['bias_desc'].isin(top10)]
sorted_legend_list = ['Anti-Black or African American', 'Anti-Jewish', 'Anti-Gay (Male)', 'Anti-White', 'Anti-Hispanic or Latino', 'Anti-Lesbian, Gay, Bisexual, or Transgender (Mixed Group)', 'Anti-Asian', 'Anti-Other Race/Ethnicity/Ancestry', 'Anti-Multiple Races, Group', 'Anti-Lesbian (Female)']
alt.Chart(hatecrimestop10, title="Hate Crimes by Bias Over Time").mark_line().encode(
        alt.X('data_year:O').title("Year"),
        alt.Y('count():N').title("Incidents"),
        alt.Color('bias_desc:N', legend=alt.Legend(labelLimit=300), sort=sorted_legend_list).title("Crime Bias").scale(scheme='category10')
    ).properties(height=500).display()
# Main comment from criitque was to sort legend so it appears in the order on the chart, potentially expand y axis to better see variation

In [None]:
# GRAPH 3 -- HEATMAP (SEASONALITY)
heatmap = alt.Chart(hatecrimes, title="Seasonality of Hate Crimes").mark_rect().encode(
    alt.X('date(incident_date):O').title("Date"),
    alt.Y('month(incident_date):O').title("Month"),

    # Previous way of coloring:
    #alt.Color('count()').title("Incidents").scale(scheme='orangered')

    # New conditional way of coloring
    color=alt.condition('datum.count > 0', 
                alt.Color('count()').title("Incidents").scale(scheme='orangered'),  # color when true
                alt.value('gray') # color when false
)
)

# df = pd.DataFrame({'x': [30], 'y': ['Oct']})
# rectangle = alt.Chart(df).mark_rect(color='', stroke='black').encode(
#     x='x',
#     y='y'
# )
# heatmap + rectangle
heatmap
# Main critiques: "gray out" ones with no data, "highlight" seasons with lots of hate crimes

In [None]:
# Using Altair's default state/ID dataset
from vega_datasets import data
states = alt.topo_feature(data.us_10m.url, feature='states')
pop = data.population_engineers_hurricanes(usecols=['state', 'id', 'population'])
hatecrimes_w_ids = hatecrimes.merge(pop, left_on='state_name', right_on='state')

# Counting hatecrimes by state IDs
state_counts = hatecrimes_w_ids.groupby(['id'], as_index=False).size()
state_counts_w_pop = state_counts.merge(pop, left_on='id', right_on='id')
state_counts_w_pop["percap"] = state_counts_w_pop["size"]/state_counts_w_pop["population"]
# GRAPH 4 -- US MAP
states = alt.topo_feature(data.us_10m.url, 'states')
alt.Chart(states, title="Hate Crimes Across the U.S.").mark_geoshape().encode(
    alt.Color('percap:Q').scale(scheme='browns').title("Hate Crime Incidents")
).transform_lookup(
    lookup='id',
    from_=alt.LookupData(state_counts_w_pop, 'id', list(state_counts_w_pop.columns))
).project(
    type='albersUsa'
).properties(
    width=500,
    height=300
)

In [None]:
# GRAPH 5 -- BAR CHART (OFFENDER RACE)
hatecrimes_race= hatecrimes.loc[hatecrimes['offender_race'] != "Not Specified"]
hatecrimes_race["offender_race"] = hatecrimes_race["offender_race"].replace("Multiple", "Multiple Races")
alt.Chart(hatecrimes_race, title=alt.Title("Hate Crimes by Offender Race", subtitle="Among hate crimes where race was specified")).transform_joinaggregate(
    total='count(*)'
).transform_calculate(pct = '1/datum.total').mark_bar().encode(
        alt.Y('offender_race:O', axis=alt.Axis(labelLimit=200)).title("Offender Race").sort('-x'),
        alt.X('sum(pct):Q').title("Hate Crime Incidents").scale(domain=(0,0.425)).axis(format='%'),
        color=alt.value("#ad2e03")
    ).display()
# Help converting to percentages here https://stackoverflow.com/questions/56358977/how-to-show-a-histogram-of-percentages-instead-of-counts-using-altair
# Main critique: clarify "multiple races", deal with not specified/unknown, possibly change to percentages

In [None]:
main_c = alt.Chart(hatecrimestop10_rec, title="Top Hate Crimes From 2019-2023").mark_line().encode(
        alt.X('yearmonth(incident_date):T').title('Month'),
        alt.Y('count()').title('Hate Crime Incidents'), 
        alt.Color('bias_desc:N', legend=alt.Legend(labelLimit=300), sort=sorted_legend_list).title("Bias")
    ).properties(height=500, width=1500)

text1 = alt.Chart(pd.DataFrame({'incident_date': ['2020-05-25'], 'label': ["Murder of George Floyd"]})).mark_text(
    dx=-75, dy=75
    ).encode(
        alt.X('yearmonth(incident_date):T'),
        text='label:N')

text2 = alt.Chart(pd.DataFrame({'incident_date': ['2021-03-16'], 'label': ["2021 Atlanta Spa Shootings"]})).mark_text(
    dx=-75, dy=150
    ).encode(
        alt.X('yearmonth(incident_date):T'),
        text='label:N')

text3 = alt.Chart(pd.DataFrame({'incident_date': ['2023-10-07'], 'label': ["Hamas Attacks Israel"]})).mark_text(
    dx=-60
    ).encode(
        alt.X('yearmonth(incident_date):T'),
        text='label:N')
text = text1 + text2 + text3

line1 = alt.Chart(pd.DataFrame({'incident_date': ['2020-05-25']})).mark_rule(color='#808080').encode(
    x='yearmonth(incident_date):T'
)
line2 = alt.Chart(pd.DataFrame({'incident_date': ['2021-03-16']})).mark_rule(color='#808080').encode(
    x='yearmonth(incident_date):T'
)
line3 = alt.Chart(pd.DataFrame({'incident_date': ['2023-10-07']})).mark_rule(color='#808080').encode(
    x='yearmonth(incident_date):T'
)
lines = line1 + line2 + line3

dot_1 = alt.Chart(pd.DataFrame({'incident_date': ['2020-05-25'], 'y': 250})).mark_point(color='#000000').encode(
        x='yearmonth(incident_date):T',
        y='y'
)
dot_2 = alt.Chart(pd.DataFrame({'incident_date': ['2021-03-16'], 'y': 125})).mark_point(color='#000000').encode(
        x='yearmonth(incident_date):T',
        y='y'
)
dot_3 = alt.Chart(pd.DataFrame({'incident_date': ['2023-10-07'], 'y': 325})).mark_point(color='#000000').encode(
        x='yearmonth(incident_date):T',
        y='y'
)
dots = dot_1 + dot_2 + dot_3

main_c + text + lines + dots
# Main critique: legend sorting, vertical lines/dots on important events
# Credit to Dorka for helping me clean up this section of my code :) <3

In [None]:
# GRAPH 7 - SCATTERPLOT?
hatecrimes_people = hatecrimes.loc[hatecrimes['total_individual_victims'] > 0]
alt.Chart(hatecrimes_people, title=alt.Title('Number of Offenders vs. Number of Victims', subtitle='Among Crimes with <50 Victims and Offenders')).mark_circle(clip=True).encode(
    alt.X('total_offender_count').title("Total Offenders").scale(domain=(0,50)),
    alt.Y('total_individual_victims').title("Total Victims").scale(domain=(0,50)),
    color=alt.value("#ad2e03")
).display()
# Main critique: consider 'zooming in', deal with outliers, make sure axis is same on both sides

In [None]:
# Filtering to find most popular locations
display(hatecrimes.groupby(['location_name']).size().sort_values(ascending=False).head(11))
top_locs = ["Residence/Home", "Highway/Road/Alley/Street/Sidewalk", "Government/Public Building", "School/College", "Parking/Drop Lot/Garage", "Church/Synagogue/Temple/Mosque", "Commercial/Office Building", "Restaurant", "School-Elementary/Secondary", "Bar/Nightclub"]
hatecrimestop10_loc = hatecrimes.loc[hatecrimes['location_name'].isin(top_locs)]


In [None]:
# GRAPH 8 - BAR CHART (LOCATIONS)
alt.Chart(hatecrimestop10_loc, title="Top Locations of Hate Crimes").mark_bar().encode(
        alt.Y('location_name:O', axis=alt.Axis(labelLimit=200)).title("Location of Incident").sort('-x'),
        alt.X('count():N').title("Hate Crime Incidents"),
        color=alt.value("#ad2e03")
    ).display()

# Main critique: 'unknowns' here not relevant to story