In [1]:
import pandas as pd
import plotly.express as px

In [2]:
data = pd.read_csv('./dataset.csv')
# record_id, occurence_month, total_number_of_victims, total_number_of_individual_victims
# suspects_race_as_a_group, total_number_of_suspects, most_serious_ucr_type
# most_serious_bias, most_serious_bias_type,
#'total_number_of_individual_victims_adult', 'total_number_of_individual_victims_juvenile',
#'total_number_of_suspects_adult', 'total_number_of_suspects_juvenile'
trimmed_data = data[['record_id', 'occurence_month', 'total_number_of_individual_victims', 
                    'suspects_race_as_a_group', 'total_number_of_suspects', 
                    'most_serious_ucr_type', 'most_serious_bias', 'most_serious_bias_type',
                    'total_number_of_individual_victims_adult', 'total_number_of_individual_victims_juvenile',
                    'total_number_of_suspects_adult', 'total_number_of_suspects_juvenile'
                    ]].copy()

In [4]:
trimmed_data['most_serious_bias_type'] = trimmed_data['most_serious_bias_type'].apply(lambda x: 'Race' if x == 'Race/Ethnicity/Ancestry' else x)

In [5]:
trimmed_data['suspects_race_as_a_group'].value_counts()

White                               563
Unknown                             549
Black or African American           366
Hispanic                            117
Group of Multiple Races              49
Asian/Pacific Islander               41
Asian                                19
East Indian/Asian Indian             11
East Indian                           3
Group of Multiple Ethnicities         2
American Indian or Alaska Native      1
Name: suspects_race_as_a_group, dtype: int64

In [30]:
num_victims = trimmed_data[['occurence_month', 'total_number_of_individual_victims']].copy()
num_victims['occurence_year'] = num_victims['occurence_month'].apply(lambda x: x[:4])
num_victims = num_victims.groupby('occurence_year')['total_number_of_individual_victims'].sum().reset_index()

fig = px.line(num_victims, x="occurence_year", y='total_number_of_individual_victims', title='Hate Crime Victims Over Time',
              labels={'x': 'Date', 'Hate Crimes': 'Number of Hate Crimes'})
fig.show()

In [7]:
num_occurences = trimmed_data[['occurence_month', 'total_number_of_individual_victims']].copy()
num_occurences['occurence_year'] = num_occurences['occurence_month'].apply(lambda x: x[:4])
num_occurences = num_occurences.groupby('occurence_year')['total_number_of_individual_victims'].count().reset_index()

fig = px.line(num_occurences, x="occurence_year", y='total_number_of_individual_victims', title='Hate Crimes Over Time',
              labels={'x': 'Date', 'Hate Crimes': 'Number of Hate Crimes'})
fig.show()

In [8]:
types_of_crimes = trimmed_data[['record_id', 'most_serious_bias_type']].groupby('most_serious_bias_type').count().reset_index()
graph = px.bar(types_of_crimes, x='most_serious_bias_type', y='record_id')
graph.show()

In [9]:
grouped_data = trimmed_data[['occurence_month', 'most_serious_bias_type', 'most_serious_bias', 'total_number_of_individual_victims']].copy()
grouped_data['occurence_year'] = grouped_data['occurence_month'].apply(lambda x: x[:4])
grouped_data.drop(columns=['occurence_month']).groupby(['occurence_year', 'most_serious_bias_type']).count()

Unnamed: 0_level_0,Unnamed: 1_level_0,most_serious_bias,total_number_of_individual_victims
occurence_year,most_serious_bias_type,Unnamed: 2_level_1,Unnamed: 3_level_1
2001,Gender Nonconforming,2,2
2001,Race,91,91
2001,Religion,23,23
2001,Sexual Orientation,50,50
2002,Gender Nonconforming,1,1
...,...,...,...
2023,Sexual Orientation,9,9
2024,Disability,1,1
2024,Race,7,7
2024,Religion,1,1


In [10]:
#2001-2004, 2005-2008, 2009-2012,2013-2016, 2017-2020, 2021-end
presidential_terms = {
    2005: "2001-2004: George W. Bush's 1st Term",
    2009: "2005-2008: George W. Bush's 2nd Term",
    2013: "2009-2012: Barack Obama's 1st Term",
    2017: "2013-2016: Barack Obama's 2nd Term",
    2021: "2017-2020: Donald Trump's Term",
    2025: "2021-2024: Biden's Term"
}

def sort_presidents(x):
    for k, v in presidential_terms.items():
        if int(x) < k:
            return v
    return

grouped_data['president_term'] = grouped_data['occurence_year'].apply(sort_presidents)

In [11]:
presidential_grouping = grouped_data.groupby(
    ['president_term', 'most_serious_bias_type']
)['total_number_of_individual_victims'].sum().reset_index()

In [12]:
pivoted_pg = presidential_grouping.pivot(index='most_serious_bias_type', columns='president_term', values='total_number_of_individual_victims').fillna(0)
pivoted_pg

president_term,2001-2004: George W. Bush's 1st Term,2005-2008: George W. Bush's 2nd Term,2009-2012: Barack Obama's 1st Term,2013-2016: Barack Obama's 2nd Term,2017-2020: Donald Trump's Term,2021-2024: Biden's Term
most_serious_bias_type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Disability,1.0,2.0,0.0,0.0,0.0,1.0
Gender,0.0,2.0,0.0,2.0,0.0,2.0
Gender Nonconforming,16.0,26.0,13.0,12.0,14.0,13.0
Race,388.0,162.0,90.0,38.0,145.0,133.0
Religion,78.0,28.0,20.0,6.0,33.0,23.0
Sexual Orientation,246.0,202.0,91.0,53.0,68.0,42.0


In [57]:
pivoted_pg.to_csv('summed_victims_terms.csv')

In [45]:
anti_arab_islam = trimmed_data[(trimmed_data['most_serious_bias'] == 'Anti-Arab') | (trimmed_data['most_serious_bias'] == 'Anti-Islamic (Muslim)')]
anti_arab_islam['occurence_year'] = anti_arab_islam['occurence_month'].apply(lambda x: x[:4])
anti_arab_islam['total_number_of_individual_victims'] = anti_arab_islam['total_number_of_individual_victims'].fillna(0)
df = anti_arab_islam.groupby(['most_serious_bias', 'occurence_year'])['total_number_of_individual_victims'].sum().reset_index().fillna(0)
df.pivot(index='occurence_year', columns='most_serious_bias', values='total_number_of_individual_victims').fillna(0)



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



most_serious_bias,Anti-Arab,Anti-Islamic (Muslim)
occurence_year,Unnamed: 1_level_1,Unnamed: 2_level_1
2001,0.0,7.0
2002,0.0,7.0
2003,8.0,4.0
2004,2.0,0.0
2005,7.0,0.0
2006,6.0,0.0
2007,2.0,1.0
2009,4.0,1.0
2010,3.0,0.0
2011,1.0,2.0


In [59]:
bias_breakdown = trimmed_data.copy()
bias_breakdown['occurence_year'] = bias_breakdown['occurence_month'].apply(lambda x: x[:4])
bias_breakdown = bias_breakdown.groupby(['most_serious_bias_type', 'most_serious_bias'])['total_number_of_individual_victims'].sum().reset_index()
bias_breakdown.pivot(index='most_serious_bias', columns='most_serious_bias_type', values='total_number_of_individual_victims').fillna(0)

most_serious_bias_type,Disability,Gender,Gender Nonconforming,Race/Ethnicity/Ancestry,Religion,Sexual Orientation
most_serious_bias,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Anti-Arab,0.0,0.0,0.0,50.0,0.0,0.0
Anti-Asian,0.0,0.0,0.0,213.0,0.0,0.0
Anti-Bisexual,0.0,0.0,0.0,0.0,0.0,2.0
Anti-Black or African American,0.0,0.0,0.0,258.0,0.0,0.0
Anti-Catholic,0.0,0.0,0.0,0.0,3.0,0.0
Anti-Citizenship Status,0.0,0.0,0.0,2.0,0.0,0.0
Anti-Female,0.0,5.0,0.0,0.0,0.0,0.0
Anti-Gay (Male),0.0,0.0,0.0,0.0,0.0,498.0
Anti-Gender Non-Conforming,0.0,0.0,1.0,0.0,0.0,0.0
Anti-Hindu,0.0,0.0,0.0,0.0,1.0,0.0


In [11]:
victims_time = trimmed_data[['occurence_month', 'total_number_of_individual_victims']].copy()
victims_time['occurence_year'] = victims_time['occurence_month'].apply(lambda x: x[:4])
victims_time = victims_time.groupby('occurence_year')['total_number_of_individual_victims'].sum()
victims_time.to_csv('victims_over_time.csv')