In [2]:
# Dependencies
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import requests

In [3]:
#!pip install census

In [4]:
hate_crime_csv = pd.read_csv("Resources/hate_crime.csv", low_memory=False)
hate_crime_df = pd.DataFrame(hate_crime_csv)
hate_crime_df.head()

FileNotFoundError: [Errno 2] File b'Resources/hate_crime.csv' does not exist: b'Resources/hate_crime.csv'

### Reduce DF to include only neccessary columns.

In [None]:
columns = ['INCIDENT_ID', 'DATA_YEAR', 'INCIDENT_DATE', 'PUB_AGENCY_NAME', 'AGENCY_TYPE_NAME',
           'STATE_ABBR', 'STATE_NAME', 'POPULATION_GROUP_DESC', 'TOTAL_OFFENDER_COUNT',
           'TOTAL_INDIVIDUAL_VICTIMS', 'LOCATION_NAME', 'BIAS_DESC', 'VICTIM_TYPES'
          ]

reduced_hate_crime_df = hate_crime_df.loc[:,  columns]

# Filter Data: 2009

In [None]:
# Filter DF for 2009 only.
reduced_hate_crime_2009_df = reduced_hate_crime_df.loc[(reduced_hate_crime_df['DATA_YEAR'] == 2009)]
reduced_hate_crime_2009_df.head()

In [None]:
# Create new DF for 2009 from value counts. Total number of hate crimes committed per state.

states2009 = reduced_hate_crime_2009_df['STATE_NAME'].value_counts(sort=True)
state_hatecrime_2009_df = pd.DataFrame(states2009)
state_hatecrime_2009_df = state_hatecrime_2009_df.reset_index()
state_hatecrime_2009_df.columns = ['State', 'Hate Crimes Committed']
state_hatecrime_2009_df.head()

In [None]:
# Census imports for 2012.

from census import Census
from us import states

# Census API Key
# Data not available for 2010 0r 2011.
from config import api_key
c = Census(api_key, year=2012)
#c.acs5.tables()

In [None]:
# Run Census Search to retrieve data on all states

census_data = c.acs5.get(("NAME", "B19013_001E", "B19301_001E", "B23025_005E", "B23025_002E",
                          "B17001_002E", "B17001_003E", "B17001_017E",
                          "B17001A_002E", "B17001B_002E", "B01003_001E",
                          "B02001_002E", "B02001_003E", "B15003_017E", "B15003_022E"), {'for': 'state:*'})

census_df = pd.DataFrame(census_data)

census_df = census_df.rename(columns={"B19301_001E": "Per Capita Income",
                                      "B19013_001E": "Median Household Income",
                                      "B23025_005E": "Unemployment Count",
                                      "B23025_002E": "Labor Force Size",
                                      "B17001_002E": "Poverty Count",
                                      "B17001_003E": "Poverty: Male",
                                      "B17001_017E": "Poverty: Female",
                                      "B17001A_002E": "Poverty: White",
                                      "B17001B_002E": "Poverty: Black",
                                      "B01003_001E": "Total Population",
                                      "B02001_002E": "Population: White",
                                      "B02001_003E": "Population: Black",
                                      "B15003_017E": "Education: High School",
                                      "B15003_022E": "Education: Bachelors",
                                      "NAME": "State", "state": "State Number"})

# Calculate & add in Poverty Rate (Poverty Count / Population)
census_df["Poverty Rate"] = 100 * \
    census_df["Poverty Count"].astype(
        int) / census_df["Total Population"].astype(int)

# Calculate & add in Unemployment Rate (Unemployment Count / Labor Force)
census_df["Unemployment Rate"] = 100 * \
    census_df["Unemployment Count"].astype(
        int) / census_df["Labor Force Size"].astype(int)

# Calculate and add share of population with at least a high school diploma.
census_df['Share of Population with HS Diploma'] = 1 - \
    census_df['Education: High School'].astype(
        int) / census_df['Total Population'].astype(int)

census_df = census_df[["State", "Total Population", "Median Household Income", "Per Capita Income", "Unemployment Rate",
                         "Poverty Rate", "Share of Population with HS Diploma"]]

census_df

Unnamed: 0,State,Total Population,Median Household Income,Per Capita Income,Unemployment Rate,Poverty Rate,Share of Population with HS Diploma
0,Alabama,4777326.0,43160.0,23587.0,10.188838,17.631035,0.828741
1,Alaska,711139.0,69917.0,32537.0,8.045603,9.369617,0.861117
2,Arizona,6410979.0,50256.0,25571.0,9.711096,16.767891,0.867727
3,Arkansas,2916372.0,40531.0,22007.0,8.521134,18.18386,0.808218
4,California,37325068.0,61400.0,29551.0,10.942019,14.976798,0.880476
5,Colorado,5042853.0,58244.0,31039.0,7.943789,12.569829,0.877578
6,Connecticut,3572213.0,69519.0,37807.0,9.11663,9.647129,0.83221
7,Delaware,900131.0,60119.0,29733.0,8.30616,11.182261,0.810472
8,District of Columbia,605759.0,64267.0,45004.0,10.410046,17.433666,0.887934
9,Florida,18885152.0,47309.0,26451.0,11.25482,15.287942,0.822082


In [9]:
# Read in GINI index for 2009.
gini2009_csv = pd.read_csv('Resources/GINI2009.csv')

# Merge csv with census DF on State.
census_df = pd.merge(census_df, gini2009_csv, how='left', on='State')

# Merge Hate Crime DF with census DF.
clean_hate_crime_2009_df = pd.merge(census_df, state_hatecrime_2009_df, how="left", on="State")

In [10]:
# Replace NaN values for states w/ no data (reported hate crimes) with 0.
# Hawaii is now the only stat w/ no data (reported hate crimes).
values = {'Hate Crimes Committed': 0}
clean_hate_crime_2009_df = clean_hate_crime_2009_df.fillna(value=values)
#clean_hate_crime_2009_df

In [13]:
# Drop Puerto Rico from DF.
# Drop Hawaii from DF.
clean_hate_crime_2009_df = clean_hate_crime_2009_df.drop([51])
clean_hate_crime_2009_df = clean_hate_crime_2009_df.drop([11])

In [31]:
# Calculate 2009 Hate Crime Rate per 100,000 total population.
# https://oag.ca.gov/sites/all/files/agweb/pdfs/cjsc/prof10/formulas.pdf
# Note:	 Calculating rates for geographies of less than 100,000 will generate an inflated rate
# when compared to geographies with populations of 100,000 or more; therefore,
# rates are not calculated for geographies with populations of less than 100,000.

clean_hate_crime_2009_df['Hate Crime Rate per 100,000 Population'] = 100000 * \
    clean_hate_crime_2009_df['Hate Crimes Committed'].astype(
        int) / clean_hate_crime_2009_df['Total Population'].astype(int)

# Sort descending on Hate Crime Rate.
clean_hate_crime_2009_df = clean_hate_crime_2009_df.sort_values(by='Hate Crime Rate', ascending=False)
clean_hate_crime_2009_df = clean_hate_crime_2009_df.reset_index(drop=True)
clean_hate_crime_2009_df

clean_hate_crime_2009_df.to_csv('hate_crime_socioeconomic_2009.csv')

# Filter Data: 2017

In [17]:
# Filter DF for 2017 only.
reduced_hate_crime_2017_df = reduced_hate_crime_df.loc[(reduced_hate_crime_df['DATA_YEAR'] == 2017)]
reduced_hate_crime_2017_df.head()

Unnamed: 0,INCIDENT_ID,DATA_YEAR,INCIDENT_DATE,PUB_AGENCY_NAME,AGENCY_TYPE_NAME,STATE_ABBR,STATE_NAME,POPULATION_GROUP_DESC,TOTAL_OFFENDER_COUNT,TOTAL_INDIVIDUAL_VICTIMS,LOCATION_NAME,BIAS_DESC,VICTIM_TYPES
186860,279156,2017,29-Oct-17,Anchorage,City,AK,Alaska,"Cities from 250,000 thru 499,999",1,1.0,Amusement Park,Anti-White,Individual
186861,194491,2017,22-Dec-17,Juneau,City,AK,Alaska,"Cities from 25,000 thru 49,999",1,1.0,Residence/Home,Anti-White,Individual
186862,194492,2017,24-Oct-17,Juneau,City,AK,Alaska,"Cities from 25,000 thru 49,999",0,,School-College/University,"Anti-Multiple Races, Group",Government
186863,194686,2017,4-May-17,State Troopers,State Police,AK,Alaska,"Non-MSA counties 100,000 or over",0,,Church/Synagogue/Temple/Mosque,Anti-Other Christian,Religious Organization
186864,280686,2017,31-Jan-17,Hoover,City,AL,Alabama,"Cities from 50,000 thru 99,000",0,1.0,Grocery/Supermarket,Anti-Hispanic or Latino,Individual


In [18]:
# Create new DF for 2017 from value counts. Total number of hate crimes committed per state.
states2017 = reduced_hate_crime_2017_df['STATE_NAME'].value_counts(sort=True)
state_hatecrime_2017_df = pd.DataFrame(states2017)
state_hatecrime_2017_df = state_hatecrime_2017_df.reset_index()
state_hatecrime_2017_df.columns = ['State', 'Hate Crimes Committed']
state_hatecrime_2017_df.head()

Unnamed: 0,State,Hate Crimes Committed
0,California,1094
1,New York,554
2,Washington,511
3,New Jersey,499
4,Michigan,457


In [19]:
# Census imports for 2016.

c = Census(api_key, year=2016)

In [20]:
# Run Census Search to retrieve data on all states

census_data = c.acs5.get(("NAME", "B19013_001E", "B19301_001E", "B23025_005E", "B23025_002E",
                          "B17001_002E", "B17001_003E", "B17001_017E",
                          "B17001A_002E", "B17001B_002E", "B01003_001E",
                          "B02001_002E", "B02001_003E", "B15003_017E", "B15003_022E"), {'for': 'state:*'})

census_df = pd.DataFrame(census_data)

census_df = census_df.rename(columns={"B19301_001E": "Per Capita Income",
                                      "B19013_001E": "Median Household Income",
                                      "B23025_005E": "Unemployment Count",
                                      "B23025_002E": "Labor Force Size",
                                      "B17001_002E": "Poverty Count",
                                      "B17001_003E": "Poverty: Male",
                                      "B17001_017E": "Poverty: Female",
                                      "B17001A_002E": "Poverty: White",
                                      "B17001B_002E": "Poverty: Black",
                                      "B01003_001E": "Total Population",
                                      "B02001_002E": "Population: White",
                                      "B02001_003E": "Population: Black",
                                      "B15003_017E": "Education: High School",
                                      "B15003_022E": "Education: Bachelors",
                                      "NAME": "State", "state": "State Number"})

# Calculate & add in Poverty Rate (Poverty Count / Population)
census_df["Poverty Rate"] = 100 * \
    census_df["Poverty Count"].astype(
        int) / census_df["Total Population"].astype(int)

# Calculate & add in Unemployment Rate (Unemployment Count / Labor Force)
census_df["Unemployment Rate"] = 100 * \
    census_df["Unemployment Count"].astype(
        int) / census_df["Labor Force Size"].astype(int)

# Calculate and add share of population with at least a high school diploma.
census_df['Share of Population with HS Diploma'] = 1 - \
    census_df['Education: High School'].astype(
        int) / census_df['Total Population'].astype(int)

census_df = census_df[["State", "Total Population", "Median Household Income", "Per Capita Income", "Unemployment Rate",
                         "Poverty Rate", "Share of Population with HS Diploma"]]

census_df.head()

Unnamed: 0,State,Total Population,Median Household Income,Per Capita Income,Unemployment Rate,Poverty Rate,Share of Population with HS Diploma
0,Alabama,4841164.0,44758.0,24736.0,8.240621,17.943329,0.828924
1,Alaska,736855.0,74444.0,34191.0,7.525813,9.883356,0.855916
2,Arizona,6728577.0,51340.0,26686.0,7.943898,17.323663,0.866442
3,Arkansas,2968472.0,42336.0,23401.0,6.831175,18.273071,0.810969
4,California,38654206.0,63783.0,31458.0,8.682885,15.533257,0.878971


In [21]:
# Read in GINI index for 2009.
gini2017_csv = pd.read_csv('Resources/GINI2017.csv')

# Merge csv with census DF on State.
census_df = pd.merge(census_df, gini2017_csv, how='left', on='State')

# Merge Hate Crime DF with census DF.
clean_hate_crime_2017_df = pd.merge(census_df, state_hatecrime_2017_df, how="left", on="State")
#clean_hate_crime_2017_df

In [22]:
# Replace NaN values for states w/ no data (reported hate crimes) with 0.
# Hawaii is now the only stat w/ no data (reported hate crimes).
values = {'Hate Crimes Committed': 0}
clean_hate_crime_2017_df = clean_hate_crime_2017_df.fillna(value=values)
#clean_hate_crime_2017_df

In [25]:
# Drop Puerto Rico from DF.
clean_hate_crime_2017_df = clean_hate_crime_2017_df.drop([51])
clean_hate_crime_2017_df = clean_hate_crime_2017_df.drop([11])

In [30]:
# Calculate 2017 Hate Crime Rate per 100,000 total population.
# https://oag.ca.gov/sites/all/files/agweb/pdfs/cjsc/prof10/formulas.pdf
# Note:	 Calculating rates for geographies of less than 100,000 will generate an inflated rate
# when compared to geographies with populations of 100,000 or more; therefore,
# rates are not calculated for geographies with populations of less than 100,000.

# Check Washington, D.C. on calculation, or if this is a result of massive increase in reported hate crimes!!!!!!

clean_hate_crime_2017_df['Hate Crime Rate per 100,000 Population'] = 100000 * \
    clean_hate_crime_2017_df['Hate Crimes Committed'].astype(
        int) / clean_hate_crime_2017_df['Total Population'].astype(int)

# Sort descending on Hate Crime Rate.
clean_hate_crime_2017_df = clean_hate_crime_2017_df.sort_values(by='Hate Crimes per 100,000', ascending=False)
clean_hate_crime_2017_df = clean_hate_crime_2017_df.reset_index(drop=True)
clean_hate_crime_2017_df

clean_hate_crime_2017_df.to_csv('hate_crime_socioeconomic_2017.csv')

## For BIAS_DESC

In [None]:
# Get list of all values in the BIAS_DESC column.

#reduced_hate_crime_df['BIAS_DESC'].values.tolist()

In [None]:
# Create new DF for type of hate crime.

#bias_type_df = pd.DataFrame(reduced_hate_crime_df['BIAS_DESC'].values.tolist(), columns=['Racially Motivated','Religiously Motivated', 'Sexually Motivated', 'Multiple',])