In [1]:
import pandas as pd
import numpy as np
import json
from pathlib import Path

PROJ_ROOT = Path().resolve().parent
EXT_DIR = PROJ_ROOT / 'data' / 'external'

hatecrime_df = pd.read_excel(
    EXT_DIR / 'Table_13_Hate_Crime_Incidents_per_Bias_Motivation_and_Quarter_by_State_and_Agency_2017.xls',
    header=5,
    usecols=[0, 2, 3, 4, 5, 6, 7, 8]
).rename(columns={'Unnamed: 0' : 'State', 'Unnamed: 2' : 'City'})

In [2]:
hatecrime_df['State'] = hatecrime_df['State'].fillna(method='ffill')

In [3]:
hatecrime_df = hatecrime_df.dropna(subset=['City']).reset_index(drop=True)

In [4]:
hatecrime_df['City'] = hatecrime_df['City'].str.replace('1', '').str.replace('2', '').str.replace('3', '')

In [5]:
hatecrime_df['Religion'] = hatecrime_df['Religion'].astype(float)

In [6]:
hatecrime_df['Total'] = hatecrime_df.sum(numeric_only=True, axis=1)

In [7]:
hatecrime_df

Unnamed: 0,State,City,Race/ Ethnicity/ Ancestry,Religion,Sexual orientation,Disability,Gender,Gender identity,Total
0,Alabama,Hoover,5.0,1.0,0.0,0.0,0.0,0.0,6.0
1,Alabama,Ozark,2.0,0.0,0.0,0.0,0.0,0.0,2.0
2,Alabama,Jacksonville State University,1.0,0.0,0.0,0.0,0.0,0.0,1.0
3,Alaska,Anchorage,1.0,0.0,0.0,0.0,0.0,0.0,1.0
4,Alaska,Juneau,2.0,0.0,0.0,0.0,0.0,0.0,2.0
5,Alaska,Alaska State Troopers,0.0,1.0,0.0,0.0,0.0,0.0,1.0
6,Arizona,Avondale,0.0,0.0,1.0,0.0,0.0,0.0,1.0
7,Arizona,Chandler,1.0,0.0,0.0,0.0,0.0,0.0,1.0
8,Arizona,Cottonwood,1.0,0.0,0.0,0.0,0.0,0.0,1.0
9,Arizona,El Mirage,1.0,0.0,0.0,0.0,0.0,0.0,1.0


In [8]:
mappings = '/Users/dhyanna/socos/impact-index-alex/xEII/data/mappings/STATE_MAPPING.json'

In [9]:
with open(mappings, 'rb') as json_file:
    state_mappings = json.load(json_file)

In [10]:
state_mappings

{'Alabama': 'AL',
 'Alaska': 'AK',
 'Arizona': 'AZ',
 'Arkansas': 'AR',
 'California': 'CA',
 'Colorado': 'CO',
 'Connecticut': 'CT',
 'Delaware': 'DE',
 'District of Columbia': 'DC',
 'Florida': 'FL',
 'Georgia': 'GA',
 'Hawaii': 'HI',
 'Idaho': 'ID',
 'Illinois': 'IL',
 'Indiana': 'IN',
 'Iowa': 'IA',
 'Kansas': 'KS',
 'Kentucky': 'KY',
 'Louisiana': 'LA',
 'Maine': 'ME',
 'Maryland': 'MD',
 'Massachusetts': 'MA',
 'Michigan': 'MI',
 'Minnesota': 'MN',
 'Mississippi': 'MS',
 'Missouri': 'MO',
 'Montana': 'MT',
 'Nebraska': 'NE',
 'Nevada': 'NV',
 'New Hampshire': 'NH',
 'New Jersey': 'NJ',
 'New Mexico': 'NM',
 'New York': 'NY',
 'North Carolina': 'NC',
 'North Dakota': 'ND',
 'Ohio': 'OH',
 'Oklahoma': 'OK',
 'Oregon': 'OR',
 'Pennsylvania': 'PA',
 'Rhode Island': 'RI',
 'South Carolina': 'SC',
 'South Dakota': 'SD',
 'Tennessee': 'TN',
 'Texas': 'TX',
 'Utah': 'UT',
 'Vermont': 'VT',
 'Virginia': 'VA',
 'Washington': 'WA',
 'West Virginia': 'WV',
 'Wisconsin': 'WI',
 'Wyoming': 'WY

In [12]:
hatecrime_df['State'] = hatecrime_df['State'].replace(state_mappings)

In [16]:
hatecrime_df['citystate'] = hatecrime_df['City'] + ', ' + hatecrime_df['State']

In [17]:
hatecrime_df

Unnamed: 0,State,City,Race/ Ethnicity/ Ancestry,Religion,Sexual orientation,Disability,Gender,Gender identity,Total,citystate
0,AL,Hoover,5.0,1.0,0.0,0.0,0.0,0.0,6.0,"Hoover, AL"
1,AL,Ozark,2.0,0.0,0.0,0.0,0.0,0.0,2.0,"Ozark, AL"
2,AL,Jacksonville State University,1.0,0.0,0.0,0.0,0.0,0.0,1.0,"Jacksonville State University, AL"
3,AK,Anchorage,1.0,0.0,0.0,0.0,0.0,0.0,1.0,"Anchorage, AK"
4,AK,Juneau,2.0,0.0,0.0,0.0,0.0,0.0,2.0,"Juneau, AK"
5,AK,Alaska State Troopers,0.0,1.0,0.0,0.0,0.0,0.0,1.0,"Alaska State Troopers, AK"
6,AZ,Avondale,0.0,0.0,1.0,0.0,0.0,0.0,1.0,"Avondale, AZ"
7,AZ,Chandler,1.0,0.0,0.0,0.0,0.0,0.0,1.0,"Chandler, AZ"
8,AZ,Cottonwood,1.0,0.0,0.0,0.0,0.0,0.0,1.0,"Cottonwood, AZ"
9,AZ,El Mirage,1.0,0.0,0.0,0.0,0.0,0.0,1.0,"El Mirage, AZ"


In [18]:
metro_cities = '/Users/dhyanna/socos/impact-index-alex/xEII/data/interim/metro_df.csv'

In [21]:
metro_df = pd.read_csv(metro_cities)
cities = metro_df['comb_metro'].tolist()

In [34]:
cities
print(len(cities))

106


In [32]:
hatecrime_dict = {}
for i in hatecrime_df.index:
    citystate = hatecrime_df.at[i, 'citystate']
    if citystate in cities:
        hatecrime_dict[citystate] = hatecrime_df.at[i, 'Total']

len(hatecrime_dict)

71

In [31]:
#create dict another way
pd.Series(hatecrime_df.Total.astype(int).values, index=hatecrime_df.citystate).to_dict()

#hatecrime_df.columns.dtype


{'Hoover, AL': 6,
 'Ozark, AL': 2,
 'Jacksonville State University, AL': 1,
 'Anchorage, AK': 1,
 'Juneau, AK': 2,
 'Alaska State Troopers, AK': 1,
 'Avondale, AZ': 1,
 'Chandler, AZ': 1,
 'Cottonwood, AZ': 1,
 'El Mirage, AZ': 1,
 'Gilbert, AZ': 3,
 'Glendale, AZ': 5,
 'Mesa, AZ': 5,
 'Page, AZ': 1,
 'Phoenix, AZ': 243,
 'Scottsdale, AZ': 4,
 'St. Johns, AZ': 2,
 'Tucson, AZ': 5,
 'Wickenburg, AZ': 1,
 'Yuma, AZ': 1,
 'University of Arizona, AZ': 3,
 'Maricopa, AZ': 3,
 'Mohave, AZ': 1,
 'Pima, AZ': 2,
 'Apache, AZ': 2,
 'Benton, AR': 1,
 'Fayetteville, AR': 1,
 'Fort Smith, AR': 4,
 'Boone, AR': 1,
 'Adelanto, CA': 2,
 'Alameda, CA': 2,
 'Albany, CA': 3,
 'Alhambra, CA': 2,
 'Aliso Viejo, CA': 1,
 'Anaheim, CA': 1,
 'Antioch, CA': 1,
 'Apple Valley, CA': 1,
 'Arcata, CA': 2,
 'Atwater, CA': 2,
 'Azusa, CA': 2,
 'Bakersfield, CA': 7,
 'Bell, CA': 1,
 'Bellflower, CA': 1,
 'Bell Gardens, CA': 1,
 'Belmont, CA': 1,
 'Benicia, CA': 1,
 'Berkeley, CA': 23,
 'Beverly Hills, CA': 2,
 'Brent

In [None]:
#.replace(CITY_HATE_CRIME_MAPPING)