In [2]:
import pandas as pd
import geopandas as gpd
import numpy as np
from preprocess import clean_police_dataset
import shapely
from shapely.geometry import point

In [5]:
def tract_merger(pol_filepath, shapefile_path):
    ''' 
    takes in the police homicide filepath and the census tract shapefile path
    and merges them together for that particular state

    pol_filepath: (str) file path for the police homicide csv
    shapefile_path: (str) file path for the census tract polygons shapefile

    returns:
    geo_joined_df: (GeoPandas Dataframe) a geopandas dataframe
    '''
    gdf = gpd.read_file(shapefile_path)
    pol_df = clean_police_dataset(pol_filepath)
    pol_df['geometry'] = gpd.points_from_xy(pol_df['longitude'], pol_df['latitude'])
    pol_gdf = gpd.GeoDataFrame(pol_df, geometry='geometry', crs="EPSG:4326")
    geo_joined_df = gpd.sjoin(gdf.to_crs(crs='EPSG:26916'), pol_gdf.to_crs(crs='EPSG:26916'))
    geo_joined_df = geo_joined_df[['GEOIDFQ','name', 'age', 'gender', 'race',
       'date', 'street_address', 'city', 'state', 'zip', 'county',
       'agency_responsible', 'ori', 'cause_of_death', 'circumstances',
       'disposition_official', 'officer_charged', 'news_urls',
       'signs_of_mental_illness', 'allegedly_armed', 'wapo_armed',
       'wapo_threat_level', 'wapo_flee', 'geography', 'encounter_type',
       'initial_reason', 'call_for_service', 'tract',
       'hhincome_median_census_tract', 'latitude', 'longitude',
       'pop_total_census_tract', 'pop_white_census_tract',
       'pop_black_census_tract', 'pop_native_american_census_tract',
       'pop_asian_census_tract', 'pop_pacific_islander_census_tract',
       'pop_other_multiple_census_tract', 'pop_hispanic_census_tract',
       'lat_long', 'month', 'day', 'year']]
    geo_joined_df.rename(columns={'GEOIDFQ':'GEO_ID'})
    geo_joined_df['year'] = geo_joined_df['year'].astype(int)
    geo_joined_df['zip'] = geo_joined_df.apply(
    lambda x: int(x.zip.strip()), axis=1)
    

    return geo_joined_df

In [94]:
def attr_merger(geo_df, census_file_path, year):
    ''' 
    takes a geo_df, census_file_path, and year and merges them all together
    geo_df: (GeoPandasDataFrame) a sjoin of a state's .shp shapefile and the original police homicide csv
        has column 'GEOIDFQ' renamed to 'GEO_ID' for merging;
    census_file_path: file path for census tract demographic information, should be csv
    year: (int) the year in question for the census_file_path

    returns:
        merged_df: a GeoPandas DataFrame containing census demographic data and police homicides in that year in the state
    '''
    df = pd.read_csv(census_file_path)
    df = df.drop(index=0, axis=0)
    for col in df.columns:
        try:
            df[col] = df[col].astype(float)
        except:
            continue
    merged_df = pd.merge(geo_df[geo_df['year']==year], df, how='left', on='GEO_ID')
    return merged_df
