# Transform homicides

Transforms homicide data to geo dataframe

In [10]:
import os
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point

In [11]:
import warnings
warnings.simplefilter("ignore")

In [2]:
%store -r

Read in all homicides

In [4]:
homicides = pd.read_csv(os.path.join(input_dir, 'homicides.csv'))

Parse out death year

In [5]:
homicides['death date'] = pd.to_datetime(homicides['death date'])

In [6]:
homicides['death_year'] = homicides['death date'].dt.year

Trim down to years we are tracking

In [7]:
five_years = pd.date_range(start='1/1/2014', end='12/31/2018')

In [8]:
homicides_five_years = homicides[homicides['death date'].isin(five_years)]

In [9]:
homicides_five_years.ID.count()

3224

Convert to GeoDataFrame

In [12]:
homicides_five_years['geometry'] = homicides_five_years.apply(
    lambda x: Point(x['incident point x'], x['incident point y']
), axis=1)

In [13]:
homicides_gdf = gpd.GeoDataFrame(homicides_five_years, crs={'init': 'epsg:4269'})

Reproject into feet so it's same as the school buffer 

In [16]:
homicides_gdf_feet = homicides_gdf.to_crs({'init': 'epsg:2229'})

Trim down to only the fields we need for analysis.

In [29]:
trimmed_homicides = homicides_gdf_feet[[
    'ID',
    'slug',
    'death date',
    'death_year',
    'geometry',
]].rename(columns={"death date": "death_date"})

Convert the date to a string for shapefile restrictions

In [30]:
trimmed_homicides['death_date'] = trimmed_homicides['death_date'].astype(str)

Output to file as a shapefile.

In [31]:
trimmed_homicides.to_file(os.path.join(output_dir, "homicides-five-years.shp"))