# Merge Property Values with Crime Data

Ideally based on year and location. Want to add to each crime row, the nearest property value at the time and the average of the area around it, using same radius as for the street lamps.

In [1]:
import warnings
import numpy as np
import pandas as pd
warnings.filterwarnings('ignore')
from scipy.spatial.distance import cdist

In [2]:
# read in crimes dataframe without index
crime_incidents = pd.read_csv('data/crime_incidents.csv') 
crime_incidents = crime_incidents.drop(crime_incidents.columns[0], axis=1)

# read in weather data without index
property_df = pd.read_csv('data/property_sample.csv')
property_df = property_df.drop(property_df.columns[0], axis=1)

In [3]:
# extract only latitude and longitude from crimes and property
crime_coords    = crime_incidents[['Lat','Long']]
property_coords = property_df[['Latitude','Longitude']]

In [4]:
# get crime and streetlight coordinates into numpy arrays
X = np.array(crime_coords).reshape(-1,2)
Y = np.array(property_coords).reshape(-1,2)

# set up streetlamp density radius
radius = 0.01

# append min dist's value and neighborhood average to crimes dataframe
for i,row in enumerate(X):
    
    # calculate distance from crime to all properties
    dists = cdist(row.reshape(1,-1), Y)[0]
    
    # append min streetlamp distance and lamp density in the neighborhood to crimes
    crime_incidents.at[i,'closest_property_value'] = property_df.AV_TOTAL[np.argmin(dists)]
    
    neighborhood_idx = [i for i,v in enumerate(dists) if v < radius]
    crime_incidents.at[i,'neighborhood_avg']  = np.mean(property_df.AV_TOTAL[neighborhood_idx])

In [5]:
crime_incidents.to_csv('data/crimes_with_property.csv')