In [1]:
from scripts import mapcalc_kde
import pandas as pd
import numpy as np
import dill

In [3]:
# read in geocoded datasets as Pandas dataframes
arrests = pd.read_csv('Django_old/clean_data/arrests_GIS.csv')
schools = pd.read_csv('Django_old/clean_data/school_list_GIS.csv')
restaurants = pd.read_csv('Django_old/clean_data/restaurant_list_GIS.csv')
restaurants = restaurants[restaurants['Longitude'] < 0] # filter out bad values
groceries = pd.read_csv('Django_old/clean_data/grocerystore_list_GIS.csv')
vacancies = pd.read_csv('Django_old/clean_data/vacancies_GIS.csv')
museums = pd.read_csv('Django_old/clean_data/museums_GIS.csv')
parks = pd.read_csv('Django_old/clean_data/parks_GIS.csv')
liquor = pd.read_csv('Django_old/clean_data/Liquor_GIS.csv')
libraries = pd.read_csv('Django_old/clean_data/Libraries_GIS.csv')

In [None]:
"""
Compute and dump kernels as dill files.
"""

bandwidth = None # Choose 'None' for Scott-type bandwidth determination

# names of datasets, in order
datas = [arrests, vacancies, groceries, restaurants, schools, museums, parks, liquor, libraries]
names = ['crime', 'vacancy', 'grocery', 'restaurant', 'schools', 'museums', 'parks', 'liquor', 'libraries']

# loop through and dump kernels into dills/
for i in range(len(names)):
    kernel = mapcalc_kde.compute_kde(datas[i]['Longitude'], datas[i]['Latitude'], 0.15)
    dill.dump(kernel, open('dills/'+names[i]+'_dynamicBW.dill', 'w'))

In [4]:
# load the kernels
crime = dill.load(open('dills/crime_dynamicBW.dill', 'r'))
vacancy = dill.load(open('dills/vacancy_dynamicBW.dill', 'r'))
grocery = dill.load(open('dills/grocery_dynamicBW.dill', 'r'))
restaurant = dill.load(open('dills/restaurant_dynamicBW.dill', 'r'))
schools = dill.load(open('dills/schools_dynamicBW.dill', 'r'))
museums = dill.load(open('dills/museums_dynamicBW.dill', 'r'))
parks = dill.load(open('dills/parks_dynamicBW.dill', 'r'))
liquor = dill.load(open('dills/liquor_dynamicBW.dill', 'r'))
libraries = dill.load(open('dills/libraries_dynamicBW.dill', 'r'))

In [6]:
"""Define map and grid of points at which to compute kernel densities"""

# Boundary conditions for all maps (longitudes as x vals, latitudes as y vals)
lonmin = -76.72
lonmax = -76.52
latmin = 39.19
latmax = 39.38

# number of points along each map edge
# (total number of points is npts**2)
npts = 200

#generate appropriate lon/lat grid
x = np.linspace(lonmin, lonmax, npts)
y = np.linspace(latmin, latmax, npts)
X, Y = np.meshgrid(x, y, indexing='ij')

# array of (lon/lat) pairs
positions = np.vstack([X.ravel(), Y.ravel()])

# compute individual maps
crime_map = mapcalc_kde.kde_map(x, y, crime)
vacancy_map = mapcalc_kde.kde_map(x, y, vacancy)
grocery_map = mapcalc_kde.kde_map(x, y, grocery)
restaurant_map = mapcalc_kde.kde_map(x, y, restaurant)
schools_map = mapcalc_kde.kde_map(x, y, schools)
museums_map = mapcalc_kde.kde_map(x, y, museums)
parks_map = mapcalc_kde.kde_map(x, y, parks)
liquor_map = mapcalc_kde.kde_map(x, y, liquor)
libraries_map = mapcalc_kde.kde_map(x, y, libraries)

# build a dataframe of all maps (as flattened arrays of length npts**2)
map_df = pd.DataFrame({
        'crime':crime_map, 
        'vacancy':vacancy_map, 
        'grocery':grocery_map, 
        'restaurant':restaurant_map, 
        'schools':schools_map,
        'museums':museums_map,
        'parks':parks_map,
        'liquor':liquor_map,
        'libraries':libraries_map
    })

# dump the whole dataframe
dill.dump(map_df, open('dills/map_df_dynamicBW.dill', 'w'))