In [1]:
import pandas as pd
import datetime as dt
import json
from shapely.geometry import shape, Point
from collections import OrderedDict

## Geo data

In [2]:
def get_geo_data(filename):
    path = r"../geodata/"
    
    with open(path+filename) as f:
        return json.load(f)

In [3]:
def get_neighborhood_data():
    geojson = get_geo_data('neighborhoods.geojson')
    
    neighborhoods = {}
    for feature in geojson['features']:
        neighborhoods[feature['properties']['nhood']] = shape(feature['geometry'])
    
    return neighborhoods

In [4]:
def get_census_tract_data():
    geojson = get_geo_data('census-tracts.geojson')
    
    census_tracts = {}
    for feature in geojson['features']:
        census_tracts[feature['properties']['tractce10']] = shape(feature['geometry'])
    
    return census_tracts

In [5]:
def get_police_district_data():
    geojson = get_geo_data('police-districts.geojson')
    
    police_districts = {}
    for feature in geojson['features']:
        police_districts[feature['properties']['district']] = shape(feature['geometry'])
    
    return police_districts

In [6]:
def get_hist_police_district_data():
    geojson = get_geo_data('hist-police-districts.geojson')
    
    hist_police_districts = {}
    for feature in geojson['features']:
        hist_police_districts[feature['properties']['district']] = shape(feature['geometry'])
    
    return hist_police_districts

## Robbery-Street

In [7]:
def open_raw_robbery_street():
    path = r"../rawdata/"
    filename = r"robbery-street.csv"
    
    conv = {
        'date' : lambda x: dt.datetime.strptime(x,"%Y-%m-%dT%H:%M:%S.%f").date(),
        'time' : lambda x: dt.datetime.strptime(x,"%H:%M").time()
    }
    
    df = pd.read_csv(path+filename,converters = conv)
    return df

In [8]:
df = open_raw_robbery_street()
print df.dtypes
df.head(5)

address       object
date          object
dayofweek     object
descript      object
time          object
x            float64
y            float64
dtype: object


Unnamed: 0,address,date,dayofweek,descript,time,x,y
0,300 Block of FAXON AV,2010-10-17,Sunday,"ROBBERY ON THE STREET, STRONGARM",19:30:00,-122.460291,37.720945
1,400 Block of NOE ST,2010-10-17,Sunday,"ROBBERY ON THE STREET, STRONGARM",21:15:00,-122.432901,37.761417
2,1500 Block of SLOAT BL,2010-10-18,Monday,ATTEMPTED ROBBERY ON THE STREET WITH A KNIFE,09:05:00,-122.489714,37.73395
3,SILVER AV / VIENNA ST,2010-10-18,Monday,"ROBBERY ON THE STREET, STRONGARM",18:07:00,-122.424198,37.728736
4,6900 Block of GEARY BL,2010-10-18,Monday,ATTEMPTED ROBBERY ON THE STREET WITH BODILY FORCE,17:55:00,-122.493845,37.779563


In [None]:
def xy_to_point(row):
    return Point(row['x'],row['y'])

def identify_regions(point):
    
    #d_get = OrderedDict([
    #    ('nhood' , get_neighborhood_data()),
    #    ('tractce10' , get_census_tract_data()),
    #    ('police_district' , get_police_district_data()),
    #    ('hist_police_district' , get_hist_police_district_data())
    #    ])
        
    d_get = OrderedDict([('nhood' , get_neighborhood_data())])
        
    d_id = OrderedDict()
    
    
    # Iterate through all region types
    for region_type, region_dict in d_get.iteritems():
        
        found = False
        
        # Iterate through each region polygon
        for region_id, polygon in region_dict.iteritems():
            
            # If found containing polygon
            if polygon.contains(point):
                found = True
                d_id[region_type] = region_id
        
        # If not found, put None for that region type
        if not found:
            d_id[region_type] = None
    
    return pd.Series([value for key, value in d_id.iteritems()], 
                     index=[key for key,value in d_id.iteritems()])
    

In [None]:
temp = df.apply(xy_to_point,axis=1).apply(identify_regions)
temp.head(5)