In [4]:
import requests
from invisibleroads_macros.disk import make_folder
from os.path import expanduser, join

def download(target_path, source_url):
    response = requests.get(source_url)
    open(target_path, 'w').write(response.content)
    return target_path

target_folder = make_folder(expanduser('~/Experiments/spatiotemporal'))
geojson_path = join(target_folder, 'nyc-traffic-injuries.json')
shapefile_path = join(target_folder, 'nyc-traffic-injuries.shp.zip')

In [11]:
geojson_path = download(
    join(target_folder, 'nyc-traffic-injuries.json'), 
    'http://www.nyc.gov/html/dot/downloads/misc/injury_all_monthly.json')
geojson_path

'/home/rhh/Experiments/spatiotemporal/nyc-traffic-injuries.json'

In [12]:
shapefile_path = download(
    join(target_folder, 'nyc-traffic-injuries.shp.zip'),
    'http://www.nyc.gov/html/dot/downloads/misc/injury_all_monthly_shapefile.zip')
shapefile_path

'/home/rhh/Experiments/spatiotemporal/nyc-traffic-injuries.shp.zip'

In [5]:
import fiona
geojson_collection = fiona.open(geojson_path)
geojson_collection.bounds

(-74.2539230306024, 40.49947769792743, -73.70059800086655, 40.91246913562538)

In [18]:
geojson_collection[0]

{'geometry': {'coordinates': (-73.7917447266822, 40.72578884918672),
  'type': 'Point'},
 'id': '0',
 'properties': OrderedDict([(u'MVOInjurie', 1),
              (u'MN', u'1'),
              (u'Injuries', 2),
              (u'BikeInjuri', 0),
              (u'YR', u'2009'),
              (u'PedInjurie', 1)]),
 'type': 'Feature'}

In [19]:
geojson_collection[0]['properties']

OrderedDict([(u'MVOInjurie', 1),
             (u'MN', u'1'),
             (u'Injuries', 2),
             (u'BikeInjuri', 0),
             (u'YR', u'2009'),
             (u'PedInjurie', 1)])

In [6]:
shapefile_collection = fiona.open('/', vfs='zip://' + shapefile_path)
shapefile_collection.bounds

(-74.2539230306024, 40.49947769792743, -73.70059800086655, 40.91246913562538)

In [29]:
shapefile_collection.crs_wkt

u'GEOGCS["GCS_WGS_1984",DATUM["WGS_1984",SPHEROID["WGS_84",6378137.0,298.257223563]],PRIMEM["Greenwich",0.0],UNIT["Degree",0.0174532925199433],AUTHORITY["EPSG","4326"]]'

In [30]:
shapefile_collection.crs

{'init': u'epsg:4326'}

In [32]:
geojson_collection.crs

{'init': u'epsg:4326'}

In [33]:
geojson_collection.crs_wkt

u'GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]],AUTHORITY["EPSG","6326"]],PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AUTHORITY["EPSG","4326"]]'

In [34]:
shapefile_collection[0]

{'geometry': {'coordinates': (-73.7917447266822, 40.72578884918672),
  'type': 'Point'},
 'id': '0',
 'properties': OrderedDict([(u'Injuries', 2),
              (u'PedInjurie', 1),
              (u'BikeInjuri', 0),
              (u'MVOInjurie', 1),
              (u'MN', u'1'),
              (u'YR', u'2009')]),
 'type': 'Feature'}

In [9]:
geojson_collection[0]

{'geometry': {'coordinates': (-73.7917447266822, 40.72578884918672),
  'type': 'Point'},
 'id': '0',
 'properties': OrderedDict([(u'MVOInjurie', 1),
              (u'MN', u'1'),
              (u'Injuries', 2),
              (u'BikeInjuri', 0),
              (u'YR', u'2009'),
              (u'PedInjurie', 1)]),
 'type': 'Feature'}

In [1]:
# Convert into DataFrame

In [11]:
from pandas import Period
Period('2000-01' )

Period('2000-01', 'M')

In [12]:
from pandas import DataFrame, Period
rows, indices = [], []
for value_by_key in shapefile_collection:
    indices.append(value_by_key['id'])
    longitude, latitude = value_by_key['geometry']['coordinates']
    properties = value_by_key['properties']
    year, month = properties['YR'], properties['MN']
    period = Period('%s-%s' % (year, month))
    total_injury_count = properties['Injuries']
    pedestrian_injury_count = properties['PedInjurie']
    bike_injury_count = properties['BikeInjuri']
    motor_vehicle_occupant_injury_count = properties['MVOInjurie']
    rows.append([
        longitude, latitude, period, year, month,
        total_injury_count, pedestrian_injury_count, bike_injury_count, motor_vehicle_occupant_injury_count,
    ])

nyc_traffic_injury_table = DataFrame(rows, index=indices, columns=[
    'Longitude', 'Latitude', 'Period', 'Year', 'Month',
    'Total Injury Count', 'Pedestrian Injury Count', 'Bike Injury Count', 'Motor Vehicle Occupant Injury Count',
])

In [13]:
nyc_traffic_injury_table[:3]

Unnamed: 0,Longitude,Latitude,Period,Year,Month,Total Injury Count,Pedestrian Injury Count,Bike Injury Count,Motor Vehicle Occupant Injury Count
0,-73.791745,40.725789,2009-01,2009,1,2,1,0,1
1,-73.882429,40.844981,2009-01,2009,1,1,1,0,0
2,-73.979058,40.744444,2009-01,2009,1,2,2,0,0


In [14]:
len(nyc_traffic_injury_table)

199341

In [None]:
# Identify safest
# Identify most dangerous

In [None]:
# Find all within radius
# Plot histogram

In [None]:
# Find all where trend is increasing month to month
# Find all where trend is increasing year to year

In [None]:
# Cluster into areas
# Run trend again

In [None]:
# Map results to png

In [None]:
# Map results to leaflet

In [None]:
# Animate results (what would we animate?)