In [28]:
"""
import requests
from invisibleroads_macros.disk import make_folder
from os.path import expanduser, join

def download(target_path, source_url):
    response = requests.get(source_url)
    open(target_path, 'w').write(response.content)
    return target_path

target_folder = make_folder(expanduser('~/Experiments/spatiotemporal'))
geojson_path = join(target_folder, 'nyc-traffic-injuries.json')
shapefile_path = join(target_folder, 'nyc-traffic-injuries.shp.zip')
""";

In [29]:
"""
geojson_path = download(
    join(target_folder, 'nyc-traffic-injuries.json'), 
    'http://www.nyc.gov/html/dot/downloads/misc/injury_all_monthly.json')
shapefile_path = download(
    join(target_folder, 'nyc-traffic-injuries.shp.zip'),
    'http://www.nyc.gov/html/dot/downloads/misc/injury_all_monthly_shapefile.zip')
""";

In [30]:
"""
import fiona
geojson_collection = fiona.open(geojson_path)
print 'geojson_collection.bounds = %s' % repr(geojson_collection.bounds)
print 'geojson_collection.crs_wkt = %s' % geojson_collection.crs_wkt
print 'geojson_collection.crs = %s' % geojson_collection.crs
geojson_collection[0]
""";

In [31]:
"""
import fiona
shapefile_collection = fiona.open('/', vfs='zip://' + shapefile_path)
print 'shapefile_collection.bounds = %s' % repr(shapefile_collection.bounds)
print 'shapefile_collection.crs_wkt = %s' % shapefile_collection.crs_wkt
print 'shapefile_collection.crs = %s' % shapefile_collection.crs
shapefile_collection[0]
""";

In [32]:
"""
from pandas import DataFrame, Period
rows, indices = [], []
for value_by_key in shapefile_collection:
    indices.append(value_by_key['id'])
    longitude, latitude = map(float, value_by_key['geometry']['coordinates'])
    properties = value_by_key['properties']
    year, month = int(properties['YR']), int(properties['MN'])
    total_injury_count = int(properties['Injuries'])
    pedestrian_injury_count = int(properties['PedInjurie'])
    bike_injury_count = int(properties['BikeInjuri'])
    motor_vehicle_occupant_injury_count = int(properties['MVOInjurie'])
    rows.append([
        longitude, latitude, year, month,
        total_injury_count, pedestrian_injury_count, bike_injury_count, motor_vehicle_occupant_injury_count,
    ])
nyc_traffic_injuries_table = DataFrame(rows, index=indices, columns=[
    'Longitude', 'Latitude', 'Year', 'Month',
    'Total Injury Count', 'Pedestrian Injury Count', 'Bike Injury Count', 'Motor Vehicle Occupant Injury Count',
])
print(nyc_traffic_injuries_table.dtypes)
""";

In [33]:
"""
from os.path import expanduser
nyc_traffic_injuries_table.to_msgpack(expanduser('~/Experiments/spatiotemporal/nyc-traffic-injuries.msg-blosc'), compress='blosc')
nyc_traffic_injuries_table.to_msgpack(expanduser('~/Experiments/spatiotemporal/nyc-traffic-injuries.msg-zlib'), compress='zlib')
""";

In [34]:
from os.path import expanduser
from pandas import read_msgpack
nyc_traffic_injuries_table = read_msgpack(expanduser('~/Experiments/spatiotemporal/nyc-traffic-injuries.msg-blosc'))
print 'nyc_traffic_injury_count = %s' % len(nyc_traffic_injuries_table)
nyc_traffic_injuries_table[:3]

nyc_traffic_injury_count = 199341


Unnamed: 0,Longitude,Latitude,Year,Month,Total Injury Count,Pedestrian Injury Count,Bike Injury Count,Motor Vehicle Occupant Injury Count
0,-73.791745,40.725789,2009,1,2,1,0,1
1,-73.882429,40.844981,2009,1,1,1,0,0
2,-73.979058,40.744444,2009,1,2,2,0,0


In [None]:
# Replace integer index with time series index
# This method seems exceedingly slow and memory hungry
# It might be faster to just filter by year and month manually
from pandas import Period

def add_time_period(row):
    row['Time Period'] = Period(year=row['Year'], month=row['Month'], freq='M')
    return row

nyc_traffic_injuries_by_month_table = nyc_traffic_injuries_table.apply(add_time_period, axis=1)
nyc_traffic_injuries_by_month_table.index = nyc_traffic_injuries_by_month_table['Time Period']

In [None]:
nyc_traffic_injuries_table.to_pickle(expanduser('~/Experiments/spatiotemporal/nyc-traffic-injuries.pkl'))
nyc_traffic_injuries_by_month_table.to_pickle(expanduser('~/Experiments/spatiotemporal/nyc-traffic-injuries-by-month.pkl'))

In [None]:
timeit len(nyc_traffic_injuries_by_month_table['2015-01':'2015-03'])

In [None]:
print(len(nyc_traffic_injuries_by_month_table['2015-01':'2015-01']))
print(len(nyc_traffic_injuries_by_month_table['2015-02':'2015-02']))
print(len(nyc_traffic_injuries_by_month_table['2015-03':'2015-03']))

In [None]:
1992 + 1749 + 2072

In [None]:
# Filter by dates
start_year = 2015
start_month = 1
end_year = 2015
end_month = 3

In [None]:
timeit len(nyc_traffic_injuries_table[(
    start_year <= nyc_traffic_injuries_table.Year
) & (
    nyc_traffic_injuries_table.Year <= end_year
) & (
    start_month <= nyc_traffic_injuries_table.Month
) & (
    nyc_traffic_injuries_table.Month <= end_month
)])

In [None]:
timeit len(nyc_traffic_injuries_table[(start_year <= nyc_traffic_injuries_table.Year) & (nyc_traffic_injuries_table.Year <= end_year) & (start_month <= nyc_traffic_injuries_table.Month) & (nyc_traffic_injuries_table.Month <= end_month)])

In [None]:
nyc_traffic_injuries_table.sum()

In [None]:
nyc_traffic_injuries_table.groupby(['Longitude', 'Latitude']).groups.items()[:3]

In [16]:
g = nyc_traffic_injuries_table[[
    'Longitude',
    'Latitude',
    'Total Injury Count',
    'Pedestrian Injury Count',
    'Bike Injury Count',
    'Motor Vehicle Occupant Injury Count',    
]].groupby(['Longitude', 'Latitude'])

In [22]:
import numpy as np
g.agg(np.sum).sort(['Total Injury Count'], ascending=False)[:10]
# pandas 0.17.0, 0.17.1 fail

Unnamed: 0_level_0,Unnamed: 1_level_0,Total Injury Count,Pedestrian Injury Count,Bike Injury Count,Motor Vehicle Occupant Injury Count
Longitude,Latitude,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
-73.890579,40.658521,310,22,7,281
-74.020421,40.642057,232,14,4,214
-73.767353,40.65616,226,1,0,225
-73.877427,40.680014,219,30,3,186
-73.80314,40.716214,217,7,2,208
-73.890485,40.820035,206,52,6,148
-73.908088,40.676189,203,12,4,187
-73.912209,40.804615,202,19,3,180
-73.919951,40.644708,199,10,5,184
-73.896852,40.675736,196,21,4,171


In [None]:
# Identify most dangerous by location and month
nyc_traffic_injuries_table.sort_values('Total Injury Count', ascending=False)[:10]

In [None]:
# Identify most dangerous by location and year
import numpy as np
nyc_traffic_injuries_table.sort_values(['Longitude'])

In [None]:
# Find all within radius
# Plot histogram

In [None]:
# Predict next month
# Predict next year

In [None]:
# Find all where trend is increasing month to month
# Find all where trend is increasing year to year

In [None]:
# Cluster into areas
# Run trend again

In [None]:
# Map results to png

In [None]:
# Map results to leaflet

In [None]:
# Animate results (what would we animate?)

In [None]:
# Given dates, return ranked table of intersections
# Okay

In [None]:
# Given dates, address, radius, return ranked table of intersections
# Illustrates kdtree

In [23]:
# Geocode address into latitude and longitude
from geopy.geocoders import GoogleV3

In [25]:
g = GoogleV3()

In [27]:
g.geocode('345 Chambers Street, NY, NY')

Location((40.7180139, -74.0138939, 0.0))

In [None]:
# Build kdtree
# I can either go direct from longitude latitude or I can convert into a different spatial reference

In [37]:
from pysal.cg import kdtree
kdtree.Arc_KDTree?

In [None]:
# Get all injuries within 100 meter radius
# Rank
# Generate histogram

In [None]:
# Given dates, addresses, return ranked table within bounding box
# Show histogram
# Filter by bounding box

In [None]:
# Given dates, addresses, mode of transport, return ranked routes

In [None]:
# Call directions api

In [None]:
# Given dates, target date, project trends for each location
# Illustrates basic trend forecasting

In [None]:
# Find most dangerous center
# Illustrates minimizing a loss function

In [None]:
# Find safest route without google directions
# Population z = f(x, y) by adding a gaussian at each accident spot, with radius proportional to number of accidents
# optimize path using optimization function, through X waypoints, where we move each of the waypoints
# in other words, we optimize the location of each waypoint
# the loss function is minimizing z and total distance
# this sounds like an obstacle avoidance algorithm
# not sure if it's really appropriate here though

In [None]:
import pandas
print pandas.__version__

In [None]:
from pandas import DataFrame
table = DataFrame([[1, 2], [3, 4]], columns=['A', 'B'])
table.groupby('A').sum()