In [81]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import warnings
warnings.filterwarnings('ignore') # Filter out warnings

import xml.etree.ElementTree as ET

In [82]:
nypd = pd.read_csv("nypd.csv")
nypd = nypd[['LATITUDE','LONGITUDE']]
nypd.head()
nypd.shape

(50, 2)

In [83]:
##delete all rows with 0 values
nypd = nypd.loc[nypd.ne(0).all(axis=1)]
nypd = nypd.dropna()

latitude_high = nypd['LATITUDE'].max()
latitude_low = nypd['LATITUDE'].min()
longitude_high = nypd['LONGITUDE'].max()
longitude_low = nypd['LONGITUDE'].min()

(latitude_low, latitude_high), (longitude_low, longitude_high)

((40.570890000000006, 40.87595), (-74.1479, -73.73841))

In [84]:
nypd['latti_range'] = pd.cut(nypd['LATITUDE'], 10)
nypd['longi_range'] = pd.cut(nypd['LONGITUDE'], 10)
nypd.head()

Unnamed: 0,LATITUDE,LONGITUDE,latti_range,longi_range
0,40.61704,-74.00366,"(40.601, 40.632]","(-74.025, -73.984]"
1,40.645027,-73.91998,"(40.632, 40.662]","(-73.943, -73.902]"
2,40.826412,-73.9406,"(40.815, 40.845]","(-73.943, -73.902]"
3,40.788914,-73.800415,"(40.784, 40.815]","(-73.82, -73.779]"
4,40.74916,-73.88991,"(40.723, 40.754]","(-73.902, -73.861]"


In [85]:
a = nypd['latti_range'].unique()
sorted(a)

[Interval(40.571, 40.601, closed='right'),
 Interval(40.601, 40.632, closed='right'),
 Interval(40.632, 40.662, closed='right'),
 Interval(40.662, 40.693, closed='right'),
 Interval(40.693, 40.723, closed='right'),
 Interval(40.723, 40.754, closed='right'),
 Interval(40.754, 40.784, closed='right'),
 Interval(40.784, 40.815, closed='right'),
 Interval(40.815, 40.845, closed='right'),
 Interval(40.845, 40.876, closed='right')]

In [89]:
intersections = pd.read_csv('intersection.csv')


In [97]:
# latitude_low = 0
# latitude_high = 80
# longitude_low = -80
# longitude_high = 0
# test_accidents = [(70,-70),(71,-71),(69,-71),(40,-60),(20,-70),(60,-10),(40,-20),(41,-21),(40,-21),(39,-19)]
# test_inters = ['70,-70','20,-50','40,-20']

def find_index(low, high, num_bucket, n):
    bucket_size = (high - low) / num_bucket
    index = int((n - low) / bucket_size)
    return min(index, num_bucket-1)


def bucket_start(low, high, num_bucket, n):
    bucket_size = (high - low) / num_bucket
    return low + bucket_size * n

def accidents_in_buckets(num_bucket):
    accidents = []
    for i in range(num_bucket):
        accidents.append([])
        for j in range(num_bucket):
            accidents[i].append([])

    for i in range(len(nypd)):
        lat = nypd.iloc[i, 0]
        long = nypd.iloc[i, 1]
        bucket_idx_x = find_index(latitude_low, latitude_high, num_bucket, lat)
        bucket_idx_y = find_index(-longitude_high, -longitude_low, num_bucket, -long)
        accidents[bucket_idx_x][bucket_idx_y].append((lat,long))

    return accidents

def accident_in_picture(accident, lat_low, lat_high, long_low, long_high):
    return accident[0] >= lat_low and accident[0] <= lat_high and accident[1] >= long_low and accident[1] <= long_high
    

def get_intersection_accident_count(accidents, num_bucket, intersection_cords, picture_len):
    
    result = {}
    total_count = 0
    
    for i in range(0, len(intersections)):
#    for intersection in intersection_cords:
#        split_cord = intersection.split(',')
#        inter_lat = float(split_cord[0])
#        inter_long = float(split_cord[1])
        inter_lat = intersections.iloc[i, 0]
        inter_long = intersections.iloc[i, 1]
        
        picture_lat_low = inter_lat - picture_len / 2
        picture_lat_high = inter_lat + picture_len / 2
        picture_long_low = inter_long - picture_len /2
        picture_long_high = inter_long + picture_len /2
        
        start_bucket_x = find_index(latitude_low, latitude_high, num_bucket, picture_lat_low)
        end_bucket_x = find_index(latitude_low, latitude_high, num_bucket, picture_lat_high) + 1
        start_bucket_y = find_index(-longitude_high, -longitude_low, num_bucket, -picture_long_high)
        end_bucket_y = find_index(-longitude_high, -longitude_low, num_bucket, -picture_long_low) + 1
        accident_count = 0
        for i in range(start_bucket_x, end_bucket_x):
            for j in range(start_bucket_y, end_bucket_y):
                relevant_accidents = accidents[i][j]
                for accident in relevant_accidents:
                    if accident_in_picture(accident, picture_lat_low,
                        picture_lat_high, picture_long_low, picture_long_high):
                        accident_count += 1
        result[(inter_lat, inter_long)] = accident_count
        total_count += accident_count
    
    print(total_count)
    return result

pic_length = 0.01
num_bucket = 1
accidents = accidents_in_buckets(num_bucket)    
print(get_intersection_accident_count(accidents, num_bucket, cord, pic_length))


    

1527
{(40.690968100000006, -73.9892198): 0, (40.6917872, -73.9890093): 0, (40.80683270000001, -73.93421509999999): 0, (40.8061691, -73.9348588): 0, (40.8057523, -73.9347088): 0, (40.8417297, -73.93941690000001): 0, (40.8411266, -73.9398609): 0, (40.7194205, -73.9913214): 0, (40.719680600000004, -73.9921746): 0, (40.7215815, -73.9977854): 0, (40.7203456, -73.9941173): 0, (40.721450899999994, -73.99739790000001): 0, (40.7208958, -73.9957503): 0, (40.7211736, -73.9965747): 0, (40.7206316, -73.99496629999999): 0, (40.7024415, -74.0128371): 0, (40.7024037, -74.0137608): 0, (40.86553060000001, -73.92727140000001): 0, (40.861547200000004, -73.92473299999999): 0, (40.86163079999999, -73.9247908): 0, (40.863575, -73.926011): 0, (40.7622894, -73.9723599): 0, (40.762966999999996, -73.973968): 0, (40.7027326, -74.0122323): 0, (40.7518406, -73.99968740000001): 0, (40.7513227, -73.9991493): 0, (40.803385600000006, -73.9291157): 0, (40.8040325, -73.9278686): 0, (40.7270946, -73.9915448): 0, (40.72526

In [103]:
pic_length = 0.02
counts = 0
for i in range(0, len(intersections)):
    inter_lat = intersections.iloc[i, 0]
    inter_long = intersections.iloc[i, 1]
    lat_low = inter_lat - pic_length/2
    lat_high = inter_lat + pic_length/2
    long_low = inter_long - pic_length/2
    long_high = inter_long + pic_length/2
    
    for i in range(0, len(nypd)):
        acc_lat = nypd.iloc[i,0]
        acc_long = nypd.iloc[i,1]
        
        if acc_lat >= lat_low and acc_lat <= lat_high and acc_long >= long_low and acc_long <= long_high:
            counts +=1
counts

5422