In [1]:
from IPython.core.display import Image
Image(filename='images/Xela-PazAmor.jpg')  # Xela

In [2]:
Image(filename='images/StickerLady-TimeTravel.jpg')  # Singapore (Secret!)

In [3]:
Image(filename='images/NeckFace-CreepingSleeping.jpg')  # Los Angeles

In [4]:
Image(filename='images/NewYork-OldTimers.jpg')  # New York

Rebecca is an anthropologist who wants to understand New York through its graffiti.  Help her find the subway entrances with the most number of graffiti within a hundred foot radius.

- [NYC Graffiti Locations](https://nycopendata.socrata.com/Other/Graffiti-Locations/2j99-6h29)
- [NYC Subway Entrances](https://nycopendata.socrata.com/Transportation/Subway-Entrances/drex-xx56)

In [5]:
from pandas import read_csv
graffiti = read_csv('datasets/NYC-GraffitiSightings.csv')
subway = read_csv('datasets/NYC-SubwayEntrances.csv')

In [6]:
graffiti.ix[0]

In [7]:
graffiti = graffiti[graffiti.Status == 'Open']

In [8]:
graffitiXY = graffiti[['X Coordinate', 'Y Coordinate']]

In [9]:
graffitiXY = graffitiXY.rename(columns={'X Coordinate': 'X', 'Y Coordinate': 'Y'})

In [10]:
graffitiXY = graffitiXY.dropna()

In [11]:
subway.ix[0]

In [12]:
from pandas import Series
from geometryIO import get_transformPoint, proj4LL

proj4NY = '+proj=lcc +lat_1=41.03333333333333 +lat_2=40.66666666666666 +lat_0=40.16666666666666 +lon_0=-74 +x_0=300000.0000000001 +y_0=0 +ellps=GRS80 +datum=NAD83 +to_meter=0.3048006096012192 +no_defs'
transformPoint = get_transformPoint(proj4LL, proj4NY)

def parse_point(row):
    string = row['Shape']
    latitude, longitude = string.replace('(', '').replace(')', '').split(',')
    x, y = transformPoint(float(longitude), float(latitude))
    return Series(dict(ID=row['OBJECTID'], X=x, Y=y))
subwayIDXY = subway.apply(parse_point, axis=1)
subwayXY = subwayIDXY[['X', 'Y']]

# Count graffiti sightings within 100 feet of a subway entrance

In [13]:
from scipy.spatial import KDTree
subwayXYValues = subwayXY.values
subwayTree = KDTree(subwayXYValues)
graffitiXYValues = graffitiXY.values
graffitiTree = KDTree(graffitiXYValues)

In [14]:
from itertools import izip
from pandas import DataFrame

graffitiXYIndexPacks = subwayTree.query_ball_tree(graffitiTree, r=100)
results = []
for subwayID, graffitiXYIndices in izip(subwayIDXY['ID'], graffitiXYIndexPacks):
    results.append([subwayID, len(graffitiXYIndices)])
subwayGraffiti = DataFrame(results, columns=['OBJECTID', 'COUNT'])

In [15]:
subwayGraffiti = subwayGraffiti.merge(subway)
subwayGraffiti.sort('COUNT', ascending=False)[['LINE', 'NAME', 'COUNT']].head()