# Cleaning Greenburgh, NY Police Data Sets

In [1]:
import geocoder
import pandas as pd
from pyproj import Proj, transform

In [3]:
filename_accidents = '~/git/greenburgh-town-supervisor-2019/src/data/police/vehicle_accidents_2018_2019.csv'
filename_larceny = '~/git/greenburgh-town-supervisor-2019/src/data/police/vehicle_larceny_2018_2019.csv'
output_filename_larceny = '~/git/greenburgh-town-supervisor-2019/output/police/vehicle_larceny_2018_2019_cleaned.csv'
output_filename_accidents = '~/git/greenburgh-town-supervisor-2019/output/police/vehicle_accidents_2018_2019_cleaned.csv'

In [7]:
vehicle_accidents = pd.read_csv(filename_accidents)
vehicle_larceny = pd.read_csv(filename_larceny)

In [4]:
def convert_scpc_lat_long(x, y, inproj_epsg=2260, outproj_epsg=4326):
    """
    Converting SCPC to Latitude/Longitude
    
    Greenburgh, NY is EPSG 2260: (NY East: https://www.dot.ny.gov/divisions/engineering/design/design-services/land-survey/repository/Chapter%206%20NYSDOT%20Coordinate%20Systems%20and%20Datums.pdf)
    Lookup EPSG Code here: https://spatialreference.org/ref/epsg/2260/
    """
    inProj = Proj(init='epsg:{}'.format(inproj_epsg), preserve_units=True)
    outProj = Proj(init='epsg:{}'.format(outproj_epsg))
    longitude, latitude = transform(inProj, outProj, x, y)
    return (latitude, longitude)

In [5]:
def geocode(address):
    """
    Geocode using Open Street Maps taking in Address as an argument
    """
    print(address)
    g = geocoder.osm(address)
    if g.status == 'ERROR - No results found':
        return None
    else:
        return (g.json['lat'], g.json['lng'])

## Cleaning Vehicle Larceny Data

In [6]:
vehicle_larceny['spcs_x_converted'] = vehicle_larceny.spcs_x / 100.00
vehicle_larceny['spcs_y_converted'] = vehicle_larceny.spcs_y / 100.00

In [7]:
vehicle_larceny.head()

Unnamed: 0,case_number,report_date,offense,location,status,status_time,officer,spcs_x,spcs_y,spcs_x_converted,spcs_y_converted
0,2018000093,1/3/18 16:14,GRAND LARCENY-4TH,480 WHITE PLAINS RD,INVESTIGATION,1/3/18,"BURNETT, G. A.",67314962,81294812,673149.62,812948.12
1,2018000445,1/13/18 0:08,PETIT LARCENY,777 OLD SAW MILL RIVER,INVESTIGATION,1/13/18,"MARZELLA, A.",67965338,81811000,679653.38,818110.0
2,2018000545,1/16/18 14:39,PETIT LARCENY,610 WHITE PLAINS RD,INVESTIGATION,1/16/18,"NECZESNY, M. M.",67546606,81231181,675466.06,812311.81
3,2018000926,1/27/18 12:22,PETIT LARCENY,9 HOLLAND PL,INVESTIGATION,1/27/18,"GARDNER, K. T.",68660119,79619500,686601.19,796195.0
4,2018000936,1/27/18 14:36,PETIT LARCENY,30 HOLLAND PL,INVESTIGATION,1/27/18,"GARDNER, K. T.",68676681,79645225,686766.81,796452.25


In [8]:
vehicle_larceny['lat_long'] = vehicle_larceny.apply(lambda x: convert_scpc_lat_long(
    x.spcs_x_converted, x.spcs_y_converted), axis=1)
vehicle_larceny[['latitude', 'longitude']] = pd.DataFrame(
    vehicle_larceny['lat_long'].tolist(), index=vehicle_larceny.index) 

In [9]:
vehicle_larceny.loc[vehicle_larceny.offense.str.contains('Grand', case=False), 'offense_type'] = 'Grand Larceny'
vehicle_larceny.loc[vehicle_larceny.offense.str.contains('Petit', case=False), 'offense_type'] = 'Petit Larceny'
vehicle_larceny.rename(columns={'status_time': 'status_date'}, inplace=True)

In [10]:
# Pruning final dataset
vehicle_larceny_final = vehicle_larceny[[
    'case_number',
    'report_date',
    'offense_type',
    'location',
    'status',
    'status_date',
    'officer',
    'latitude',
    'longitude',
]]
vehicle_larceny_final.head()

Unnamed: 0,case_number,report_date,offense_type,location,status,status_date,officer,latitude,longitude
0,2018000093,1/3/18 16:14,Grand Larceny,480 WHITE PLAINS RD,INVESTIGATION,1/3/18,"BURNETT, G. A.",41.063325,-73.843501
1,2018000445,1/13/18 0:08,Petit Larceny,777 OLD SAW MILL RIVER,INVESTIGATION,1/13/18,"MARZELLA, A.",41.077356,-73.81977
2,2018000545,1/16/18 14:39,Petit Larceny,610 WHITE PLAINS RD,INVESTIGATION,1/16/18,"NECZESNY, M. M.",41.061531,-73.835118
3,2018000926,1/27/18 12:22,Petit Larceny,9 HOLLAND PL,INVESTIGATION,1/27/18,"GARDNER, K. T.",41.017055,-73.795212
4,2018000936,1/27/18 14:36,Petit Larceny,30 HOLLAND PL,INVESTIGATION,1/27/18,"GARDNER, K. T.",41.017758,-73.794604


In [11]:
vehicle_larceny_final.to_csv(output_filename_larceny, index=False)

## Cleaning Vehicle Accident Data

In [None]:
def process_vehicle_accident_data(vehicle_accidents_df, vehicle_accidents_geocoded_df=None):
    """
    Process Vehicle Accident Data
    
    If given a data frame that is already geocoded, check against it to make sure we're not duplicating 
    reports that geocode (since there is rate limiting)
    """
    if vehicle_accidents_geocoded_df:
        vehicle_accidents_df = vehicle_accidents.loc[~vehicle_accidents.report_number.isin(vehicle_accidents_cleaned.report_number)]
    vehicle_accidents_df.loc[pd.isnull(vehicle_accidents_df.intersection), 'address'] = vehicle_accidents_df.roadway + ', Greenburgh, NY'
    vehicle_accidents_df.loc[pd.notnull(vehicle_accidents_df.intersection), 'address'] = \
        vehicle_accidents_df.roadway + ' AND ' + vehicle_accidents_df.intersection + ', Greenburgh, NY'
    
    reports = []
    for index, row in vehicle_accidents.iterrows():
        report = row.to_frame().transpose()
        result = geocode(row['address'])
        if result:
            lat, long = result
            report['latitude'] = lat
            report['longitude'] = long
            reports.append(report)
            final_output = pd.concat(reports)
            final_output.to_csv(output_filename_accidents, index=False)
    
    return final_output
    

In [4]:
vehicle_accidents_cleaned = pd.read_csv(output_filename_accidents)

In [5]:
vehicle_accidents_cleaned.head()

Unnamed: 0,date,time,date_time,roadway,intersection,report_number,address,latitude,longitude
0,1/4/18,855,1/4/18 8:55,HILLCREST RD,,2018000119,"HILLCREST RD, Greenburgh, NY",41.021352,-73.802
1,1/4/18,2038,1/4/18 20:38,25 JEAN LN,,2018000146,"25 JEAN LN, Greenburgh, NY",41.028349,-73.825272
2,1/7/18,1432,1/7/18 14:32,280 SECOR RD,,2018000227,"280 SECOR RD, Greenburgh, NY",41.026762,-73.827426
3,1/10/18,804,1/10/18 8:04,UNDERHILL RD,,2018000321,"UNDERHILL RD, Greenburgh, NY",41.001765,-73.814421
4,1/10/18,1012,1/10/18 10:12,UNDERHILL RD,,2018000327,"UNDERHILL RD, Greenburgh, NY",41.001765,-73.814421


In [12]:
vehicle_accidents.head()

Unnamed: 0,date,time,date_time,roadway,intersection,report_number
0,1/4/18,855,1/4/18 8:55,HILLCREST RD,,2018000119
1,1/4/18,2038,1/4/18 20:38,25 JEAN LN,,2018000146
2,1/5/18,1711,1/5/18 17:11,PAYNE ST,N HIGH ST,2018000168
3,1/7/18,1432,1/7/18 14:32,280 SECOR RD,,2018000227
4,1/10/18,804,1/10/18 8:04,UNDERHILL RD,,2018000321


In [10]:
vehicle_accidents.loc[~vehicle_accidents.report_number.isin(vehicle_accidents_cleaned.report_number)]

Unnamed: 0,date,time,date_time,roadway,intersection,report_number
2,1/5/18,1711,1/5/18 17:11,PAYNE ST,N HIGH ST,2018000168
12,1/17/18,835,1/17/18 8:35,SAW MILL RIVER RD,410 SAW MILL RIVER,2018000570
16,1/24/18,815,1/24/18 8:15,TARRYTOWN RD,ROSEMONT BLVD,2018000819
17,1/24/18,1725,1/24/18 17:25,GRASSLANDS RD,CLEARBROOK RD,2018000838
21,1/30/18,1940,1/30/18 19:40,S CENTRAL AV,MT JOY AV,2018001062
23,2/2/18,2209,2/2/18 22:09,JACKSON AV,SPRAIN RD,2018001158
24,2/5/18,1200,2/5/18 12:00,OLD JACKSON AV,SPRAIN VALLEY RD,2018001228
25,2/5/18,1555,2/5/18 15:55,W HARTSDALE AV,SECOR RD,2018001235
30,2/17/18,1805,2/17/18 18:05,TARRYTOWN RD,MANHATTAN AV,2018001636
32,2/17/18,1915,2/17/18 19:15,ARDSLEY RD,EDGEMONT RD,2018001641


In [13]:
vehicle_accidents.loc[pd.isnull(vehicle_accidents.intersection), 'address'] = vehicle_accidents.roadway + ', Greenburgh, NY'
vehicle_accidents.loc[pd.notnull(vehicle_accidents.intersection), 'address'] = \
     vehicle_accidents.roadway + ' AND ' + vehicle_accidents.intersection + ', Greenburgh, NY'

In [14]:
vehicle_accidents.head()

Unnamed: 0,date,time,date_time,roadway,intersection,report_number,address
0,1/4/18,855,1/4/18 8:55,HILLCREST RD,,2018000119,"HILLCREST RD, Greenburgh, NY"
1,1/4/18,2038,1/4/18 20:38,25 JEAN LN,,2018000146,"25 JEAN LN, Greenburgh, NY"
2,1/5/18,1711,1/5/18 17:11,PAYNE ST,N HIGH ST,2018000168,"PAYNE ST AND N HIGH ST, Greenburgh, NY"
3,1/7/18,1432,1/7/18 14:32,280 SECOR RD,,2018000227,"280 SECOR RD, Greenburgh, NY"
4,1/10/18,804,1/10/18 8:04,UNDERHILL RD,,2018000321,"UNDERHILL RD, Greenburgh, NY"


In [15]:
reports = []
for index, row in vehicle_accidents.iterrows():
    report = row.to_frame().transpose()
    result = geocode(row['address'])
    if result:
        lat, long = result
        report['latitude'] = lat
        report['longitude'] = long
        reports.append(report)
        final_output = pd.concat(reports)
        final_output.to_csv(output_filename_accidents, index=False)
    

HILLCREST RD, Greenburgh, NY
25 JEAN LN, Greenburgh, NY
PAYNE ST AND N HIGH ST, Greenburgh, NY
280 SECOR RD, Greenburgh, NY
UNDERHILL RD, Greenburgh, NY
UNDERHILL RD, Greenburgh, NY
180 PINEWOOD RD, Greenburgh, NY
W HARTSDALE AV, Greenburgh, NY
385 N CENTRAL AV, Greenburgh, NY
SEARS AV, Greenburgh, NY
55 FIELDSTONE DR, Greenburgh, NY
JACKSON AV, Greenburgh, NY
SAW MILL RIVER RD AND 410 SAW MILL RIVER, Greenburgh, NY
SPRAIN RD, Greenburgh, NY
JACKSON AV, Greenburgh, NY
TARRYTOWN RD AND 188 TARRYTOWN RD, Greenburgh, NY
TARRYTOWN RD AND ROSEMONT BLVD, Greenburgh, NY
GRASSLANDS RD AND CLEARBROOK RD, Greenburgh, NY
38 TAYLOR RD, Greenburgh, NY
50 JACKSON AV, Greenburgh, NY
290 SPRAIN RD, Greenburgh, NY
S CENTRAL AV AND MT JOY AV, Greenburgh, NY
E HARTSDALE AV, Greenburgh, NY
JACKSON AV AND SPRAIN RD, Greenburgh, NY
OLD JACKSON AV AND SPRAIN VALLEY RD, Greenburgh, NY
W HARTSDALE AV AND SECOR RD, Greenburgh, NY
SPRAIN RD, Greenburgh, NY
SPRAIN RD, Greenburgh, NY
SALEM RD, Greenburgh, NY
25 OL

S CENTRAL AV AND ARDSLEY RD, Greenburgh, NY
SAW MILL RIVER RD AND PAYNE ST, Greenburgh, NY
W HARTSDALE AV AND MAPLEWOOD RD, Greenburgh, NY
SAW MILL RIVER RD AND PAYNE ST, Greenburgh, NY
WHITE PLAINS RD, Greenburgh, NY
S CENTRAL AV, Greenburgh, NY
TARRYTOWN RD, Greenburgh, NY
ARDSLEY RD, Greenburgh, NY
S CENTRAL AV, Greenburgh, NY
JACKSON AV, Greenburgh, NY
SUNRISE LN, Greenburgh, NY
WHITE PLAINS RD AND DUNNINGS DR, Greenburgh, NY
300 S CENTRAL AV, Greenburgh, NY
W HARTSDALE AV AND DOBBS FERRY RD, Greenburgh, NY
480 WHITE PLAINS RD AND OLD WHITE PLAINS R, Greenburgh, NY
GRASSLANDS RD AND SAW MILL RD, Greenburgh, NY
100 MANHATTAN AV AND OAK ST, Greenburgh, NY
JACKSON AV, Greenburgh, NY
267 SAW MILL RIVER RD AND WAREHOUSE LN, Greenburgh, NY
114 S CENTRAL AV, Greenburgh, NY
381 KNOLLWOOD RD, Greenburgh, NY
DOBBS FERRY RD, Greenburgh, NY
TARRYTOWN AND 49 TARRYTOWN RD, Greenburgh, NY
HILLSDIE AV AND I 287, Greenburgh, NY
407 TARRYTOWN RD AND CROSSROADS SHOPP, Greenburgh, NY
GRASSLANDS RD AND

TARRYTOWN RD, Greenburgh, NY
SAW MILL RIVER RD, Greenburgh, NY
SAW MILL RIVER RD, Greenburgh, NY
JACKSON AVE, Greenburgh, NY
GRASSLANDS RD, Greenburgh, NY
44 ROSEMONT BLVD, Greenburgh, NY
OLD TARRYTOWN RD AND HILLSIDE AVE, Greenburgh, NY
SAW MILL RIVER RD, Greenburgh, NY
KNOLLWOOD RD AND GRASSLANDS RD, Greenburgh, NY
GRASSLANDS RD AND LEGION DR, Greenburgh, NY
PARKING LOT 540 SAW M AND SAW MILL RIVER RD, Greenburgh, NY
TARRYTOWN RD AND OLD KENSICO RD, Greenburgh, NY
OLD TARRYTOWN RD AND HILLSIDE AVE, Greenburgh, NY
TAXTER RD, Greenburgh, NY
KNOLLWOOD RD AND NYS I-287 EASTBOUN, Greenburgh, NY
DOBBS FERRY RD, Greenburgh, NY
GRASSLANDS RD AND VIRGINIA RD, Greenburgh, NY
DOBBS FERRY RD, Greenburgh, NY
NOTH RD, Greenburgh, NY
PAYNE ST AND CABOT AVE, Greenburgh, NY
TARRYTOWN RD AND FULTON ST, Greenburgh, NY
TARRYTOWN RD AND KNOLLWOOD RD, Greenburgh, NY
1 WAREHOUSE LN, Greenburgh, NY
E HARTSDALE AVE, Greenburgh, NY
SAW MILL RIVER ROAD AND HUNTER LANE, Greenburgh, NY
WHITE PLAINS RD AND DUNNIN

JACKSON AVE, Greenburgh, NY
TARRYTOWN RD, Greenburgh, NY
TARRYTOWN RD, Greenburgh, NY
TARRYTOWN RD AND HILLSIDE AVE, Greenburgh, NY
TARRYTOWN RD AND HILLSIDE AVE, Greenburgh, NY
TARRYTOWN RD, Greenburgh, NY
S CENTRAL AV AND INVERNESS RD, Greenburgh, NY
WALBROOKE RD, Greenburgh, NY
JACKSON AVE, Greenburgh, NY
HILLTOP LN AND SURREY WAY, Greenburgh, NY
W HARTSDALE AVE, Greenburgh, NY
CROSS HILL RD, Greenburgh, NY
18 N CENTRAL AV, Greenburgh, NY
N CENTRAL AV AND ALEXANDER AV, Greenburgh, NY
N CENTRAL AV AND HARVARD DR, Greenburgh, NY
LYNWOOD RD, Greenburgh, NY
LAWTON AVE AND WILSON ST, Greenburgh, NY
SAW MILL RIVER RD AND SECOR RD, Greenburgh, NY
W.HARTSDALE AV AND N.WASHINGTON AV, Greenburgh, NY
111 WOOD AV, Greenburgh, NY
ARDSLEY RD AND OLD ARMY RD, Greenburgh, NY
JACKSON AVE, Greenburgh, NY
ARDSLEY RD AND S CENTRAL AV, Greenburgh, NY
TARRYTOWN RD, Greenburgh, NY
S CENTRAL AVE, Greenburgh, NY
CENTRAL AVE AND MOUNT JOY AVE, Greenburgh, NY
111 WOOD AV, Greenburgh, NY
TARRYTOWN RD AND DOBBS

SAW MILL RIVER RD AND OLD SAW MILL RIVER, Greenburgh, NY
S CENTRAL AV AND E HARTSDALE AV, Greenburgh, NY
S CENTRAL AV, Greenburgh, NY
COUNTY CENTER RD, Greenburgh, NY
JOYCE RD AND DARWOOD PL, Greenburgh, NY
GRASSLANDS RD AND SAW MILL RIVER RD, Greenburgh, NY
SAW MILL RIVER RD AND PAYNE ST, Greenburgh, NY
SAW MILL RIVER RD AND HUNTER LN, Greenburgh, NY
368 S CENTRAL AV, Greenburgh, NY
W HARTSDALE AV AND KEATS AV, Greenburgh, NY
1 WAREHOUSE LN, Greenburgh, NY
SAW MILL RIVER RD AND PAYNE ST, Greenburgh, NY
OLD TARRYTOWN RD, Greenburgh, NY
TARRYTOWN RD AND MANHATTAN AV, Greenburgh, NY
S CENTRAL AV, Greenburgh, NY
ARDSLEY RD AND FORT HILL RD, Greenburgh, NY
TARRYTOWN RD, Greenburgh, NY
DOBBS FERRY RD AND TERRACE ST, Greenburgh, NY
SAW MILL RIVER RD, Greenburgh, NY
ARDSLEY RD, Greenburgh, NY
299 N CENTRAL AV AND CHATTERTON PKWY, Greenburgh, NY
TARRYTOWN RD AND MANHATTAN AV, Greenburgh, NY
KNOLLWOOD RD AND I-287, Greenburgh, NY
KNOLLWOOD RD, Greenburgh, NY
DOBBS FERRY RD AND SPRAIN BROOK PKW,

SPRAIN RD, Greenburgh, NY
N WASHINGTON AV, Greenburgh, NY
FULTON ST, Greenburgh, NY
W HARTSDALE AV AND RIDGE RD, Greenburgh, NY
1-BLK SPRAIN RD, Greenburgh, NY
SPRAIN RD, Greenburgh, NY
EDGEWOOD RD AND SECOR RD, Greenburgh, NY
JACKSON AV AND SPRAIN BROOK PKW, Greenburgh, NY
OLD KENSICO RD, Greenburgh, NY
COLUMBIA AV, Greenburgh, NY
SPRAIN RD AND ARDSLEY RD, Greenburgh, NY
NEPPERHAN AV, Greenburgh, NY
1134 DOBBS FERRY RD AND FORREST BLVD, Greenburgh, NY
OLD TARRYTOWN RD AND MAPLE ST, Greenburgh, NY
UNDERHILL RD, Greenburgh, NY
HILLSIDE AV, Greenburgh, NY
353 COUNTY CENTER RD AND WINNETOU RD, Greenburgh, NY
JACKSON AVE., Greenburgh, NY
OLD COLONY RD, Greenburgh, NY
CHESTNUT ST, Greenburgh, NY
S CENTRAL AVE, Greenburgh, NY
STONELEIGH CLOSE, Greenburgh, NY
TERRACE ST, Greenburgh, NY
NORTH RD, Greenburgh, NY
LAWRENCE DRIVE AND VIRGINIA ROAD, Greenburgh, NY
BEECH ST, Greenburgh, NY
TAXTER RD, Greenburgh, NY
250 CLEARBROOK RD, Greenburgh, NY
320 SAW MILL RIVER RD, Greenburgh, NY
WAREHOUSE LAN

500 W HARTSDALE AVE, Greenburgh, NY
45 TARRYTOWN RD, Greenburgh, NY
W. HARTSDALE AVE AND S. WASHINGTON AVE, Greenburgh, NY
CENTRAL AVE AND S. HEALY AVE, Greenburgh, NY
ARDSLEY RD AND FORT HILL RD, Greenburgh, NY
FORT HILL RD, Greenburgh, NY
MOUNTAIN RD, Greenburgh, NY
SECOR ROAD, Greenburgh, NY
SAWMILL RVR RD, Greenburgh, NY
W HARTSDALE AV AND RIDGE RD, Greenburgh, NY
201 TARRYTOWN RD, Greenburgh, NY
JACKSON AVE, Greenburgh, NY
S. CENTRAL AVE AND E. HARTSDALE AVE, Greenburgh, NY
HILLSIDE AVE, Greenburgh, NY
TARRYTOWN RD., Greenburgh, NY
TARRYTOWN RD, Greenburgh, NY
W HARTSDALE AVE AND N WASHINGTON AVE, Greenburgh, NY
JACKSON AVE, Greenburgh, NY
VIRGINIA RD, Greenburgh, NY
WHITE PLAINS RD AND NYS I-287 ON RAMP, Greenburgh, NY
TARRYTOWN RD, Greenburgh, NY
SECOR ROAD, Greenburgh, NY
24 TARRYTOWN RD AND AQUEDUCT RD, Greenburgh, NY
JACKSON AVE AND SPRAIN PKWY S/B EN, Greenburgh, NY
WHITE PLAINS RD, Greenburgh, NY
JACKSON AVE, Greenburgh, NY
SAW MILL RVR RD, Greenburgh, NY
DOBBS FERRY RD AND

Status code 429 from https://nominatim.openstreetmap.org/search: ERROR - 429 Client Error: Too Many Requests for url: https://nominatim.openstreetmap.org/search?q=BENEDICT+AVE+AND+MAPLE+AVE%2C+Greenburgh%2C+NY&format=jsonv2&addressdetails=1&limit=1


TypeError: 'NoneType' object is not subscriptable