In [1]:
import numpy as np
import pandas as pd
import json
from numba import jit

In [2]:
%%time
arrest_data = pd.read_csv('data/arrest-data-from-2010-to-present.csv')
crime_data = pd.read_csv('data/crime-data-from-2010-to-present.csv')
zip_data = pd.read_csv('data/ziplatlon.csv', dtype={'ZIP': 'str'})

Wall time: 22.2 s


### Dealing with location

In [3]:
def convert_lat_lon(x):
    return (float(eval(x)['latitude']), float(eval(x)['longitude'])) 
convert_lat_lon = np.vectorize(convert_lat_lon)

In [4]:
arrest_data['Lat'], arrest_data['Lon'] = convert_lat_lon(arrest_data['Location'])

In [5]:
zip_data = zip_data[zip_data['ZIP'].apply(lambda x: x[:2]=='90')]

In [6]:
@jit(forceobj=True)
def calc_zip(lat, lon):
    res_zip = None
    res_dis = 999
    
    i = 0
    for i in range(len(zip_zip_list)):
        zzip = zip_zip_list[i]
        zlat = zip_lat_list[i]
        zlon = zip_lon_list[i]
        tdis = (zlat-lat)**2 + (zlon-lon)**2
        if tdis < res_dis:
            res_dis = tdis
            res_zip = zzip
    
    return res_zip

In [7]:
zip_lat_list = zip_data['LAT'].to_list()
zip_lon_list = zip_data['LNG'].to_list()
zip_zip_list = zip_data['ZIP'].to_list()

In [8]:
%%time
arrest_data.iloc[:500].apply( (lambda x: calc_zip( x[23], x[24] )), raw=True, axis=1)

Wall time: 1.57 s


0      90066
1      90272
2      90046
3      90077
4      90292
       ...  
495    90021
496    90290
497    90048
498    90011
499    90062
Length: 500, dtype: object

In [9]:
%%time
arrest_data['ZipCode'] = arrest_data.apply( (lambda x: calc_zip( x[23], x[24] )), raw=True, axis=1)

Wall time: 5min 44s


In [10]:
arrest_data['ZipCode'] = arrest_data['ZipCode'].astype('str')

In [11]:
arrest_data.to_csv('arrest_numba_zipcode.csv', index=False)

In [12]:
pd.read_csv('arrest_numba_zipcode.csv')

Unnamed: 0,Report ID,Arrest Date,Time,Area ID,Area Name,Reporting District,Age,Sex Code,Descent Code,Charge Group Code,...,Location,Zip Codes,Census Tracts,Precinct Boundaries,LA Specific Plans,Council Districts,Neighborhood Councils (Certified),Lat,Lon,ZipCode
0,5666847,2019-06-22T00:00:00.000,1630.0,14,Pacific,1457,44,M,W,24.0,...,"{'latitude': '33.992', 'human_address': '{""add...",24031.0,918.0,1137.0,10.0,10.0,85.0,33.9920,-118.4201,90066.0
1,5666688,2019-06-22T00:00:00.000,1010.0,10,West Valley,1061,8,M,O,,...,"{'latitude': '34.1687', 'human_address': '{""ad...",19339.0,321.0,1494.0,,4.0,10.0,34.1687,-118.5579,90272.0
2,5666570,2019-06-22T00:00:00.000,400.0,15,N Hollywood,1543,31,F,O,22.0,...,"{'latitude': '34.1649', 'human_address': '{""ad...",8890.0,205.0,1332.0,17.0,5.0,39.0,34.1649,-118.3965,90046.0
3,5666529,2019-06-22T00:00:00.000,302.0,17,Devonshire,1738,23,F,W,22.0,...,"{'latitude': '34.2692', 'human_address': '{""ad...",19329.0,69.0,388.0,,2.0,78.0,34.2692,-118.4789,90077.0
4,5666742,2019-06-22T00:00:00.000,1240.0,14,Pacific,1472,28,M,W,8.0,...,"{'latitude': '33.9609', 'human_address': '{""ad...",25075.0,937.0,241.0,10.0,10.0,16.0,33.9609,-118.4504,90292.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1276155,100504416,2010-01-01T00:00:00.000,1430.0,5,Harbor,521,17,M,H,24.0,...,"{'latitude': '33.7406', 'human_address': '{""ad...",3342.0,975.0,1205.0,,15.0,36.0,33.7406,-118.2923,90731.0
1276156,101104731,2010-01-01T00:00:00.000,2215.0,11,Northeast,1118,12,M,H,24.0,...,"{'latitude': '34.1101', 'human_address': '{""ad...",23673.0,370.0,477.0,28.0,11.0,93.0,34.1101,-118.1918,90042.0
1276157,101104211,2010-01-01T00:00:00.000,1310.0,11,Northeast,1128,52,M,H,18.0,...,"{'latitude': '34.1148', 'human_address': '{""ad...",23673.0,359.0,575.0,,9.0,93.0,34.1148,-118.1826,90042.0
1276158,2179817,2010-01-01T00:00:00.000,319.0,14,Pacific,1408,24,M,H,22.0,...,"{'latitude': '34.0301', 'human_address': '{""ad...",23451.0,872.0,1124.0,9.0,6.0,75.0,34.0301,-118.4029,90034.0


In [13]:
crime_data['Lat'], crime_data['Lon'] = convert_lat_lon(crime_data['Location '])

In [14]:
%%time
crime_data.iloc[:500].apply( (lambda x: calc_zip( x['Lat'], x['Lon'] )), axis=1)

Wall time: 150 ms


0      90005
1      90003
2      90042
3      90068
4      90077
       ...  
495    90005
496    90020
497    90057
498    90004
499    90020
Length: 500, dtype: object

In [15]:
%%time
crime_data['ZipCode'] = crime_data.apply( (lambda x: calc_zip( x['Lat'], x['Lon'] )), axis=1)

Wall time: 9min 23s


In [16]:
crime_data['ZipCode'] = crime_data['ZipCode'].astype('str')

In [17]:
crime_data.to_csv('crime_numba_zipcode.csv', index=False)

In [18]:
pd.read_csv('crime_numba_zipcode.csv')

Unnamed: 0,DR Number,Date Reported,Date Occurred,Time Occurred,Area ID,Area Name,Reporting District,Crime Code,Crime Code Description,MO Codes,...,Crime Code 1,Crime Code 2,Crime Code 3,Crime Code 4,Address,Cross Street,Location,Lat,Lon,ZipCode
0,102005556,2010-01-25T00:00:00,2010-01-22T00:00:00,2300,20,Olympic,2071,510,VEHICLE - STOLEN,,...,510.0,,,,VAN NESS,15TH,"{'latitude': '34.0454', 'needs_recoding': Fals...",34.0454,-118.3157,90005.0
1,101822289,2010-11-11T00:00:00,2010-11-10T00:00:00,1800,18,Southeast,1803,510,VEHICLE - STOLEN,,...,510.0,,,,88TH,WALL,"{'latitude': '33.9572', 'needs_recoding': Fals...",33.9572,-118.2717,90003.0
2,101105609,2010-01-28T00:00:00,2010-01-27T00:00:00,2230,11,Northeast,1125,510,VEHICLE - STOLEN,,...,510.0,,,,YORK,AVENUE 51,"{'latitude': '34.1211', 'needs_recoding': Fals...",34.1211,-118.2048,90042.0
3,101620051,2010-11-11T00:00:00,2010-11-07T00:00:00,1600,16,Foothill,1641,510,VEHICLE - STOLEN,,...,510.0,,,,EL DORADO,TRUESDALE,"{'latitude': '34.241', 'needs_recoding': False...",34.2410,-118.3987,90068.0
4,101910498,2010-04-07T00:00:00,2010-04-07T00:00:00,1600,19,Mission,1902,510,VEHICLE - STOLEN,,...,510.0,,,,GLENOAKS,DRELL,"{'latitude': '34.3147', 'needs_recoding': Fals...",34.3147,-118.4589,90077.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1993254,192111884,6242019-01-01T00:00:00,2019-06-21T00:00:00,1205,21,Topanga,2143,442,SHOPLIFTING - PETTY THEFT ($950 & UNDER),0325,...,442.0,,,,6600 FALLBROOK AV,,"{'latitude': '34.1883', 'needs_recoding': Fals...",34.1883,-118.6274,90290.0
1993255,199905748,1152019-01-01T00:00:00,2019-01-15T00:00:00,1940,18,Southeast,1829,910,KIDNAPPING,2000 1243 0416 0515 1402,...,910.0,,,,2000 E 101ST ST,,"{'latitude': '33.9449', 'needs_recoding': Fals...",33.9449,-118.2367,90002.0
1993256,199907720,2022019-01-01T00:00:00,2019-02-01T00:00:00,2126,11,Northeast,1126,946,OTHER MISCELLANEOUS CRIME,1407 1300 1309 0906,...,946.0,998.0,,,CORINGA,N AVENUE 53,"{'latitude': '34.1244', 'needs_recoding': Fals...",34.1244,-118.1985,90042.0
1993257,199916823,4262019-01-01T00:00:00,2019-02-05T00:00:00,1600,17,Devonshire,1794,668,"EMBEZZLEMENT, GRAND THEFT ($950.01 & OVER)",,...,668.0,,,,8400 RESEDA BL,,"{'latitude': '34.2227', 'needs_recoding': Fals...",34.2227,-118.5361,90272.0
