In [1]:
import ijson
import pandas as pd
import numpy as np
from pyproj import Proj, transform
import folium
from folium import plugins

filename = 'cadcalls.json'
with open(filename, 'r') as f:
    objects = ijson.items(f, "item")
    columns = list(objects)
    
print(columns[0].keys())

dict_keys(['Record ID', 'Call Type', 'Description', 'Reporting Officer', 'Unit ID', 'Report Created', 'Location', 'Police District', 'Beat', 'Grid', 'X Coordinate', 'Y Coordinate', 'Day of Week', 'Occurence Date', 'Occurence Time', 'Received Date', 'Received Time', 'Dispatch Date', 'Dispatch Time', 'Enroute Date', 'Enroute Time', 'At Scene Date', 'At Scene Time', 'Clear Date', 'Clear Time'])


In [2]:
column_names = list(columns[0].keys())
print(column_names)

['Record ID', 'Call Type', 'Description', 'Reporting Officer', 'Unit ID', 'Report Created', 'Location', 'Police District', 'Beat', 'Grid', 'X Coordinate', 'Y Coordinate', 'Day of Week', 'Occurence Date', 'Occurence Time', 'Received Date', 'Received Time', 'Dispatch Date', 'Dispatch Time', 'Enroute Date', 'Enroute Time', 'At Scene Date', 'At Scene Time', 'Clear Date', 'Clear Time']


In [3]:
data = []
with open(filename, 'r') as f:
    objects = ijson.items(f, 'item')
    for row in objects:
        selected_row = []
        for item in column_names:
            selected_row.append(row[item])
        data.append(selected_row)
        
print(data[1])

['6167911', '952PP', 'INCOMPLETE CALL FROM PAY PHONE', '', '', 'N', '1115 3RD ST', '3', '3M', '0742', '6703668', '1973944', 'Fri', '01/01/2016', '02:12:47', '01/01/2016', '02:12:47', '', '', '', '', '', '', '01/01/2016', '02:22:59']


In [4]:
calls = pd.DataFrame(data, columns=column_names)

calls['Description'].value_counts()

DISTURBANCE-CLARIFY                                  37286
TRAFFIC STOP                                         26872
SUSPICIOUS SUBJECT/CIRCUMSTANCE-IN PROGRESS          22151
SUBJECT STOP                                         19391
ALL UNITS BROADCAST                                  19266
INCOMPLETE CALL FOR POLICE                           16634
WELFARE CHECK                                        15487
SUSPICIOUS OCCUPIED VEHICLE                          15068
ALARM-SECURE NO EVID OF CRIME                         8175
ERRAND                                                7316
BUSINESS CHECK                                        6961
MISDEMEANOR ASSAULT-IN PROGRESS                       6844
ALARM-CANCEL                                          6020
REPORT NUMBER ASSIGNMENT                              5670
VEHICLE ACCIDENT-NO OR UNKNOWN INJURIES               5416
DISTURBANCE-NOISE                                     5176
DISTURBANCE-FAMILY                                    49

In [5]:
calls['Day of Week'].value_counts()

Fri    51619
Thu    49484
Wed    48426
Tue    48107
Sat    47934
Mon    44964
Sun    41979
Name: Day of Week, dtype: int64

In [6]:
def parse_float(x):
    try:
        x = float(x)
    except Exception:
        x = 0
    return x
calls['X Coordinate'] = calls['X Coordinate'].apply(parse_float)
calls['Y Coordinate'] = calls['Y Coordinate'].apply(parse_float)

In [7]:
stolen = calls[calls['Description'] == 'STOLEN VEHICLE-REPORT']

print(stolen)

       Record ID Call Type            Description Reporting Officer Unit ID  \
103      5896362    503RPT  STOLEN VEHICLE-REPORT              0326   EXP60   
149      5895748    503RPT  STOLEN VEHICLE-REPORT                             
319      5896213    503RPT  STOLEN VEHICLE-REPORT              6201           
708      5895633    503RPT  STOLEN VEHICLE-REPORT                             
894      5924237    503RPT  STOLEN VEHICLE-REPORT              6252           
1639     5898084    503RPT  STOLEN VEHICLE-REPORT              0326   EXP60   
1824     5898357    503RPT  STOLEN VEHICLE-REPORT              0326   EXP60   
2198     5898642    503RPT  STOLEN VEHICLE-REPORT                             
2284     5906544    503RPT  STOLEN VEHICLE-REPORT              6256           
2435     5899077    503RPT  STOLEN VEHICLE-REPORT             R6262           
2623     5899962    503RPT  STOLEN VEHICLE-REPORT              8122           
2751     5899458    503RPT  STOLEN VEHICLE-REPORT   

In [8]:
inProj = Proj(init='epsg:2226', preserve_units = True)
outProj = Proj(init='epsg:4326')
x1,y1 = 6722570,1992314
x2,y2 = transform(inProj,outProj,x1,y1)
print(x2,y2)

-121.43672656313065 38.63153998649104


In [10]:
calls_map = folium.Map(location=[38.58, -121.5], zoom_start=10)
marker_cluster = folium.MarkerCluster().add_to(calls_map)
coords_list = []
for name, row in stolen.iloc[:1000].iterrows():   
    x1, y1 =row['X Coordinate'],row['Y Coordinate']
    long, lat = transform(inProj,outProj,x1,y1)
    coords = (lat,long)
    coords_list.append(coords)
    folium.Marker([lat, long], popup=row["Description"]).add_to(marker_cluster)
calls_map.save('callsnew.html')
calls_map

In [11]:
calls_heatmap = folium.Map(location=[38.58, -121.5], tiles='Stamen Toner', zoom_start=12)
calls_heatmap.add_child(plugins.HeatMap([[item[0], item[1]] for item in coords_list]))
calls_heatmap.save("heatmap.html")
calls_heatmap