In [1]:
import pip._vendor.requests
import pandas as pd
import json
import urllib
import geopy
from geopy.geocoders import Nominatim
import geopandas
import plotly
import plotly.express as px
import nbformat 
from shapely.geometry import Point

In [26]:
url = "https://data.cambridgema.gov/resource/xuad-73uj.json?$limit=800"
uh = urllib.request.urlopen(url)
data = uh.read().decode()
print('Retrieved', len(data), 'characters')

Retrieved 193427 characters


In [27]:
#load data to a dictionary
myData = json.loads(data)

#load myData into a DataFrame object
df = pd.DataFrame(myData)

#replace Ma with Massachusetts for every address in df['location']
df['location'] = df['location'].str.replace(', MA', ', Massachusetts')

#print the first 5 rows of df
df.head()


Unnamed: 0,file_number,date_of_report,crime_date_time,crime,reporting_area,neighborhood,location
0,2023-02647,2023-03-31T22:52:00.000,03/30/2023 16:00 - 03/31/2023 22:52,Auto Theft,506,Cambridgeport,"Lopez St & Brookline St, Cambridge, Massachusetts"
1,2023-02643,2023-03-31T20:39:00.000,03/31/2023 20:36,Aggravated Assault,1101,North Cambridge,"MASSACHUSETTS AVE & SOMERVILLE AVE, Cambridge,..."
2,2023-02640,2023-03-31T18:14:00.000,03/29/2023 20:00 - 03/31/2023 18:00,Larceny from MV,508,Cambridgeport,"500 PUTNAM AVE, Cambridge, Massachusetts"
3,2023-02634,2023-03-31T13:27:00.000,03/26/2023 17:30,Street Robbery,1002,West Cambridge,"BRATTLE ST & Church St, Cambridge, Massachusetts"
4,2023-02630,2023-03-31T10:36:00.000,03/30/2023 18:00 - 23:00,Auto Theft,901,Peabody,"GARDEN ST & MASSACHUSETTS AVE, Cambridge, Mass..."


In [28]:
locator = Nominatim(user_agent="myGeocoder", timeout=10)

#create columns for latitude and longitude
df['latitude'] = ""
df['longitude'] = ""

#loop through each row of the DataFrame and geocode the address
for index, row in df.iterrows():
    location = locator.geocode(row['location'])
    if location:
        df.at[index, 'latitude'] = location.latitude
        df.at[index, 'longitude'] = location.longitude
    else:
        df.at[index, 'latitude'] = ""
        df.at[index, 'longitude'] = ""

#drop rows with no latitude or longitude
df = df[df['latitude'] != ""]
df = df[df['longitude'] != ""]
df.head()


Unnamed: 0,file_number,date_of_report,crime_date_time,crime,reporting_area,neighborhood,location,latitude,longitude
2,2023-02640,2023-03-31T18:14:00.000,03/29/2023 20:00 - 03/31/2023 18:00,Larceny from MV,508,Cambridgeport,"500 PUTNAM AVE, Cambridge, Massachusetts",42.358253,-71.109385
4,2023-02630,2023-03-31T10:36:00.000,03/30/2023 18:00 - 23:00,Auto Theft,901,Peabody,"GARDEN ST & MASSACHUSETTS AVE, Cambridge, Mass...",42.378372,-71.123878
5,2023-02614,2023-03-30T17:27:00.000,03/29/2023 14:56 - 15:40,Larceny from Residence,107,East Cambridge,"100 GORE ST, Cambridge, Massachusetts",42.371887,-71.08232
6,2023-02612,2023-03-30T17:20:00.000,03/30/2023 17:19,Commercial Robbery,1204,Highlands,"200 Alewife Brook Pky, Cambridge, Massachusetts",42.388369,-71.142954
7,2023-02618,2023-03-30T17:10:00.000,03/30/2023 13:29,Larceny from Residence,504,Cambridgeport,"0 PEARL ST, Cambridge, Massachusetts",42.364644,-71.102668


In [29]:
#count the number of rows at each location
df['count'] = df.groupby(['latitude', 'longitude'])['latitude'].transform('count')

#print the first 25 rows of df
df.head(25)

Unnamed: 0,file_number,date_of_report,crime_date_time,crime,reporting_area,neighborhood,location,latitude,longitude,count
2,2023-02640,2023-03-31T18:14:00.000,03/29/2023 20:00 - 03/31/2023 18:00,Larceny from MV,508,Cambridgeport,"500 PUTNAM AVE, Cambridge, Massachusetts",42.358253,-71.109385,1
4,2023-02630,2023-03-31T10:36:00.000,03/30/2023 18:00 - 23:00,Auto Theft,901,Peabody,"GARDEN ST & MASSACHUSETTS AVE, Cambridge, Mass...",42.378372,-71.123878,2
5,2023-02614,2023-03-30T17:27:00.000,03/29/2023 14:56 - 15:40,Larceny from Residence,107,East Cambridge,"100 GORE ST, Cambridge, Massachusetts",42.371887,-71.08232,3
6,2023-02612,2023-03-30T17:20:00.000,03/30/2023 17:19,Commercial Robbery,1204,Highlands,"200 Alewife Brook Pky, Cambridge, Massachusetts",42.388369,-71.142954,13
7,2023-02618,2023-03-30T17:10:00.000,03/30/2023 13:29,Larceny from Residence,504,Cambridgeport,"0 PEARL ST, Cambridge, Massachusetts",42.364644,-71.102668,4
8,2023-02608,2023-03-30T15:26:00.000,03/30/2023 15:25 - 15:30,Shoplifting,411,Area 4,"600 MASSACHUSETTS AVE, Cambridge, Massachusetts",42.364916,-71.103308,74
9,2023-02603,2023-03-30T13:59:00.000,03/23/2023 14:02 - 03/30/2023 13:58,Flim Flam,104,East Cambridge,"300 WINDSOR ST, Cambridge, Massachusetts",42.368865,-71.094636,1
10,2023-02600,2023-03-30T12:51:00.000,03/30/2023 11:00 - 12:00,Weapon Violations,304,Inman/Harrington,"800 CAMBRIDGE ST, Cambridge, Massachusetts",42.37223,-71.089916,2
11,2023-02599,2023-03-30T12:17:00.000,03/30/2023 12:16,Shoplifting,501,Cambridgeport,"600 MASSACHUSETTS AVE, Cambridge, Massachusetts",42.364916,-71.103308,74
12,2023-02593,2023-03-30T07:52:00.000,03/29/2023 20:00 - 03/30/2023 07:00,Larceny of Bicycle,301,Inman/Harrington,"0 HUNTING ST, Cambridge, Massachusetts",42.373378,-71.090988,1


In [21]:
px.set_mapbox_access_token(open(".mapbox_token").read())

In [77]:
fig = px.scatter_mapbox(df,
                        lat='latitude',
                        lon='longitude',
                        hover_name='location',
                        mapbox_style='open-street-map',
                        zoom=11,)
fig.update_layout(title='Cambridge Crime Map',
                  autosize=True,
                  hovermode='closest',
                  showlegend=False,
                  )

#update fig to color by df['count']
fig.update_traces(marker=dict(size=6,
                                color=df['count'],
                                colorscale='ylorrd_r',
                                opacity=.8,
                                reversescale=True,
                                colorbar_title='Number of Crimes',
                                ),
                    selector=dict(mode='markers')
                    )



#update fig to show the number of crimes in the hover text
fig.update_traces(hovertemplate='<b>%{hovertext}</b><br><br>' +
                                'Number of Crimes: %{marker.color}<br>' +
                                '<extra></extra>'
                                )


fig.show()
