In [None]:
from IPython.core.display import display, HTML
HTML('''<script> </script> <form action="javascript:IPython.notebook.execute_cells_below()"><input type="submit" id="toggleButton" value="Run all"></form>''')


In [None]:
#Necessary to run on binder
!pip install pandas
!pip install matplotlib
!pip install plotly

In [None]:
import requests
import json
import pandas as pd
import matplotlib as mp
import matplotlib.pyplot as plt
import plotly.express as px
import ast
import plotly.graph_objects as go
from itertools import cycle
import plotly
import datetime 
from IPython.display import HTML

In [None]:
!conda install cartopy --yes

In [None]:
#!pip install cartopy

In [None]:
# !pip uninstall shapely -y

# !pip install shapely --no-binary shapely

In [None]:
import cartopy.crs as ccrs

In [None]:
#Filtering out Seattle crime data for the year 2022
url = "https://data.seattle.gov/resource/tazs-3rd5.json?$where=report_datetime between '2022-01-01T12:00:00' and '2022-12-31T14:00:00' &$limit=100000"

In [None]:
#JSON Response
response = requests.get(url).json()
response

In [None]:
#Converting the JSON response to a Dataframe
df = pd.DataFrame(response)

print(df.shape[0])

df.head()

In [None]:
df.isna().sum()

In [None]:
df.dtypes

## Data Cleaning

In [None]:
#df['report_number'] = df['report_number'].astype('|S')
#df['report_number'] = df['report_number'].apply(ast.literal_eval).str.decode("utf8").fillna(df['report_number'])

#All the columns are of type object. In order to plot the location, the lat and lon need to be converted to float type. 

df['longitude'] = df['longitude'].astype('str').astype('float')
df['latitude'] = df['latitude'].astype('str').astype('float')

# df['report_number'].astype(str)
# df['offense_id'].astype(str)
# df['offense_start_datetime'].astype(str)
# df['offense_end_datetime'].astype(str)
df.dtypes

### Displaying the number of crimes committed against a category.

In [None]:
#Grouping by unique category. 
df1 = df.groupby(['crime_against_category']).agg({'crime_against_category': 'count'})
df_renamed = df1.rename(columns={'crime_against_category':'Count'})
df_cat = df_renamed.reset_index()

df_r = df_cat.rename(columns = {'crime_against_category': 'Crime Category Against'})
df_r

In [None]:
# fig, ax = plt.subplots(figsize = (10,10))
# ax = plt.bar(df_r['Crime Category'], df_r['Count'], color = '#cc1b1b')
# plt.ylabel('Crime Count', size = 25, labelpad = 20)
# plt.xlabel('Crime Category', size = 25, labelpad = 20)
# plt.xticks(df_r['Crime Category'], fontsize = 15, rotation = 30)
# plt.yticks(fontsize = 15)
# plt.title('Crime counts of various categories', fontsize = 50)
# plt.show()

fig = px.bar(df_r, x='Crime Category Against', y='Count', hover_data=["Count"], color = 'Crime Category Against', width = 800, height = 500)
fig.update_layout(
    title={
        'text': 'Crime counts against various categories',
        'y':0.93,
        'x':0.46,
        'xanchor': 'center',
        'yanchor': 'top'})

#Run fig.show('notebook') if you download the notebook
#fig.show('notebook')

#Use this if running on binder
HTML(fig.to_html())

The bar graph above displays the crimes committed against a category. The reason a bar graph is chosen is to categorize the data properly and increase the readability. Although this graph is interactive and can be scaled in to see the category "NOT_A_CRIME", since the data for this category is so less compared to the other categories, it becomes almost impossible to identify it through naked eye, which is a small flaw in this. We would attempt to improve this in the future.  

In [None]:
for i in df['longitude']:
    if (float(i) < -123.3):
        print (i)


### Locating all the crimes committed in Seattle in 2022

In [None]:
fig = plt.figure(figsize=(10,10))
ax = plt.axes(projection=ccrs.PlateCarree())
ax.gridlines(draw_labels = True)
#The values used in set_extent are the coordinates of Seattle. 
ax.set_extent([-122.45167,-122.2244331, 47.49013,47.754145], ccrs.PlateCarree())
plt.scatter(x = df['longitude'], y = df['latitude'], transform = ccrs.PlateCarree(), s = 0.2, color = 'r')
ax.coastlines(resolution='10m')
plt.show()


This visualization is a zoomed in map of Seattle with the locations of all the crimes that were committed in 2022. It does not give any information and seems very cluttered, which makes the readability very difficult. There is no way to identify what is the crime where did it happen and it does not seem to serve any purpose. The intent was to check if the points are getting plotted on the map properly.

### Locating all the areas reporting robbery

In [None]:
df_rob = df[df['offense_parent_group'] == 'ROBBERY']
df_rob

df_r1 = df_rob.groupby(['mcpp']).agg({'mcpp': 'count'})
df_r2 = df_r1.rename(columns={'mcpp':'Count'})
df_r_final = df_r2.reset_index()

df_r_final

In [None]:
#px.set_mapbox_access_token(open(".mapbox_token").read())
#fig = px.scatter_mapbox(df_rob, lat = 'latitude', lon = 'longitude', color = 'sector')
fig = px.scatter_geo(df_rob, 'latitude', 'longitude', color = 'mcpp', scope = 'north america', locationmode = 'ISO-3', title = 'Robberies in Seattle - 2022')
fig.update_geos(
    
    lataxis_range=[47.49013,47.754145], lonaxis_range=[-122.45167,-122.2244331], lataxis_showgrid = True, lonaxis_showgrid = True, visible=False, resolution=50, scope="north america",
    showcountries=True, countrycolor="Black",
    showsubunits=True, subunitcolor="Blue",
)

#Run fig.show('notebook') if you download the notebook
#fig.show('notebook')

#Use this if running on binder
HTML(fig.to_html())

This visualization drills down on just the offense group of Robbery in Seattle. This is an improvement of the previous visualization as the areas are grouped using the same color and it displays some information such as the area where the robbery took place and its exact coordinates. This is plotted on the scope of North-America. Initially it was plotted using the USA scope but the same coordinates did not work on the scope and it seemed difficult to adjust the coordinates on that ensuring the visiblity of Seattle when the code is run. 

### Displaying the most common crimes using a dropdown

In [None]:
df_larc = df[df['offense_parent_group'] == 'LARCENY-THEFT']
df_larc

df_assault = df[df['offense_parent_group'] == 'ASSAULT OFFENSES']
df_assault

df_burg = df[df['offense_parent_group'] == 'BURGLARY/BREAKING&ENTERING']
df_burg

df_vand = df[df['offense_parent_group'] == 'DESTRUCTION/DAMAGE/VANDALISM OF PROPERTY']
df_vand

df_mvt = df[df['offense_parent_group'] == 'MOTOR VEHICLE THEFT']
df_mvt

df_fraud = df[df['offense_parent_group'] == 'FRAUD OFFENSES']
df_fraud

df_tresp = df[df['offense_parent_group'] == 'TRESPASS OF REAL PROPERTY']
df_tresp

df_dui = df[df['offense_parent_group'] == 'DRIVING UNDER THE INFLUENCE']
df_dui

df_wep = df[df['offense_parent_group'] == 'WEAPON LAW VIOLATIONS']
df_wep


df_off = df.groupby(['offense_parent_group']).agg({'offense_parent_group': 'count'})
df_r2 = df_off.rename(columns={'offense_parent_group':'Count'})
df_r2 = df_r2.reset_index()

df_sort = df_r2.sort_values('Count', ascending = False)
df_sort

In [None]:

colors = cycle(plotly.colors.sequential.Inferno)

fig = go.Figure()

for s in df_rob.mcpp.unique():
    df_r_1 = df_rob[df_rob.mcpp == s]
    fig.add_trace(go.Scattergeo(lat = df_r_1['latitude'], lon = df_r_1['longitude'], marker_color = next(colors), marker_size = 5, customdata = df_r_1, name = s, hovertemplate="<b>Area: %{customdata[13]} </b><br><br>Longitude: %{customdata[15]: .3f} </b><br><br>Latitude: %{customdata[16]: .3f}<extra></extra>" ))

for s in df_larc.mcpp.unique():
    df_larc_1 = df_larc[df_larc.mcpp == s]
    fig.add_trace(go.Scattergeo(lat = df_larc_1['latitude'], lon = df_larc_1['longitude'], marker_color = next(colors), marker_size = 5, customdata = df_larc_1, name = s, hovertemplate="<b>Area: %{customdata[13]} </b><br><br>Longitude: %{customdata[15]: .3f} </b><br><br>Latitude: %{customdata[16]: .3f}<extra></extra>" ))

for s in df_assault.mcpp.unique():
    df_as_1 = df_assault[df_assault.mcpp == s]
    fig.add_trace(go.Scattergeo(lat = df_as_1['latitude'], lon = df_as_1['longitude'], marker_color = next(colors), marker_size = 5, customdata = df_as_1, name = s, hovertemplate="<b>Area: %{customdata[13]} </b><br><br>Longitude: %{customdata[15]: .3f} </b><br><br>Latitude: %{customdata[16]: .3f}<extra></extra>" ))

for s in df_burg.mcpp.unique():
    df_burg_1 = df_burg[df_burg.mcpp == s]
    fig.add_trace(go.Scattergeo(lat = df_burg_1['latitude'], lon = df_burg_1['longitude'], marker_color = next(colors), marker_size = 5, customdata = df_burg_1, name = s, hovertemplate="<b>Area: %{customdata[13]} </b><br><br>Longitude: %{customdata[15]: .3f} </b><br><br>Latitude: %{customdata[16]: .3f}<extra></extra>" ))

for s in df_vand.mcpp.unique():
    df_vand_1 = df_vand[df_vand.mcpp == s]
    fig.add_trace(go.Scattergeo(lat = df_vand_1['latitude'], lon = df_vand_1['longitude'], marker_color = next(colors), marker_size = 5, customdata = df_vand_1, name = s, hovertemplate="<b>Area: %{customdata[13]} </b><br><br>Longitude: %{customdata[15]: .3f} </b><br><br>Latitude: %{customdata[16]: .3f}<extra></extra>" ))

for s in df_mvt.mcpp.unique():
    df_mvt_1 = df_mvt[df_mvt.mcpp == s]
    fig.add_trace(go.Scattergeo(lat = df_mvt_1['latitude'], lon = df_mvt_1['longitude'], marker_color = next(colors), marker_size = 5, customdata = df_mvt_1, name = s, hovertemplate="<b>Area: %{customdata[13]} </b><br><br>Longitude: %{customdata[15]: .3f} </b><br><br>Latitude: %{customdata[16]: .3f}<extra></extra>" ))

# for s in df_rob.mcpp.unique():
#     df_r_1 = df_rob[df_rob.mcpp == s]
#     fig.add_trace(go.Scattergeo(lat = df_r_1['latitude'], lon = df_r_1['longitude'], marker_color = next(colors), customdata = df_r_1, name = s, hovertemplate="<b>Area: %{customdata[13]} </b><br><br>Longitude: %{customdata[15]: .3f} </b><br><br>Latitude: %{customdata[16]: .3f}<extra></extra>" ))

# for s in df_larc.mcpp.unique():
#     df_larc_1 = df_larc[df_larc.mcpp == s]
#     fig.add_trace(go.Scattergeo(lat = df_larc_1['latitude'], lon = df_larc_1['longitude'], marker_color = next(colors), customdata = df_larc_1, name = s, hovertemplate="<b>Area: %{customdata[13]} </b><br><br>Longitude: %{customdata[15]: .3f} </b><br><br>Latitude: %{customdata[16]: .3f}<extra></extra>" ))

    
fig.update_geos(
    
    lataxis_range=[47.49013,47.754145], lonaxis_range=[-122.45167,-122.2244331], lataxis_showgrid = True, lonaxis_showgrid = True, visible=False, resolution=50, scope="north america",
    showcountries=True, countrycolor="Black",
    showsubunits=
    True, subunitcolor="Blue",
)

#Add dropdown
fig.layout.update(
   updatemenus = [
       dict(
         buttons = list(
            [
               dict(
                  label = "Robbery", method = "update",
                  args = [{"visible": [True, False, False, False, False, False]},{"title": "Offense: Robbery | Year: 2022"} ]
               ),
               dict(
                  label = "Larceny", method = "update", 
                  args = [{"visible": [False, True, False, False, False, False]},{"title": "Offense: Larceny | Year: 2022"}]
               ),
               dict(
                  label = "Assault", method = "update",
                  args = [{"visible": [False, False, True, False, False, False ]},{"title": "Offense: Assault | Year: 2022"} ]
               ),
               dict(
                  label = "Burglary", method = "update", 
                  args = [{"visible": [False, False, False, True, False, False]},{"title": "Offense: Burglary | Year: 2022"}]
               ),
               dict(
                  label = "Vandalism", method = "update",
                  args = [{"visible": [False, False, False, False, True, False]},{"title": "Offense: Vandalism | Year: 2022"} ]
               ),
               dict(
                  label = "MVT", method = "update", 
                  args = [{"visible": [False, False, False, False, False, True]},{"title": "Offense: MVT | Year: 2022"}]
               )
# #                dict(
# #                   label = "Robbery", method = "update",
# #                   args = [{"visible": [True, False]},{"title": "Robbery"} ]
# #                ),
# #                dict(
# #                   label = "Larceny", method = "update", 
# #                   args = [{"visible": [False, True]},{"title": "Larceny"}]
#                )
            ]
         ),
           direction="down",
            pad={"r": 10, "t": 10},
            showactive=True,
            x=0.1,
            xanchor="left",
            y=1.1,
            yanchor="top" )
      ] )

#Run fig.show('notebook') if you download the notebook
#fig.show('notebook')

#Use this if running on binder
HTML(fig.to_html())


To improve the earlier visualization and to include all the other offenses within the same map, a dropdown feature is included which shows the coordinates and the areas. The problem with this is that it incorporates all the data on the map of all the offenses at once and only by clicking a dropdown first it changes the data to each category. We would try to improve this further. Another issue we encountered in this is that the sequential colors repeat after the color cycle is exhausted, grouping two or more areas with the same color. This seems to defeat the purpose of the groups. We need to figure out a way to make this better. 

In [None]:
df['report_datetime'] = pd.to_datetime(df['report_datetime'])
df['hour'] = df['report_datetime'].dt.strftime('%H').astype(int)

def timeofday(x):
  if x > 2 and x <= 6:
    return 'Early Morning'
  elif x > 6 and x <= 10:
    return 'Morning'
  elif x > 10 and x <= 16:
    return 'Afternoon'
  elif x > 16 and x <= 20:
    return 'Evening'
  elif x > 20 and x <=2:
    'Night'

df['time_of_day'] = df['hour'].apply(timeofday)
df

In [None]:
time_of_day = pd.DataFrame(df.groupby(['time_of_day']).count().sort_values(by=['report_number'], ascending = True).reset_index())
time_of_day

In [None]:
fig = px.pie(time_of_day, values='report_number', names='time_of_day', title='Crime according to time of day', color_discrete_sequence=px.colors.sequential.RdBu, width = 800, height = 500)
#Run fig.show('notebook') if you download the notebook
#fig.show('notebook')

#Use this if running on binder
HTML(fig.to_html())

The pie chart shows the number of crimes in percentages according to the time of the day. When you hover over the chart, you can see the time of the day and the number of reports for that particular group hovered.

In [None]:
#Created a Tree Map based on the Parent Offense Group and Offence. It tells the the total cout when we hover over it. 

df1 = df.groupby(['offense_parent_group','offense']).count().reset_index()
df1.rename(columns = {'offense_id':'count'}, inplace = True)
df1


fig = px.treemap(df1, path=[px.Constant("All crimes"), 'offense_parent_group','offense'], values='count')
fig.update_traces(root_color="lightgrey")
fig.update_layout(margin = dict(t=50, l=25, r=25, b=25))

#Run fig.show('notebook') if you download the notebook
#fig.show('notebook')

#Use this if running on binder
HTML(fig.to_html())

Using the ployly library, we created a tree map that represents all the crimes grouped by the offence parent group and offence. We used the crime ID as total count of that particular offence. All the crimes are fist grouped by the parent group and in each parent groups, the offences are mentioned. It was a bit challenging to understand which variables needed to be taken to group and plot this visualization, in the end we chose the offense-parent-group as the parent in the path of our treemap and then the offense as a child.

In [None]:
df['date'] = df['report_datetime'].apply(pd.Timestamp)  # will handle parsing
#df['B'] = df['B'].apply(pd.Timestamp)  # will handle parsing
#df['day_diff'] = (df['A'] - df['B']).dt.days

In [None]:
fig = px.histogram(data_frame = df, x = 'offense_parent_group')
#Run fig.show('notebook') if you download the notebook
#fig.show('notebook')

#Use this if running on binder
HTML(fig.to_html())

We tried to plot a graph for the year 2022, based on the crime counts. We do not find this visualization to be very useful and would try to improve on this by making a histogram with more information.

In [None]:
df['report_datetime'] = pd.to_datetime(df['report_datetime'])
df['report_date'] = df['report_datetime'].dt.strftime('%Y-%m-%d').astype(str)
df.head()

In [None]:
fig = px.histogram(data_frame = df, color = 'offense_parent_group', x='mcpp', animation_frame='report_date',
             barmode='overlay', marginal='box', height = 1200, width=1200)

#Run fig.show('notebook') if you download the notebook
#fig.show('notebook')

#Use this if running on binder
HTML(fig.to_html())

This is a slight improvement of the previous graph. We plotted a histogram with the areas on the x axis and the bar height representing the number of the counts of each crime if different types of crimes were committed. We have a slider which represents the report date. So, we can slide the bar to observe the crime counts for each day in each area for different kinds of thefts. The only problem which we feel exist here is that the frame moves too with the slider. We would try to figure a way out to make the frame still and just animate the bar graphs. 

In [None]:
url2 = "https://data.seattle.gov/resource/tazs-3rd5.json?$where=report_datetime between '2008-01-01T12:00:00' and '2022-12-31T14:00:00' &$limit=1500000"

In [None]:
response2 = requests.get(url2).json()
response2

In [None]:

df2 = pd.DataFrame(response2)

print(df2.shape[0])

df2

In [None]:
df2["report_datetime"] = pd.to_datetime(df2["report_datetime"])
df2['year'] = df2['report_datetime'].dt.strftime('%Y')
#print(df2.head())
print(df2.year)

In [None]:
fig = px.histogram(df2, x='year', color = 'mcpp', title = 'Number of Crimes from 2008 to 2022')

#Run fig.show('notebook') if you download the notebook
#fig.show('notebook')

#Use this if running on binder
HTML(fig.to_html())

This represents a histogram ranging from 2008 to present of the total number of crimes each year. Initially we thought of just plotting a histogram of the counts, then we tried to make it more informative by also segregating the bars into the areas the crimes were committed, that way we would be able to figure out the number of crimes in each area every year and can make a comparision among all the years. Each bar has the same order of colors, meaning the same order of the areas, so by comparing the size changes of that color, one can figure out if the crimes in that area have been reduced the following years.