In [None]:
import requests
import json
import pandas as pd
import matplotlib as mp
import matplotlib.pyplot as plt
import plotly.express as px
import ast
import plotly.graph_objects as go
from itertools import cycle
import plotly
import datetime 

## Data Gathering

In [None]:
#Filtering out Seattle crime data for the year 2022
url = "https://data.seattle.gov/resource/tazs-3rd5.json?$where=report_datetime between '2022-01-01T12:00:00' and '2022-12-31T14:00:00' &$limit=150000"

In [None]:
#JSON Response
response = requests.get(url).json()

In [None]:
#Converting the JSON response to a Dataframe
df = pd.DataFrame(response)

print(df.shape[0])

df.head()

In [None]:
df.isna().sum()

In [None]:
df.dtypes

## Data Cleaning

In [None]:
#All the columns are of type object. In order to plot the location, the lat and lon need to be converted to float type. 

df['longitude'] = df['longitude'].astype('str').astype('float')
df['latitude'] = df['latitude'].astype('str').astype('float')

df.dtypes

## Displaying the number of crimes committed against a category.

In [None]:
#Grouping by unique category. 
df1 = df.groupby(['crime_against_category']).agg({'crime_against_category': 'count'})
df_renamed = df1.rename(columns={'crime_against_category':'Count'})
df_cat = df_renamed.reset_index()

df_r = df_cat.rename(columns = {'crime_against_category': 'Crime Category Against'})
df_r

In [None]:
fig = px.bar(df_r, x='Crime Category Against', y='Count', hover_data=["Count"], color = 'Crime Category Against', width = 800, height = 500)
fig.update_layout(
    title={
        'text': 'Crime counts against various categories',
        'y':0.93,
        'x':0.46,
        'xanchor': 'center',
        'yanchor': 'top'})

fig.show('notebook')

The bar graph above displays the crimes committed against a category. The reason a bar graph is chosen is to categorize the data properly and increase the readability. Although this graph is interactive and can be scaled in to see the category "NOT_A_CRIME", since the data for this category is so less compared to the other categories, it becomes almost impossible to identify it through naked eye, which is a small flaw in this.  

## Locating all the areas reporting robbery

In [None]:
df_rob = df[df['offense_parent_group'] == 'ROBBERY']
df_rob

df_r1 = df_rob.groupby(['mcpp']).agg({'mcpp': 'count'})
df_r2 = df_r1.rename(columns={'mcpp':'Count'})
df_r_final = df_r2.reset_index()

df_r_final

In [None]:
fig = px.scatter_geo(df_rob, 'latitude', 'longitude', color = 'mcpp', scope = 'north america', locationmode = 'ISO-3', title = 'Robberies in Seattle - 2022')
fig.update_geos(
    
    lataxis_range=[47.49013,47.754145], lonaxis_range=[-122.45167,-122.2244331], lataxis_showgrid = True, lonaxis_showgrid = True, visible=False, resolution=50, scope="north america",
    showcountries=True, countrycolor="Black",
    showsubunits=True, subunitcolor="Blue",
)

fig.show('notebook')

This visualization drills down on just the offense group of Robbery in Seattle. This is an improvement of the previous visualization as the areas are grouped using the same color and it displays some information such as the area where the robbery took place and its exact coordinates. This is plotted on the scope of North-America. Initially it was plotted using the USA scope but the same coordinates did not work on the scope and it seemed difficult to adjust the coordinates on that ensuring the visiblity of Seattle when the code is run. 

## Displaying the most common crimes using a dropdown

In [None]:
#Filtering data based on offense group

df_larc = df[df['offense_parent_group'] == 'LARCENY-THEFT']
df_larc

df_assault = df[df['offense_parent_group'] == 'ASSAULT OFFENSES']
df_assault

df_burg = df[df['offense_parent_group'] == 'BURGLARY/BREAKING&ENTERING']
df_burg

df_vand = df[df['offense_parent_group'] == 'DESTRUCTION/DAMAGE/VANDALISM OF PROPERTY']
df_vand

df_mvt = df[df['offense_parent_group'] == 'MOTOR VEHICLE THEFT']
df_mvt

df_fraud = df[df['offense_parent_group'] == 'FRAUD OFFENSES']
df_fraud

df_tresp = df[df['offense_parent_group'] == 'TRESPASS OF REAL PROPERTY']
df_tresp

df_dui = df[df['offense_parent_group'] == 'DRIVING UNDER THE INFLUENCE']
df_dui

df_wep = df[df['offense_parent_group'] == 'WEAPON LAW VIOLATIONS']
df_wep


df_off = df.groupby(['offense_parent_group']).agg({'offense_parent_group': 'count'})
df_r2 = df_off.rename(columns={'offense_parent_group':'Count'})
df_r2 = df_r2.reset_index()

df_sort = df_r2.sort_values('Count', ascending = False)
df_sort

In [None]:
len(df['mcpp'].unique())


In [None]:
#Using a dictionary to map all colors and the areas.

from random import randint
colors = []
area = []
d = {}
for i in range(61):
    colors.append('#%06X' % randint(0, 0xFFFFFF))
    
for i in range(61):
    area.append(df['mcpp'].unique()[i])


for i in range(61):
    
    d[area[i]] = colors[i]


In [None]:
#Adding trace for each crime category.
fig = go.Figure()

for s in df_rob.mcpp.unique():
    df_r_1 = df_rob[df_rob.mcpp == s]
    fig.add_trace(go.Scattergeo(lat = df_r_1['latitude'], lon = df_r_1['longitude'], marker_color = d[s], marker_size = 3, customdata = df_r_1, name = s, hovertemplate="<b>Area: %{customdata[13]} </b><br><br>Longitude: %{customdata[15]: .3f} </b><br><br>Latitude: %{customdata[16]: .3f}<extra></extra>" ))

for s in df_larc.mcpp.unique():
    df_larc_1 = df_larc[df_larc.mcpp == s]
    fig.add_trace(go.Scattergeo(lat = df_larc_1['latitude'], lon = df_larc_1['longitude'], marker_color = d[s], marker_size = 3, customdata = df_larc_1, name = s, hovertemplate="<b>Area: %{customdata[13]} </b><br><br>Longitude: %{customdata[15]: .3f} </b><br><br>Latitude: %{customdata[16]: .3f}<extra></extra>" ))

for s in df_assault.mcpp.unique():
    df_as_1 = df_assault[df_assault.mcpp == s]
    fig.add_trace(go.Scattergeo(lat = df_as_1['latitude'], lon = df_as_1['longitude'], marker_color = d[s], marker_size = 3, customdata = df_as_1, name = s, hovertemplate="<b>Area: %{customdata[13]} </b><br><br>Longitude: %{customdata[15]: .3f} </b><br><br>Latitude: %{customdata[16]: .3f}<extra></extra>" ))

for s in df_burg.mcpp.unique():
    df_burg_1 = df_burg[df_burg.mcpp == s]
    fig.add_trace(go.Scattergeo(lat = df_burg_1['latitude'], lon = df_burg_1['longitude'], marker_color = d[s], marker_size = 3, customdata = df_burg_1, name = s, hovertemplate="<b>Area: %{customdata[13]} </b><br><br>Longitude: %{customdata[15]: .3f} </b><br><br>Latitude: %{customdata[16]: .3f}<extra></extra>" ))

for s in df_vand.mcpp.unique():
    df_vand_1 = df_vand[df_vand.mcpp == s]
    fig.add_trace(go.Scattergeo(lat = df_vand_1['latitude'], lon = df_vand_1['longitude'], marker_color = d[s], marker_size = 3, customdata = df_vand_1, name = s, hovertemplate="<b>Area: %{customdata[13]} </b><br><br>Longitude: %{customdata[15]: .3f} </b><br><br>Latitude: %{customdata[16]: .3f}<extra></extra>" ))

for s in df_mvt.mcpp.unique():
    df_mvt_1 = df_mvt[df_mvt.mcpp == s]
    fig.add_trace(go.Scattergeo(lat = df_mvt_1['latitude'], lon = df_mvt_1['longitude'], marker_color = d[s], marker_size = 3, customdata = df_mvt_1, name = s, hovertemplate="<b>Area: %{customdata[13]} </b><br><br>Longitude: %{customdata[15]: .3f} </b><br><br>Latitude: %{customdata[16]: .3f}<extra></extra>" ))

for s in df_fraud.mcpp.unique():
    df_f_1 = df_fraud[df_fraud.mcpp == s]
    fig.add_trace(go.Scattergeo(lat = df_f_1['latitude'], lon = df_f_1['longitude'], marker_color = d[s], customdata = df_f_1, marker_size = 3, name = s, hovertemplate="<b>Area: %{customdata[13]} </b><br><br>Longitude: %{customdata[15]: .3f} </b><br><br>Latitude: %{customdata[16]: .3f}<extra></extra>" ))

for s in df_tresp.mcpp.unique():
    df_t_1 = df_tresp[df_tresp.mcpp == s]
    fig.add_trace(go.Scattergeo(lat = df_t_1['latitude'], lon = df_t_1['longitude'], marker_color = d[s], customdata = df_t_1, marker_size = 3, name = s, hovertemplate="<b>Area: %{customdata[13]} </b><br><br>Longitude: %{customdata[15]: .3f} </b><br><br>Latitude: %{customdata[16]: .3f}<extra></extra>" ))

for s in df_dui.mcpp.unique():
    df_d_1 = df_dui[df_dui.mcpp == s]
    fig.add_trace(go.Scattergeo(lat = df_d_1['latitude'], lon = df_d_1['longitude'], marker_color = d[s], customdata = df_d_1, marker_size = 3, name = s, hovertemplate="<b>Area: %{customdata[13]} </b><br><br>Longitude: %{customdata[15]: .3f} </b><br><br>Latitude: %{customdata[16]: .3f}<extra></extra>" ))

for s in df_wep.mcpp.unique():
    df_wlv = df_wep[df_wep.mcpp == s]
    fig.add_trace(go.Scattergeo(lat = df_wlv['latitude'], lon = df_wlv['longitude'], marker_color = d[s], customdata = df_wlv, marker_size = 3, name = s, hovertemplate="<b>Area: %{customdata[13]} </b><br><br>Longitude: %{customdata[15]: .3f} </b><br><br>Latitude: %{customdata[16]: .3f}<extra></extra>" ))

    
fig.update_geos(
    
    lataxis_range=[47.49013,47.754145], lonaxis_range=[-122.45167,-122.2244331], lataxis_showgrid = True, lonaxis_showgrid = True, visible=False, resolution=50, scope="north america",
    showcountries=True, countrycolor="Black",
    showsubunits=
    True, subunitcolor="Blue",
)

#Add dropdown
fig.layout.update(
   updatemenus = [
       dict(
         buttons = list(
            [
               dict(
                  label = "Robbery", method = "update",
                  args = [{"visible": [True, False, False, False, False, False ,False, False, False, False]},{"title": "Offense: Robbery | Year: 2022"} ]
               ),
               dict(
                  label = "Larceny", method = "update", 
                  args = [{"visible": [False, True, False, False, False, False ,False, False, False, False]},{"title": "Offense: Larceny | Year: 2022"}]
               ),
               dict(
                  label = "Assault", method = "update",
                  args = [{"visible": [False, False, True, False, False, False ,False, False,False, False ]},{"title": "Offense: Assault | Year: 2022"} ]
               ),
               dict(
                  label = "Burglary", method = "update", 
                  args = [{"visible": [False, False, False, True, False, False ,False, False, False, False]},{"title": "Offense: Burglary | Year: 2022"}]
               ),
               dict(
                  label = "Vandalism", method = "update",
                  args = [{"visible": [False, False, False, False, True, False ,False, False, False, False]},{"title": "Offense: Vandalism | Year: 2022"} ]
               ),
               dict(
                  label = "MVT", method = "update", 
                  args = [{"visible": [False, False, False, False, False, True ,False, False,False, False]},{"title": "Offense: MVT | Year: 2022"}]
               ),
                dict(
                   label = "Fraud", method = "update",
                   args = [{"visible": [False, False, False, False, False, False, True, False, False, False]},{"title": "Offense: Fraud | Year: 2022"} ]
                ),
                dict(
                   label = "Trespassing", method = "update", 
                   args = [{"visible": [False, False, False, False, False, False, False, True, False, False]},{"title": "Offense: Trespassing | Year: 2022"}]
               ),
                dict(
                   label = "DUI", method = "update",
                   args = [{"visible": [False, False, False, False, False, False, False, False, True, False]},{"title": "Offense: DUI | Year: 2022"} ]
                ),
                dict(
                   label = "WLV", method = "update", 
                   args = [{"visible": [False, False, False, False, False, False, False, False, False, True]},{"title": "Offense: Weapon Law Violation | Year: 2022"}]
               )
            ]
         ),
           direction="down",
            pad={"r": 10, "t": 10},
            showactive=True,
            x=0.1,
            xanchor="left",
            y=1.1,
            yanchor="top" )
      ] )

fig.add_scattergeo(
    locations=["ID","OR","WA"],    ###codes for states,
    locationmode='USA-states',
    text=["Idaho","Oregon","Washington"],
    mode='text')

fig.show('notebook')


To improve the earlier visualization and to include all the other offenses within the same map, a dropdown feature is included which shows the coordinates and the areas. In the previous part there were two flaws: 1. The areas were grouped by one color, but the colors were repetitive and a few of the areas were represented by two or more different colors. 2. There was no label of the states. We have added label for Washington and the neighbouring states. 


## Displaying crime percentages according to time of day

In [None]:
df['report_datetime'] = pd.to_datetime(df['report_datetime'])
df['hour'] = df['report_datetime'].dt.strftime('%H').astype(int)

def timeofday(x):
  if x > 2 and x <= 6:
    return 'Early Morning'
  elif x > 6 and x <= 10:
    return 'Morning'
  elif x > 10 and x <= 16:
    return 'Afternoon'
  elif x > 16 and x <= 20:
    return 'Evening'
  elif x > 20 and x <=2:
    'Night'

df['time_of_day'] = df['hour'].apply(timeofday)
df

In [None]:
time_of_day = pd.DataFrame(df.groupby(['time_of_day']).count().sort_values(by=['report_number'], ascending = True).reset_index())
time_of_day

In [None]:
fig = px.pie(time_of_day, values='report_number', names='time_of_day', title='Crime according to time of day', color_discrete_sequence=px.colors.sequential.RdBu, width = 800, height = 500)
fig.show('notebook')

The pie chart shows the number of crimes in percentages according to the time of the day. When you hover over the chart, you can see the time of the day and the number of reports for that particular group hovered.

## Tree Map based on the Parent Offense Group and Offence

In [None]:
#Created a Tree Map based on the Parent Offense Group and Offence. It tells the the total cout when we hover over it. 

df1 = df.groupby(['offense_parent_group','offense']).count().reset_index()
df1.rename(columns = {'offense_id':'count'}, inplace = True)
df1


fig = px.treemap(df1, path=[px.Constant("All crimes"), 'offense_parent_group','offense'], values='count')
fig.update_traces(root_color="lightgrey")
fig.update_layout(margin = dict(t=50, l=25, r=25, b=25))
fig.show('notebook')

Using the plotly library, we created a tree map that represents all the crimes grouped by the offence parent group and offence. We used the crime ID as total count of that particular offence. All the crimes are fist grouped by the parent group and in each parent groups, the offences are mentioned.

In [None]:
df['report_datetime'] = pd.to_datetime(df['report_datetime'])
df['report_date'] = df['report_datetime'].dt.strftime('%Y-%m-%d').astype(str)
df.head()

## Displaying the area-wise count of occurrence of major offense types

In [None]:
fig = px.histogram(data_frame = df, color = 'offense_parent_group', x='mcpp', animation_frame='report_date', title = 'Area-wise count of occurrence of major offense',
             barmode='overlay', marginal='box', height = 1000, width=1000, range_x=[0,61])
fig.update_xaxes(tickangle=30, tickfont_size = 8)
fig.update_layout(xaxis_title="Area", xaxis_title_standoff = 10)
fig.show('notebook')

The visualization depicts the area-wise count of occurrence of major offense types. The date slider at the bottom allows the user to select a particular date on which they would want to see the records for. For a better understanding of the distribution of crimes in the histogram plot, there’s a boxplot on the top that summarizes the distribution of offenses with quartile and median values as the areas. Both the visuals are made interactive in a way that the user can hover over the area to view details.

## Summarising the data from 2008

In [None]:
url2 = "https://data.seattle.gov/resource/tazs-3rd5.json?$limit=2000000"

In [None]:
response2 = requests.get(url2).json()

In [None]:

df2 = pd.DataFrame(response2)

print(df2.shape[0])

df2

In [None]:
df2["report_datetime"] = pd.to_datetime(df2["report_datetime"])
df2['year'] = df2['report_datetime'].dt.strftime('%Y')
print(df2.year)

In [None]:
#Sorting the values based on year as when plotting a histogram, this would ensure the dates start from 2008 and not randomly.
df3 = df2.sort_values(by='year', ascending = True)
df3

In [None]:
#The color mccp divides the histogram such that we get the area wise count.
fig = px.histogram(df3, x='year', color = 'mcpp', title = 'Number of Crimes from 2008 to 2022')
fig.show('notebook')

The histogram shows the count of crimes in each area every year from 2008 to 2022 in ascending order. When you hover over, you are able to see the area, separated by color, year, and the number of crimes that have taken place.

**References:** 
- https://plotly.com/python/map-configuration/
- https://www.tutorialspoint.com/plotly/plotly_adding_buttons_dropdown.htm
- https://stackoverflow.com/questions/69235532/add-dropdown-button-to-plotly-express-choropleth-map
- https://stackoverflow.com/questions/62115372/python-function-for-customdata-hover-in-plotly-lib
- https://plotly.com/python/colorscales/