# Download and analyze seattle crime data

Download and extract data to use in an app to display incidents.

In [1]:
import requests
import pandas as pd
import os

In [3]:
seattle_crime_data_uri = "https://data.seattle.gov/api/views/tazs-3rd5/rows.csv?accessType=DOWNLOAD"
seattle_crime_data_file = "data/seattle_crime_data.csv"

In [20]:
def download_uri_to_file(uri, filename):
    r = requests.get(uri, stream=True)
    if not(r.ok):
        raise Exception(f"HTTP Error ({r.status_code}) downloading {uri}.")
    os.makedirs(os.path.dirname(seattle_crime_data_file), exist_ok=True)
    with open(filename, "wb") as fh:
        for chunk in r.iter_content(chunk_size=1024*16):
            fh.write(chunk)

In [23]:
if not(os.path.isfile(seattle_crime_data_file)):
    download_uri_to_file(seattle_crime_data_uri, seattle_crime_data_file)

In [22]:
crime_df = pd.read_csv(seattle_crime_data_file)

In [25]:
crime_df.iloc[0]

Report Number                          2020-044620
Offense ID                             12605873663
Offense Start DateTime      02/05/2020 10:10:00 AM
Offense End DateTime                           NaN
Report DateTime             02/05/2020 11:24:31 AM
Group A B                                        A
Crime Against Category                     SOCIETY
Offense Parent Group        DRUG/NARCOTIC OFFENSES
Offense                   Drug/Narcotic Violations
Offense Code                                   35A
Precinct                                         W
Sector                                           Q
Beat                                            Q1
MCPP                                      MAGNOLIA
100 Block Address         32XX BLOCK OF 23RD AVE W
Longitude                              -122.385974
Latitude                                 47.649387
Name: 0, dtype: object

In [26]:
crime_df["Crime Against Category"].value_counts()

PROPERTY       747141
PERSON         152174
SOCIETY        104308
NOT_A_CRIME        31
Name: Crime Against Category, dtype: int64

In [28]:
crime_df.groupby(["Crime Against Category", "Offense"]).size().reset_index()

Unnamed: 0,Crime Against Category,Offense,0
0,NOT_A_CRIME,Justifiable Homicide,31
1,PERSON,Aggravated Assault,33390
2,PERSON,Fondling,2984
3,PERSON,"Human Trafficking, Commercial Sex Acts",50
4,PERSON,"Human Trafficking, Involuntary Servitude",2
5,PERSON,Incest,43
6,PERSON,Intimidation,33705
7,PERSON,Kidnapping/Abduction,1198
8,PERSON,Murder & Nonnegligent Manslaughter,418
9,PERSON,Negligent Manslaughter,17


In [45]:
violent_data = (crime_df
    [
        (crime_df['Crime Against Category'] == 'PERSON') &
        (violent_data.Longitude != 0.0) &
        (violent_data.Latitude != 0.0)
    ]
    [['Offense Start DateTime', 'Offense', 'Longitude', 'Latitude']]
).rename(columns={'Offense Start DateTime': 'OffenseDateTime'})

In [46]:
violent_data['OffenseDateTime'] = pd.to_datetime(violent_data['OffenseDateTime'])

In [49]:
violent_data.OffenseDateTime.max()

Timestamp('2022-09-19 20:44:00')

In [50]:
violent_data.OffenseDateTime.min()

Timestamp('1984-05-22 00:00:00')

In [58]:
violent_data_l6 = violent_data[violent_data.OffenseDateTime >= violent_data.OffenseDateTime.max() - pd.Timedelta(days = 365/12 * 6)]

In [59]:
violent_data_l6

Unnamed: 0,OffenseDateTime,Offense,Longitude,Latitude
955828,2022-03-31 15:04:00,Intimidation,-122.302890,47.719615
955830,2022-03-27 18:00:00,Intimidation,-122.316292,47.635932
955832,2022-03-31 00:00:00,Simple Assault,-122.289938,47.570675
955834,2022-03-31 17:50:00,Simple Assault,-122.374196,47.564076
955836,2022-03-31 14:49:00,Aggravated Assault,-122.331578,47.599201
...,...,...,...,...
1003507,2022-03-23 20:58:00,Aggravated Assault,-122.344706,47.705799
1003508,2022-03-23 21:54:00,Aggravated Assault,-122.384743,47.670548
1003509,2022-03-23 19:00:00,Simple Assault,-122.326347,47.609820
1003511,2022-03-22 09:06:00,Intimidation,-122.294113,47.662550


In [65]:
violent_data_l6.to_json('data/violent_data_seattle_6months.json', orient='records', date_format='iso')