In [1]:
import pandas as pd
import requests
import json
from pprint import pprint
import time
from sklearn.preprocessing import LabelEncoder

# Data Import
Calling API and Importing JSON File

In [2]:
url = "https://services5.arcgis.com/54falWtcpty3V47Z/arcgis/rest/services/general_offenses_year3/FeatureServer/0/query?where=1%3D1&outFields=*&outSR=4326&resultType=standard&f=json"
json = json.loads(requests.get(url).text)

# Reformating Data
Pulling values from each key and listing them in a seperate variable

In [3]:
Beat = []
Description = []
Grid = []
ObjectID = []
Occurence_Date = []
Offense_Category = []
Offense_Code = []
Offense_Ext = []
Police_District = []
Record_ID = []

for i in range(len(json['features'])):
    for k, v in json['features'][i]['attributes'].items():
        if k == 'Beat':
            Beat.append(v)
        if k == 'Description':
            Description.append(v)
        if k == 'Grid':
            Grid.append(v)
        if k == 'OBJECTID':
            ObjectID.append(v)
        if k == 'Occurence_Date':
            Occurence_Date.append(v)
        if k == 'Offense_Category':
            Offense_Category.append(v)
        if k == 'Offense_Code':
            Offense_Code.append(v)
        if k == 'Offense_Ext':
            Offense_Ext.append(v)
        if k == 'Police_District':
            Police_District.append(v)
        if k == 'Record_ID':
            Record_ID.append(v)

# Creating New Dictionary
Creating a dictionary with our new lists

In [4]:
crime_dict = {'Beat': Beat,
 'Description': Description,
 'Grid': Grid,
 'ObjectID': ObjectID,
 'Occurence_Date': Occurence_Date,
 'Offense_Category': Offense_Category,
 'Offense_Code': Offense_Code,
 'Offense_Ext': Offense_Ext,
 'Police_District': Police_District,
 'Record_ID': Record_ID}

# Transforming Dictionary to DataFrame

In [5]:
crime_df = pd.DataFrame(crime_dict).set_index('ObjectID')

In [6]:
important_crimes = ['AUTO THEFT','BURGLARY','GRAND THEFT','LARCENY','PRIVACY','ROBBERY','STOLEN PROP','THEFT','TREPASS','TRESPASS']
crime_df = crime_df.loc[crime_df['Beat'] != 'UI']
crime_df = crime_df.loc[crime_df['Offense_Category'].isin(important_crimes)]
crime_df.dropna(inplace=True)
crime_df.reset_index(drop=True, inplace=True)
crime_df.count()

Beat                5835
Description         5835
Grid                5835
Occurence_Date      5835
Offense_Category    5835
Offense_Code        5835
Offense_Ext         5835
Police_District     5835
Record_ID           5835
dtype: int64

In [7]:
# Reviewing data types
crime_df.dtypes

Beat                object
Description         object
Grid                object
Occurence_Date       int64
Offense_Category    object
Offense_Code        object
Offense_Ext         object
Police_District     object
Record_ID           object
dtype: object

# Converting Data Types

In [8]:
crime_df['Grid'] = crime_df['Grid'].astype('int')
crime_df['Occurence_Date'] = crime_df['Occurence_Date'].astype('str')
crime_df['Offense_Code'] = crime_df['Offense_Code'].astype('int')
crime_df['Police_District'] = crime_df['Police_District'].astype('int')
crime_df['Record_ID'] = crime_df['Record_ID'].astype('int')

In [9]:
crime_df.dtypes

Beat                object
Description         object
Grid                 int64
Occurence_Date      object
Offense_Category    object
Offense_Code         int64
Offense_Ext         object
Police_District      int64
Record_ID            int64
dtype: object

In [10]:
# Converting unix code to readble dates and times
Occurence_Date = []
Day_of_the_Week = []
Month = []
Year = []
Hour = []

for i in range(len(crime_df)):
        trimmed_unix = crime_df['Occurence_Date'][i][0:10]
        day_of_week = time.strftime('%u', time.localtime(int(trimmed_unix)))
        month = time.strftime('%m', time.localtime(int(trimmed_unix)))
        day_month = time.strftime('%d', time.localtime(int(trimmed_unix)))
        year = time.strftime('%Y', time.localtime(int(trimmed_unix)))
        hour = time.strftime('%H', time.localtime(int(trimmed_unix)))
        date_time = time.strftime('%a %m-%d-%Y, %I:%M:%S %p', time.localtime(int(trimmed_unix)))
        Occurence_Date.append(date_time)
        Day_of_the_Week.append(day_of_week)
        Month.append(day_month)
        Year.append(year)
        Hour.append(hour)

crime_df['Occurence_Date'] = Occurence_Date
crime_df['Day_of_the_Week'] = Day_of_the_Week
crime_df['Month'] = Month
crime_df['Year'] = Year
crime_df['Hour'] = Hour

In [11]:
# Encoding beats to numerical values
label_encoder = LabelEncoder()
label_encoder.fit(crime_df['Beat'])
encoded_beat = label_encoder.transform(crime_df['Beat'])

In [12]:
crime_df['Encoded_Beat'] = encoded_beat

# Placing Data Frame into a CSV

In [14]:
crime_df.to_csv('sac_crime.csv', index=False)