In [None]:
# Import dependencies
import os
import pandas as pd
import numpy as np
from sodapy import Socrata

# Import App Token
from config import SODAPY_APPTOKEN

# Use Socrata API to import the dataset
------------
API endpoint: https://information.stpaul.gov/resource/gppb-g9cg.json

In [None]:
# Set up variables for api endpoint and dataset identifier
query_url = 'information.stpaul.gov'
data_id = 'gppb-g9cg'

# Authenticate the request
client = Socrata(query_url, SODAPY_APPTOKEN)

# Returns results as JSON from API / converted to Python list of dictionaries by sodapy
results = client.get(data_id, limit=300000)
''' *****NOTE: Need to sort thru how to do pages because this works for now, but the dataset will only continue to grow.
Will undoubtedly become problematic.  Will work for now though.'''


# Convert to pandas DataFrame
crime_df = pd.DataFrame.from_records(results)
crime_df.head()


# Exploratory Analysis

#### TIME

In [None]:
# Find oldest and most recent date
# 'date' column is an object, change it to datetime
crime_df['date'] = pd.to_datetime(crime_df['date'])

# Preview
crime_df.head()

In [None]:
# Preview the tail
'''Notice difference in date and time columns over the years'''
crime_df.tail()

In [None]:
# Datset start and end dates
recent_date = crime_df.iloc[0][1]
print(f"The earliest date recorded in the dataset: {recent_date}")
first_date = crime_df.iloc[-1][1]
print(f"The earliest date recorded in the dataset: {first_date}")

#### TYPES OF CALLS

In [None]:
# Look at types of Call Disposition
crime_df['call_disposition'].unique()

In [None]:
# Combine 'RR' and 'R' because they both stand for Report Written from what I can tell from the official dataset
crime_df = crime_df.replace({'R': 'RR'})

# Check unique codes again
crime_df['call_disposition'].unique()

In [None]:
# Look at 'incident_type' and 'incident' to see if there are similar values
print("INCIDENT TYPE VALUE COUNTS:")
print(crime_df['incident_type'].value_counts())
print("----------")
print("INCIDENT VALUE COUNTS:")
print(crime_df['incident'].value_counts())

In [None]:
# Combine 'Simple Assault Dom.' (one is misspelled)
crime_df = crime_df.replace({"Simple Asasult Dom.": "Simple Assault Dom."})

# Check value counts again
crime_df['incident'].value_counts()

In [None]:
crime_df