# **Predictive Crime Mapping with a spatio-temporal kernel density estimation approach**

## **Case of study: Dallas City**

*Institute of Mathematical and Computational Engineering*

*School of Engineering*

*Pontifical Catholic University of Chile*

**Authors:** Christian Carstens, Mauro Mendoza, Francisco Tobar

**Date:** 2019-04-11

`Python version: 3.7`

### **DB Request**

In [1]:
import pandas as pd
from sodapy import Socrata

import credentials as cre

In [146]:
with Socrata(cre.socrata_domain,
             cre.API_KEY_S,
             username=cre.USERNAME_S,
             password=cre.PASSWORD_S) as client:
    query = """
select
    incidentnum,
    geocoded_column,
    date1,
    time1
where
    geocoded_column is not null
    and date1 is not null
    and time1 is not null
limit
    5
""" # 530000 max. 11/04

    results = client.get(cre.socrata_dataset_identifier, query=query, content_type='json')
    df = pd.DataFrame.from_records(results)

### **DB Cleaning & Formatting**

#### Cleaning

In [143]:
from datetime import date, time, datetime
from time import strftime

In [147]:
# 'date1' column cleaning: deleting 'T00:00:00.000'
# df["date1"] = df['date1'].apply(lambda x: date(*(int(i) for i in x.split('T')[0].split('-'))))

df["date1"] = df['date1'].apply(lambda x: '/'.join([str(int(i)) for i in x.split('T')[0].split('-')[::-1]]))
df["time1"] = df['time1'].apply(lambda x: datetime.strptime(x, "%H:%M").strftime("%I:%M %p"))

df['datetime'] = df["date1"] + ' ' + df["time1"]

# 'incidentnum' column cleaning: deleting '20XX'
df["incidentnum"] = df['incidentnum'].apply(lambda x: x.split('-')[0])

# Modificar la columna 'geocoded_column': Dividirla en dos columnas -> latitude, longitude

# dict to list: [latitude, longitude]
df['geocoded_column'] = df['geocoded_column'].apply(lambda x: x['coordinates'])

# Column creation: latitude & longitude
df['longitude'] = df['geocoded_column'].apply(lambda x: x[0])
df['latitude'] = df['geocoded_column'].apply(lambda x: x[1])

# Deletion the old column 'geocoded_column'
del df['geocoded_column']
del df["date1"]
del df["time1"]

In [105]:
df.head()

Unnamed: 0,incidentnum,datetime,longitude,latitude
0,71975,4/4/2018 10:42 AM,-96.700292,32.893226
1,82945,19/4/2018 06:00 PM,-96.691377,32.883235
2,130869,16/6/2018 06:00 PM,-96.693665,32.894433
3,18171,24/1/2018 09:00 PM,-96.686146,32.878845
4,201197,9/9/2018 11:54 PM,-96.683125,32.883679


#### Formatting

In [148]:
# Cambio de nombre en las columnas
df.rename(columns={'incidentnum':'incident_id'}, inplace=True)

# Cambio de las posiciones de las columnas
df = df[['incident_id', 'datetime',  'longitude', 'latitude']]

# Sort of the 'incident_id' column
df.sort_values(by=['incident_id'], inplace=True)

# Reset the old index
df.reset_index(drop=True, inplace=True)
df.index = df.index + 1

In [73]:
# En caso de que queramos exportar la base a .csv
# df.to_csv('dallas_db.csv', index=False)

### Dallas Map

Cargamos los elementos necesarios para la visualización del mapa.

In [49]:
from arcgis import GIS, geocoding, features, geometry
from arcgis.gis import GIS
from arcgis.features import GeoAccessor, GeoSeriesAccessor

from credentials import arcgis_domain, USERNAME_A, PASSWORD_A

In [137]:
gis = GIS(arcgis_domain, USERNAME_A, PASSWORD_A)

In [149]:
df.head(5)

Unnamed: 0,incident_id,datetime,longitude,latitude
1,41722,26/2/2018 04:00 AM,-96.700339,32.884225
2,169301,1/8/2018 06:44 PM,-96.807846,32.79209
3,218546,23/9/2017 10:00 PM,-96.698923,32.894431
4,253127,5/11/2017 01:55 PM,-96.700296,32.886513
5,261985,16/11/2017 11:11 PM,-96.700293,32.892595


In [155]:
map_1 = gis.map('Dallas', zoomlevel=11, mode="2D")
map_1.basemap = "dark-gray-vector"

sdf = df.spatial
incidents = gis.content.import_data(df, title="Dallas Incidents")

map_1.add_layer(incidents)

map_1

MapView(layout=Layout(height='400px', width='100%'), zoom=11.0)

Additional info. about Dallas City.

In [40]:
gcode = geocoding.geocode(address="Dallas, Texas")[0]["location"]
citylong = gcode["x"]
citylat = gcode["y"]

f"Dallas -> longitude: {citylong}, latitude: {citylat}"

'Dallas -> longitude: -96.79539999999997, latitude: 32.77815000000004'