In [1]:
import requests
import pandas as pd
import numpy as np
from geopy.geocoders import Nominatim

In [2]:
# Grab police call log from HTML table
url = 'https://itmdapps.milwaukee.gov/MPDCallData/index.jsp?district=All'
html = requests.get(url).content
df = pd.read_html(html, index_col=0, header=None)[-1]
# Remove multi-index
df.columns = df.columns.droplevel(0)
# Remove duplicate entries
df.drop_duplicates(inplace=True)

In [29]:
# Define functions for geolocation
geolocator = Nominatim(user_agent="MU-DS-Capstone")
def fix_address(address):
    #Error catching
    #Not-string error
    address = str(address)
    # Remove city
    if "," in address:
        address = address[:address.index(",")]
    # If intersection, just use first street. TODO: Improve this
    if "/" in address:
        address = address[:address.index("/")]
    # Remove NA values
    if pd.isna(address):
        address = np.nan
    # Change block address to just be that address number
    address = address.replace("-BLK", "")
    # Catching the BLVD error, needs to say BLVD not BL
    if address.endswith("BL"):
        address += "VD"
    # Catching MLK DR error, use old name
    address = address.replace("MARTIN L KING JR DR", "OLD WORLD THIRD ST")
    # Mc Kinley Error
    address = address.replace("MC KINLEY AV", "MCKINLEY AV")
    # Bluemound road error
    address = address.replace("BLUE MOUND RD", "BLUEMOUND RD")        
    return address + " MILWAUKEE"

def get_gps(geocoded, index):
    if geocoded is None:
        return np.nan
    return geocoded[1][index]

In [30]:
# Add Latitude and Longitude columns to dataframe
fixed_addresses = df.Location.transform(fix_address)
geocoded_addresses = fixed_addresses.apply(geolocator.geocode)
df["Latitude"] = geocoded_addresses.apply(get_gps, index=0)
df["Longitude"] = geocoded_addresses.apply(get_gps, index=1)

In [33]:
df

Call Number,Date/Time,Location,Police District,Nature of Call,Status,Latitude,Longitude
230260763,01/26/2023 12:59:43 PM,"3355 S 27TH ST,MKE",6,SPECIAL ASSIGN,Service in Progress,42.983637,-87.950567
230260762,01/26/2023 12:58:59 PM,"2920 N VEL R PHILLIPS AV,MKE",5,RETURN STATION,Assignment Completed,43.071644,-87.915175
230260760,01/26/2023 12:57:54 PM,"605 E LYON ST,MKE",1,SPECIAL ASSIGN,Service in Progress,43.048754,-87.904192
230260757,01/26/2023 12:56:10 PM,"W FOND DU LAC AV / W MEINECKE AV,MKE",3,ACC PI,Service in Progress,43.106328,-87.996879
230260754,01/26/2023 12:53:50 PM,"2620 W WISCONSIN AV,MKE",3,ASSIGNMENT,Assignment Completed,43.039187,-87.947133
...,...,...,...,...,...,...,...
230260761,01/26/2023 01:02:58 PM,"W KEEFE AV / W ATKINSON AV,MKE",5,SUBJ WITH GUN,Unable to Locate Complainant,43.081658,-87.919062
230260768,01/26/2023 01:01:04 PM,"606 W CENTER ST,MKE",5,TRAFFIC STOP,City Citation(s) Issued,43.067736,-87.918765
230260767,01/26/2023 01:01:02 PM,"901 N 9TH ST,MKE",1,COURT DUTY,Service in Progress,43.041574,-87.923826
230260764,01/26/2023 01:00:47 PM,"9529 W MORGAN AV,MKE",6,TRAFFIC STOP,City Citation(s) Issued,42.980622,-88.032288


In [176]:
df.to_csv("clean_data.csv")