In [None]:
import requests
import pandas as pd
import numpy as np
import time
import oracledb
import getpass
from geopy.geocoders import Nominatim

# Connect to database
password = getpass.getpass("Enter DB Password")
wallet_password = getpass.getpass("Enter Wallet Password")

connection = oracledb.connect(
    user="admin",
    password=password,
    dsn = "ltl3y0m4d7of29l1_high",
    config_dir="./config",
    wallet_location="./config",
    wallet_password=wallet_password)

print("Successfully connected to Oracle Database")
cursor = connection.cursor()

# Define functions for geolocation
geolocator = Nominatim(user_agent="MU-DS-Capstone")
def fix_address(address):
    #Error catching
    #Not-string error
    address = str(address)
    # Remove city
    if "," in address:
        address = address[:address.index(",")].strip()
    # If intersection, just use first street. TODO: Improve this
    if "/" in address:
        address = address[:address.index("/")].strip()
    # Remove NA values
    if pd.isna(address):
        address = np.nan
    # Change block address to just be that address number
    address = address.replace("-BLK", "")
    # Catching the BLVD error, needs to say BLVD not BL
    if address.endswith("BL"):
        address += "VD"
    # Catching MLK DR error, use old name
    address = address.replace("MARTIN L KING JR DR", "OLD WORLD THIRD ST")
    # Layton Error
    address = address.replace("S LAYTON ST", "S LAYTON BLVD")
    # Leon Error
    address = address.replace("LEON TR", "LEON TERRACE")
    # Mc Kinley Error
    address = address.replace("MC KINLEY", "MCKINLEY")
    # W Fond Du Lac Error
    if "FOND DU LAC" in address and "AV" not in address:
        address = address.replace("FOND DU LAC", "FOND DU LAC AV")
    # Bluemound road error
    address = address.replace("BLUE MOUND RD", "BLUEMOUND RD")        
    return address + " MILWAUKEE"

def get_gps(geocoded, index):
    if geocoded is None:
        return np.nan
    gps = geocoded[1][index]
    if index == 0 and abs(gps - 43.0389) > 1:
        return np.nan
    elif index == 1 and abs(gps + 87.9065) > 1:
        return np.nan
    else:
        print("Error in get_gps():", geocoded, index)
        return np.nan    
    return geocoded[1][index] 

# Run indefinitely
while True:
    # Grab police call log from HTML table
    url = 'https://itmdapps.milwaukee.gov/MPDCallData/index.jsp?district=All'
    headers = {'User-Agent': 'Marquette Data Science'}
    html = requests.get(url, headers=headers).content
    df = pd.read_html(html, index_col=0, header=None)[-1]
    # Remove multi-index
    df.columns = df.columns.droplevel(0)
    # Remove duplicate entries
    df.drop_duplicates(inplace=True)

    # Add Latitude and Longitude columns to dataframe
    fixed_addresses = df.Location.transform(fix_address)
    geocoded_addresses = fixed_addresses.apply(geolocator.geocode)
    df["Latitude"] = geocoded_addresses.apply(get_gps, index=0)
    df["Longitude"] = geocoded_addresses.apply(get_gps, index=1)

    """ SEND DATA TO SQL DATABASE """
    
    for callnumber, (date, location, district, nature, status, latitude, longitude) in df.iterrows():
        if pd.isna(latitude): latitude = "NULL"
        if pd.isna(longitude): longitude = "NULL"
        try:
            query = f"DELETE FROM CALLS WHERE CALL_NUMBER = '{callnumber}'"
            cursor.execute(query)
            query = f"INSERT INTO CALLS VALUES ('{callnumber}', '{date}', '{location}', '{district}', '{nature}', '{status}', {latitude}, {longitude})"
            cursor.execute(query)
        except Exception as e:
            print("Error updating Database:", e)
            print(callnumber, date, location, district, nature, status, latitude, longitude)

    connection.commit()
    
    time.sleep(30*60) # Sleep for 30 minutes