Backend

In [1]:
import numpy as np
import pandas as pd
from sklearn.cluster import *
import datetime as dt
from sklearn import preprocessing as pp
import plotly.express as px
from scipy.spatial import Delaunay
import warnings


warnings.filterwarnings("ignore")
df = pd.read_csv('https://raw.githubusercontent.com/hauntedcupoftea/nikolaj/main/datasets/crimedata2016.csv')
timelist = []
for i in range(len(df)):
    datetime_object = dt.datetime.strptime(df['Date'][i][-11:], '%I:%M:%S %p')
    timelist.append(datetime_object)
df['Time'] = timelist

lp = pp.LabelEncoder()
op = pp.OrdinalEncoder()
# creating a manual encoder for descriptions
cleandf = df.drop(['Date', 'X Coordinate', 'Y Coordinate', 'Beat', 'Year', 'FBI Code'], axis=1)
basicCrime = list(set(cleandf['Primary Type']))
basicCrime
primList = {'NON - CRIMINAL': 0, 'NON-CRIMINAL (SUBJECT SPECIFIED)': 0, 'NON-CRIMINAL': 0,
            'INTIMIDATION': 1, 'OBSCENITY': 1, 'OTHER OFFENSE': 1, 'PUBLIC INDECENCY': 1,
            'LIQUOR LAW VIOLATION': 2, 'PUBLIC PEACE VIOLATION': 2, 'CONCEALED CARRY LICENSE VIOLATION': 2,
            'PROSTITUTION': 3, 'GAMBLING': 3, 'INTERFERENCE WITH PUBLIC OFFICER': 3, 'STALKING': 3,
            'ARSON': 6, 'BURGLARY': 5, 'BATTERY': 2, 'ROBBERY': 5, 'SEX OFFENSE': 5, 'ASSAULT': 3,
            'THEFT': 4, 'DECEPTIVE PRACTICE': 5, 'CRIMINAL TRESPASS': 4, 'CRIMINAL DAMAGE': 4, 'WEAPONS VIOLATION' : 5,
            'MOTOR VEHICLE THEFT': 5, 'OFFENSE INVOLVING CHILDREN': 5, 'KIDNAPPING': 5, 'NARCOTICS': 5,
            'OTHER NARCOTIC VIOLATION' : 4,'HUMAN TRAFFICKING' : 6,'CRIM SEXUAL ASSAULT' : 6, 'HOMICIDE' : 6}
encodePrim = [primList[i] for i in cleandf['Primary Type']]
cleandf['desc'] = lp.fit_transform(cleandf['Description'])
cleandf['locdesc'] = lp.fit_transform(cleandf['Location Description'])
cleandf['type'] = encodePrim

def alpha_shape(points, alpha, only_outer=True):
    assert points.shape[0] > 3, "Need at least four points"
    def add_edge(edges, i, j):
        if (i, j) in edges or (j, i) in edges:
            assert (j, i) in edges, "Can't go twice over same directed edge right?"
            if only_outer:
                edges.remove((j, i))
            return
        edges.add((i, j))
    tri = Delaunay(points)
    edges = set()
    for ia, ib, ic in tri.vertices:
        pa = points[ia]
        pb = points[ib]
        pc = points[ic]
        a = np.sqrt((pa[0] - pb[0]) ** 2 + (pa[1] - pb[1]) ** 2)
        b = np.sqrt((pb[0] - pc[0]) ** 2 + (pb[1] - pc[1]) ** 2)
        c = np.sqrt((pc[0] - pa[0]) ** 2 + (pc[1] - pa[1]) ** 2)
        s = (a + b + c) / 2.0
        area = np.sqrt(s * (s - a) * (s - b) * (s - c))
        circum_r = a * b * c / (4.0 * area)
        if circum_r < alpha:
            add_edge(edges, ia, ib)
            add_edge(edges, ib, ic)
            add_edge(edges, ic, ia)
    return edges

def genHeatMap(df):
    fig = px.density_mapbox(df, lat='Latitude', lon='Longitude', z='type',
                        mapbox_style="stamen-terrain", radius=1, width=650, height=650)
    return fig

def timeFilter(start: str, end: str) -> pd.DataFrame:
    start = dt.datetime.strptime(start, '%H:%M:%S')
    end = dt.datetime.strptime(end, '%H:%M:%S')
    if (start < end):
        return cleandf.loc[(df['Time'] >= start) & (df['Time'] < end)]
    else:
        return cleandf.loc[(df['Time'] >= start) | (df['Time'] < end)]

def cluster(nCluster: int, df: pd.DataFrame):
    nCluster = nCluster
    model = KMeans(n_clusters=nCluster)
    results = model.fit_predict(df.loc(axis=1)['Latitude':'Longitude'])
    return results

def analyze(nCluster: int, start: str, end: str):
    tdf = timeFilter(start, end)
    results = cluster(nCluster, tdf)
    tdf['cluster'] = results
    Hcenters = []
    Pedges = []
    hm = genHeatMap(tdf)
    for i in range(len(set(results))):
        fildf = tdf[tdf['cluster'] == i]
        nmod = KMeans(int(np.power(len(fildf), 0.25)))
        nmod.fit([[i, j] for i, j in zip(fildf['Longitude'], fildf['Latitude'])])
        centers = nmod.cluster_centers_
        try:
            edges = alpha_shape(centers, alpha=1, only_outer=True)
        except:
            continue
        Hcenters.append(centers)
        Pedges.append(edges)
    return [hm, Hcenters, Pedges]

In [2]:
from google.colab import widgets
import ipywidgets as widgets
from ipywidgets import Dropdown
from IPython.display import display, clear_output, Image

In [3]:
# Get Started
started = widgets.Button(
    description='Get Started',
    disabled=False,
    layout=widgets.Layout(width='50%', height='50px'),
)

# City
city = widgets.Dropdown(
    options=['Chicago'],
        value='Chicago',
    description='Select a City:',
    disabled=False,
)

# Start Time
start = widgets.Dropdown(
    options=['00:00', '00:15', '00:30', '00:45', '01:00', '01:15', '01:30', '01:45', '02:00', '02:15', '02:30', '02:45', '03:00', '03:15', '03:30', '03:45', '04:00', '04:15', '04:30', '04:45', '05:00', '05:15', '05:30', '05:45', '06:00', '06:15', '06:30', '06:45', '07:00', '07:15', '07:30', '07:45', '08:00', '08:15', '08:30', '08:45', '09:00', '09:15', '09:30', '09:45', '10:00', '10:15', '10:30', '10:45', '11:00', '11:15', '11:30', '12:45', '13:00', '13:15', '13:30', '13:45', '14:00', '14:15', '14:30', '14:45', '15:00', '15:15', '15:30', '15:45', '16:00', '16:15', '16:30', '16:45', '17:00', '17:15', '17:30', '17:45', '18:00', '18:15', '18:30', '18:45', '19:00', '20:15', '20:30', '20:45', '21:00', '21:15', '21:30', '21:45', '22:00', '22:15', '22:30', '22:45', '23:00', '23:15', '23:30', '23:45'],
        value='00:00',
    description='Start Time:',
    disabled=False,
)

# End Time
end = widgets.Dropdown(
    options=['00:00', '00:15', '00:30', '00:45', '01:00', '01:15', '01:30', '01:45', '02:00', '02:15', '02:30', '02:45', '03:00', '03:15', '03:30', '03:45', '04:00', '04:15', '04:30', '04:45', '05:00', '05:15', '05:30', '05:45', '06:00', '06:15', '06:30', '06:45', '07:00', '07:15', '07:30', '07:45', '08:00', '08:15', '08:30', '08:45', '09:00', '09:15', '09:30', '09:45', '10:00', '10:15', '10:30', '10:45', '11:00', '11:15', '11:30', '12:45', '13:00', '13:15', '13:30', '13:45', '14:00', '14:15', '14:30', '14:45', '15:00', '15:15', '15:30', '15:45', '16:00', '16:15', '16:30', '16:45', '17:00', '17:15', '17:30', '17:45', '18:00', '18:15', '18:30', '18:45', '19:00', '20:15', '20:30', '20:45', '21:00', '21:15', '21:30', '21:45', '22:00', '22:15', '22:30', '22:45', '23:00', '23:15', '23:30', '23:45'],
        value='00:00',
    description='End Time:',
    disabled=False,
)

# Integer Input
police_num = widgets.IntText(
    value=7,
    description='Police Officers to be Stationed:',
    disabled=False
)

# Submit
submit = widgets.Button(
    description='Submit',
    disabled=False,
    layout=widgets.Layout(width='50%', height='50px'),
)

In [4]:
# Display the widgets after the user clicks the Get Started button
def on_button_clicked(b):
    clear_output()

    display(city)
    display(police_num)
    display(start)
    display(end)
    display(submit)

In [5]:
# Observe the values in the widgets and store them in variables
def on_submit_clicked(b):
    global City, PolNum, Start, End
    City = city.value
    PolNum = police_num.value
    Start = str(start.value) + ":00"
    End = str(end.value) + ":00"

In [6]:
display(started)
started.on_click(on_button_clicked)
submit.on_click(on_submit_clicked)

Dropdown(description='Select a City:', options=('Chicago',), value='Chicago')

IntText(value=7, description='Police Officers to be Stationed:')

Dropdown(description='Start Time:', options=('00:00', '00:15', '00:30', '00:45', '01:00', '01:15', '01:30', '0…

Dropdown(description='End Time:', options=('00:00', '00:15', '00:30', '00:45', '01:00', '01:15', '01:30', '01:…

Button(description='Submit', layout=Layout(height='50px', width='50%'), style=ButtonStyle())

In [8]:
tresult = analyze(PolNum, Start, End)
tresult[0]