In [2]:
import pandas as pd
from pandas.api.types import CategoricalDtype
import numpy as np

data = pd.read_csv('../../2024-DTU-Social-Data-Analysis-And-Visualization/files/Police_Department_Incident_Reports__Historical_2003_to_May_2018_20240210.csv')
data['Datetime'] = pd.to_datetime(data['Date'] + ' ' + data['Time'])
data = data.loc[data['Datetime'] < '2018-01-01 00:00:00']
data['Hour'] = data['Datetime'].apply(lambda x: x.hour)

In [3]:
markers = np.array([
    [37.80029, -122.41018, 'North Beach'],
    [37.79417, -122.40694, 'Chinatown'],
    [37.77722, -122.41111, 'South of Market'],
    [37.78806, -122.4075, 'Union Square'],
    [37.7825, -122.4108, 'Theatre District'],
    [37.76083, -122.435, 'Castro District'],
    [37.803, -122.436, 'Marina District'],
    [37.791190, -122.420828, 'Polk Street'],
    [37.76, -122.42, 'Mission'],
])

# polk_street = [(37.806217, -122.423863), (37.776601, -122.417903)]

In [4]:
k = 3
indices_of_data_within_radius = {}
# polk_street_indices = []
for marker in markers: 
    within_radius = (data.Y - float(marker[0]))**2 + (data.X - float(marker[1]))**2 < (0.001 * k)**2
    indices = list(within_radius.loc[within_radius].index)
    # if marker[2] == 'Polk Street':
        # polk_street_indices += indices
    # else:
    indices_of_data_within_radius[marker[2]] = indices

# indices_of_data_within_radius['Polk Street'] = list(set(polk_street_indices))

In [5]:
# Extract indices from the dictionary
indices = [idx for indices in indices_of_data_within_radius.values() for idx in indices]

# Filter data based on indices
filtered_data = data[data.index.isin(indices)].copy()

# Assign a new column based on dictionary keys
for location, indices in indices_of_data_within_radius.items():
    filtered_data.loc[filtered_data.index.isin(indices), 'Party location'] = location

  filtered_data.loc[filtered_data.index.isin(indices), 'Party location'] = location


In [6]:
data_party = filtered_data.loc[filtered_data['Category'].isin(['DRUG/NARCOTIC', 'LIQUOR LAWS', 'VANDALISM', 'PROSTITUTION', 'DRUNKENNESS', 'DRIVING UNDER THE INFLUENCE', 'LOITERING', 'GAMBLING'])]

df = data_party.groupby(['Date', 'Party location'])['PdId'].count()

In [8]:
from bokeh.io import output_notebook, output_file
import numpy as np
from bokeh.layouts import column
from bokeh.models import ColumnDataSource, RangeTool, Legend
from bokeh.plotting import figure, show
from bokeh.palettes import brewer

output_file("../plots/bokeh_plot.html")

# Sample data (replace this with your actual data)
df = data_party.groupby(['Date', 'Party location']).size().unstack(fill_value=0).reset_index()
df['Date'] = pd.to_datetime(df['Date'])
df = df.sort_values('Date')
df.set_index('Date', inplace=True)
df_monthly = df.resample('M').sum().reset_index()

dates = np.array(df_monthly['Date'], dtype=np.datetime64)
source = ColumnDataSource(df_monthly)

p = figure(height=400, width=700, tools="xpan", toolbar_location='right',
           x_axis_type="datetime", x_axis_location="below",
           background_fill_color="#efefef", x_range=(dates[118], dates[166]))

categories = df_monthly.columns[1:]
names = [str(cat) for cat in categories]

p.varea_stack(stackers=categories, x='Date', color=brewer['Spectral'][len(categories)], source=source, legend_label=names, muted_alpha=0.2)

p.legend.click_policy = "mute"
p.legend.title = 'Location'

p.yaxis.axis_label = 'Number of crime occurences'
p.xaxis.axis_label = 'Date'
p.axis.axis_label_text_font_style = 'normal'

select = figure(title="Drag the middle and edges of the selection box to change the range above",
                height=130, width=700, y_range=p.y_range,
                x_axis_type="datetime", y_axis_type=None,
                tools="", toolbar_location=None, background_fill_color="#efefef")

range_tool = RangeTool(x_range=p.x_range)
range_tool.overlay.fill_color = "navy"
range_tool.overlay.fill_alpha = 0.2

select.varea_stack(stackers=categories, x='Date', color=brewer['Spectral'][len(categories)], source=source)
select.add_tools(range_tool)

output_notebook()
show(column(p, select))