In [50]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from bokeh.plotting import ColumnDataSource, figure, output_file, save
from bokeh.io import show
from bokeh.models import FactorRange, Legend
from bokeh.palettes import Category10
import folium 
from folium import plugins
from collections import OrderedDict, defaultdict
import warnings
warnings.filterwarnings('ignore')

In [120]:
data = pd.read_csv(r"C:\Users\inest\OneDrive - Danmarks Tekniske Universitet\Semester IV\Social Data Analysis and Visualization\merged_data.csv")

In [121]:
data.head()

Unnamed: 0,Category,PdDistrict,Longitude,Latitude,TimeOfDayHour,TimeOfDayMinute,DayOfWeek,DayOfMonth,Month,Year
0,ROBBERY,INGLESIDE,-122.420084,37.708311,17,50,Monday,22,November,2004
1,VEHICLE THEFT,PARK,-120.5,90.0,20,0,Tuesday,18,October,2005
2,VEHICLE THEFT,SOUTHERN,-120.5,90.0,2,0,Sunday,15,February,2004
3,ASSAULT,SOUTHERN,-122.410541,37.770913,17,0,Sunday,21,November,2010
4,ASSAULT,TARAVAL,-122.470366,37.745158,15,50,Tuesday,2,April,2013


In [136]:
focus_crimes = ['ROBBERY']

In [147]:
data = data[data['Category'].isin(focus_crimes)]
data = data[data['Year'] >= 2014 & (data['Year'] <= 2024)]
hourly_data = data.groupby(['TimeOfDayHour', 'Category']).size().unstack()

In [148]:
#normalize the data by hour and category
hourly_data = hourly_data.div(hourly_data.sum(axis=1), axis=0)
hourly_data.head()

Category,ROBBERY
TimeOfDayHour,Unnamed: 1_level_1
0,1.0
1,1.0
2,1.0
3,1.0
4,1.0


In [149]:
# Define a color palette
colors = ['#1f77b4', '#ff7f0e']
hour = [str(i) for i in range(24)]
src = ColumnDataSource(hourly_data)

p = figure(x_range=FactorRange(factors=hour), title="Normalized number of incidents by hour")
bar = {}  # to store vbars

# Create a vbar for each focus crime with a specific color
for indx, crime in enumerate(focus_crimes):
    bar[crime] = p.vbar(x='TimeOfDayHour', top=crime, source=src, color=colors[indx],
                        legend_label=crime, muted_alpha=0.1, muted=True)

In [150]:
# Remove the default legend
p.legend.visible = False

# Add custom legend
legend = Legend(items=[(x, [bar[x]]) for x in focus_crimes], location=(0, -30), click_policy='mute')
p.add_layout(legend, 'right')  # assigns the click policy (you can try to use ''hide')
show(p)

In [151]:
output_file("bokeh_plot.html")
save(p)

'c:\\Users\\inest\\OneDrive - Danmarks Tekniske Universitet\\Semester IV\\Social Data Analysis and Visualization\\Assignment 2\\inestacanho.github.io\\bokeh_plot.html'

san francisco landmark data

In [152]:
landmarks = pd.read_csv("_data\Landmarks_Listed_in_Article_10_of_the_San_Francisco_Planning_Code_20250325.csv", header=0, sep=',')

landmarks.head()

Unnamed: 0,the_geom,apn,name,Address,LandmarkNo,YearDesignated,DesignationDocument,Photo,PIMLink
0,MULTIPOLYGON (((-122.43047437009602 37.8028801...,0471003,Ladies' Protection and Relief Society,3400 Laguna Street,320,2024,https://files.sfplanning.org/documents/preserv...,https://sfplanninggis.org/Preservation/Landmar...,https://sfplanninggis.org/pim?search=0471/003
1,MULTIPOLYGON (((-122.43325629424248 37.7269420...,6797046,Excelsior Branch Library,4400 MISSION ST,0,0,,,
2,MULTIPOLYGON (((-122.43170756725033 37.7638346...,3564095,Eureka Valley/Harvey Milk Memorial Branch Library,1 JOSE SARRIA CT,0,0,,,
3,MULTIPOLYGON (((-122.43037325392243 37.7745087...,0828022A,Sacred Heart Parish Complex,735 FELL ST,316,2024,https://files.sfplanning.org/documents/preserv...,https://sfplanninggis.org/Preservation/Landmar...,https://sfplanninggis.org/pim?search=0828/022A
4,MULTIPOLYGON (((-122.4145552700096 37.78402509...,,Black Cat Cafe,317 LEAVENWORTH ST,0,0,,,


In [153]:
import geopandas as gpd

landmarks = gpd.GeoDataFrame(landmarks, geometry=gpd.GeoSeries.from_wkt(landmarks['the_geom']))

landmarks['latitude'] = landmarks.geometry.centroid.y
landmarks['longitude'] = landmarks.geometry.centroid.x

#remove null values
landmarks = landmarks.dropna(subset=['latitude', 'longitude'])

heat map of intensity of crime during time

In [154]:
heat_df = data[['Latitude', 'Longitude', 'Year']].dropna(axis=0, subset=['Latitude','Longitude', 'Year'])

heat_data = defaultdict(list)
for r in heat_df.itertuples():
    heat_data[r.Year].append([r.Latitude, r.Longitude])

heat_data_ordered = OrderedDict(sorted(heat_data.items(), key=lambda t: t[0]))

In [155]:
# Followed the solution from the thread - https://stackoverflow.com/questions/68606541/dataformat-for-folium-heatmapwithtime?rq=3
# folium.plugins.HeatMapWithTime did not plot otherwise

def generatebasemap(default_location=[47.670,-122.394], default_zoom_start=7):
    basemap = folium.Map(location=default_location, zoom_start=default_zoom_start)
    return basemap

In [156]:
basemap = generatebasemap()

hm_with_time = folium.Map(location=[37.7749, -122.4194], zoom_start=13)

hm = plugins.HeatMapWithTime(data=list(heat_data_ordered.values()),
                     index=list(heat_data_ordered.keys()), 
                     radius=10,
                     auto_play=True,
                     max_opacity=0.8)

hm.add_to(hm_with_time)

for _, row in landmarks.iterrows():
    folium.CircleMarker(
        location=[row['latitude'], row['longitude']],
        radius=6,
        color="blue",
        fill=True,
        fill_color="blue",
        fill_opacity=0.7,
        popup=row.get('name', 'Address') 
    ).add_to(hm_with_time)

hm_with_time.save("_assets/heat_map.html")
hm_with_time