**User Stories**

- View neighbourhood crime levels on a map: As a newcomer unfamiliar with Vancouver neighbourhoods, I want to view an interactive map that displays crime levels by neighbourhood so that I can visually compare areas and better understand where higher or lower crime concentrations are located.
- Understand crime patterns by time of day: As a parent with young children, I want to see when crimes most frequently occur (morning, afternoon, evening, night) so that I can assess whether incidents tend to happen during times when my children may be home.
- View crime rates adjusted for neighbourhood population size: As someone comparing neighbourhoods of different sizes, I want to see crime rates normalized by population so that I can make fair comparisons between larger and smaller areas.


##### **TASK**
**Demonstrate that your data can actually support your user stories.**

Select one of your User Stories/JTBD from Section 3. Create a Jupyter notebook in the notebooks/ folder (e.g., notebooks/eda_analysis.ipynb). 

- Create 1-2 static visualizations or summary tables that directly address the user's task. 

- In your proposal document (this section), briefly explain what the visualization shows and how comparing these values specifically supports the user's decision-making. (Include the relevant plots or a link to the notebook in this section).

In [1]:
import pandas as pd 
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import plotly.express as px
import ipywidgets as widgets
from IPython.display import display

In [2]:
pwd

'/Users/braimah/Documents/MDS_UBS/Block5/532/DSCI-532_2026_7_vancouver-crimewatch/notebooks'

In [3]:
data = pd.read_csv("../data/raw/crimedata_csv_AllNeighbourhoods_2025.csv")

In [4]:
data.head()

Unnamed: 0,TYPE,YEAR,MONTH,DAY,HOUR,MINUTE,HUNDRED_BLOCK,NEIGHBOURHOOD,X,Y
0,Break and Enter Commercial,2025,11,25,0,0,10XX PACIFIC ST,West End,490246.7971,5458367.0
1,Break and Enter Commercial,2025,11,29,18,53,10XX PACIFIC ST,West End,490246.7971,5458367.0
2,Break and Enter Commercial,2025,1,2,0,0,10XX PACIFIC ST,West End,490258.8902,5458368.0
3,Break and Enter Commercial,2025,6,10,19,27,10XX PACIFIC ST,West End,490298.1327,5458317.0
4,Break and Enter Commercial,2025,1,13,3,43,10XX ROBSON ST,West End,491036.3006,5458994.0


In [5]:
data.isnull().sum()

TYPE             0
YEAR             0
MONTH            0
DAY              0
HOUR             0
MINUTE           0
HUNDRED_BLOCK    0
NEIGHBOURHOOD    8
X                0
Y                0
dtype: int64

In [6]:
data['HOUR'].unique()

array([ 0, 18, 19,  3, 13, 14, 23,  2,  8, 10,  6,  5, 21,  1,  4, 17,  9,
       15, 16, 22, 11, 12,  7, 20])

In [7]:
data['TYPE'].unique()

<StringArray>
[                            'Break and Enter Commercial',
                      'Break and Enter Residential/Other',
                                               'Mischief',
                                               'Homicide',
                               'Offence Against a Person',
                                            'Other Theft',
                                     'Theft from Vehicle',
                                       'Theft of Bicycle',
                                       'Theft of Vehicle',
 'Vehicle Collision or Pedestrian Struck (with Fatality)',
   'Vehicle Collision or Pedestrian Struck (with Injury)']
Length: 11, dtype: str

### **PLOT 1**
**Can we help John find the type of crimes occurring the most in specific neighborhood areas?**

In [8]:
crime_type_counts = data.groupby("TYPE").size().reset_index(name="counts").sort_values(by="counts", ascending=False)

crime_type_counts

Unnamed: 0,TYPE,counts
5,Other Theft,13785
6,Theft from Vehicle,5580
3,Mischief,5222
4,Offence Against a Person,3132
0,Break and Enter Commercial,1276
10,Vehicle Collision or Pedestrian Struck (with I...,1171
1,Break and Enter Residential/Other,886
7,Theft of Bicycle,788
8,Theft of Vehicle,534
9,Vehicle Collision or Pedestrian Struck (with F...,23


In [9]:
fig = px.bar(
    crime_type_counts,
    y="TYPE",
    x="counts",
    title="Distribution of Crime Types in Vancouver",
    color="counts",
    color_continuous_scale='Burg',
)

fig.update_layout(
    height=700,
    font=dict(size=10),
    yaxis={'categoryorder':'total ascending'},
    xaxis_title="Number of Recorded Incidences",
    yaxis_title="",
    coloraxis_showscale=False
)

fig.show()

In [10]:
# Distribution of Crime Types per Neighbourhood.

neighbourhood = ['All'] + sorted(data['NEIGHBOURHOOD'].dropna().unique())
# neighbourhood


dropdown = widgets.Dropdown(
    options=neighbourhood,
    value="All",
    description="Neighborhood:"
)

# We're defining this function for interactivity
def plots_by_neighbourhood(neighbourhood):
    if neighbourhood == "All":
        updated_data = data
    else:
        updated_data = data[data['NEIGHBOURHOOD'] == neighbourhood]
        
    crime_type_counts = updated_data.groupby("TYPE").size().reset_index(name="counts").sort_values(by="counts", ascending=False)
        
    fig = px.bar(
    crime_type_counts,
    y="TYPE",
    x="counts",
    title="Distribution of Crime Types in Vancouver",
    color="counts",
    color_continuous_scale='Burg',
    )

    fig.update_layout(
        height=600,
        font=dict(size=10),
        yaxis={'categoryorder':'total ascending'},
        xaxis_title="Number of Recorded Incidences",
        yaxis_title="",
        coloraxis_showscale=False
    )

    fig.show()
    

widgets.interact(plots_by_neighbourhood, neighbourhood=dropdown)

interactive(children=(Dropdown(description='Neighborhood:', options=('All', 'Arbutus Ridge', 'Central Business…

<function __main__.plots_by_neighbourhood(neighbourhood)>

In [11]:
data['NEIGHBOURHOOD'].unique()

<StringArray>
[                 'West End',                    'Sunset',
                'Strathcona',                   'Marpole',
                  'Oakridge',                  'Fairview',
 'Central Business District',            'Mount Pleasant',
                 'Kitsilano',           'West Point Grey',
  'Kensington-Cedar Cottage',        'Grandview-Woodland',
          'Hastings-Sunrise',             'Arbutus Ridge',
                'Kerrisdale',       'Renfrew-Collingwood',
                 'Killarney',                'Riley Park',
              'Stanley Park',       'Victoria-Fraserview',
               'Shaughnessy',         'Dunbar-Southlands',
              'South Cambie',                  'Musqueam',
                         nan]
Length: 25, dtype: str

### PLOT 2
**Can we help John understand crime patterns in a day and their occurrence in specific neighborhood areas?**

In [12]:
# Distribution of Daily Crime Occurrence in Vancouver.

data.head()

Unnamed: 0,TYPE,YEAR,MONTH,DAY,HOUR,MINUTE,HUNDRED_BLOCK,NEIGHBOURHOOD,X,Y
0,Break and Enter Commercial,2025,11,25,0,0,10XX PACIFIC ST,West End,490246.7971,5458367.0
1,Break and Enter Commercial,2025,11,29,18,53,10XX PACIFIC ST,West End,490246.7971,5458367.0
2,Break and Enter Commercial,2025,1,2,0,0,10XX PACIFIC ST,West End,490258.8902,5458368.0
3,Break and Enter Commercial,2025,6,10,19,27,10XX PACIFIC ST,West End,490298.1327,5458317.0
4,Break and Enter Commercial,2025,1,13,3,43,10XX ROBSON ST,West End,491036.3006,5458994.0


In [13]:
data['HOUR'].unique()

array([ 0, 18, 19,  3, 13, 14, 23,  2,  8, 10,  6,  5, 21,  1,  4, 17,  9,
       15, 16, 22, 11, 12,  7, 20])

In [14]:
"""
Morning: 4-11
Afternoon: 12-17
Evening: 18-4
"""

# segmenting the time of the day into three categories: Morning, Afternoon, and Evening
def time_of_day(hour):
    if 4 <= hour <= 11:
        return "Morning"
    elif 12 <= hour <= 17:
        return "Afternoon"
    else:
        return "Evening"

data["TIME_OF_DAY"] = data["HOUR"].apply(time_of_day)

In [15]:
data.groupby("TIME_OF_DAY").size().reset_index(name="counts").sort_values(by="counts", ascending=False)
        

Unnamed: 0,TIME_OF_DAY,counts
1,Evening,15395
0,Afternoon,10390
2,Morning,6626


In [16]:
# We're defining this function for interactivity

def plots_by_time(neighbourhood):
    if neighbourhood == "All":
        updated_data = data
    else:
        updated_data = data[data['NEIGHBOURHOOD'] == neighbourhood]
        
    time_occurrence = updated_data.groupby("TIME_OF_DAY").size().reset_index(name="counts").sort_values(by="counts", ascending=False)
        
    fig = px.pie(
    time_occurrence,
    names="TIME_OF_DAY",
    values="counts",
    title="Distribution of Crime Occurrences Considering the Time of the Day in Vancouver",
    color="counts",
    color_discrete_sequence=px.colors.sequential.Burgyl_r,
    hole=0.4,
    labels={"TIME_OF_DAY": "Time of Day", "counts": "Number of Crimes"}
    )

    fig.update_layout(
        height=600,
        font=dict(size=10),
    )

    fig.show()
    

widgets.interact(plots_by_time, neighbourhood=dropdown)

interactive(children=(Dropdown(description='Neighborhood:', options=('All', 'Arbutus Ridge', 'Central Business…

<function __main__.plots_by_time(neighbourhood)>