# A clean version of the first notebook i made

In [None]:
import pandas as pd
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go

# Read CSV file and convert to DataFrame
df = pd.read_csv('../data/raw_data.csv')
df.head()

# extract date, time, location, lat, lng, subject_race, type, arrest_made, citation_issued, outcome, reasone_for_stop
df_extracted = df[['date', 'time', 'location', 'lat', 'lng', 'subject_race', 'type', 'arrest_made', 'citation_issued', 'outcome', 'reason_for_stop']]
df_extracted.head()

In [None]:
# Create a Plotly map open-street-map
fig = px.scatter_mapbox(
    df_extracted,
    lat="lat",
    lon="lng",
    hover_data={"location": True, "subject_race": True, "reason_for_stop": True, "arrest_made": True},
    width=1600,
    height=1200,
    zoom=11,
    mapbox_style="open-street-map",  # Choose a map style
    center={"lat": 37.3382, "lon": -121.8863}, 
    title="Police Stops in San Jose"
)

# reduce point size
fig.update_traces(marker=dict(size=4))

# Show map
fig.show()

In [None]:
import hdbscan

clustering = df_extracted

coords = clustering[['lat', 'lng']].to_numpy()
clusterer = hdbscan.HDBSCAN(min_cluster_size=2, metric='haversine')
clusterer.fit(np.radians(coords))
clustering['cluster'] = clusterer.labels_

clustering

In [None]:
fig = px.scatter_mapbox(
    clustering,
    lat="lat",
    lon="lng",
    color="cluster",
    hover_name="reason_for_stop",
    width=1600,
    height=1200,
    zoom=11,
    center={"lat": 37.3382, "lon": -121.8863}, 
    title="Police Stops in San Jose"
)

fig.update_traces(marker=dict(size=5))
fig.update_layout(mapbox_style="open-street-map")
fig.show()

In [None]:
import json

# Load the GeoJSON data for San Jose districts
# geojson_url = "https://gisdata-csj.opendata.arcgis.com/api/download/v1/items/001373893c8347d4b36cf15a6103f78c/geojson?layers=120"

# i have it locally
with open('Council_District.geojson') as f:
    districts_geojson = json.load(f)


districts_df = pd.DataFrame({
    "district_id": [feature['properties']['DISTRICT'] for feature in districts_geojson['features']],
    "population": [district['properties']['POPULATION'] for district in districts_geojson['features']]
})

# Visualize districts
fig = px.choropleth_mapbox(
    districts_df,
    geojson=districts_geojson,
    locations="district_id",  # This should match the GeoJSON 'DISTRICT' key
    featureidkey="properties.DISTRICT",  # Key in the GeoJSON
    color="population",
    hover_name="district_id",
    title="San Jose Council Districts",
    mapbox_style="carto-positron",
    center={"lat": 37.3382, "lon": -121.8863},
    zoom=10,
    width=800,
    height=600
)

fig.update_layout(margin={"r":0,"t":50,"l":0,"b":0})
fig.show()

In [None]:
df_extracted['time'] = pd.to_datetime(df_extracted['time'], format='%H:%M:%S')

df_extracted['hour'] = df_extracted['time'].dt.hour

hourly_stops = df_extracted['hour'].value_counts().sort_index()

plt.figure(figsize=(10, 6))
sns.lineplot(x=hourly_stops.index, y=hourly_stops.values, palette='viridis')
plt.xlabel('Hour of Day')
plt.ylabel('Number of Stops')
plt.title('Number of Police Stops by Hour of Day')
plt.xticks(range(24))
plt.show()