In [1]:
# Load Supabase creds from .env and initialize the client used below.
import os
import pandas as pd
from supabase import create_client
from dotenv import load_dotenv
import plotly.express as px

load_dotenv()

SUPABASE_URL = os.getenv("SUPABASE_URL")
SUPABASE_KEY = os.getenv("SUPABASE_KEY")

supabase = create_client(SUPABASE_URL, SUPABASE_KEY)


### Fetch incidents from Supabase
Using paginated requests (1k rows per page) to safely bring the entire table into memory.

In [None]:
# Page through the incident table in 1000-row chunks to avoid timeouts and memory spikes.
page_size = 1000
# Accumulate rows across pages until Supabase returns an empty page.
all_rows = []
start = 0

while True:
    response = (
        supabase
        .table("incident")
        .select("*")
        .order("Incident_ID")   # REQUIRED for stable pagination
        .range(start, start + page_size - 1)
        .execute()
    )

    data = response.data
    if not data:
        break

    all_rows.extend(data)
    start += page_size

incident_df = pd.DataFrame(all_rows)
incident_no_gangs_df = incident_df[incident_df["Gang_Related"] == False]

# Quick data validation on total row count.
len(incident_df)

3136

In [3]:
# basic data validations
incident_df.head()

# Preview first few rows to verify schema and key fields.

Unnamed: 0,Incident_ID,Month,Day,Year,Date,School,Victims_Killed,Victims_Wounded,Number_Victims,Shooter_Killed,...,Preplanned,SRO_School,Security_Screening,Screening_Outcome,Shots_Fired,School_Lockdown,LAT,LNG,Campus_Type,Zipcode
0,19660311NCIRC,3,11,1966,1966-03-11T00:00:00+00:00,Irwing Avenue Junior High School,0,1,1,0,...,No,,,,7,,35.237069,-80.850227,,28202
1,19660314TXCAW,3,14,1966,1966-03-14T00:00:00+00:00,Carver High School,1,0,1,0,...,No,Yes,Armed Guards,Outside/Off-Property,3,,31.57954,-97.130303,,76704
2,19660324CACAM,3,24,1966,1966-03-24T00:00:00+00:00,Camino Pablo Elementary School,3,0,3,1,...,No,,,Outside/Off-Property,99,,37.821031,-122.121426,,94575
3,19660328CAJOL,3,28,1966,1966-03-28T00:00:00+00:00,Jordan High School,0,1,1,0,...,No,Yes,Armed Guards,Outside/Off-Property,99,,33.944239,-118.23056,,90002
4,19660427NYBAB,4,27,1966,1966-04-27T00:00:00+00:00,Bay Shore High School,1,0,1,0,...,No,,,,1,,40.731985,-73.254938,,11706


In [4]:
# Aggregate incidents by year for high-level trend analyses.

incident_df["Year"] = incident_df["Year"].astype(int)

incidents_over_time = (
    incident_df
    .groupby("Year")
    .size()
    .reset_index(name="incident_count")
)

incidents_over_time.head()


Unnamed: 0,Year,incident_count
0,1966,9
1,1967,7
2,1968,10
3,1969,5
4,1970,20


In [5]:
# Sort by year so downstream plots animate correctly.
incidents_over_time = incidents_over_time.sort_values("Year").reset_index(drop=True)


## City level-patterns

In [7]:
incidents_by_city = (
    incident_df
    .groupby(["City", "State", "LAT", "LNG"])
    .size()
    .reset_index(name="incident_count")
)


In [9]:
fig = px.scatter_geo(
    incidents_by_city,
    lat="LAT",
    lon="LNG",
    size="incident_count",
    scope="usa",
    title="School Shooting Incidents by City"
)

fig.show()


In [12]:
incidents_city_year = (
    incident_df
    .groupby(["City", "State", "LAT", "LNG", "Year"])
    .size()
    .reset_index(name="incident_count")
)


In [14]:
# Normalize types and order for plotting over time.
incidents_city_year["Year"] = incidents_city_year["Year"].astype(int)

incidents_city_year = incidents_city_year.sort_values("Year")


In [15]:
# Track maximum yearly count for color scale consistency.
zmax = incidents_city_year["incident_count"].max()


In [16]:
# Ordered list of years used in animation controls.
year_order = sorted(incidents_city_year["Year"].unique())


In [18]:
import plotly.express as px

plot_df = incidents_city_year[
    (incidents_city_year["Year"] >= 2020) &
    (incidents_city_year["Year"] <= 2025)
]

year_order = sorted(plot_df["Year"].unique())
zmax = plot_df["incident_count"].max()

fig = px.scatter_geo(
    plot_df,
    lat="LAT",
    lon="LNG",
    size="incident_count",
    animation_frame="Year",
    category_orders={"Year": year_order},
    scope="usa",
    size_max=40,
    title="School Shooting Incidents by City (2020â€“2025)"
)


fig.update_layout(
    transition={"duration": 400}
)

fig.show()


In [24]:
top_cities = (
    incident_df
    .groupby(["City", "State"])
    .size()
    .reset_index(name="incident_count")
    .sort_values("incident_count", ascending=False)
    .head(10)
    .reset_index(drop=True)
)

top_cities.index += 1
top_cities


Unnamed: 0,City,State,incident_count
1,Chicago,IL,93
2,Detroit,MI,57
3,Washington,DC,54
4,Los Angeles,CA,52
5,Philadelphia,PA,50
6,Baltimore,MD,49
7,Memphis,TN,47
8,Houston,TX,45
9,Dallas,TX,35
10,Jacksonville,FL,29


In [25]:
top_cities = (
    incident_no_gangs_df
    .groupby(["City", "State"])
    .size()
    .reset_index(name="incident_count")
    .sort_values("incident_count", ascending=False)
    .head(20)
    .reset_index(drop=True)
)

top_cities.index += 1
top_cities


Unnamed: 0,City,State,incident_count
1,Chicago,IL,37
2,Memphis,TN,31
3,Detroit,MI,30
4,Houston,TX,29
5,Baltimore,MD,29
6,Dallas,TX,19
7,Jacksonville,FL,18
8,Los Angeles,CA,18
9,St. Louis,MO,18
10,Washington,DC,16
