In [1]:
# Cell 2 — imports and setup
import os
import pandas as pd
from supabase import create_client
from dotenv import load_dotenv
import plotly.express as px

load_dotenv()

SUPABASE_URL = os.getenv("SUPABASE_URL")
SUPABASE_KEY = os.getenv("SUPABASE_KEY")

supabase = create_client(SUPABASE_URL, SUPABASE_KEY)


In [2]:
import pandas as pd

page_size = 1000
all_rows = []
start = 0

while True:
    response = (
        supabase
        .table("incident")
        .select("*")
        .order("Incident_ID")   # REQUIRED for stable pagination
        .range(start, start + page_size - 1)
        .execute()
    )

    data = response.data
    if not data:
        break

    all_rows.extend(data)
    start += page_size

incident_df = pd.DataFrame(all_rows)

len(incident_df)


3136

In [3]:
# Cell 4 — basic sanity checks
incident_df.head()


Unnamed: 0,Incident_ID,Month,Day,Year,Date,School,Victims_Killed,Victims_Wounded,Number_Victims,Shooter_Killed,...,Preplanned,SRO_School,Security_Screening,Screening_Outcome,Shots_Fired,School_Lockdown,LAT,LNG,Campus_Type,Zipcode
0,19660311NCIRC,3,11,1966,1966-03-11T00:00:00+00:00,Irwing Avenue Junior High School,0,1,1,0,...,No,,,,7,,35.237069,-80.850227,,28202
1,19660314TXCAW,3,14,1966,1966-03-14T00:00:00+00:00,Carver High School,1,0,1,0,...,No,Yes,Armed Guards,Outside/Off-Property,3,,31.57954,-97.130303,,76704
2,19660324CACAM,3,24,1966,1966-03-24T00:00:00+00:00,Camino Pablo Elementary School,3,0,3,1,...,No,,,Outside/Off-Property,99,,37.821031,-122.121426,,94575
3,19660328CAJOL,3,28,1966,1966-03-28T00:00:00+00:00,Jordan High School,0,1,1,0,...,No,Yes,Armed Guards,Outside/Off-Property,99,,33.944239,-118.23056,,90002
4,19660427NYBAB,4,27,1966,1966-04-27T00:00:00+00:00,Bay Shore High School,1,0,1,0,...,No,,,,1,,40.731985,-73.254938,,11706


In [4]:
import pandas as pd

incident_df["Year"] = incident_df["Year"].astype(int)

incidents_over_time = (
    incident_df
    .groupby("Year")
    .size()
    .reset_index(name="incident_count")
)

incidents_over_time.head(50)


Unnamed: 0,Year,incident_count
0,1966,9
1,1967,7
2,1968,10
3,1969,5
4,1970,20
5,1971,21
6,1972,18
7,1973,19
8,1974,16
9,1975,15


In [5]:
len(incident_df)


3136

In [6]:
incidents_over_time = incidents_over_time.sort_values("Year").reset_index(drop=True)


In [7]:
expected_years = set(range(incident_df["Year"].min(), incident_df["Year"].max() + 1))
actual_years = set(incidents_over_time["Year"])

expected_years - actual_years


set()

In [12]:
import plotly.express as px

fig = px.line(
    incidents_over_time,
    x="Year",
    y="incident_count",
    animation_frame="Year",
    markers=True,
    title="School Shooting Incidents Over Time"
)

fig.update_layout(
    xaxis_title="Year",
    yaxis_title="Number of Incidents",
    transition={"duration": 300},
    xaxis=dict(range=[2017, 2025]),
    yaxis=dict(range=[0, 450])
)

fig.show()


In [17]:
fig = px.line(
    incidents_over_time,
    x="Year",
    y="incident_count",
    markers=True,
    title="Number of School Shooting Incidents by Year"
)

fig.update_layout(
    xaxis_title="Year",
    yaxis_title="Number of Incidents",
    xaxis=dict(range=[1979, 2020]),
    yaxis=dict(range=[0, 150])
)

fig.show()



In [20]:
fig = px.line(
    incidents_over_time,
    x="Year",
    y="incident_count",
    markers=True,
    title="Number of School Shooting Incidents by Year"
)

fig.update_layout(
    xaxis_title="Year",
    yaxis_title="Number of Incidents",
    xaxis=dict(range=[2016, 2019]),
    yaxis=dict(range=[0, 200])
)

fig.show()

In [19]:
fig = px.line(
    incidents_over_time,
    x="Year",
    y="incident_count",
    markers=True,
    title="Number of School Shooting Incidents by Year"
)

fig.update_layout(
    xaxis_title="Year",
    yaxis_title="Number of Incidents",
    xaxis=dict(range=[2020, 2025]),
    yaxis=dict(range=[0, 375])
)

fig.show()



In [22]:
incident_df["Incident_Date"] = pd.to_datetime(
    dict(
        year=incident_df["Year"],
        month=incident_df["Month"],
        day=incident_df["Day"]
    ),
    errors="coerce"
)


In [23]:
incident_df["YearMonth"] = incident_df["Incident_Date"].dt.to_period("M").astype(str)


In [24]:
incidents_monthly = (
    incident_df
    .groupby("YearMonth")
    .size()
    .reset_index(name="incident_count")
)


In [27]:
incidents_monthly = incidents_monthly[
    incidents_monthly["YearMonth"].between("2020-01", "2022-12")
]


In [31]:
fig = px.line(
    incidents_monthly,
    x="YearMonth",
    y="incident_count",
    markers=True,
    title="Number of School Shooting Incidents by Month"
)

fig.update_layout(
    xaxis_title="Year–Month",
    yaxis_title="Number of Incidents",
    yaxis=dict(range=[0, 60])
)

fig.show()


In [39]:
incidents_by_state = (
    incident_df
    .groupby("State")
    .size()
    .reset_index(name="incident_count")
)


In [40]:
import plotly.express as px

fig = px.choropleth(
    incidents_by_state,
    locations="State",
    locationmode="USA-states",
    color="incident_count",
    scope="usa",
    color_continuous_scale="Reds",
    title="School Shooting Incidents by State"
)

fig.show()


In [None]:
incidents_state_year = (
    incident_df
    .groupby(["State", "Year"])
    .size()
    .reset_index(name="incident_count")
)


In [48]:
incidents_state_year["Year"] = incidents_state_year["Year"].astype(int)

incidents_state_year = incidents_state_year.sort_values("Year")


In [46]:
zmax = incidents_state_year["incident_count"].max()


In [49]:
year_order = sorted(incidents_state_year["Year"].unique())


In [53]:
import plotly.express as px

plot_df = incidents_state_year[
    (incidents_state_year["Year"] >= 2020) &
    (incidents_state_year["Year"] <= 2025)
]

year_order = sorted(plot_df["Year"].unique())
zmax = plot_df["incident_count"].max()

fig = px.choropleth(
    plot_df,
    locations="State",
    locationmode="USA-states",
    color="incident_count",
    animation_frame="Year",
    category_orders={"Year": year_order},
    scope="usa",
    color_continuous_scale="Reds",
    range_color=[0, zmax],
    title="School Shooting Incidents by State (Gradient Over Time)"
)

fig.update_layout(
    transition={"duration": 400},
    coloraxis_colorbar=dict(title="Incidents")
)

fig.show()


In [54]:
incidents_state_year = (
    incident_df
    .groupby(["State", "Year"])
    .size()
    .reset_index(name="incident_count")
)

incidents_state_year["Year"] = incidents_state_year["Year"].astype(int)
incidents_state_year = incidents_state_year.sort_values(["State", "Year"])


In [55]:
incidents_state_year["cumulative_incidents"] = (
    incidents_state_year
    .groupby("State")["incident_count"]
    .cumsum()
)


In [56]:
plot_df = incidents_state_year[
    (incidents_state_year["Year"] >= 1979) &
    (incidents_state_year["Year"] <= 2025)
]


In [57]:
year_order = sorted(plot_df["Year"].unique())
zmax = plot_df["cumulative_incidents"].max()


In [58]:
import plotly.express as px

fig = px.choropleth(
    plot_df,
    locations="State",
    locationmode="USA-states",
    color="cumulative_incidents",
    animation_frame="Year",
    category_orders={"Year": year_order},
    scope="usa",
    color_continuous_scale="Reds",
    range_color=[0, zmax],
    title="Cumulative School Shooting Incidents by State (Historical View)"
)

fig.update_layout(
    transition={"duration": 400},
    coloraxis_colorbar=dict(title="Cumulative Incidents")
)

fig.show()
