In [1]:
# Cell 2 — imports and setup
import os
import pandas as pd
from supabase import create_client
from dotenv import load_dotenv
from pathlib import Path
import plotly.express as px

# Resolve project root (one level above /notebooks)
project_root = Path.cwd().parent
env_path = project_root / ".env"

load_dotenv(dotenv_path=env_path, override=True)

SUPABASE_URL = os.getenv("SUPABASE_URL")
SUPABASE_KEY = os.getenv("SUPABASE_KEY")

supabase = create_client(SUPABASE_URL, SUPABASE_KEY)

# Optional sanity check
print("Loaded key starts with:", SUPABASE_KEY[:5])


Loaded key starts with: eyJhb


## INCIDENTS

In [6]:
# Page through the incident table in 1000-row chunks to avoid timeouts and memory spikes.
page_size = 1000
# Accumulate rows across pages until Supabase returns an empty page.
all_rows = []
start = 0

while True:
    response = (
        supabase
        .table("incident")
        .select("*")
        .order("Incident_ID")   # REQUIRED for stable pagination
        .range(start, start + page_size - 1)
        .execute()
    )

    data = response.data
    if not data:
        break

    all_rows.extend(data)
    start += page_size

incident_df = pd.DataFrame(all_rows)

# Quick data validation on total row count.
print (len(incident_df))
print (incident_df.head(2))

3136
     Incident_ID  Month  Day  Year                       Date  \
0  19660311NCIRC      3   11  1966  1966-03-11T00:00:00+00:00   
1  19660314TXCAW      3   14  1966  1966-03-14T00:00:00+00:00   

                             School Victims_Killed Victims_Wounded  \
0  Irwing Avenue Junior High School              0               1   
1                Carver High School              1               0   

  Number_Victims Shooter_Killed  ... Preplanned SRO_School Security_Screening  \
0              1              0  ...         No                                 
1              1              0  ...         No        Yes       Armed Guards   

      Screening_Outcome Shots_Fired School_Lockdown        LAT         LNG  \
0                                 7                  35.237069  -80.850227   
1  Outside/Off-Property           3                   31.57954  -97.130303   

  Campus_Type Zipcode  
0               28202  
1               76704  

[2 rows x 50 columns]


## SHOOTER

In [9]:
# Page through the incident table in 1000-row chunks to avoid timeouts and memory spikes.
page_size = 1000
# Accumulate rows across pages until Supabase returns an empty page.
all_rows = []
start = 0

while True:
    response = (
        supabase
        .table("shooter")
        .select("*")
        .order("Incident_ID")   # REQUIRED for stable pagination
        .range(start, start + page_size - 1)
        .execute()
    )

    data = response.data
    if not data:
        break

    all_rows.extend(data)
    start += page_size

shooter_df = pd.DataFrame(all_rows)

# Quick data validation on total row count.
print (len(shooter_df))
print (shooter_df.head(2))

3542
  Incident_ID Age Gender Race School_Affiliation Shooter_Outcome Shooter_Died  \
0                                                                               
1                                                                               

  Injury  
0         
1         


## VICTIM

In [11]:
# Page through the incident table in 1000-row chunks to avoid timeouts and memory spikes.
page_size = 1000
# Accumulate rows across pages until Supabase returns an empty page.
all_rows = []
start = 0

while True:
    response = (
        supabase
        .table("victim")
        .select("*")
        .order("Incident_ID")   # REQUIRED for stable pagination
        .range(start, start + page_size - 1)
        .execute()
    )

    data = response.data
    if not data:
        break

    all_rows.extend(data)
    start += page_size

victim_df = pd.DataFrame(all_rows)

# Quick data validation on total row count.
print (len(victim_df))
print (victim_df.head(2))

8370
     Incident_ID   Injury Gender School_Affiliation Age   Race
0  19660311NCIRC  Wounded   Male            Student  13    NaN
1  19660314TXCAW    Fatal   Male        No Relation  24  Black


## WEAPON

In [16]:
# Page through the incident table in 1000-row chunks to avoid timeouts and memory spikes.
page_size = 1000
# Accumulate rows across pages until Supabase returns an empty page.
all_rows = []
start = 0

while True:
    response = (
        supabase
        .table("weapon")
        .select("*")
        .order("Incident_ID")   # REQUIRED for stable pagination
        .range(start, start + page_size - 1)
        .execute()
    )

    data = response.data
    if not data:
        break

    all_rows.extend(data)
    start += page_size

weapon_df = pd.DataFrame(all_rows)

# Quick data validation on total row count.
print (len(weapon_df))
print (weapon_df.head(2))

3168
     Incident_ID Weapon_Type  Weapon_Caliber Weapon_Details
0  19660311NCIRC     Handgun     .22 caliber            NaN
1  19660314TXCAW     Handgun  Service Weapon            NaN


## 1987-1995 School Enrollment

In [18]:
# Page through the incident table in 1000-row chunks to avoid timeouts and memory spikes.
page_size = 1000
# Accumulate rows across pages until Supabase returns an empty page.
all_rows = []
start = 0

while True:
    response = (
        supabase
        .table("1987-1995 School Enrollment")
        .select("*")
        .order("School_ID")   # REQUIRED for stable pagination
        .range(start, start + page_size - 1)
        .execute()
    )

    data = response.data
    if not data:
        break

    all_rows.extend(data)
    start += page_size

enrollment_1987_95_df = pd.DataFrame(all_rows)

# Quick data validation on total row count.
print (len(enrollment_1987_95_df))
print (enrollment_1987_95_df.head(2))

98034
                        School Name    State    School_ID Enrollment 1995  \
0  SEQUOYAH SCH - CHALKVILLE CAMPUS  Alabama  10000200277             123   
1       CHALKVILLE CAMPUS -SEQUOYAH  Alabama  10000201704               †   

  Enrollment 1994 Enrollment 1993 Enrollment 1992 Enrollment 1991  \
0               †               †               †               †   
1               †               †               †             104   

  Enrollment 1990 Enrollment 1989 Enrollment 1988 Enrollment 1987  
0               †               †               †               †  
1             115             121             119             129  
