# Matching Shootings and Alerts

This notebook documents the process we used to determine whether or not each shooting had a corresponding ShotSpotter alert.

## Setup

In [1]:
import datetime as dt


import geopandas as gpd
import pandas as pd
import shapely
from shotspotter import settings

### Shooting Incident Data
We combine the homicide/nonfatal shooting victimization dataset with the reckless firearm discharge dataset to create our sample of shootings that _should_ have been detected by ShotSpotter.

We also convert the lat/long data into GeoPandas coordinates in two coordinate systems (lat/long and meters).

In [2]:
shooting_incidents = gpd.GeoDataFrame(
    pd.concat(
        [
            pd.read_csv(settings.DATA_DIR_PROCESSED / "shooting_victimizations_2023_2024.csv", parse_dates=["date_time"]),
            pd.read_csv(settings.DATA_DIR_PROCESSED / "reckless_firearm_discharges_2023_2024.csv", parse_dates=["date_time"]),
        ]
    )
    .assign(
        location=lambda df: gpd.points_from_xy(df["longitude"], df["latitude"], crs="EPSG:4326"),
        location_in_meters=lambda df: gpd.points_from_xy(df["longitude"], df["latitude"], crs="EPSG:4326").to_crs("EPSG:3857")
    )
    .set_index("id"),
    geometry="location",
)
shooting_incidents.head()

Unnamed: 0_level_0,case_number,date_time,latitude,longitude,type,place_description,police_district,location,location_in_meters
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
SHOOT-JG134757-#1,JG134757,2023-01-30 13:13:00,41.749632,-87.664005,BATTERY,STREET,6,POINT (-87.66401 41.74963),POINT (-9758712.436 5123548.949)
SHOOT-JG446904-#2,JG446904,2023-10-01 22:44:00,41.827692,-87.680404,BATTERY,STREET,9,POINT (-87.6804 41.82769),POINT (-9760537.923 5135203.373)
HOM-JH175123-#1,JH175123,2024-03-04 13:19:00,41.844202,-87.705945,HOMICIDE,STREET,10,POINT (-87.70594 41.8442),POINT (-9763381.098 5137670.102)
HOM-JH175867-#1,JH175867,2024-03-04 23:50:00,41.887062,-87.755605,HOMICIDE,STREET,15,POINT (-87.75561 41.88706),POINT (-9768909.301 5144076.812)
HOM-JG484795-#1,JG484795,2023-10-30 17:04:00,41.73654,-87.57713,HOMICIDE,PARKING LOT,4,POINT (-87.57713 41.73654),POINT (-9749041.517 5121595.823)


We define a circular search area with a radius of 0.5 miles (or 804.7 meters) for each shooting in which to look for matching ShotSpotter alerts. 

In [3]:
shooting_incidents["search_area"] = (
    shooting_incidents["location_in_meters"]
    .buffer(settings.SEARCH_RADIUS_IN_METERS)
    .to_crs("EPSG:4326")
)

We also define a one-hour time window to restrict our search for matching ShotSpotter alerts.

In [4]:
shooting_incidents["time_window"] = shooting_incidents["date_time"].map(
    lambda x: pd.Interval(x - settings.TIME_WINDOW, x + settings.TIME_WINDOW)
)

### ShotSpotter Data
As with the shooting data, we convert the lat/long data into GeoPandas coordinates in two coordinate systems (lat/long and meters).

In [5]:
shotspotter_alerts_portal = gpd.GeoDataFrame(
    pd.read_csv(settings.DATA_DIR_PROCESSED / "shotspotter_alerts_2023_2024.csv", parse_dates=["date_time"], index_col="id")
    .assign(
        location=lambda df: gpd.points_from_xy(df["longitude"], df["latitude"], crs="EPSG:4326"),
        location_in_meters=lambda df: gpd.points_from_xy(df["longitude"], df["latitude"], crs="EPSG:4326").to_crs("EPSG:3857")
    ),
    geometry="location",
)
shotspotter_alerts_portal.head()

Unnamed: 0_level_0,date_time,latitude,longitude,type,location,location_in_meters
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
SST-79100107733,2023-12-31 21:16:10,41.857749,-87.740013,MULTIPLE GUNSHOTS,POINT (-87.74001 41.85775),POINT (-9767173.531 5139694.621)
SST-173731,2023-03-03 06:39:09,41.808957,-87.683256,SINGLE GUNSHOT,POINT (-87.68326 41.80896),POINT (-9760855.412 5132404.97)
SST-193170,2023-05-29 21:15:32,41.79536,-87.648472,SINGLE GUNSHOT,POINT (-87.64847 41.79536),POINT (-9756983.219 5130374.544)
SST-265498,2023-07-10 04:30:20,41.797296,-87.676743,MULTIPLE GUNSHOTS,POINT (-87.67674 41.7973),POINT (-9760130.388 5130663.571)
SST-281932,2023-02-18 18:21:53,41.724744,-87.605491,SINGLE GUNSHOT,POINT (-87.60549 41.72474),POINT (-9752198.668 5119836.241)


We also group the alert data by date to make searching more efficient (omitting this step means that we have to search through $m \times n$ rows, where $m$ is the number of alerts and $n$ is the number of shootings).

In [6]:
shotspotter_alerts_portal_by_date = (
    shotspotter_alerts_portal
    .assign(date=lambda df: df["date_time"].dt.date)
    .groupby("date")
)

## Matching Shootings and Alerts

First, we define some helper functions to facilitate matching alerts and shootings:

In [7]:
def find_matching_alerts(
    alerts_by_date: pd.core.groupby.DataFrameGroupBy, 
    incident: pd.Series,
    min_date: dt.datetime = dt.datetime(2023, 1, 1),
    max_date: dt.datetime = dt.datetime(2024, 1, 1),
) -> pd.DataFrame:
    """Return a dataframe of all ShotSpotter alerts within the time window and search area.
    
    Parameters
    ----------
    alerts_by_date : pd.DataFrameGroupBy
        A dataframe grouped by date containing ShotSpotter alerts. Should contain a `date_time`
        column of type `dt.datetime` or `pd.Timestamp` and a `location` column of type `shapely.Point`.
    incident : pd.Series
        A row representing a single shooting incident. Should contain a `time_window` column of
        type `pd.Interval`, a `date_time` column of type `dt.datetime` and a `search_area`
        column of type `shapely.Polygon`.

    Returns
    -------
    pd.DataFrame
        A dataframe with all matching alerts, preserving columns from `alerts_df`.
    """
    
    matching_on_day = (
        alerts_by_date
        .get_group(
            dt.date(
                incident["date_time"].year,
                incident["date_time"].month,
                incident["date_time"].day,
            )
        )
        .loc[lambda df: df["date_time"].map(lambda x: x in incident["time_window"])]
        .loc[lambda df: df["location"].within(incident["search_area"])]
        .assign(detected_shot=incident.name)
    )
    if (
        (incident["date_time"].hour < 1)
        and (incident["date_time"] >= min_date + dt.timedelta(hours=2))
    ):
        return pd.concat(
            [
                matching_on_day,
                alerts_by_date
                .get_group(
                    dt.date(
                        incident["date_time"].year,
                        incident["date_time"].month,
                        incident["date_time"].day,
                    )
                    - dt.timedelta(days=1)
                )
                .loc[lambda df: df["date_time"].map(lambda x: x in incident["time_window"])]
                .loc[lambda df: df["location"].within(incident["search_area"])]
                .assign(detected_shot=incident.name),
            ]
        )
    if (
        (incident["date_time"].hour >= 23)
        and (incident["date_time"] <= max_date - dt.timedelta(hours=2))
    ):
        return pd.concat(
            [
                matching_on_day,
                alerts_by_date
                .get_group(
                    dt.date(
                        incident["date_time"].year,
                        incident["date_time"].month,
                        incident["date_time"].day,
                    )
                    + dt.timedelta(days=1)
                )
                .loc[lambda df: df["date_time"].map(lambda x: x in incident["time_window"])]
                .loc[lambda df: df["location"].within(incident["search_area"])]
                .assign(detected_shot=incident.name),
            ]
        )
    return matching_on_day
                

def get_matching_alert(matching_alerts_df: pd.DataFrame, incident: pd.Series) -> pd.Series:
    """Find the alert where the detected shot matches the incident id and calculate the distance in meters.

    Parameters
    ----------
    matching_alerts_df : pd.DataFrame
        A dataframe of alert data. Must have `detected_shot` and `location_in_meters` columns.
    incident : pd.Series
        The incident data. Must have `id` and `location_in_meters` indices.

    Returns
    -------
    pd.Series
        The matching alert. If no match, just return the incident ID.
    """
    try:
        return (
            matching_alerts_df
            .loc[matching_alerts_df["detected_shot"] == incident["id"]]
            .reset_index()
            .assign(distance_to_alert_in_meters=lambda df: df["location_in_meters"].distance(incident["location_in_meters"]))
            .loc[
                lambda df: df["location_in_meters"]
                .distance(incident["location_in_meters"]).idxmin()
            ]
        )
    except ValueError:
        return pd.Series({"detected_shot": incident["id"]})

Then, we find matching alerts for each shooting:

In [8]:
matching_alerts_portal = pd.concat(
    shooting_incidents
    .apply(
        lambda incident: find_matching_alerts(
            shotspotter_alerts_portal_by_date, 
            incident,
        ), 
        axis=1,
    )
    .to_list()
)
matching_alerts_portal.head()

Unnamed: 0_level_0,date_time,latitude,longitude,type,location,location_in_meters,date,detected_shot
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
SST-387399,2023-01-30 13:12:52,41.750466,-87.663813,MULTIPLE GUNSHOTS,POINT (-87.66381 41.75047),POINT (-9758690.997 5123673.501),2023-01-30,SHOOT-JG134757-#1
SST-35000300487,2023-10-01 22:44:19,41.828074,-87.681359,MULTIPLE GUNSHOTS,POINT (-87.68136 41.82807),POINT (-9760644.215 5135260.45),2023-10-01,SHOOT-JG446904-#2
SST-79100114753,2024-03-04 13:18:36,41.844374,-87.705963,MULTIPLE GUNSHOTS,POINT (-87.70596 41.84437),POINT (-9763383.103 5137695.883),2024-03-04,HOM-JH175123-#1
SST-281526,2023-02-11 02:31:03,41.73994,-87.623903,MULTIPLE GUNSHOTS,POINT (-87.6239 41.73994),POINT (-9754248.307 5122102.918),2023-02-11,HOM-JG148375-#1
SST-439705,2023-02-19 15:43:09,41.750848,-87.564949,MULTIPLE GUNSHOTS,POINT (-87.56495 41.75085),POINT (-9747685.538 5123730.398),2023-02-19,HOM-JG158505-#1


Then, we merge the alert data into the shooting dataset:

In [9]:
matched_alerts_portal = (
    shooting_incidents
    .reset_index()
    .apply(lambda x: get_matching_alert(matching_alerts_portal, x), axis=1)
    .set_index("detected_shot")
    .rename(columns={"id": "id_alert"})
)
matched_alerts_portal.head()

Unnamed: 0_level_0,date,date_time,distance_to_alert_in_meters,id_alert,latitude,location,location_in_meters,longitude,type
detected_shot,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
SHOOT-JG134757-#1,2023-01-30,2023-01-30 13:12:52,126.383402,SST-387399,41.750466,POINT (-87.66381 41.75047),POINT (-9758690.99744008 5123673.500582644),-87.663813,MULTIPLE GUNSHOTS
SHOOT-JG446904-#2,2023-10-01,2023-10-01 22:44:19,120.64696,SST-35000300487,41.828074,POINT (-87.68136 41.82807),POINT (-9760644.214568872 5135260.450351795),-87.681359,MULTIPLE GUNSHOTS
HOM-JH175123-#1,2024-03-04,2024-03-04 13:18:36,25.859151,SST-79100114753,41.844374,POINT (-87.70596 41.84437),POINT (-9763383.103290344 5137695.883365209),-87.705963,MULTIPLE GUNSHOTS
HOM-JH175867-#1,,NaT,,,,,,,
HOM-JG484795-#1,,NaT,,,,,,,


In case it isn't clear why we did these two steps: The first step adds a `detected_shot` column to the alerts dataset, while the second set effectively reindexes this data using `detected_shot` as the index so that we can merge it with the shooting dataset.

Now we can actually merge the two datasets, calculating a `detected` column to determine whether each shooting has a corresponding alert.

In [10]:
shooting_incidents_matched_portal = (
    shooting_incidents.join(matched_alerts_portal, rsuffix="_alert")
    .assign(detected=lambda df: ~df["id_alert"].isna())
)
shooting_incidents_matched_portal.head()

Unnamed: 0_level_0,case_number,date_time,latitude,longitude,type,place_description,police_district,location,location_in_meters,search_area,...,date,date_time_alert,distance_to_alert_in_meters,id_alert,latitude_alert,location_alert,location_in_meters_alert,longitude_alert,type_alert,detected
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
SHOOT-JG134757-#1,JG134757,2023-01-30 13:13:00,41.749632,-87.664005,BATTERY,STREET,6,POINT (-87.66401 41.74963),POINT (-9758712.436 5123548.949),"POLYGON ((-87.65678 41.74963, -87.65681 41.749...",...,2023-01-30,2023-01-30 13:12:52,126.383402,SST-387399,41.750466,POINT (-87.66381 41.75047),POINT (-9758690.99744008 5123673.500582644),-87.663813,MULTIPLE GUNSHOTS,True
SHOOT-JG446904-#2,JG446904,2023-10-01 22:44:00,41.827692,-87.680404,BATTERY,STREET,9,POINT (-87.6804 41.82769),POINT (-9760537.923 5135203.373),"POLYGON ((-87.67318 41.82769, -87.67321 41.827...",...,2023-10-01,2023-10-01 22:44:19,120.64696,SST-35000300487,41.828074,POINT (-87.68136 41.82807),POINT (-9760644.214568872 5135260.450351795),-87.681359,MULTIPLE GUNSHOTS,True
HOM-JH175123-#1,JH175123,2024-03-04 13:19:00,41.844202,-87.705945,HOMICIDE,STREET,10,POINT (-87.70594 41.8442),POINT (-9763381.098 5137670.102),"POLYGON ((-87.69872 41.8442, -87.69875 41.8436...",...,2024-03-04,2024-03-04 13:18:36,25.859151,SST-79100114753,41.844374,POINT (-87.70596 41.84437),POINT (-9763383.103290344 5137695.883365209),-87.705963,MULTIPLE GUNSHOTS,True
HOM-JH175867-#1,JH175867,2024-03-04 23:50:00,41.887062,-87.755605,HOMICIDE,STREET,15,POINT (-87.75561 41.88706),POINT (-9768909.301 5144076.812),"POLYGON ((-87.74838 41.88706, -87.74841 41.886...",...,,NaT,,,,,,,,False
HOM-JG484795-#1,JG484795,2023-10-30 17:04:00,41.73654,-87.57713,HOMICIDE,PARKING LOT,4,POINT (-87.57713 41.73654),POINT (-9749041.517 5121595.823),"POLYGON ((-87.5699 41.73654, -87.56994 41.7360...",...,,NaT,,,,,,,,False


## Saving

In [11]:
shooting_incidents_matched_portal.to_csv(
    settings.DATA_DIR_PUBLIC / "matched_shootings_alerts_2023_2024.csv"
)