# Generate insights from data 

## map 1 - 2025 stop & search and lfr deployments 
- % of lfr deployments in LSOAs in the top 10% of s&s incidence 
- % of lfr deployments in LSOAs in the top 20% of s&s incidence 

## map 2 - s&s change over time 
- % of lfr deployments in LSOAs where stop and search is rising 
- % of lfr deployments in LSOAs where stop and search remains the same 
- % of lfr deployments in LSOAs where stop and search is falling 

## map 3 - IMD deciles and lft 
- % of lfr deployments in highest decile 
- % of lfr deployemnts in top 3 deciles 

In [41]:
from pathlib import Path 
import geopandas as gpd 

data_path = Path('../data/processed/imd_dep_london.gpkg')

complete_gdf = gpd.read_file(data_path)

In [42]:
complete_gdf.columns

Index(['LSOA11CD', 'LSOA11NM', 'MSOA11CD', 'MSOA11NM', 'LAD11CD', 'LAD11NM',
       'RGN11CD', 'RGN11NM', 'USUALRES', 'HHOLDRES', 'COMESTRES', 'POPDEN',
       'HHOLDS', 'AVHHOLDSZ', 'stop_search_count_2025', 'stop_search_c_2023',
       'abs_change', 'Index of Multiple Deprivation (IMD) Decile', 'geometry'],
      dtype='object')

In [43]:
# slim dataset

stats_gdf = complete_gdf[['LSOA11CD', 'stop_search_count_2025', 'stop_search_c_2023', 'abs_change', 'Index of Multiple Deprivation (IMD) Decile', 'geometry']]

stats_gdf.columns

Index(['LSOA11CD', 'stop_search_count_2025', 'stop_search_c_2023',
       'abs_change', 'Index of Multiple Deprivation (IMD) Decile', 'geometry'],
      dtype='object')

In [44]:
# add lfr data grouped by lsoa 

lfr_path = Path('../data/processed/lfr_deployments.gpkg')

lfr_gdf = gpd.read_file(lfr_path)

lfr_gdf.columns

Index(['Deployment Location', 'Date', 'Duration', 'LFR Use Case',
       'Watchlist Size', 'Min Threshold Setting', 'Total Alerts',
       'True Alerts Confirmed', 'True Alerts Unconfirmed',
       'False Alerts Confirmed', 'False Alerts Unconfirmed',
       'False Alert Rate', 'Outcome - Arrest', 'Outcome - Other', 'No action',
       'Faces seen (estimate)', 'latitude', 'longitude', 'geometry'],
      dtype='object')

In [45]:
#spatial join within the LSOA polygons 

lfr_joined = gpd.sjoin(
    lfr_gdf,
    stats_gdf[['LSOA11CD', 'geometry']],
    how='left',
    predicate='within'
)


In [46]:
# count lfr deployments within each LSOA 

lfr_counts = (
    lfr_joined
    .groupby("LSOA11CD")
    .size()
    .reset_index(name="lfr_count")
)

In [47]:
# Merge with stats_gdf to create single unified dataset 

stats_gdf = stats_gdf.merge(lfr_counts, how='left', on='LSOA11CD')
stats_gdf['lfr_count'] = stats_gdf['lfr_count'].fillna(0)

In [48]:
stats_gdf.columns

Index(['LSOA11CD', 'stop_search_count_2025', 'stop_search_c_2023',
       'abs_change', 'Index of Multiple Deprivation (IMD) Decile', 'geometry',
       'lfr_count'],
      dtype='object')

In [54]:
#map 1 data 
# find number of lSOAs with number of stop search >= to top 10%

top_10_stop_search = stats_gdf['stop_search_count_2025'].quantile(0.9)

high_stop_search_lsoas = stats_gdf[stats_gdf['stop_search_count_2025'] >= top_10_stop_search]

lfr_in_top_10 = high_stop_search_lsoas['lfr_count'].sum()

pct_in_top_10 = lfr_in_top_10 / stats_gdf['lfr_count'].sum() * 100

print(f"The top 10% of LSOAs experienced above {round(top_10_stop_search, 2)} stop & search incidents")

print(f"{round(pct_in_top_10, 2)}% of lfr deployments are in LSOAs that experience the top 10% of Jan-Nov 2025 Stop & Search incidents in London")

The top 10% of LSOAs experienced above 53.6 stop & search incidents
74.35% of lfr deployments are in LSOAs that experience the top 10% of Jan-Nov 2025 Stop & Search incidents in London


In [50]:
#map 1 data 
# find number of lSOAs with number of stop search >= to top 20%

top_20_stop_search = stats_gdf['stop_search_count_2025'].quantile(0.8)

high_stop_search_lsoas = stats_gdf[stats_gdf['stop_search_count_2025'] >= top_20_stop_search]

lfr_in_top_20 = high_stop_search_lsoas['lfr_count'].sum()

pct_in_top_20 = lfr_in_top_20 / stats_gdf['lfr_count'].sum() * 100

print(f"{round(pct_in_top_20, 2)}% of lfr deployments are in LSOAs that experience the top 10% of Jan-Nov 2025 Stop & Search incidents in London")

84.35% of lfr deployments are in LSOAs that experience the top 10% of Jan-Nov 2025 Stop & Search incidents in London


In [51]:
'''## map 2 - s&s change over time 
- % of lfr deployments in LSOAs where stop and search is rising 
- % of lfr deployments in LSOAs where stop and search remains the same 
- % of lfr deployments in LSOAs where stop and search is falling '''


rising_search = stats_gdf[stats_gdf['abs_change'] > 0]
no_change = stats_gdf[stats_gdf['abs_change'] == 0]
falling_search = stats_gdf[stats_gdf['abs_change'] < 0]

lfr_rising = rising_search['lfr_count'].sum()
lfr_no_change = no_change['lfr_count'].sum()
lfr_falling = falling_search['lfr_count'].sum() 

total_lfr = stats_gdf['lfr_count'].sum()

pct_in_rising = lfr_rising / total_lfr * 100 
pct_no_change = lfr_no_change / total_lfr * 100
pct_falling = lfr_falling / total_lfr * 100 

print(f"{round(pct_in_rising, 2)}% of LFRs occur in LSOAs where stop and search is rising between 2023 and 2025")
print(f"{round(pct_no_change, 2)}% of LFRs occur in LSOAs where stop and search is remaining consistent between 2023 and 2025")
print(f"{round(pct_falling, 2)}% of LFRs occur in LSOAs where stop and search is falling between 2023 and 2025")

54.78% of LFRs occur in LSOAs where stop and search is rising between 2023 and 2025
1.3% of LFRs occur in LSOAs where stop and search is remaining consistent between 2023 and 2025
43.91% of LFRs occur in LSOAs where stop and search is falling between 2023 and 2025


In [56]:
'''## map 3 - IMD deciles and lft 
- % of lfr deployments in highest decile 
- % of lfr deployemnts in top 3 deciles'''

highest_decile = stats_gdf[stats_gdf['Index of Multiple Deprivation (IMD) Decile'] == 1]
three_highest_deciles = stats_gdf[stats_gdf['Index of Multiple Deprivation (IMD) Decile'] < 4]

hi_dec_sum = highest_decile.shape[0]
hi_three_dec = three_highest_deciles.shape[0]

lfr_highest = highest_decile['lfr_count'].sum()
lfr_three_highest = three_highest_deciles['lfr_count'].sum()

imd_lfr_pct = lfr_highest / total_lfr * 100 
imd_three_lfr_pct = lfr_three_highest / total_lfr * 100 

print(f"{round(imd_lfr_pct, 2)}% of LFRs occur in LSOAs in the highest IMD decile")
print(f"{round(imd_three_lfr_pct, 2)}% of LFRs occur in LSOAs in the top 30% of IMD deciles")


5.65% of LFRs occur in LSOAs in the highest IMD decile
63.04% of LFRs occur in LSOAs in the top 30% of IMD deciles


In [57]:
# Save results to csv 

import pandas as pd 

summary_df = pd.DataFrame({
    "stat": [
        "top_10_stop_search",
        "lfr_in_top_10",
        "pct_in_top_10",
        "top_20_stop_search",
        "lfr_in_top_20",
        "pct_in_top_20",
        "lfr_rising",
        "lfr_no_change",
        "lfr_falling",
        "total_lfr",
        "pct_in_rising",
        "pct_no_change",
        "pct_falling",
        "hi_dec_sum",
        "hi_three_dec",
        "lfr_highest",
        "lfr_three_highest",
        "imd_lfr_pct",
        "imd_three_lfr_pct"
    ],
    "value": [
        top_10_stop_search,
        lfr_in_top_10,
        pct_in_top_10,
        top_20_stop_search,
        lfr_in_top_20,
        pct_in_top_20,
        lfr_rising,
        lfr_no_change,
        lfr_falling,
        total_lfr,
        pct_in_rising,
        pct_no_change,
        pct_falling,
        hi_dec_sum,
        hi_three_dec,
        lfr_highest,
        lfr_three_highest,
        imd_lfr_pct,
        imd_three_lfr_pct
    ]
})

summary_df["value"] = summary_df["value"].round(2)


summary_df

summary_df.to_csv("../outputs/tables/summary_stats.csv", index=False)
