# Join 2023 stop and search data to LSOA boundaries
- Follow same process as 03_LSOA for historic data 
- load 2025_LSOA counts and merge

In [1]:
from pathlib import Path
import geopandas as gpd


zip_path = Path("../data/raw/statistical-gis-boundaries-london.zip")

shp_inside_zip = "statistical-gis-boundaries-london/ESRI/LSOA_2011_London_gen_MHW.shp"



lsoa = gpd.read_file(f"zip://{zip_path}!{shp_inside_zip}")

# Check
print(lsoa.head())
print(lsoa.crs)


    LSOA11CD                   LSOA11NM   MSOA11CD                  MSOA11NM  \
0  E01000001        City of London 001A  E02000001        City of London 001   
1  E01000002        City of London 001B  E02000001        City of London 001   
2  E01000003        City of London 001C  E02000001        City of London 001   
3  E01000005        City of London 001E  E02000001        City of London 001   
4  E01000006  Barking and Dagenham 016A  E02000017  Barking and Dagenham 016   

     LAD11CD               LAD11NM    RGN11CD RGN11NM  USUALRES  HHOLDRES  \
0  E09000001        City of London  E12000007  London      1465      1465   
1  E09000001        City of London  E12000007  London      1436      1436   
2  E09000001        City of London  E12000007  London      1346      1250   
3  E09000001        City of London  E12000007  London       985       985   
4  E09000002  Barking and Dagenham  E12000007  London      1703      1699   

   COMESTRES  POPDEN  HHOLDS  AVHHOLDSZ  \
0          0 

In [2]:
#OK need to convert the CRS to work with lat long


lsoa = lsoa.to_crs("EPSG:4326")

In [3]:
#import stop and search data 

import pandas as pd

processed_dir = Path("../data/processed")
stop_search_file = processed_dir / "2023_stop_search_aggregated.csv"

stop_and_search_2023_data = pd.read_csv(stop_search_file)

print(stop_and_search_2023_data.head())


                        Type                       Date  \
0              Person search  2023-01-01T00:00:00+00:00   
1              Person search  2023-01-01T00:01:00+00:00   
2              Person search  2023-01-01T00:01:00+00:00   
3  Person and Vehicle search  2023-01-01T00:02:00+00:00   
4  Person and Vehicle search  2023-01-01T00:02:00+00:00   

   Part of a policing operation  Policing operation   Latitude  Longitude  \
0                         False                 NaN  51.459946  -0.093261   
1                         False                 NaN  51.528765  -0.216088   
2                         False                 NaN  51.510468  -0.130769   
3                         False                 NaN  51.502641  -0.000790   
4                         False                 NaN  51.460105   0.113566   

  Gender Age range                             Self-defined ethnicity  \
0   Male     18-24                    Other ethnic group - Not stated   
1   Male     18-24  Black/African/Ca

In [4]:
#Convert dataframe to geodatafram

import geopandas as gpd
from shapely.geometry import Point

gdf = gpd.GeoDataFrame(
    stop_and_search_2023_data,
    geometry=[Point(xy) for xy in zip(stop_and_search_2023_data.Longitude, stop_and_search_2023_data.Latitude)],
    crs="EPSG:4326"
)


In [5]:
# Add LSOA data to geodataframe

joined = gpd.sjoin(
    gdf,
    lsoa,
    how="left",
    predicate="within"
)

In [6]:
#Check

print(joined[["LSOA11CD"]].head())

    LSOA11CD
0  E01004070
1  E01000586
2  E01004734
3  E01001667
4  E01000388


In [7]:
#Create counts for each area 

lsoa_counts = (
    joined
    .groupby("LSOA11CD")
    .size()
    .reset_index(name="stop_search_count")
)


In [10]:
# Add the counts to the polygons 

lsoa_map_2023 = lsoa.merge(
    lsoa_counts,
    on="LSOA11CD",
    how="left"
).fillna(0)
