# LSOA aggreagation 
- extract the LSOA boundaries shapefile 
- aggregate with 2025 stop and search data to create choloropleth 


In [None]:
from pathlib import Path
import geopandas as gpd


zip_path = Path("../data/raw/statistical-gis-boundaries-london.zip")

shp_inside_zip = "statistical-gis-boundaries-london/ESRI/LSOA_2011_London_gen_MHW.shp"



lsoa = gpd.read_file(f"zip://{zip_path}!{shp_inside_zip}")

# Check
print(lsoa.head())
print(lsoa.crs)


    LSOA11CD                   LSOA11NM   MSOA11CD                  MSOA11NM  \
0  E01000001        City of London 001A  E02000001        City of London 001   
1  E01000002        City of London 001B  E02000001        City of London 001   
2  E01000003        City of London 001C  E02000001        City of London 001   
3  E01000005        City of London 001E  E02000001        City of London 001   
4  E01000006  Barking and Dagenham 016A  E02000017  Barking and Dagenham 016   

     LAD11CD               LAD11NM    RGN11CD RGN11NM  USUALRES  HHOLDRES  \
0  E09000001        City of London  E12000007  London      1465      1465   
1  E09000001        City of London  E12000007  London      1436      1436   
2  E09000001        City of London  E12000007  London      1346      1250   
3  E09000001        City of London  E12000007  London       985       985   
4  E09000002  Barking and Dagenham  E12000007  London      1703      1699   

   COMESTRES  POPDEN  HHOLDS  AVHHOLDSZ  \
0          0 

In [4]:
#OK need to convert the CRS to work with lat long


lsoa = lsoa.to_crs("EPSG:4326")

In [None]:
#import stop and search data 

import pandas as pd

processed_dir = Path("../data/processed")
stop_search_file = processed_dir / "stop_search_aggregated.csv"

stop_and_search_2025_data = pd.read_csv(stop_search_file)

print(stop_and_search_2025_data.head())


                        Type                       Date  \
0  Person and Vehicle search  2025-01-01T00:00:00+00:00   
1              Person search  2025-01-01T00:04:00+00:00   
2              Person search  2025-01-01T00:10:00+00:00   
3              Person search  2025-01-01T00:10:00+00:00   
4  Person and Vehicle search  2025-01-01T00:10:00+00:00   

   Part of a policing operation  Policing operation   Latitude  Longitude  \
0                         False                 NaN  51.572648   0.086515   
1                         False                 NaN  51.495775  -0.142679   
2                         False                 NaN  51.454034  -0.161689   
3                         False                 NaN  51.454034  -0.161689   
4                         False                 NaN  51.372247   0.039315   

  Gender Age range                             Self-defined ethnicity  \
0    NaN     18-24                 White - Any other White background   
1   Male     18-24                  

In [None]:
#Convert dataframe to geodatafram

import geopandas as gpd
from shapely.geometry import Point

gdf = gpd.GeoDataFrame(
    stop_and_search_2025_data,
    geometry=[Point(xy) for xy in zip(stop_and_search_2025_data.Longitude, stop_and_search_2025_data.Latitude)],
    crs="EPSG:4326"
)


In [None]:
# Add LSOA data to geodataframe

joined = gpd.sjoin(
    gdf,
    lsoa,
    how="left",
    predicate="within"
)



                        Type                       Date  \
0  Person and Vehicle search  2025-01-01T00:00:00+00:00   
1              Person search  2025-01-01T00:04:00+00:00   
2              Person search  2025-01-01T00:10:00+00:00   
3              Person search  2025-01-01T00:10:00+00:00   
4  Person and Vehicle search  2025-01-01T00:10:00+00:00   

   Part of a policing operation  Policing operation   Latitude  Longitude  \
0                         False                 NaN  51.572648   0.086515   
1                         False                 NaN  51.495775  -0.142679   
2                         False                 NaN  51.454034  -0.161689   
3                         False                 NaN  51.454034  -0.161689   
4                         False                 NaN  51.372247   0.039315   

  Gender Age range                             Self-defined ethnicity  \
0    NaN     18-24                 White - Any other White background   
1   Male     18-24                  

In [None]:
#Check

print(joined[["LSOA11CD"]].head())

    LSOA11CD
0  E01003755
1  E01004750
2  E01004478
3  E01004478
4  E01000665


In [None]:
#Create counts for each area 

lsoa_counts = (
    joined
    .groupby("LSOA11CD")
    .size()
    .reset_index(name="stop_search_count")
)


In [None]:
# Add the counts to the polygons 

lsoa_map = lsoa.merge(
    lsoa_counts,
    on="LSOA11CD",
    how="left"
).fillna(0)


In [14]:
# Export the lsoa as geopackadge 

from pathlib import Path

output_dir = Path("../data/processed")

lsoa_output = output_dir / "lsoa_stop_search.gpkg"

lsoa_map.to_file(
    lsoa_output,
    layer="lsoa_stop_search",
    driver="GPKG"
)
