In [1]:
# Extract LSOA and MSOA boundary files for the London Borough of LBTH.
# Based on the 2011 census boundaries for now, which is the most recent OA 
# boundary revision available. (This will change with the 2021 Census.)
#
# We also export two lookup tables:
# - a list of OAs (OA, MSOA, LSOA) & administrative regions (LAD) for LBTH
# - a mapping of OAs to Wards for LBTH

In [2]:
import os

import pandas as pd
import geopandas as gpd

Configuration
==


In [3]:
# Used as root folder.
project_dir = '..'

data_dir = f"{project_dir}/data"

# Used as output folder for all boundary files and centroid files.
oa_dir = f"{data_dir}/external/ons/boundaries"

# Used as output folder for all lookup tables.
lookup_dir = f"{data_dir}/external/ons/lookups"

In [4]:
# Clean. !! Use with caution !!
# !rm -rf "{oa_dir}"

In [3]:
!mkdir "{oa_dir}"
!mkdir "{lookup_dir}"

A subdirectory or file ../data/external/ons/boundaries already exists.
A subdirectory or file ../data/external/ons/lookups already exists.


Downloads
==

Boundary downloads
--


please see https://data-hamlets.github.io/open-data-tower-hamlets/datasets/ons-2021-census-boundaries/

Centroid downloads
--

these are used to map census areas to ward

In [None]:
# OA population-weighted centroids, 2011 census boundaries
# Source: 
# https://geoportal.statistics.gov.uk/datasets/ons::output-areas-december-2021-population-weighted-centroids
!curl "https://opendata.arcgis.com/api/v3/datasets/a7528e17f2b0471b879a82dcfde4f164_0/downloads/data?format=shp&spatialRefId=27700&where=1%3D1" --output "{oa_dir}/oa21_centroids.zip"

In [10]:
# LSOA population-weighted centroids, 2011 census boundaries
# Source: 
# ?
# !curl "https://opendata.arcgis.com/api/v3/datasets/b7c49538f0464f748dd7137247bbc41c_0/downloads/data?format=shp&spatialRefId=27700" --output "{oa_dir}/lsoa21_centroids-shp.zip"

Lookups
--

In [12]:
!rm "{lookup_dir}/"*

In [None]:
# 2011 Admin lookup, from Dec 2020
# Source:
# https://geoportal.statistics.gov.uk/datasets/ons::output-area-to-lower-layer-super-output-area-to-middle-layer-super-output-area-to-local-authority-district-december-2020-lookup-in-england-and-wales/explore
!curl "https://opendata.arcgis.com/api/v3/datasets/65664b00231444edb3f6f83c9d40591f_0/downloads/data?format=csv&spatialRefId=4326&where=1%3D1" --output "{lookup_dir}/oa11_lsoa11_msoa11_lad20_rgn20.csv"

In [20]:
# 2021 Admin lookup
# Source:
# https://geoportal.statistics.gov.uk/datasets/output-area-to-lower-layer-super-output-area-to-middle-layer-super-output-area-to-local-authority-district-december-2021-lookup-in-england-and-wales-v2-1/about
# !curl "https://www.arcgis.com/sharing/rest/content/items/792f7ab3a99d403ca02cc9ca1cf8af02/data" --output "{lookup_dir}/oa21_lsoa21_msoa21_lad21.csv"

df = pd.read_csv("https://www.arcgis.com/sharing/rest/content/items/792f7ab3a99d403ca02cc9ca1cf8af02/data", encoding = 'latin')
df.query('lad22nm == "Tower Hamlets"').to_csv(f'{lookup_dir}/lbth_oa21_lsoa21_msoa21_lad21.csv', index=False)

In [None]:
# https://www.arcgis.com/sharing/rest/content/items/5fc4ff82228846c2b893e3beba0e3751/data

# Best-Fit Lookup
# A best-fit lookup file between Lower layer Super Output Areas (LSOA) as at December 2011 and LSOAs as at December 2021 in England and Wales.
# The lookup contains all the 2011 LSOAs (34,753) and these are point-in-polygon to the 2021 LSOA full extent boundaries (which contains 34,628 records, so 1,044 LSOAs are missing from the 2021 LSOAs)

!curl "https://www.arcgis.com/sharing/rest/content/items/5fc4ff82228846c2b893e3beba0e3751/data" --output "{lookup_dir}/LSOA2011-to-LSOA-2021-to-Local-Authority-District-2022.csv"

Directory content
--

In [33]:
!ls -lh "{oa_dir}"

total 124M
-rw-r--r-- 1 Joseph.Leach 1049089  32K Oct 28 08:42 lsoa21_full_clipped.zip
-rw-r--r-- 1 Joseph.Leach 1049089 118M Oct 28 08:43 msoa21_full_clipped.zip
-rw-r--r-- 1 Joseph.Leach 1049089   44 Oct 28 08:43 oa21_centroids.zip
-rw-r--r-- 1 Joseph.Leach 1049089 6.5M Oct 28 08:30 oa21_full_clipped.zip


In [None]:
!ls -lh "{lookup_dir}" 