In [1]:
# Extract LSOA and MSOA boundary files for the London Borough of LBTH.
# Based on the 2011 census boundaries for now, which is the most recent OA 
# boundary revision available. (This will change with the 2021 Census.)
#
# We also export two lookup tables:
# - a list of OAs (OA, MSOA, LSOA) & administrative regions (LAD) for LBTH
# - a mapping of OAs to Wards for LBTH

In [31]:
import os

import pandas as pd
import geopandas as gpd

Configuration
==


In [32]:
# Used as root folder.
project_dir = '..'

data_dir = f"{project_dir}/data"

# Used as output folder for all boundary files and centroid files.
oa_dir = f"{data_dir}/external/ons/boundaries"

# Used as output folder for all lookup tables.
lookup_dir = f"{data_dir}/external/ons/lookups"

In [4]:
# Clean. !! Use with caution !!
# !rm -rf "{oa_dir}"

In [5]:
!mkdir "{oa_dir}"
!mkdir "{lookup_dir}"

Downloads
==

Boundary downloads
--


In [6]:
# OA21, full, clipped
# Source: 
# https://geoportal.statistics.gov.uk/datasets/ons::output-areas-december-2021-boundaries-full-clipped-ew-bfc-2/explore
# !curl "https://opendata.arcgis.com/api/v3/datasets/5670c14a21224d8187357a095121ca39_0/downloads/data?format=shp&spatialRefId=27700&where=1%3D1" --output "{oa_dir}/oa21_full_clipped.zip"
!curl "https://opendata.arcgis.com/api/v3/datasets/5670c14a21224d8187357a095121ca39_0/downloads/data?format=fgdb&spatialRefId=27700&where=1%3D1" --output "{oa_dir}/oa21_full_clipped.zip"


  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed

  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0
  0     0    0     0    0     0      0      0 --:--:--  0:00:01 --:--:--     0
  0     0    0     0    0     0      0      0 --:--:--  0:00:02 --:--:--     0
  0     0    0     0    0     0      0      0 --:--:--  0:00:03 --:--:--     0
  0     0    0     0    0     0      0      0 --:--:--  0:00:04 --:--:--     0
  0     0    0     0    0     0      0      0 --:--:--  0:00:05 --:--:--     0
  0     0    0     0    0     0      0      0 --:--:--  0:00:06 --:--:--     0
  0     0    0     0    0     0      0      0 --:--:--  0:00:06 --:--:--     0
  0     0    0     0    0     0      0      0 --:--:--  0:00:08 --:--:--     0
  0     0    0     0    0     0      0      0 --:--:--  0:00:09 --:--:--     0
  0     0    0     0    0     0      0      0 --:--

In [7]:
# LSOA21, full, clipped
# Source: 
# https://geoportal.statistics.gov.uk/datasets/ons::lower-layer-super-output-areas-december-2021-boundaries-full-clipped-ew-bfc/explore
!curl "https://opendata.arcgis.com/api/v3/datasets/8fb6ac064a704cee9a999ee08414d61e_0/downloads/data?format=shp&spatialRefId=27700&where=1%3D1" --output "{oa_dir}/lsoa21_full_clipped.zip"

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed

  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0
  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0
  0     0    0     0    0     0      0      0 --:--:--  0:00:01 --:--:--     0
  0     0    0     0    0     0      0      0 --:--:--  0:00:24 --:--:--     0
  0     0    0     0    0     0      0      0 --:--:--  0:00:25 --:--:--     0
  0     0    0     0    0     0      0      0 --:--:--  0:00:26 --:--:--     0
  0     0    0     0    0     0      0      0 --:--:--  0:00:26 --:--:--     0
  0     0    0     0    0     0      0      0 --:--:--  0:00:27 --:--:--     0
  0     0    0     0    0     0      0      0 --:--:--  0:00:28 --:--:--     0
100 15717    0 15717    0     0    528      0 --:--:--  0:00:29 --:--:--  3505
100 32093    0 32093    0     0   1041      0 --:--

In [8]:
# MSOA21, full, clipped
# Source:
# https://geoportal.statistics.gov.uk/maps/middle-layer-super-output-areas-december-2021-boundaries-full-clipped-ew-bfc
!curl "https://opendata.arcgis.com/api/v3/datasets/ec053fbfee484bdcacc3ff8f328f605e_0/downloads/data?format=shp&spatialRefId=27700&where=1%3D1" --output "{oa_dir}/msoa21_full_clipped.zip"

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed

  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0
  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0
  0     0    0     0    0     0      0      0 --:--:--  0:00:01 --:--:--     0
  0     0    0     0    0     0      0      0 --:--:--  0:00:02 --:--:--     0
  0     0    0     0    0     0      0      0 --:--:--  0:00:03 --:--:--     0
100  933k    0  933k    0     0   216k      0 --:--:--  0:00:04 --:--:--  216k
100 3120k    0 3120k    0     0   589k      0 --:--:--  0:00:05 --:--:--  623k
100 5035k    0 5035k    0     0   800k      0 --:--:--  0:00:06 --:--:-- 1019k
100 6906k    0 6906k    0     0   947k      0 --:--:--  0:00:07 --:--:-- 1401k
100 9057k    0 9057k    0     0  1092k      0 --:--:--  0:00:08 --:--:-- 1837k
100 10.7M    0 10.7M    0     0  1188k      0 --:--

Centroid downloads
--

In [9]:
# OA population-weighted centroids, 2011 census boundaries
# Source: 
# https://geoportal.statistics.gov.uk/datasets/ons::output-areas-december-2021-population-weighted-centroids
!curl "https://opendata.arcgis.com/api/v3/datasets/a7528e17f2b0471b879a82dcfde4f164_0/downloads/data?format=shp&spatialRefId=27700&where=1%3D1" --output "{oa_dir}/oa21_centroids.zip"

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed

  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0
100    44  100    44    0     0     68      0 --:--:-- --:--:-- --:--:--    68


In [10]:
# LSOA population-weighted centroids, 2011 census boundaries
# Source: 
# ?
# !curl "https://opendata.arcgis.com/api/v3/datasets/b7c49538f0464f748dd7137247bbc41c_0/downloads/data?format=shp&spatialRefId=27700" --output "{oa_dir}/lsoa21_centroids-shp.zip"

In [11]:
# MSOA population-weighted centroids, 2011 census boundaries
# Source: 
# ??
# !curl "https://opendata.arcgis.com/api/v3/datasets/b0a6d8a3dc5d4718b3fd62c548d60f81_0/downloads/data?format=shp&spatialRefId=27700" --output "{oa_dir}/msoa21_centroids-shp.zip"

Lookups
--

In [12]:
#!rm "{lookup_dir}/"*

In [13]:
!curl "https://opendata.arcgis.com/api/v3/datasets/6408273b5aff4e01ab540a1b1b95b7a7_0/downloads/data?format=csv&spatialRefId=4326&where=%28LAD20NM+IN+%28%27Tower+Hamlets%27%29%29" --output "{lookup_dir}/Lower_Layer_Super_Output_Area_(2011)_to_Ward_(2020)_to_LAD_(2020)_Lookup_in_England_and_Wales_V2.csv"

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed

  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0
100 11891    0 11891    0     0  18496      0 --:--:-- --:--:-- --:--:-- 18550


In [14]:
# 2011 Admin lookup, from Dec 2020
# Source:
# https://geoportal.statistics.gov.uk/datasets/ons::output-area-to-lower-layer-super-output-area-to-middle-layer-super-output-area-to-local-authority-district-december-2011-lookup-in-england-and-wales/explore
# !curl "https://opendata.arcgis.com/api/v3/datasets/6ecda95a83304543bc8feedbd1a58303_0/downloads/data?format=csv&spatialRefId=4326&where=1%3D1" --output "{lookup_dir}/oa11_lsoa11_msoa11_lad20_rgn20.csv"
# https://geoportal.statistics.gov.uk/datasets/ons::output-area-to-lower-layer-super-output-area-to-middle-layer-super-output-area-to-local-authority-district-december-2020-lookup-in-england-and-wales/explore
!curl "https://opendata.arcgis.com/api/v3/datasets/65664b00231444edb3f6f83c9d40591f_0/downloads/data?format=csv&spatialRefId=4326&where=1%3D1" --output "{lookup_dir}/oa11_lsoa11_msoa11_lad20_rgn20.csv"

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed

  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0
  0     0    0     0    0     0      0      0 --:--:--  0:00:01 --:--:--     0
  0     0    0     0    0     0      0      0 --:--:--  0:00:02 --:--:--     0
  0     0    0     0    0     0      0      0 --:--:--  0:00:03 --:--:--     0
100 1095k    0 1095k    0     0   280k      0 --:--:--  0:00:03 --:--:--  280k
100 2379k    0 2379k    0     0   461k      0 --:--:--  0:00:05 --:--:--  461k
100 3024k    0 3024k    0     0   512k      0 --:--:--  0:00:05 --:--:--  646k
100 5199k    0 5199k    0     0   753k      0 --:--:--  0:00:06 --:--:-- 1113k
100 6804k    0 6804k    0     0   861k      0 --:--:--  0:00:07 --:--:-- 1447k
100 8514k    0 8514k    0     0   956k      0 --:--:--  0:00:08 --:--:-- 1484k
100 10.2M    0 10.2M    0     0  1062k      0 --:--

In [15]:
# OA-Ward lookup, from Dec 2020
# Source:
# https://geoportal.statistics.gov.uk/datasets/output-area-to-lower-layer-super-output-area-to-middle-layer-super-output-area-to-local-authority-district-december-2021-lookup-in-england-and-wales-v2/about
!curl "https://www.arcgis.com/sharing/rest/content/items/9f0bc2c6fbc9427ba11db01759e5f6d8/data" --output "{lookup_dir}/oa21_lsoa21_msoa21_lad21_rgn21.csv"

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed

  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0
  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0
  0     0    0     0    0     0      0      0 --:--:--  0:00:01 --:--:--     0
  0     0    0     0    0     0      0      0 --:--:--  0:00:02 --:--:--     0
  0 15.7M    0     0    0     0      0      0 --:--:--  0:00:03 --:--:--     0
 10 15.7M   10 1727k    0     0   411k      0  0:00:39  0:00:04  0:00:35  411k
 21 15.7M   21 3527k    0     0   678k      0  0:00:23  0:00:05  0:00:18  715k
 33 15.7M   33 5389k    0     0   869k      0  0:00:18  0:00:06  0:00:12 1130k
 42 15.7M   42 6786k    0     0   943k      0  0:00:17  0:00:07  0:00:10 1428k
 54 15.7M   54 8822k    0     0  1076k      0  0:00:14  0:00:08  0:00:06 1800k
 66 15.7M   66 10.4M    0     0  1168k      0  0:00

In [16]:
# https://www.arcgis.com/sharing/rest/content/items/5fc4ff82228846c2b893e3beba0e3751/data

# Best-Fit Lookup
# A best-fit lookup file between Lower layer Super Output Areas (LSOA) as at December 2011 and LSOAs as at December 2021 in England and Wales.
# The lookup contains all the 2011 LSOAs (34,753) and these are point-in-polygon to the 2021 LSOA full extent boundaries (which contains 34,628 records, so 1,044 LSOAs are missing from the 2021 LSOAs)

!curl "https://www.arcgis.com/sharing/rest/content/items/5fc4ff82228846c2b893e3beba0e3751/data" --output "{lookup_dir}/LSOA2011-to-LSOA-2021-to-Local-Authority-District-2022.csv"

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed

  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0
100   546  100   546    0     0   2113      0 --:--:-- --:--:-- --:--:--  2124
100   546  100   546    0     0   2111      0 --:--:-- --:--:-- --:--:--  2116


In [17]:
# OA-Ward lookup, from Dec 2020
# ODD... DOESN'T WORK
# Source:
# https://geoportal.statistics.gov.uk/documents/output-area-to-ward-to-local-authority-district-december-2021-lookup-in-england-and-wales/about
# !curl "https://www.arcgis.com/sharing/rest/content/items/dd6264586c5049089ca53d67b9af14af/data" --output "{lookup_dir}/oa21_ward22.xlsx"

Directory content
--

In [33]:
!ls -lh "{oa_dir}"

total 124M
-rw-r--r-- 1 Joseph.Leach 1049089  32K Oct 28 08:42 lsoa21_full_clipped.zip
-rw-r--r-- 1 Joseph.Leach 1049089 118M Oct 28 08:43 msoa21_full_clipped.zip
-rw-r--r-- 1 Joseph.Leach 1049089   44 Oct 28 08:43 oa21_centroids.zip
-rw-r--r-- 1 Joseph.Leach 1049089 6.5M Oct 28 08:30 oa21_full_clipped.zip


In [34]:
!ls -lh "{lookup_dir}" 

total 36M
-rw-r--r-- 1 Joseph.Leach 1049089 546 Oct 28 08:44 LSOA2011-to-LSOA-2021-to-Local-Authority-District-2022.csv
-rw-r--r-- 1 Joseph.Leach 1049089 12K Oct 28 08:43 Lower_Layer_Super_Output_Area_(2011)_to_Ward_(2020)_to_LAD_(2020)_Lookup_in_England_and_Wales_V2.csv
-rw-r--r-- 1 Joseph.Leach 1049089 80K Oct 28 08:44 lbth_oa11_lsoa11_msoa11_lad11_rgn11.csv
-rw-r--r-- 1 Joseph.Leach 1049089 83K Oct 28 08:44 lbth_oa21_lsoa21_msoa21_lad21_rgn21.csv
-rw-r--r-- 1 Joseph.Leach 1049089 20M Oct 28 08:43 oa11_lsoa11_msoa11_lad20_rgn20.csv
-rw-r--r-- 1 Joseph.Leach 1049089 16M Oct 28 08:44 oa21_lsoa21_msoa21_lad21_rgn21.csv


Load & extract LBTH lookups
==
First we process plain tabular data.

lbth oa-lsoa-msoa-lad-rgn
--
This lookup also allows us to identify all OAs that relate to the LBTH Local Authority. We do this first so we can then filter any national data sets below.

In [20]:
# Load the lookup of all OAs, MSOAs, LSOAs, LADs (2011)
oa_lookup_11 = pd.read_csv(f"{lookup_dir}/oa11_lsoa11_msoa11_lad20_rgn20.csv")
oa_lookup_11.head()

Unnamed: 0,FID,OA11CD,LSOA11CD,LSOA11NM,MSOA11CD,MSOA11NM,LAD20CD,LAD20NM,RGN20CD,RGN20NM
0,1,E00060369,E01011969,Hartlepool 001B,E02002483,Hartlepool 001,E06000001,Hartlepool,E12000001,North East
1,2,E00060256,E01011949,Hartlepool 009A,E02002491,Hartlepool 009,E06000001,Hartlepool,E12000001,North East
2,3,E00060371,E01011969,Hartlepool 001B,E02002483,Hartlepool 001,E06000001,Hartlepool,E12000001,North East
3,4,E00060362,E01011970,Hartlepool 001C,E02002483,Hartlepool 001,E06000001,Hartlepool,E12000001,North East
4,5,E00060257,E01011949,Hartlepool 009A,E02002491,Hartlepool 009,E06000001,Hartlepool,E12000001,North East


In [35]:
# Load the lookup of all OAs, MSOAs, LSOAs, LADs (2021)
oa_lookup_21 = pd.read_csv(f"{lookup_dir}/oa21_lsoa21_msoa21_lad21_rgn21.csv", encoding = 'latin')
oa_lookup_21.head()

  oa_lookup_21 = pd.read_csv(f"{lookup_dir}/oa21_lsoa21_msoa21_lad21_rgn21.csv", encoding = 'latin')


Unnamed: 0,oa21cd,lsoa21cd,lsoa21nm,msoa21cd,msoa21nm,lad22cd,lad22nm,lad22nmw
0,E00000001,E01000001,City of London 001A,E02000001,City of London 001,E09000001,City of London,
1,E00000003,E01000001,City of London 001A,E02000001,City of London 001,E09000001,City of London,
2,E00000005,E01000001,City of London 001A,E02000001,City of London 001,E09000001,City of London,
3,E00000007,E01000001,City of London 001A,E02000001,City of London 001,E09000001,City of London,
4,E00000010,E01000003,City of London 001C,E02000001,City of London 001,E09000001,City of London,


In [36]:
# Extract all records relating to the LBTH Local Authority District (LAD), 
# according to the December 2020 LAD definitions.
# We filter by LAD name: LAD20NM = LBTH
# Could alternatively filter by LAD code, LAD20CD = E09000023
lbth_oa_recs_11 = oa_lookup_11[oa_lookup_11.LAD20NM=='Tower Hamlets'].drop(labels='FID', axis=1)
print(len(lbth_oa_recs_11))
lbth_oa_recs_11.head()

748


Unnamed: 0,OA11CD,LSOA11CD,LSOA11NM,MSOA11CD,MSOA11NM,LAD20CD,LAD20NM,RGN20CD,RGN20NM
168000,E00021172,E01004201,Tower Hamlets 002D,E02000865,Tower Hamlets 002,E09000030,Tower Hamlets,E12000007,London
168002,E00021307,E01004232,Tower Hamlets 003C,E02000866,Tower Hamlets 003,E09000030,Tower Hamlets,E12000007,London
168004,E00167246,E01004222,Tower Hamlets 001A,E02000864,Tower Hamlets 001,E09000030,Tower Hamlets,E12000007,London
168005,E00021173,E01004201,Tower Hamlets 002D,E02000865,Tower Hamlets 002,E09000030,Tower Hamlets,E12000007,London
168006,E00021175,E01004201,Tower Hamlets 002D,E02000865,Tower Hamlets 002,E09000030,Tower Hamlets,E12000007,London


In [37]:
# Extract all records relating to the LBTH Local Authority District (LAD), 
# according to the 2021 LAD definitions.
# We filter by LAD name: LAD20NM = LBTH
# Could alternatively filter by LAD code, LAD20CD = E09000023
lbth_oa_recs_21 = oa_lookup_21[oa_lookup_21.lad22nm=='Tower Hamlets']
print(len(lbth_oa_recs_21))
lbth_oa_recs_21.head()

913


Unnamed: 0,oa21cd,lsoa21cd,lsoa21nm,msoa21cd,msoa21nm,lad22cd,lad22nm,lad22nmw
19878,E00021141,E01004198,Tower Hamlets 002B,E02000865,Tower Hamlets 002,E09000030,Tower Hamlets,
19879,E00021143,E01004201,Tower Hamlets 002D,E02000865,Tower Hamlets 002,E09000030,Tower Hamlets,
19880,E00021145,E01035661,Tower Hamlets 002F,E02000865,Tower Hamlets 002,E09000030,Tower Hamlets,
19881,E00021146,E01004200,Tower Hamlets 005A,E02000868,Tower Hamlets 005,E09000030,Tower Hamlets,
19882,E00021147,E01004201,Tower Hamlets 002D,E02000865,Tower Hamlets 002,E09000030,Tower Hamlets,


In [24]:
# Save as LBTH-specific lookup table for future use
lbth_oa_recs_11.to_csv(f"{lookup_dir}/lbth_oa11_lsoa11_msoa11_lad11_rgn11.csv", index=False)
lbth_oa_recs_21.to_csv(f"{lookup_dir}/lbth_oa21_lsoa21_msoa21_lad21_rgn21.csv", index=False)

LBTH OA-Ward lookup
--

In [38]:
# Load the lookup of OAs to Wards, LADs
# Output Area to Ward to Local Authority District (December 2021) Lookup in England and Wales
# oa11_ward11_lad11 = pd.read_csv(f"{lookup_dir}/oa11_ward11_lad11.csv")
# oa11_ward11_lad11.columns

In [39]:
# # Filter
# lbth_oa_ward_lad_11 = oa11_ward11_lad11[oa11_ward11_lad11.OA11CD.isin(lbth_oa_recs_11.oa21cd.unique())]
# print(len(lbth_oa_ward_lad_11))
# lbth_oa_ward_lad_11.head()

Load & extract LBTH geometries
==
This is spatial geometry data.

Load & extract OA, LSOA, MSOA boundaries
--

In [63]:
oa21_full = gpd.read_file(f"{oa_dir}/oa21_full_clipped.zip")
oa21_full.columns

DriverError: '/vsizip/../data/external/ons/boundaries/oa21_full_clipped.zip' does not exist in the file system, and is not recognized as a supported dataset name.

In [61]:
lbth_oa21_full = oa21_full[oa21_full.OA21CD.isin(lbth_oa_recs_202012.oa21cd.unique())]
lbth_oa21_full.head()

NameError: name 'oa21_full' is not defined

In [60]:
lsoa11_full = gpd.read_file(f"{oa_dir}/lsoa11_full_clipped.zip")
lsoa11_full.columns

DriverError: '/vsizip/../data/external/ons/boundaries/lsoa11_full_clipped.zip' does not exist in the file system, and is not recognized as a supported dataset name.

In [None]:
lbth_lsoa11_full = lsoa11_full[lsoa11_full.LSOA11CD.isin(lbth_oa_recs_11.LSOA11CD.unique())]
print(len(lbth_lsoa11_full))
lbth_lsoa11_full.head()

144


Unnamed: 0,OBJECTID,LSOA11CD,LSOA11NM,BNG_E,BNG_N,LONG_,LAT,Shape_Leng,Shape__Are,Shape__Len,geometry
4359,4360,E01004197,Tower Hamlets 002A,534924,183331,-0.05615,51.5328,2634.515663,169926.971878,2634.515663,"POLYGON ((535187.590 183308.381, 535243.037 18..."
4360,4361,E01004198,Tower Hamlets 002B,535012,183130,-0.05496,51.5309,1835.149164,94968.784569,1835.149164,"POLYGON ((535056.623 183297.891, 535057.573 18..."
4361,4362,E01004199,Tower Hamlets 002C,534754,183031,-0.05871,51.5301,1710.969886,68981.051849,1710.969886,"POLYGON ((534827.670 183222.599, 534828.862 18..."
4362,4363,E01004200,Tower Hamlets 005A,534519,183093,-0.06207,51.5307,1249.726403,57353.775116,1249.726403,"POLYGON ((534671.000 183256.000, 534673.363 18..."
4363,4364,E01004201,Tower Hamlets 002D,535390,183327,-0.04943,51.5326,1932.510937,129813.369217,1932.510937,"POLYGON ((535721.387 183262.442, 535728.297 18..."


In [None]:
lsoa21_full = gpd.read_file(f"{oa_dir}/lsoa21_full_clipped.zip")
lsoa21_full.columns

Index(['OBJECTID', 'LSOA21CD', 'LSOA21NM', 'SHAPE_Leng', 'SHAPE_Area',
       'geometry'],
      dtype='object')

In [None]:
lbth_lsoa21_full = lsoa21_full[lsoa21_full.LSOA21CD.isin(lbth_oa_recs_21.lsoa21cd.unique())]
print(len(lbth_lsoa21_full))
lbth_lsoa21_full.head()

169


Unnamed: 0,OBJECTID,LSOA21CD,LSOA21NM,SHAPE_Leng,SHAPE_Area,geometry
3995,3996,E01004198,Tower Hamlets 002B,1835.149164,94968.784186,"POLYGON ((535056.623 183297.891, 535057.573 18..."
3996,3997,E01004199,Tower Hamlets 002C,1710.969886,68981.051864,"POLYGON ((534827.670 183222.599, 534828.862 18..."
3997,3998,E01004200,Tower Hamlets 005A,1249.726403,57353.772884,"POLYGON ((534671.000 183256.000, 534673.363 18..."
3998,3999,E01004201,Tower Hamlets 002D,1932.510937,129813.369214,"POLYGON ((535721.387 183262.442, 535728.297 18..."
3999,4000,E01004202,Tower Hamlets 005B,1731.238324,112184.242096,"POLYGON ((534717.138 183472.093, 534720.000 18..."


In [None]:
msoa21_full = gpd.read_file(f"{oa_dir}/msoa21_full_clipped.zip")
msoa21_full.columns

In [None]:
lbth_msoa21_full = msoa21_full[msoa21_full.MSOA21CD.isin(lbth_oa_recs_21.msoa21cd.unique())]
print(len(lbth_msoa21_full))
lbth_msoa21_full.head()

LBTH centroids
--

In [45]:
oa21_centroids = gpd.read_file(f"{data_dir}/ons/lookups/oa21_centroids.zip")
oa21_centroids.columns

DriverError: '/vsizip/../data/ons/lookups/oa21_centroids.zip' does not exist in the file system, and is not recognized as a supported dataset name.

In [43]:
lbth_oa21_centroids = oa21_centroids[oa21_centroids.OA21CD.isin(lbth_oa_recs_202012.oa21cd.unique())]
print(len(lbth_oa21_centroids))
lbth_oa21_centroids.head()

NameError: name 'oa21_centroids' is not defined

In [None]:
# NO CENTROIDS YET

# lsoa21_centroids = gpd.read_file(f"{oa_dir}/lsoa21_centroids-shp.zip").\
#                       rename(columns={
#                       # Convert column names to uppercase to stick to general convention:
#                           'lsoa21cd': 'LSoa21CD', 
#                           'lsoa21nm': 'LSoa21NM', })
# lsoa21_centroids.columns

In [None]:
# lbth_lsoa21_centroids = lsoa21_centroids[lsoa21_centroids.LSoa21CD.isin(lbth_oa_recs_202012.LSoa21CD.unique())]
# print(len(lbth_lsoa21_centroids))
# lbth_lsoa21_centroids.head()

In [None]:
# NO CENTROIDS YET
# msoa21_centroids = gpd.read_file(f"{oa_dir}/msoa21_centroids-shp.zip").\
#                       rename(columns={
#                       # Convert column names to uppercase to stick to general convention:
#                           'msoa21cd': 'MSoa21CD', 
#                           'msoa21nm': 'MSoa21NM', })
# msoa21_centroids.columns

In [None]:
# lbth_msoa21_centroids = msoa21_centroids[msoa21_centroids.MSoa21CD.isin(lbth_oa_recs_202012.MSoa21CD.unique())]
# print(len(lbth_msoa21_centroids))
# lbth_msoa21_centroids.head()

Export all geometry files in multiple formats
--

In [None]:
# The full list of supported file formats:
# import fiona
# fiona.supported_drivers  

In [55]:
# In total we are exporting 18 variants: three generalisation levels, each in
# three file formats, for both LSOA and MSOA
for gd, name in zip(
    [
        lbth_oa21_full,
        lbth_lsoa11_full,
        lbth_lsoa21_full,
        lbth_msoa21_full,
        lbth_oa21_centroids,
    ],
    [
        "lbth_oa21_full",
        "lbth_lsoa11_full",
        "lbth_lsoa21_full",
        "lbth_msoa21_full",
        "lbth_oa21_centroids",
    ],
):  # , 'lbth_lsoa21_centroids', 'lbth_msoa21_centroids']):
    print(f"{name}...")

    gd = gd.to_crs({"init": "epsg:4326"})
    gd.to_file(filename=f"{oa_dir}/{name}.geojson", driver="GeoJSON")
    #   gd.to_file(filename=f"{oa_dir}/{name}.gpkg", driver='GPKG')

    os.makedirs(f"{oa_dir}/{name}_shp/", exist_ok=True)
    gd.to_file(filename=f"{oa_dir}/{name}_shp/{name}.shp", driver="ESRI Shapefile")


NameError: name 'lbth_oa21_full' is not defined

In [None]:
# Compress the shapefile folders, for easier download
!zip -r "{oa_dir}/lbth_oa21_full_shp.zip" "{oa_dir}/lbth_oa21_full_shp"
!zip -r "{oa_dir}/lbth_oa21_20m_shp.zip" "{oa_dir}/lbth_oa21_20m_shp"

!zip -r "{oa_dir}/lbth_lsoa21_full_shp.zip" "{oa_dir}/lbth_lsoa21_full_shp"
!zip -r "{oa_dir}/lbth_lsoa21_20m_shp.zip" "{oa_dir}/lbth_lsoa21_20m_shp"
!zip -r "{oa_dir}/lbth_lsoa21_200m_shp.zip" "{oa_dir}/lbth_lsoa21_200m_shp"

!zip -r "{oa_dir}/lbth_msoa21_full_shp.zip" "{oa_dir}/lbth_msoa21_full_shp"
!zip -r "{oa_dir}/lbth_msoa21_20m_shp.zip" "{oa_dir}/lbth_msoa21_20m_shp"
!zip -r "{oa_dir}/lbth_msoa21_200m_shp.zip" "{oa_dir}/lbth_msoa21_200m_shp"

!zip -r "{oa_dir}/lbth_oa21_centroids_shp.zip" "{oa_dir}/lbth_oa21_centroids_shp"
!zip -r "{oa_dir}/lbth_lsoa21_centroids_shp.zip" "{oa_dir}/lbth_lsoa21_centroids_shp"
!zip -r "{oa_dir}/lbth_msoa21_centroids_shp.zip" "{oa_dir}/lbth_msoa21_centroids_shp"


'zip' is not recognized as an internal or external command,
operable program or batch file.
'zip' is not recognized as an internal or external command,
operable program or batch file.
'zip' is not recognized as an internal or external command,
operable program or batch file.
'zip' is not recognized as an internal or external command,
operable program or batch file.
'zip' is not recognized as an internal or external command,
operable program or batch file.
'zip' is not recognized as an internal or external command,
operable program or batch file.
'zip' is not recognized as an internal or external command,
operable program or batch file.
'zip' is not recognized as an internal or external command,
operable program or batch file.
'zip' is not recognized as an internal or external command,
operable program or batch file.
'zip' is not recognized as an internal or external command,
operable program or batch file.
'zip' is not recognized as an internal or external command,
operable program or 

Directory content
--

In [None]:
!ls -lh '{oa_dir}'

total 978M
-rw-r--r-- 1 Joseph.Leach 1049089 1.5M Aug 23 17:16 lbth_lsoa11_full.geojson
-rw-r--r-- 1 Joseph.Leach 1049089 776K Aug 23 17:16 lbth_lsoa11_full.gpkg
drwxr-xr-x 1 Joseph.Leach 1049089    0 Aug 23 17:16 lbth_lsoa11_full_shp
-rw-r--r-- 1 Joseph.Leach 1049089 1.7M Aug 19 08:54 lbth_lsoa21_full.geojson
-rw-r--r-- 1 Joseph.Leach 1049089 824K Aug 19 08:18 lbth_lsoa21_full.gpkg
drwxr-xr-x 1 Joseph.Leach 1049089    0 Aug 19 09:26 lbth_lsoa21_full_shp
-rw-r--r-- 1 Joseph.Leach 1049089 797K Aug 19 08:54 lbth_msoa21_full.geojson
-rw-r--r-- 1 Joseph.Leach 1049089 416K Aug 19 08:18 lbth_msoa21_full.gpkg
drwxr-xr-x 1 Joseph.Leach 1049089    0 Aug 19 09:26 lbth_msoa21_full_shp
-rw-r--r-- 1 Joseph.Leach 1049089 152K Aug 19 08:54 lbth_oa21_centroids.geojson
-rw-r--r-- 1 Joseph.Leach 1049089 192K Aug 19 08:18 lbth_oa21_centroids.gpkg
drwxr-xr-x 1 Joseph.Leach 1049089    0 Aug 19 09:26 lbth_oa21_centroids_shp
-rw-r--r-- 1 Joseph.Leach 1049089 3.3M Aug 19 08:54 lbth_oa21_full.geojson
-rw-r--r-

In [None]:
!ls -lh '{lookup_dir}'

total 38M
-rw-r--r-- 1 Joseph.Leach 1049089 2.6M Aug 23 15:11 LSOA2011-to-LSOA-2021-to-Local-Authority-District-2022.csv
-rw-r--r-- 1 Joseph.Leach 1049089  80K Aug 23 19:09 lbth_oa11_lsoa11_msoa11_lad11_rgn11.csv
-rw-r--r-- 1 Joseph.Leach 1049089  83K Aug 23 19:09 lbth_oa21_lsoa21_msoa21_lad21_rgn21.csv
-rw-r--r-- 1 Joseph.Leach 1049089  20M Aug 23 16:49 oa11_lsoa11_msoa11_lad20_rgn20.csv
-rw-r--r-- 1 Joseph.Leach 1049089    0 Aug 23 20:10 oa11_ward20_lad20_202012.csv
-rw-r--r-- 1 Joseph.Leach 1049089  16M Aug 23 16:36 oa21_lsoa21_msoa21_lad21_rgn21.csv
-rw-r--r-- 1 Joseph.Leach 1049089    0 Aug 23 20:17 oa21_ward22.xlsx
