In [None]:
# Extract LSOA and MSOA boundary files for the London Borough of Lewisham.
# Based on the 2011 census boundaries for now, which is the most recent OA 
# boundary revision available. (This will change with the 2021 Census.)
#
# We also export two lookup tables:
# - a list of OAs (OA, MSOA, LSOA) & administrative regions (LAD) for Lewisham
# - a mapping of OAs to Wards for Lewisham
#
# These are revised more frequently, we use the most recent available version
# from December 2020.

In [3]:
# Dependencies
!pip install --upgrade geopandas

Collecting geopandas
  Downloading geopandas-0.10.2-py2.py3-none-any.whl (1.0 MB)
[?25l[K     |▎                               | 10 kB 27.2 MB/s eta 0:00:01[K     |▋                               | 20 kB 34.6 MB/s eta 0:00:01[K     |█                               | 30 kB 39.9 MB/s eta 0:00:01[K     |█▎                              | 40 kB 39.8 MB/s eta 0:00:01[K     |█▋                              | 51 kB 18.7 MB/s eta 0:00:01[K     |██                              | 61 kB 19.8 MB/s eta 0:00:01[K     |██▎                             | 71 kB 13.1 MB/s eta 0:00:01[K     |██▌                             | 81 kB 14.5 MB/s eta 0:00:01[K     |██▉                             | 92 kB 13.4 MB/s eta 0:00:01[K     |███▏                            | 102 kB 14.5 MB/s eta 0:00:01[K     |███▌                            | 112 kB 14.5 MB/s eta 0:00:01[K     |███▉                            | 122 kB 14.5 MB/s eta 0:00:01[K     |████▏                           | 133 kB 14.5 M

In [4]:
import os

import pandas as pd
import geopandas as gpd

from google.colab import drive
import google.colab.files as files

Configuration
==


GDrive mount
--

In [5]:
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [19]:
# Used as root folder.
project_dir = '/content/gdrive/MyDrive/WardProfiles'

# Used as output folder for all boundary files and centroid files.
oa_dir = f"{project_dir}/boundaries/oa"

# Used as output folder for all lookup tables.
lookup_dir = f"{project_dir}/lookups/oa"

In [7]:
# Clean. !! Use with caution !!
# !rm -rf "{oa_dir}"

In [20]:
!mkdir -p "{oa_dir}"
!mkdir -p "{lookup_dir}"

Downloads
==

Boundary downloads
--


In [None]:
# OA, full, clipped
# Source: 
# https://geoportal.statistics.gov.uk/datasets/ons::output-areas-december-2011-boundaries-ew-bfc/about
!wget 'https://opendata.arcgis.com/api/v3/datasets/09b58d063d4e421a9cad16ba5419a6bd_0/downloads/data?format=shp&spatialRefId=27700' \
  -O "{oa_dir}/oa11_full_clipped.zip"

--2021-10-29 08:27:25--  https://opendata.arcgis.com/api/v3/datasets/09b58d063d4e421a9cad16ba5419a6bd_0/downloads/data?format=shp&spatialRefId=27700
Resolving opendata.arcgis.com (opendata.arcgis.com)... 3.219.212.222, 54.209.123.207, 34.197.204.215
Connecting to opendata.arcgis.com (opendata.arcgis.com)|3.219.212.222|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: unspecified [application/zip]
Saving to: ‘/content/gdrive/MyDrive/WardProfiles/boundaries/oa/oa11_full_clipped.zip’

/content/gdrive/MyD     [ <=>                ] 396.02M  32.8MB/s    in 14s     

2021-10-29 08:27:40 (29.1 MB/s) - ‘/content/gdrive/MyDrive/WardProfiles/boundaries/oa/oa11_full_clipped.zip’ saved [415259381]



In [None]:
# OA, 20M, clipped
# Source: 
# https://geoportal.statistics.gov.uk/datasets/ons::output-areas-december-2011-boundaries-ew-bgc-1/about
!wget 'https://opendata.arcgis.com/api/v3/datasets/a76b2f87057b43d989d8f01733104d62_0/downloads/data?format=shp&spatialRefId=27700' \
  -O "{oa_dir}/oa11_20m_clipped.zip"

--2021-10-29 08:27:44--  https://opendata.arcgis.com/api/v3/datasets/a76b2f87057b43d989d8f01733104d62_0/downloads/data?format=shp&spatialRefId=27700
Resolving opendata.arcgis.com (opendata.arcgis.com)... 34.197.204.215, 3.219.212.222, 54.209.123.207
Connecting to opendata.arcgis.com (opendata.arcgis.com)|34.197.204.215|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: unspecified [application/zip]
Saving to: ‘/content/gdrive/MyDrive/WardProfiles/boundaries/oa/oa11_20m_clipped.zip’

/content/gdrive/MyD     [        <=>         ]  32.99M  19.5MB/s    in 1.7s    

2021-10-29 08:27:46 (19.5 MB/s) - ‘/content/gdrive/MyDrive/WardProfiles/boundaries/oa/oa11_20m_clipped.zip’ saved [34592453]



In [None]:
# LSOA, full, clipped
# Source: 
# https://geoportal.statistics.gov.uk/datasets/ons::lower-layer-super-output-areas-december-2011-boundaries-full-clipped-bfc-ew-v3/about
!wget 'https://opendata.arcgis.com/api/v3/datasets/1f23484eafea45f98485ef816e4fee2d_0/downloads/data?format=shp&spatialRefId=27700' \
  -O "{oa_dir}/lsoa11_full_clipped.zip"

--2021-10-29 08:27:51--  https://opendata.arcgis.com/api/v3/datasets/1f23484eafea45f98485ef816e4fee2d_0/downloads/data?format=shp&spatialRefId=27700
Resolving opendata.arcgis.com (opendata.arcgis.com)... 34.197.204.215, 3.219.212.222, 54.209.123.207
Connecting to opendata.arcgis.com (opendata.arcgis.com)|34.197.204.215|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: unspecified [application/zip]
Saving to: ‘/content/gdrive/MyDrive/WardProfiles/boundaries/oa/lsoa11_full_clipped.zip’

/content/gdrive/MyD     [    <=>             ] 211.77M  31.0MB/s    in 7.6s    

2021-10-29 08:27:59 (27.9 MB/s) - ‘/content/gdrive/MyDrive/WardProfiles/boundaries/oa/lsoa11_full_clipped.zip’ saved [222054017]



In [None]:
# LSOA, 20m, clipped
# Source: 
# https://geoportal.statistics.gov.uk/datasets/ons::lower-layer-super-output-areas-december-2011-boundaries-generalised-clipped-bgc-ew-v3/about
!wget 'https://opendata.arcgis.com/api/v3/datasets/8bbadffa6ddc493a94078c195a1e293b_0/downloads/data?format=shp&spatialRefId=27700' \
  -O "{oa_dir}/lsoa11_20m_clipped.zip"

--2021-10-29 08:28:06--  https://opendata.arcgis.com/api/v3/datasets/8bbadffa6ddc493a94078c195a1e293b_0/downloads/data?format=shp&spatialRefId=27700
Resolving opendata.arcgis.com (opendata.arcgis.com)... 3.219.212.222, 54.209.123.207, 34.197.204.215
Connecting to opendata.arcgis.com (opendata.arcgis.com)|3.219.212.222|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: unspecified [application/zip]
Saving to: ‘/content/gdrive/MyDrive/WardProfiles/boundaries/oa/lsoa11_20m_clipped.zip’

/content/gdrive/MyD     [     <=>            ]  15.87M  15.8MB/s    in 1.0s    

2021-10-29 08:28:07 (15.8 MB/s) - ‘/content/gdrive/MyDrive/WardProfiles/boundaries/oa/lsoa11_20m_clipped.zip’ saved [16643682]



In [None]:
# LSOA, 200m, clipped
# Source: 
# https://geoportal.statistics.gov.uk/datasets/ons::lower-layer-super-output-areas-december-2011-boundaries-super-generalised-clipped-bsc-ew-v3/about
!wget 'https://opendata.arcgis.com/api/v3/datasets/e9d10c36ebed4ff3865c4389c2c98827_0/downloads/data?format=shp&spatialRefId=27700' \
  -O "{oa_dir}/lsoa11_200m_clipped.zip"

--2021-10-29 08:28:11--  https://opendata.arcgis.com/api/v3/datasets/e9d10c36ebed4ff3865c4389c2c98827_0/downloads/data?format=shp&spatialRefId=27700
Resolving opendata.arcgis.com (opendata.arcgis.com)... 3.219.212.222, 54.209.123.207, 34.197.204.215
Connecting to opendata.arcgis.com (opendata.arcgis.com)|3.219.212.222|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: unspecified [application/zip]
Saving to: ‘/content/gdrive/MyDrive/WardProfiles/boundaries/oa/lsoa11_200m_clipped.zip’

/content/gdrive/MyD     [   <=>              ]   5.18M  9.49MB/s    in 0.5s    

2021-10-29 08:28:13 (9.49 MB/s) - ‘/content/gdrive/MyDrive/WardProfiles/boundaries/oa/lsoa11_200m_clipped.zip’ saved [5431862]



In [None]:
# MSOA, full, clipped
# Source:
# https://geoportal.statistics.gov.uk/datasets/ons::middle-layer-super-output-areas-december-2011-boundaries-full-clipped-bfc-ew-v3/about
!wget 'https://opendata.arcgis.com/api/v3/datasets/1382f390c22f4bed89ce11f2a9207ff0_0/downloads/data?format=shp&spatialRefId=27700' \
  -O "{oa_dir}/msoa11_full_clipped.zip"

--2021-10-29 08:28:16--  https://opendata.arcgis.com/api/v3/datasets/1382f390c22f4bed89ce11f2a9207ff0_0/downloads/data?format=shp&spatialRefId=27700
Resolving opendata.arcgis.com (opendata.arcgis.com)... 3.219.212.222, 54.209.123.207, 34.197.204.215
Connecting to opendata.arcgis.com (opendata.arcgis.com)|3.219.212.222|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: unspecified [application/zip]
Saving to: ‘/content/gdrive/MyDrive/WardProfiles/boundaries/oa/msoa11_full_clipped.zip’

/content/gdrive/MyD     [            <=>     ] 112.42M  28.2MB/s    in 4.4s    

2021-10-29 08:28:21 (25.5 MB/s) - ‘/content/gdrive/MyDrive/WardProfiles/boundaries/oa/msoa11_full_clipped.zip’ saved [117876289]



In [None]:
# MSOA, 20m, clipped
# Source:
# https://geoportal.statistics.gov.uk/datasets/ons::middle-layer-super-output-areas-december-2011-boundaries-generalised-clipped-bgc-ew-v3/about
!wget 'https://opendata.arcgis.com/api/v3/datasets/abfccdf1071c43dd981a49eb7da13d2b_0/downloads/data?format=shp&spatialRefId=27700' \
  -O "{oa_dir}/msoa11_20m_clipped.zip"

--2021-10-29 08:28:26--  https://opendata.arcgis.com/api/v3/datasets/abfccdf1071c43dd981a49eb7da13d2b_0/downloads/data?format=shp&spatialRefId=27700
Resolving opendata.arcgis.com (opendata.arcgis.com)... 54.209.123.207, 34.197.204.215, 3.219.212.222
Connecting to opendata.arcgis.com (opendata.arcgis.com)|54.209.123.207|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: unspecified [application/zip]
Saving to: ‘/content/gdrive/MyDrive/WardProfiles/boundaries/oa/msoa11_20m_clipped.zip’

/content/gdrive/MyD     [    <=>             ]   7.04M  11.0MB/s    in 0.6s    

2021-10-29 08:28:28 (11.0 MB/s) - ‘/content/gdrive/MyDrive/WardProfiles/boundaries/oa/msoa11_20m_clipped.zip’ saved [7381641]



In [None]:
# MSOA, 200m, clipped
# Source:
# https://geoportal.statistics.gov.uk/datasets/ons::middle-layer-super-output-areas-december-2011-boundaries-super-generalised-clipped-bsc-ew-v3/about
!wget 'https://opendata.arcgis.com/api/v3/datasets/80223f1d571c405fb2fdf719c7e6da13_0/downloads/data?format=shp&spatialRefId=27700' \
  -O "{oa_dir}/msoa11_200m_clipped.zip"

--2021-10-29 08:28:32--  https://opendata.arcgis.com/api/v3/datasets/80223f1d571c405fb2fdf719c7e6da13_0/downloads/data?format=shp&spatialRefId=27700
Resolving opendata.arcgis.com (opendata.arcgis.com)... 34.197.204.215, 3.219.212.222, 54.209.123.207
Connecting to opendata.arcgis.com (opendata.arcgis.com)|34.197.204.215|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: unspecified [application/zip]
Saving to: ‘/content/gdrive/MyDrive/WardProfiles/boundaries/oa/msoa11_200m_clipped.zip’

/content/gdrive/MyD     [  <=>               ]   1.61M  4.31MB/s    in 0.4s    

2021-10-29 08:28:33 (4.31 MB/s) - ‘/content/gdrive/MyDrive/WardProfiles/boundaries/oa/msoa11_200m_clipped.zip’ saved [1693373]



Centroid downloads
--

In [None]:
# OA population-weighted centroids, 2011 census boundaries
# Source: 
# https://geoportal.statistics.gov.uk/datasets/ons::output-areas-december-2011-population-weighted-centroids-1/about
!wget 'https://opendata.arcgis.com/api/v3/datasets/b0c86eaafc5a4f339eb36785628da904_0/downloads/data?format=shp&spatialRefId=27700' \
  -O "{oa_dir}/oa11_centroids-shp.zip"

--2021-11-01 13:08:47--  https://opendata.arcgis.com/api/v3/datasets/b0c86eaafc5a4f339eb36785628da904_0/downloads/data?format=shp&spatialRefId=27700
Resolving opendata.arcgis.com (opendata.arcgis.com)... 3.219.212.222, 34.197.204.215, 54.209.123.207
Connecting to opendata.arcgis.com (opendata.arcgis.com)|3.219.212.222|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: unspecified [application/zip]
Saving to: ‘/content/gdrive/MyDrive/WardProfiles/boundaries/oa/oa11_centroids-shp.zip’

/content/gdrive/MyD     [ <=>                ]   3.43M  --.-KB/s    in 0.07s   

2021-11-01 13:08:47 (51.5 MB/s) - ‘/content/gdrive/MyDrive/WardProfiles/boundaries/oa/oa11_centroids-shp.zip’ saved [3600660]



In [None]:
# LSOA population-weighted centroids, 2011 census boundaries
# Source: 
# https://geoportal.statistics.gov.uk/datasets/ons::lower-layer-super-output-areas-december-2011-population-weighted-centroids/about
!wget 'https://opendata.arcgis.com/api/v3/datasets/b7c49538f0464f748dd7137247bbc41c_0/downloads/data?format=shp&spatialRefId=27700' \
  -O "{oa_dir}/lsoa11_centroids-shp.zip"

--2021-11-01 13:09:01--  https://opendata.arcgis.com/api/v3/datasets/b7c49538f0464f748dd7137247bbc41c_0/downloads/data?format=shp&spatialRefId=27700
Resolving opendata.arcgis.com (opendata.arcgis.com)... 34.197.204.215, 3.219.212.222, 54.209.123.207
Connecting to opendata.arcgis.com (opendata.arcgis.com)|34.197.204.215|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: unspecified [application/zip]
Saving to: ‘/content/gdrive/MyDrive/WardProfiles/boundaries/oa/lsoa11_centroids-shp.zip’

/content/gdrive/MyD     [ <=>                ] 853.52K  --.-KB/s    in 0.03s   

2021-11-01 13:09:01 (29.5 MB/s) - ‘/content/gdrive/MyDrive/WardProfiles/boundaries/oa/lsoa11_centroids-shp.zip’ saved [874009]



In [None]:
# MSOA population-weighted centroids, 2011 census boundaries
# Source: 
# https://geoportal.statistics.gov.uk/datasets/ons::middle-layer-super-output-areas-december-2011-population-weighted-centroids/about
!wget 'https://opendata.arcgis.com/api/v3/datasets/b0a6d8a3dc5d4718b3fd62c548d60f81_0/downloads/data?format=shp&spatialRefId=27700' \
  -O "{oa_dir}/msoa11_centroids-shp.zip"

--2021-11-01 13:09:14--  https://opendata.arcgis.com/api/v3/datasets/b0a6d8a3dc5d4718b3fd62c548d60f81_0/downloads/data?format=shp&spatialRefId=27700
Resolving opendata.arcgis.com (opendata.arcgis.com)... 34.197.204.215, 54.209.123.207, 3.219.212.222
Connecting to opendata.arcgis.com (opendata.arcgis.com)|34.197.204.215|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: unspecified [application/zip]
Saving to: ‘/content/gdrive/MyDrive/WardProfiles/boundaries/oa/msoa11_centroids-shp.zip’

/content/gdrive/MyD     [ <=>                ] 182.36K  --.-KB/s    in 0.007s  

2021-11-01 13:09:17 (27.0 MB/s) - ‘/content/gdrive/MyDrive/WardProfiles/boundaries/oa/msoa11_centroids-shp.zip’ saved [186734]



Lookups
--

In [None]:
#!rm "{lookup_dir}/"*

In [21]:
# Admin lookup, from Dec 2020
# Source:
# https://geoportal.statistics.gov.uk/datasets/output-area-to-lower-layer-super-output-area-to-middle-layer-super-output-area-to-local-authority-district-december-2020-lookup-in-england-and-wales/about
!wget 'https://opendata.arcgis.com/api/v3/datasets/65664b00231444edb3f6f83c9d40591f_0/downloads/data?format=csv&spatialRefId=4326' \
  -O "{lookup_dir}/oa11_lsoa11_msoa11_lad20_rgn20_202012.csv"

--2021-11-02 14:04:02--  https://opendata.arcgis.com/api/v3/datasets/65664b00231444edb3f6f83c9d40591f_0/downloads/data?format=csv&spatialRefId=4326
Resolving opendata.arcgis.com (opendata.arcgis.com)... 3.219.212.222, 54.209.123.207, 34.197.204.215
Connecting to opendata.arcgis.com (opendata.arcgis.com)|3.219.212.222|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: unspecified [text/csv]
Saving to: ‘/content/gdrive/MyDrive/WardProfiles/lookups/oa/oa11_lsoa11_msoa11_lad20_rgn20_202012.csv’

/content/gdrive/MyD     [       <=>          ]  19.40M  14.1MB/s    in 1.4s    

2021-11-02 14:04:04 (14.1 MB/s) - ‘/content/gdrive/MyDrive/WardProfiles/lookups/oa/oa11_lsoa11_msoa11_lad20_rgn20_202012.csv’ saved [20346556]



In [22]:
# OA-Ward lookup, from Dec 2020
# Source:
# https://geoportal.statistics.gov.uk/datasets/ons::output-area-to-ward-to-local-authority-district-december-2020-lookup-in-england-and-wales-v2/about
!wget 'https://opendata.arcgis.com/api/v3/datasets/c6a68d369782417da8d5ee5593e7cf00_0/downloads/data?format=csv&spatialRefId=4326' \
  -O "{lookup_dir}/oa11_ward20_lad20_202012.csv"

--2021-11-02 14:04:04--  https://opendata.arcgis.com/api/v3/datasets/c6a68d369782417da8d5ee5593e7cf00_0/downloads/data?format=csv&spatialRefId=4326
Resolving opendata.arcgis.com (opendata.arcgis.com)... 3.219.212.222, 54.209.123.207, 34.197.204.215
Connecting to opendata.arcgis.com (opendata.arcgis.com)|3.219.212.222|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: unspecified [text/csv]
Saving to: ‘/content/gdrive/MyDrive/WardProfiles/lookups/oa/oa11_ward20_lad20_202012.csv’

/content/gdrive/MyD     [     <=>            ]  10.83M  11.4MB/s    in 0.9s    

2021-11-02 14:04:06 (11.4 MB/s) - ‘/content/gdrive/MyDrive/WardProfiles/lookups/oa/oa11_ward20_lad20_202012.csv’ saved [11352351]



Directory content
--

In [None]:
!ls -lh "{oa_dir}"

total 811M
-rw------- 1 root root 5.2M Feb 16  2021 lsoa11_200m_clipped.zip
-rw------- 1 root root  16M Feb 16  2021 lsoa11_20m_clipped.zip
-rw------- 1 root root 854K Nov  1 13:00 lsoa11_centroids-shp.zip
-rw------- 1 root root 212M Jan 26  2021 lsoa11_full_clipped.zip
-rw------- 1 root root 1.7M Feb 16  2021 msoa11_200m_clipped.zip
-rw------- 1 root root 7.1M Feb 16  2021 msoa11_20m_clipped.zip
-rw------- 1 root root 183K Oct 31 21:09 msoa11_centroids-shp.zip
-rw------- 1 root root 113M Jan 26  2021 msoa11_full_clipped.zip
-rw------- 1 root root  33M Jul 24  2020 oa11_20m_clipped.zip
-rw------- 1 root root 3.5M Jul 27  2020 oa11_centroids-shp.zip
-rw------- 1 root root 397M Jul 23  2020 oa11_full_clipped.zip


In [11]:
!ls -lh "{lookup_dir}" 

total 31M
-rw------- 1 root root 20M Jan  6  2021 oa11_lsoa11_msoa11_lad20_rgn20_202012.csv
-rw------- 1 root root 11M Jan 27  2021 oa11_ward20_lad20_202012.csv


Load & extract Lewisham lookups
==
First we process plain tabular data.

Lewisham OA-LSOA-MSOA-LAD
--
This lookup also allows us to identify all OAs that relate to the Lewisham Local Authority. We do this first so we can then filter any national data sets below.

In [23]:
# Load the lookup of all OAs, MSOAs, LSOAs, LADs
oa_lookup_202012 = pd.read_csv(f"{lookup_dir}/oa11_lsoa11_msoa11_lad20_rgn20_202012.csv")
oa_lookup_202012.columns

Index(['FID', 'OA11CD', 'LSOA11CD', 'LSOA11NM', 'MSOA11CD', 'MSOA11NM',
       'LAD20CD', 'LAD20NM', 'RGN20CD', 'RGN20NM'],
      dtype='object')

In [24]:
# Extract all records relating to the Lewisham Local Authority District (LAD), 
# according to the December 2020 LAD definitions.
# We filter by LAD name: LAD20NM = Lewisham
# Could alternatively filter by LAD code, LAD20CD = E09000023
lbl_oa_recs_202012 = oa_lookup_202012[oa_lookup_202012.LAD20NM=='Lewisham']
print(len(lbl_oa_recs_202012))
lbl_oa_recs_202012.head()

887


Unnamed: 0,FID,OA11CD,LSOA11CD,LSOA11NM,MSOA11CD,MSOA11NM,LAD20CD,LAD20NM,RGN20CD,RGN20NM
162800,162801,E00016277,E01003220,Lewisham 022C,E02000674,Lewisham 022,E09000023,Lewisham,E12000007,London
162801,162802,E00016278,E01003220,Lewisham 022C,E02000674,Lewisham 022,E09000023,Lewisham,E12000007,London
162802,162803,E00016285,E01003220,Lewisham 022C,E02000674,Lewisham 022,E09000023,Lewisham,E12000007,London
162803,162804,E00016257,E01003221,Lewisham 025C,E02000677,Lewisham 025,E09000023,Lewisham,E12000007,London
162804,162805,E00016263,E01003221,Lewisham 025C,E02000677,Lewisham 025,E09000023,Lewisham,E12000007,London


In [25]:
# Save as Lewisham-specific lookup table for future use
lbl_oa_recs_202012.to_csv(f"{lookup_dir}/lbl_oa11_lsoa11_msoa11_lad20_rgn20_202012.csv", index=False)

Lewisham OA-Ward20 lookup
--

In [26]:
# Load the lookup of OAs to Wards, LADs
oa_ward_lad_202012 = pd.read_csv(f"{lookup_dir}/oa11_ward20_lad20_202012.csv")
oa_ward_lad_202012.columns

Index(['FID', 'OA11CD', 'WD20CD', 'WD20NM', 'LAD20CD', 'LAD20NM'], dtype='object')

In [27]:
# Filter
lbl_oa_ward_lad_202012 = oa_ward_lad_202012[oa_ward_lad_202012.OA11CD.isin(lbl_oa_recs_202012.OA11CD.unique())]
print(len(lbl_oa_ward_lad_202012))
lbl_oa_ward_lad_202012.head()

887


Unnamed: 0,FID,OA11CD,WD20CD,WD20NM,LAD20CD,LAD20NM
162823,162824,E00016365,E05000442,Downham,E09000023,Lewisham
162827,162828,E00016362,E05000442,Downham,E09000023,Lewisham
162828,162829,E00016279,E05000440,Catford South,E09000023,Lewisham
162829,162830,E00016132,E05000437,Bellingham,E09000023,Lewisham
162831,162832,E00016217,E05000439,Brockley,E09000023,Lewisham


In [28]:
# Save as Lewisham-specific lookup table for future use
lbl_oa_ward_lad_202012.to_csv(f"{lookup_dir}/lbl_oa11_wd20_lad20_202012.csv", index=False)

Load & extract Lewisham geometries
==
This is spatial geometry data.

Load & extract OA, LSOA, MSOA boundaries
--

In [None]:
oa11_full = gpd.read_file(f"{oa_dir}/oa11_full_clipped.zip")
oa11_20m = gpd.read_file(f"{oa_dir}/oa11_20m_clipped.zip")
oa11_full.columns

Index(['OBJECTID', 'OA11CD', 'LAD11CD', 'LAD16CD', 'LAD16NM', 'Shape__Are',
       'Shape__Len', 'geometry'],
      dtype='object')

In [None]:
lbl_oa11_full = oa11_full[oa11_full.OA11CD.isin(lbl_oa_recs_202012.OA11CD.unique())]
lbl_oa11_20m = oa11_20m[oa11_20m.OA11CD.isin(lbl_oa_recs_202012.OA11CD.unique())]
print(len(lbl_oa11_full), len(lbl_oa11_20m))
lbl_oa11_full.head()

887 887


Unnamed: 0,OBJECTID,OA11CD,LAD11CD,LAD16CD,LAD16NM,Shape__Are,Shape__Len,geometry
15701,15702,E00016112,E09000023,E09000023,Lewisham,19940.745827,769.477441,"POLYGON ((536952.066 172736.652, 536935.236 17..."
15702,15703,E00016113,E09000023,E09000023,Lewisham,117095.035057,2260.067739,"POLYGON ((537785.541 172278.947, 537786.089 17..."
15703,15704,E00016114,E09000023,E09000023,Lewisham,31080.46653,1366.427748,"POLYGON ((537272.951 172219.834, 537272.662 17..."
15704,15705,E00016115,E09000023,E09000023,Lewisham,38562.49559,1206.484082,"POLYGON ((537510.074 171857.820, 537497.451 17..."
15705,15706,E00016116,E09000023,E09000023,Lewisham,46969.382187,1097.474643,"POLYGON ((536720.000 171753.000, 536722.914 17..."


In [None]:
lsoa11_full = gpd.read_file(f"{oa_dir}/lsoa11_full_clipped.zip")
lsoa11_20m = gpd.read_file(f"{oa_dir}/lsoa11_20m_clipped.zip")
lsoa11_200m = gpd.read_file(f"{oa_dir}/lsoa11_200m_clipped.zip")
lsoa11_full.columns

Index(['OBJECTID', 'LSOA11CD', 'LSOA11NM', 'BNG_E', 'BNG_N', 'LONG_', 'LAT',
       'Shape_Leng', 'Shape__Are', 'Shape__Len', 'geometry'],
      dtype='object')

In [None]:
lbl_lsoa11_full = lsoa11_full[lsoa11_full.LSOA11CD.isin(lbl_oa_recs_202012.LSOA11CD.unique())]
lbl_lsoa11_20m = lsoa11_20m[lsoa11_20m.LSOA11CD.isin(lbl_oa_recs_202012.LSOA11CD.unique())]
lbl_lsoa11_200m = lsoa11_200m[lsoa11_200m.LSOA11CD.isin(lbl_oa_recs_202012.LSOA11CD.unique())]
print(len(lbl_lsoa11_full), len(lbl_lsoa11_20m), len(lbl_lsoa11_200m))
lbl_lsoa11_full.head()

169 169 169


Unnamed: 0,OBJECTID,LSOA11CD,LSOA11NM,BNG_E,BNG_N,LONG_,LAT,Shape_Leng,Shape__Are,Shape__Len,geometry
3121,3122,E01003189,Lewisham 034A,537581,171824,-0.02234,51.4287,2513.10991,190899.2,2513.10991,"POLYGON ((537614.892 172060.955, 537617.004 17..."
3122,3123,E01003190,Lewisham 034B,536718,171642,-0.03482,51.4273,3050.92318,326070.9,3050.92318,"POLYGON ((536761.779 172021.110, 536744.821 17..."
3123,3124,E01003191,Lewisham 034C,537247,171537,-0.02726,51.4262,3050.308546,197789.8,3050.308546,"POLYGON ((536982.429 171941.765, 536982.759 17..."
3124,3125,E01003192,Lewisham 034D,537962,170910,-0.01722,51.4204,5831.12675,1016245.0,5831.12675,"POLYGON ((537523.532 171647.620, 537526.639 17..."
3125,3126,E01003193,Lewisham 029A,536719,172067,-0.03464,51.4311,3716.871838,348374.9,3716.871838,"POLYGON ((536641.390 172315.146, 536645.470 17..."


In [None]:
msoa11_full = gpd.read_file(f"{oa_dir}/msoa11_full_clipped.zip")
msoa11_20m = gpd.read_file(f"{oa_dir}/msoa11_20m_clipped.zip")
msoa11_200m = gpd.read_file(f"{oa_dir}/msoa11_200m_clipped.zip")
msoa11_full.columns

Index(['OBJECTID', 'MSOA11CD', 'MSOA11NM', 'BNG_E', 'BNG_N', 'LONG_', 'LAT',
       'Shape_Leng', 'Shape__Are', 'Shape__Len', 'geometry'],
      dtype='object')

In [None]:
lbl_msoa11_full = msoa11_full[msoa11_full.MSOA11CD.isin(lbl_oa_recs_202012.MSOA11CD.unique())]
lbl_msoa11_20m = msoa11_20m[msoa11_20m.MSOA11CD.isin(lbl_oa_recs_202012.MSOA11CD.unique())]
lbl_msoa11_200m = msoa11_200m[msoa11_200m.MSOA11CD.isin(lbl_oa_recs_202012.MSOA11CD.unique())]
print(len(lbl_msoa11_full), len(lbl_msoa11_20m), len(lbl_msoa11_200m))
lbl_msoa11_full.head()

36 36 36


Unnamed: 0,OBJECTID,MSOA11CD,MSOA11NM,BNG_E,BNG_N,LONG_,LAT,Shape_Leng,Shape__Are,Shape__Len,geometry
631,632,E02000653,Lewisham 001,536028,178459,-0.04212,51.4887,5520.883634,676116.97496,5520.883634,"POLYGON ((536691.863 178955.310, 536695.099 17..."
632,633,E02000654,Lewisham 002,536580,177914,-0.03438,51.4837,5436.688817,945665.298073,5436.688817,"POLYGON ((536952.600 178430.804, 536997.602 17..."
633,634,E02000655,Lewisham 003,535814,177346,-0.04563,51.4788,5686.253163,978425.032066,5686.253163,"POLYGON ((535498.938 178349.531, 535523.031 17..."
634,635,E02000657,Lewisham 005,536845,176528,-0.03111,51.4712,5198.767531,873659.28112,5198.767531,"POLYGON ((537424.375 176856.141, 537436.000 17..."
635,636,E02000658,Lewisham 006,535842,176571,-0.04552,51.4718,4374.331332,773959.497925,4374.331332,"POLYGON ((535897.606 176847.053, 535897.938 17..."


Lewisham boundary extracts
---


Lewisham centroids
--

In [None]:
oa11_centroids = gpd.read_file(f"{oa_dir}/oa11_centroids-shp.zip")
oa11_centroids.columns

Index(['OBJECTID', 'OA11CD', 'geometry'], dtype='object')

In [None]:
lbl_oa11_centroids = oa11_centroids[oa11_centroids.OA11CD.isin(lbl_oa_recs_202012.OA11CD.unique())]
print(len(lbl_oa11_centroids))
lbl_oa11_centroids.head()

887


Unnamed: 0,OBJECTID,OA11CD,geometry
162887,162888,E00016403,POINT (536465.000 178485.000)
162888,162889,E00174027,POINT (537509.000 176789.000)
162889,162890,E00016442,POINT (536646.000 178495.000)
162890,162891,E00016787,POINT (537574.000 173825.000)
162891,162892,E00173707,POINT (535457.000 172878.000)


In [None]:
lsoa11_centroids = gpd.read_file(f"{oa_dir}/lsoa11_centroids-shp.zip").\
                      rename(columns={
                      # Convert column names to uppercase to stick to general convention:
                          'lsoa11cd': 'LSOA11CD', 
                          'lsoa11nm': 'LSOA11NM', })
lsoa11_centroids.columns

Index(['objectid', 'LSOA11CD', 'LSOA11NM', 'geometry'], dtype='object')

In [None]:
lbl_lsoa11_centroids = lsoa11_centroids[lsoa11_centroids.LSOA11CD.isin(lbl_oa_recs_202012.LSOA11CD.unique())]
print(len(lbl_lsoa11_centroids))
lbl_lsoa11_centroids.head()

169


Unnamed: 0,objectid,LSOA11CD,LSOA11NM,geometry
19792,19793,E01003316,Lewisham 024E,POINT (536053.320 173096.003)
19794,19795,E01003317,Lewisham 020A,POINT (537099.318 173710.878)
19796,19797,E01003314,Lewisham 024C,POINT (536396.182 172933.107)
19799,19800,E01003312,Lewisham 027D,POINT (536130.911 172179.098)
19801,19802,E01003313,Lewisham 027E,POINT (535679.914 172118.159)


In [None]:
msoa11_centroids = gpd.read_file(f"{oa_dir}/msoa11_centroids-shp.zip").\
                      rename(columns={
                      # Convert column names to uppercase to stick to general convention:
                          'msoa11cd': 'MSOA11CD', 
                          'msoa11nm': 'MSOA11NM', })
msoa11_centroids.columns

Index(['objectid', 'MSOA11CD', 'MSOA11NM', 'geometry'], dtype='object')

In [None]:
lbl_msoa11_centroids = msoa11_centroids[msoa11_centroids.MSOA11CD.isin(lbl_oa_recs_202012.MSOA11CD.unique())]
print(len(lbl_msoa11_centroids))
lbl_msoa11_centroids.head()

36


Unnamed: 0,objectid,MSOA11CD,MSOA11NM,geometry
4232,4233,E02006784,Lewisham 038,POINT (538982.517 171259.399)
4233,4234,E02006783,Lewisham 037,POINT (539914.040 171813.650)
4237,4238,E02000666,Lewisham 014,POINT (537191.945 174850.539)
4238,4239,E02000667,Lewisham 015,POINT (536592.495 174536.778)
4239,4240,E02000664,Lewisham 012,POINT (538348.533 175132.387)


Export all geometry files in multiple formats
--

In [None]:
# The full list of supported file formats:
# import fiona
# fiona.supported_drivers  

In [None]:
# In total we are exporting 18 variants: three generalisation levels, each in 
# three file formats, for both LSOA and MSOA
for gd, name in zip([lbl_oa11_full, lbl_oa11_20m, 
                     lbl_lsoa11_full, lbl_lsoa11_20m, lbl_lsoa11_200m,
                     lbl_msoa11_full, lbl_msoa11_20m, lbl_msoa11_200m,
                     lbl_oa11_centroids, lbl_lsoa11_centroids, lbl_msoa11_centroids], 
                      ['lbl_oa11_full', 'lbl_oa11_20m', 
                       'lbl_lsoa11_full', 'lbl_lsoa11_20m', 'lbl_lsoa11_200m',
                       'lbl_msoa11_full', 'lbl_msoa11_20m', 'lbl_msoa11_200m',
                       'lbl_oa11_centroids', 'lbl_lsoa11_centroids', 'lbl_msoa11_centroids']):
  print(f"{name}...")

  gd.to_file(filename=f"{oa_dir}/{name}.geojson", driver='GeoJSON')
  gd.to_file(filename=f"{oa_dir}/{name}.gpkg", driver='GPKG')

  # This does not work, contrary to the documentation -- 
  # it creates a directory of that name, rather than a .zip file
  #gd.to_file(filename=f"{oa_dir}/{name}.zip", driver='ESRI Shapefile')
  
  # Instead we write the shapefile to a subdirectory, so we can compress it later
  os.makedirs(f"{oa_dir}/{name}_shp/",exist_ok=True)
  gd.to_file(filename=f"{oa_dir}/{name}_shp/{name}.shp", driver='ESRI Shapefile')

lbl_oa11_full...
lbl_oa11_20m...
lbl_lsoa11_full...
lbl_lsoa11_20m...
lbl_lsoa11_200m...
lbl_msoa11_full...
lbl_msoa11_20m...
lbl_msoa11_200m...
lbl_oa11_centroids...
lbl_lsoa11_centroids...
lbl_msoa11_centroids...


In [None]:
# Compress the shapefile folders, for easier download
!zip -r "{oa_dir}/lbl_oa11_full_shp.zip" "{oa_dir}/lbl_oa11_full_shp"
!zip -r "{oa_dir}/lbl_oa11_20m_shp.zip" "{oa_dir}/lbl_oa11_20m_shp"

!zip -r "{oa_dir}/lbl_lsoa11_full_shp.zip" "{oa_dir}/lbl_lsoa11_full_shp"
!zip -r "{oa_dir}/lbl_lsoa11_20m_shp.zip" "{oa_dir}/lbl_lsoa11_20m_shp"
!zip -r "{oa_dir}/lbl_lsoa11_200m_shp.zip" "{oa_dir}/lbl_lsoa11_200m_shp"

!zip -r "{oa_dir}/lbl_msoa11_full_shp.zip" "{oa_dir}/lbl_msoa11_full_shp"
!zip -r "{oa_dir}/lbl_msoa11_20m_shp.zip" "{oa_dir}/lbl_msoa11_20m_shp"
!zip -r "{oa_dir}/lbl_msoa11_200m_shp.zip" "{oa_dir}/lbl_msoa11_200m_shp"

!zip -r "{oa_dir}/lbl_oa11_centroids_shp.zip" "{oa_dir}/lbl_oa11_centroids_shp"
!zip -r "{oa_dir}/lbl_lsoa11_centroids_shp.zip" "{oa_dir}/lbl_lsoa11_centroids_shp"
!zip -r "{oa_dir}/lbl_msoa11_centroids_shp.zip" "{oa_dir}/lbl_msoa11_centroids_shp"


updating: content/gdrive/MyDrive/WardProfiles/boundaries/oa/lbl_oa11_full_shp/ (stored 0%)
updating: content/gdrive/MyDrive/WardProfiles/boundaries/oa/lbl_oa11_full_shp/lbl_oa11_full.shp (deflated 59%)
updating: content/gdrive/MyDrive/WardProfiles/boundaries/oa/lbl_oa11_full_shp/lbl_oa11_full.shx (deflated 36%)
updating: content/gdrive/MyDrive/WardProfiles/boundaries/oa/lbl_oa11_full_shp/lbl_oa11_full.dbf (deflated 91%)
updating: content/gdrive/MyDrive/WardProfiles/boundaries/oa/lbl_oa11_full_shp/lbl_oa11_full.cpg (stored 0%)
updating: content/gdrive/MyDrive/WardProfiles/boundaries/oa/lbl_oa11_full_shp/lbl_oa11_full.prj (deflated 34%)
updating: content/gdrive/MyDrive/WardProfiles/boundaries/oa/lbl_oa11_20m_shp/ (stored 0%)
updating: content/gdrive/MyDrive/WardProfiles/boundaries/oa/lbl_oa11_20m_shp/lbl_oa11_20m.shp (deflated 67%)
updating: content/gdrive/MyDrive/WardProfiles/boundaries/oa/lbl_oa11_20m_shp/lbl_oa11_20m.shx (deflated 55%)
updating: content/gdrive/MyDrive/WardProfiles/bou

Directory content
--

In [None]:
!ls -lh '{oa_dir}'

total 815M
-rw------- 1 root root 112K Nov  1 13:41 lbl_lsoa11_200m.geojson
-rw------- 1 root root 152K Nov  1 13:41 lbl_lsoa11_200m.gpkg
drwx------ 2 root root 4.0K Nov  1 13:41 lbl_lsoa11_200m_shp
-rw------- 1 root root  90K Nov  1 13:44 lbl_lsoa11_200m_shp.zip
-rw------- 1 root root 267K Nov  1 13:41 lbl_lsoa11_20m.geojson
-rw------- 1 root root 216K Nov  1 13:41 lbl_lsoa11_20m.gpkg
drwx------ 2 root root 4.0K Nov  1 13:41 lbl_lsoa11_20m_shp
-rw------- 1 root root 192K Nov  1 13:44 lbl_lsoa11_20m_shp.zip
-rw------- 1 root root  36K Nov  1 13:41 lbl_lsoa11_centroids.geojson
-rw------- 1 root root 120K Nov  1 13:41 lbl_lsoa11_centroids.gpkg
drwx------ 2 root root 4.0K Nov  1 13:41 lbl_lsoa11_centroids_shp
-rw------- 1 root root 6.8K Nov  1 13:44 lbl_lsoa11_centroids_shp.zip
-rw------- 1 root root 3.5M Nov  1 13:41 lbl_lsoa11_full.geojson
-rw------- 1 root root 1.5M Nov  1 13:41 lbl_lsoa11_full.gpkg
drwx------ 2 root root 4.0K Nov  1 13:41 lbl_lsoa11_full_shp
-rw------- 1 root root 2.3

In [29]:
!ls -lh '{lookup_dir}'

total 31M
-rw------- 1 root root 87K Nov  2 14:04 lbl_oa11_lsoa11_msoa11_lad20_rgn20_202012.csv
-rw------- 1 root root 50K Nov  2 14:04 lbl_oa11_wd20_lad20_202012.csv
-rw------- 1 root root 20M Jan  6  2021 oa11_lsoa11_msoa11_lad20_rgn20_202012.csv
-rw------- 1 root root 11M Jan 27  2021 oa11_ward20_lad20_202012.csv
