In [None]:
# ONS (Census 2011) statistics for Lewisham.
# Fetches Nomis data releases and prepares them for use in Lewisham ward profiles.
#
# This requires the OA->LSOA lookup table produced by "lbl_boundaries_oa.ipynb"
# as well as the OA->WD22 lookup table produced by "lbl_boundaries_2022_wards.ipynb"

In [2]:
import pandas as pd

from google.colab import drive
import google.colab.files as files

import IPython

Tools
==
Helpers to reformat & aggregate the Nomis source data.

In [3]:
# Extract the subset we need.
def filter_nomis_data(d, date_values, geography_values):
  return d[d.date.isin(date_values) & 
           d.geography.isin(geography_values)]

# Reformat Nomis data into a lookup table format:
# clear labelling of the index (geography) column, 
# remove superfluous columns.
def format_nomis_data(d, geo_colname='OA11CD'):
  return d.\
           rename(columns={'geography code': geo_colname}).\
           drop(columns=['date', 'geography'])

# Convenience function to call the above.
def nomis_to_oa(d, years, oa_list):
  return format_nomis_data(filter_nomis_data(d, years, oa_list))

In [4]:
# Aggregate OA-level data to LSOAs, Wards, etc. (Calculates the sum.)
def aggregate_oa_groups(oa_data, oa_group_table, oa_col, group_col):
  return pd.merge(oa_group_table, oa_data, on=oa_col, how='right').\
              drop(columns=[oa_col]).\
              groupby(group_col).sum()

Data
==
GDrive mount
--

In [5]:
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [6]:
# Used as root folder.
project_dir = '/content/gdrive/MyDrive/WardProfiles'
oa_lookups_dir = f"{project_dir}/lookups/oa"
ward_lookups_dir = f"{project_dir}/lookups/2022_wards"

# For exports
output_dir = f"{project_dir}/ethnicity"

In [7]:
!mkdir -p '{output_dir}'

Downloads
--

In [8]:
# Ethnic group data
# Source: 
# https://www.nomisweb.co.uk/census/2011/qs201ew
!wget 'https://www.nomisweb.co.uk/api/v01/dataset/nm_522_1.bulk.csv?time=latest&measures=20100&rural_urban=total&geography=2013265927TYPE299' \
  -O "{output_dir}/census11_ethnicity_oa11_london.csv"

--2021-12-08 11:55:06--  https://www.nomisweb.co.uk/api/v01/dataset/nm_522_1.bulk.csv?time=latest&measures=20100&rural_urban=total&geography=2013265927TYPE299
Resolving www.nomisweb.co.uk (www.nomisweb.co.uk)... 129.234.3.145
Connecting to www.nomisweb.co.uk (www.nomisweb.co.uk)|129.234.3.145|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: unspecified [text/csv]
Saving to: ‘/content/gdrive/MyDrive/WardProfiles/ethnicity/census11_ethnicity_oa11_london.csv’

/content/gdrive/MyD     [           <=>      ]   2.41M   636KB/s    in 14s     

2021-12-08 11:55:24 (179 KB/s) - ‘/content/gdrive/MyDrive/WardProfiles/ethnicity/census11_ethnicity_oa11_london.csv’ saved [2523694]



In [9]:
# reference regions
!wget 'https://www.nomisweb.co.uk/api/v01/dataset/nm_522_1.bulk.csv?time=latest&measures=20100&rural_urban=total&geography=2092957699,2013265927TYPE480,1946157254TYPE464' \
  -O "{output_dir}/census11_ethnicity_references.csv"

--2021-12-08 11:55:31--  https://www.nomisweb.co.uk/api/v01/dataset/nm_522_1.bulk.csv?time=latest&measures=20100&rural_urban=total&geography=2092957699,2013265927TYPE480,1946157254TYPE464
Resolving www.nomisweb.co.uk (www.nomisweb.co.uk)... 129.234.3.145
Connecting to www.nomisweb.co.uk (www.nomisweb.co.uk)|129.234.3.145|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: unspecified [text/csv]
Saving to: ‘/content/gdrive/MyDrive/WardProfiles/ethnicity/census11_ethnicity_references.csv’

/content/gdrive/MyD     [ <=>                ]   2.12K  --.-KB/s    in 0.08s   

2021-12-08 11:55:32 (26.4 KB/s) - ‘/content/gdrive/MyDrive/WardProfiles/ethnicity/census11_ethnicity_references.csv’ saved [2175]



Lookups
--
Used to match up OAs with their LSOAs, and 2022 electoral wards

In [10]:
# All OAs and their LSOAs in Lewisham
oa_lsoa_join = pd.read_csv(f"{oa_lookups_dir}/lbl_oa11_lsoa11_msoa11_lad20_rgn20_202012.csv")
oa_lsoa_join = oa_lsoa_join[['OA11CD', 'LSOA11CD']]
oa_lsoa_join.head()

Unnamed: 0,OA11CD,LSOA11CD
0,E00016277,E01003220
1,E00016278,E01003220
2,E00016285,E01003220
3,E00016257,E01003221
4,E00016263,E01003221


In [11]:
# All OAs and their 2022 wards in Lewisham
oa_wd22_join = pd.read_csv(f"{ward_lookups_dir}/lbl_oa11_wd22_proposed.csv")
oa_wd22_join = oa_wd22_join[['OA11CD', 'WD22CD_proposed']]
oa_wd22_join.head()

Unnamed: 0,OA11CD,WD22CD_proposed
0,E00016403,E05013721
1,E00016442,E05013721
2,E00016407,E05013721
3,E00016404,E05013721
4,E00016402,E05013721


Process & export
==
Lewisham
--

In [12]:
# For filtering of source data
lbl_oa_list = oa_lsoa_join.OA11CD.unique() # All OAs in Lewisham
years = [2011] # Just a precaution, in case any of the downloads include multiple periods

for datname in ['ethnicity']:
  IPython.display.display(f"=== {datname} ===")
  # Load the Nomis data
  d = pd.read_csv(f"{output_dir}/census11_{datname}_oa11_london.csv")

  # Simplify the column names
  d.columns = d.columns.str.replace(
      # Remove the redundant title prefix and qualifier suffix
      r'^.*?: (.*?); measures: Value', 
      r'\1')

  # Transform to OA index data
  lbl_oa = nomis_to_oa(d, years, lbl_oa_list)
  lbl_oa.to_csv(f"{output_dir}/lbl_{datname}_oa11.csv", index=False)

  # Aggregate to LSOA level
  lbl_lsoa = aggregate_oa_groups(lbl_oa, oa_lsoa_join, oa_col='OA11CD', group_col='LSOA11CD')
  lbl_lsoa.to_csv(f"{output_dir}/lbl_{datname}_lsoa11.csv", index=True)
  IPython.display.display(lbl_lsoa.head())

  # Aggregate to WD22 level
  lbl_wd22 = aggregate_oa_groups(lbl_oa, oa_wd22_join, oa_col='OA11CD', group_col='WD22CD_proposed')
  lbl_wd22.to_csv(f"{output_dir}/lbl_{datname}_wd22.csv", index=True)
  IPython.display.display(lbl_wd22.head())

'=== ethnicity ==='

Unnamed: 0_level_0,All categories: Ethnic group,White,White: English/Welsh/Scottish/Northern Irish/British,White: Irish,White: Gypsy or Irish Traveller,White: Other White,Mixed,Mixed/multiple ethnic group: White and Black Caribbean,Mixed/multiple ethnic group: White and Black African,Mixed/multiple ethnic group: White and Asian,Mixed/multiple ethnic group: Other Mixed,Asian,Asian/Asian British: Indian,Asian/Asian British: Pakistani,Asian/Asian British: Bangladeshi,Asian/Asian British: Chinese,Asian/Asian British: Other Asian,Black,Black/African/Caribbean/Black British: African,Black/African/Caribbean/Black British: Caribbean,Black/African/Caribbean/Black British: Other Black,Other,Other ethnic group: Arab,Other ethnic group: Any other ethnic group
LSOA11CD,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1
E01003189,1670,877,738,29,2,108,102,48,12,14,28,120,19,5,8,9,79,518,251,187,80,53,16,37
E01003190,1878,712,544,20,1,147,214,109,60,5,40,145,33,4,1,16,91,742,348,267,127,65,25,40
E01003191,1494,743,601,27,1,114,140,65,15,7,53,63,7,0,1,17,38,518,196,205,117,30,6,24
E01003192,1639,817,652,25,7,133,101,62,16,6,17,129,12,11,4,27,75,578,256,225,97,14,4,10
E01003193,1568,890,743,32,6,109,97,52,14,7,24,72,17,3,6,17,29,494,173,243,78,15,2,13


Unnamed: 0_level_0,All categories: Ethnic group,White,White: English/Welsh/Scottish/Northern Irish/British,White: Irish,White: Gypsy or Irish Traveller,White: Other White,Mixed,Mixed/multiple ethnic group: White and Black Caribbean,Mixed/multiple ethnic group: White and Black African,Mixed/multiple ethnic group: White and Asian,Mixed/multiple ethnic group: Other Mixed,Asian,Asian/Asian British: Indian,Asian/Asian British: Pakistani,Asian/Asian British: Bangladeshi,Asian/Asian British: Chinese,Asian/Asian British: Other Asian,Black,Black/African/Caribbean/Black British: African,Black/African/Caribbean/Black British: Caribbean,Black/African/Caribbean/Black British: Other Black,Other,Other ethnic group: Arab,Other ethnic group: Any other ethnic group
WD22CD_proposed,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1
E05013714,10627,5141,4294,144,17,686,829,418,153,62,196,778,106,41,37,96,498,3592,1543,1409,640,287,66,221
E05013715,15544,10507,8396,369,4,1738,941,409,121,153,258,1241,306,66,66,389,414,2519,1104,939,476,336,67,269
E05013716,16587,9612,7006,435,23,2148,1234,467,203,249,315,1459,377,72,96,437,477,3842,1641,1589,612,440,129,311
E05013717,16097,7420,5783,287,10,1340,1239,519,206,165,349,1827,278,163,96,190,1100,5244,1580,2856,808,367,49,318
E05013718,14937,9026,7025,367,1,1633,1172,461,177,228,306,1094,258,143,51,213,429,3309,1068,1637,604,336,93,243


Reference geographies
--

In [13]:
# For filtering of source data
reference_names = ['United Kingdom', 'Great Britain', 'England and Wales',
                   'England', 'London', 'Lewisham']
years = [2011] # Just a precaution, in case any of the downloads include multiple periods

for datname in ['ethnicity']:
  IPython.display.display(f"=== {datname} ===")
  # Load the Nomis data
  d = pd.read_csv(f"{output_dir}/census11_{datname}_references.csv")

  # Simplify the column names
  d.columns = d.columns.str.replace(
      # Remove the redundant title prefix and qualifier suffix
      r'^.*?: (.*?); measures: Value', 
      r'\1')

  # Filter & transform to index data
  ref = filter_nomis_data(d, years, reference_names).drop(columns=['date'])
  ref = ref.rename(columns={'geography': 'Name', 'geography code': 'Code'})
  ref.to_csv(f"{output_dir}/references_{datname}.csv", index=False)
  IPython.display.display(ref.head())

'=== ethnicity ==='

Unnamed: 0,Name,Code,Rural Urban,All categories: Ethnic group,White,White: English/Welsh/Scottish/Northern Irish/British,White: Irish,White: Gypsy or Irish Traveller,White: Other White,Mixed,Mixed/multiple ethnic group: White and Black Caribbean,Mixed/multiple ethnic group: White and Black African,Mixed/multiple ethnic group: White and Asian,Mixed/multiple ethnic group: Other Mixed,Asian,Asian/Asian British: Indian,Asian/Asian British: Pakistani,Asian/Asian British: Bangladeshi,Asian/Asian British: Chinese,Asian/Asian British: Other Asian,Black,Black/African/Caribbean/Black British: African,Black/African/Caribbean/Black British: Caribbean,Black/African/Caribbean/Black British: Other Black,Other,Other ethnic group: Arab,Other ethnic group: Any other ethnic group
0,England,E92000001,Total,53012456,45281142,42279236,517001,54895,2430010,1192879,415616,161550,332708,283005,4143403,1395702,1112282,436514,379503,819402,1846614,977741,591016,277857,548418,220985,327433
1,London,E12000007,Total,8173941,4887435,3669284,175974,8196,1033981,405279,119425,65479,101500,118875,1511546,542857,223797,222127,124250,398515,1088640,573931,344597,170112,281041,106020,175021
2,Lewisham,E09000023,Total,275885,147686,114446,5206,208,27826,20472,8539,3559,3045,5329,25534,4600,1596,1388,6164,11786,74942,32025,30854,12063,7251,1456,5795


Derivatives
==

Ethnicity
--

In [14]:
ethnicity = pd.read_csv(f"{output_dir}/census11_ethnicity_oa11_london.csv")
ethnicity.columns

Index(['date', 'geography', 'geography code', 'Rural Urban',
       'Ethnic Group: All categories: Ethnic group; measures: Value',
       'Ethnic Group: White; measures: Value',
       'Ethnic Group: White: English/Welsh/Scottish/Northern Irish/British; measures: Value',
       'Ethnic Group: White: Irish; measures: Value',
       'Ethnic Group: White: Gypsy or Irish Traveller; measures: Value',
       'Ethnic Group: White: Other White; measures: Value',
       'Ethnic Group: Mixed; measures: Value',
       'Ethnic Group: Mixed/multiple ethnic group: White and Black Caribbean; measures: Value',
       'Ethnic Group: Mixed/multiple ethnic group: White and Black African; measures: Value',
       'Ethnic Group: Mixed/multiple ethnic group: White and Asian; measures: Value',
       'Ethnic Group: Mixed/multiple ethnic group: Other Mixed; measures: Value',
       'Ethnic Group: Asian; measures: Value',
       'Ethnic Group: Asian/Asian British: Indian; measures: Value',
       'Ethnic Group

In [17]:
# Segmentation of categories
def ethnicity_groups(ethnicity, fixed_columns):
  d = ethnicity[fixed_columns].copy()
  total = ethnicity['All categories: Ethnic group']

  d['White English/Welsh/Scottish/Northern Irish/British'] = ethnicity['White: English/Welsh/Scottish/Northern Irish/British'] * 100. / total
  d['White Irish'] = ethnicity['White: Irish'] * 100. / total
  d['White Gypsy or Irish Traveller'] = ethnicity['White: Gypsy or Irish Traveller'] * 100 / total
  d['White Other'] = ethnicity['White: Other White'] * 100 / total

  d['White & Black Caribbean'] = ethnicity['Mixed/multiple ethnic group: White and Black Caribbean'] * 100 / total
  d['White & Black African'] = ethnicity['Mixed/multiple ethnic group: White and Black African'] * 100 / total
  d['White & Asian'] = ethnicity['Mixed/multiple ethnic group: White and Asian'] * 100 / total
  d['Other Mixed'] = ethnicity['Mixed/multiple ethnic group: Other Mixed'] * 100 / total

  d['Indian'] = ethnicity['Asian/Asian British: Indian'] * 100 / total
  d['Pakistani'] = ethnicity['Asian/Asian British: Pakistani'] * 100 / total
  d['Bangladeshi'] = ethnicity['Asian/Asian British: Bangladeshi'] * 100 / total
  d['Chinese'] = ethnicity['Asian/Asian British: Chinese'] * 100 / total
  d['Other Asian'] = ethnicity['Asian/Asian British: Other Asian'] * 100 / total

  d['Black African'] = ethnicity['Black/African/Caribbean/Black British: African'] * 100 / total
  d['Black Caribbean'] = ethnicity['Black/African/Caribbean/Black British: Caribbean'] * 100 / total
  d['Other Black'] = ethnicity['Black/African/Caribbean/Black British: Other Black'] * 100 / total

  d['Arab'] = ethnicity['Other ethnic group: Arab'] * 100 / total
  d['Other Ethnic Group'] = ethnicity['Other ethnic group: Any other ethnic group'] * 100 / total

  return d#.round(1)

In [18]:
# Lewisham
for geog, geog_colname in zip(['oa11', 'wd22'], ['OA11CD', 'WD22CD_proposed']):
  IPython.display.display(f"=== {geog} ===")

  ethnicity = pd.read_csv(f"{output_dir}/lbl_ethnicity_{geog}.csv")
  d = ethnicity_groups(ethnicity, [geog_colname])
  d.to_csv(f"{output_dir}/lbl_ethnicity_groups_{geog}.csv", index=False)
  IPython.display.display(d.head())

'=== oa11 ==='

Unnamed: 0,OA11CD,White English/Welsh/Scottish/Northern Irish/British,White Irish,White Gypsy or Irish Traveller,White Other,White & Black Caribbean,White & Black African,White & Asian,Other Mixed,Indian,Pakistani,Bangladeshi,Chinese,Other Asian,Black African,Black Caribbean,Other Black,Arab,Other Ethnic Group
0,E00016403,13.934426,0.546448,0.0,10.10929,1.36612,3.551913,0.819672,2.185792,1.36612,0.0,0.819672,4.098361,6.284153,39.344262,9.016393,4.098361,0.819672,1.639344
1,E00016437,18.587361,1.486989,0.0,8.736059,2.230483,2.230483,0.929368,1.672862,2.230483,0.743494,0.0,5.762082,6.319703,29.925651,10.223048,6.505576,1.115242,1.301115
2,E00016439,21.563342,2.425876,1.078167,12.398922,0.808625,1.886792,0.0,1.617251,0.539084,0.0,0.0,1.617251,4.043127,29.649596,9.16442,11.051213,0.0,2.156334
3,E00016442,13.276836,1.412429,0.0,8.19209,2.542373,2.542373,0.564972,3.389831,0.847458,0.564972,1.977401,1.977401,7.909605,40.112994,5.649718,6.214689,0.847458,1.977401
4,E00016399,28.32618,1.502146,0.0,5.364807,2.360515,3.648069,0.214592,1.93133,1.716738,2.145923,0.0,1.93133,6.008584,28.540773,7.725322,6.652361,0.429185,1.502146


'=== wd22 ==='

Unnamed: 0,WD22CD_proposed,White English/Welsh/Scottish/Northern Irish/British,White Irish,White Gypsy or Irish Traveller,White Other,White & Black Caribbean,White & Black African,White & Asian,Other Mixed,Indian,Pakistani,Bangladeshi,Chinese,Other Asian,Black African,Black Caribbean,Other Black,Arab,Other Ethnic Group
0,E05013714,40.406512,1.355039,0.15997,6.455255,3.933377,1.439729,0.58342,1.844359,0.997459,0.38581,0.34817,0.903359,4.686177,14.51962,13.258681,6.022396,0.62106,2.079609
1,E05013715,54.014411,2.373906,0.025733,11.181163,2.63124,0.778435,0.984303,1.659804,1.968605,0.424601,0.424601,2.502573,2.663407,7.102419,6.040916,3.062275,0.431034,1.730571
2,E05013716,42.237897,2.622536,0.138663,12.949901,2.815458,1.22385,1.501176,1.899078,2.272864,0.434075,0.578767,2.634593,2.875746,9.89329,9.579791,3.689636,0.777717,1.874962
3,E05013717,35.925949,1.782941,0.062123,8.324533,3.224203,1.279742,1.025036,2.168106,1.72703,1.012611,0.596384,1.180344,6.833571,9.815494,17.742436,5.019569,0.304405,1.975523
4,E05013718,47.030863,2.456986,0.006695,10.932584,3.086296,1.184977,1.526411,2.048604,1.727254,0.957354,0.341434,1.425989,2.872063,7.15003,10.959363,4.04365,0.622615,1.626833


Reference geographies ethnicity
--

In [20]:
# Reference geographies
ethnicity_ref = pd.read_csv(f"{output_dir}/references_ethnicity.csv")
d = ethnicity_groups(ethnicity_ref, ['Name', 'Code'])
d.to_csv(f"{output_dir}/references_ethnicity_groups.csv", index=False)
IPython.display.display(d.head())



Unnamed: 0,Name,Code,White English/Welsh/Scottish/Northern Irish/British,White Irish,White Gypsy or Irish Traveller,White Other,White & Black Caribbean,White & Black African,White & Asian,Other Mixed,Indian,Pakistani,Bangladeshi,Chinese,Other Asian,Black African,Black Caribbean,Other Black,Arab,Other Ethnic Group
0,England,E92000001,79.7534,0.975244,0.103551,4.583847,0.783997,0.30474,0.627603,0.533846,2.632781,2.098152,0.823418,0.715875,1.545678,1.844361,1.114863,0.524135,0.416855,0.617653
1,London,E12000007,44.890023,2.152866,0.10027,12.649724,1.461046,0.80107,1.241751,1.454317,6.641313,2.737933,2.717502,1.520075,4.875433,7.021472,4.2158,2.08115,1.297049,2.141207
2,Lewisham,E09000023,41.483227,1.887018,0.075394,10.086087,3.09513,1.29003,1.103721,1.931602,1.667361,0.578502,0.503108,2.234264,4.27207,11.608098,11.183645,4.372474,0.527756,2.100513
