In [None]:
# ONS (Census 2011) statistics for Lewisham.
# Fetches Nomis data releases and prepares them for use in Lewisham ward profiles.
#
# This requires the OA->LSOA lookup table produced by "lbl_boundaries_oa.ipynb"
# as well as the OA->WD22 lookup table produced by "lbl_boundaries_2022_wards.ipynb"

In [1]:
import pandas as pd

from google.colab import drive
import google.colab.files as files

import IPython

Tools
==
Helpers to reformat & aggregate the Nomis source data.

In [2]:
# Extract the subset we need.
def filter_nomis_data(d, date_values, geography_values):
  return d[d.date.isin(date_values) & 
           d.geography.isin(geography_values)]

# Reformat Nomis data into a lookup table format:
# clear labelling of the index (geography) column, 
# remove superfluous columns.
def format_nomis_data(d, geo_colname):
  return d.\
           rename(columns={'geography code': geo_colname}).\
           drop(columns=['date', 'geography'])

# Convenience function to call the above.
def nomis_to_oa(d, years, oa_list):
  return format_nomis_data(filter_nomis_data(d, years, oa_list), geo_colname='OA11CD')

In [3]:
# Aggregate OA-level data to LSOAs, Wards, etc. (Calculates the sum.)
def aggregate_oa_groups(oa_data, oa_group_table, oa_col, group_col):
  return pd.merge(oa_group_table, oa_data, on=oa_col, how='right').\
              drop(columns=[oa_col]).\
              groupby(group_col).sum()

Data
==
GDrive mount
--

In [4]:
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [5]:
# Used as root folder.
project_dir = '/content/gdrive/MyDrive/WardProfiles'
oa_lookups_dir = f"{project_dir}/lookups/oa"
ward_lookups_dir = f"{project_dir}/lookups/2022_wards"

# For exports
output_dir = f"{project_dir}/employment"

In [None]:
!mkdir -p '{output_dir}'

Downloads
--

In [None]:
# Occupation, OAs in London 2011
# Source: 
# https://www.nomisweb.co.uk/census/2011/ks608uk
!wget 'https://www.nomisweb.co.uk/api/v01/dataset/nm_1518_1.bulk.csv?time=latest&measures=20100&geography=2013265927TYPE299' \
  -O "{output_dir}/census11_occupation_oa11_london.csv"

--2021-11-02 13:38:06--  https://www.nomisweb.co.uk/api/v01/dataset/nm_1518_1.bulk.csv?time=latest&measures=20100&geography=2013265927TYPE299
Resolving www.nomisweb.co.uk (www.nomisweb.co.uk)... 129.234.253.212
Connecting to www.nomisweb.co.uk (www.nomisweb.co.uk)|129.234.253.212|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 1488142 (1.4M) [text/csv]
Saving to: ‘/content/gdrive/MyDrive/WardProfiles/employment/census11_occupation_oa11_london.csv’


2021-11-02 13:38:20 (614 KB/s) - ‘/content/gdrive/MyDrive/WardProfiles/employment/census11_occupation_oa11_london.csv’ saved [1488142/1488142]



In [None]:
# Occupation (Minor Groups), OAs in London 2011
# Source: 
# https://www.nomisweb.co.uk/census/2011/qs606ew
!wget 'https://www.nomisweb.co.uk/api/v01/dataset/nm_561_1.bulk.csv?time=latest&measures=20100&rural_urban=total&geography=2013265927TYPE299' \
  -O "{output_dir}/census11_occupation_minor_groups_oa11_london.csv"

--2021-11-02 13:38:24--  https://www.nomisweb.co.uk/api/v01/dataset/nm_561_1.bulk.csv?time=latest&measures=20100&rural_urban=total&geography=2013265927TYPE299
Resolving www.nomisweb.co.uk (www.nomisweb.co.uk)... 129.234.253.212
Connecting to www.nomisweb.co.uk (www.nomisweb.co.uk)|129.234.253.212|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 7661832 (7.3M) [text/csv]
Saving to: ‘/content/gdrive/MyDrive/WardProfiles/employment/census11_occupation_minor_groups_oa11_london.csv’


2021-11-02 13:39:03 (651 KB/s) - ‘/content/gdrive/MyDrive/WardProfiles/employment/census11_occupation_minor_groups_oa11_london.csv’ saved [7661832/7661832]



In [None]:
# Hours Worked, OAs in London 2011
# Source: 
# https://www.nomisweb.co.uk/census/2011/QS604EW
!wget 'https://www.nomisweb.co.uk/api/v01/dataset/nm_559_1.bulk.csv?time=latest&measures=20100&rural_urban=total&geography=2013265927TYPE299' \
  -O "{output_dir}/census11_hours_worked_oa11_london.csv"

--2021-11-02 13:39:05--  https://www.nomisweb.co.uk/api/v01/dataset/nm_559_1.bulk.csv?time=latest&measures=20100&rural_urban=total&geography=2013265927TYPE299
Resolving www.nomisweb.co.uk (www.nomisweb.co.uk)... 129.234.253.212
Connecting to www.nomisweb.co.uk (www.nomisweb.co.uk)|129.234.253.212|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 1543556 (1.5M) [text/csv]
Saving to: ‘/content/gdrive/MyDrive/WardProfiles/employment/census11_hours_worked_oa11_london.csv’


2021-11-02 13:39:19 (635 KB/s) - ‘/content/gdrive/MyDrive/WardProfiles/employment/census11_hours_worked_oa11_london.csv’ saved [1543556/1543556]



In [None]:
# Economic Activity, OAs in London 2011
# Source: 
# https://www.nomisweb.co.uk/census/2011/QS601EW
!wget 'https://www.nomisweb.co.uk/api/v01/dataset/nm_556_1.bulk.csv?time=latest&measures=20100&rural_urban=total&geography=2013265927TYPE299' \
  -O "{output_dir}/census11_economic_activity_oa11_london.csv"

--2021-11-02 13:39:20--  https://www.nomisweb.co.uk/api/v01/dataset/nm_556_1.bulk.csv?time=latest&measures=20100&rural_urban=total&geography=2013265927TYPE299
Resolving www.nomisweb.co.uk (www.nomisweb.co.uk)... 129.234.253.212
Connecting to www.nomisweb.co.uk (www.nomisweb.co.uk)|129.234.253.212|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 2094451 (2.0M) [text/csv]
Saving to: ‘/content/gdrive/MyDrive/WardProfiles/employment/census11_economic_activity_oa11_london.csv’


2021-11-02 13:39:36 (630 KB/s) - ‘/content/gdrive/MyDrive/WardProfiles/employment/census11_economic_activity_oa11_london.csv’ saved [2094451/2094451]



In [None]:
# For certain variables: get reference data for other geographies.
#
# Nomis geography codes are derived from their GEOGRAPHY and GEOGRAPHY_TYPECODE 
# fields, both numeric. For our purposes they are as follows:
# England: 2092957699TYPE499
# London: 2013265927TYPE480
# Lewisham: 1946157254TYPE464

# And optionally:
# United Kingdom: 2092957697TYPE499 -- this also returns nation-level records
# Great Britain: 2092957698TYPE499
# England and Wales: 2092957703TYPE499

# NOTE that not all variables are available at all levels of aggregation.

In [None]:
# Occupation, misc reference regions, 2011
# Source: 
# https://www.nomisweb.co.uk/census/2011/ks608uk
!wget 'https://www.nomisweb.co.uk/api/v01/dataset/nm_1518_1.bulk.csv?time=latest&measures=20100&geography=2092957697TYPE499,2013265927TYPE480,1946157254TYPE464' \
  -O "{output_dir}/census11_occupation_references.csv"

--2021-11-16 12:27:04--  https://www.nomisweb.co.uk/api/v01/dataset/nm_1518_1.bulk.csv?time=latest&measures=20100&geography=2092957697TYPE499,2013265927TYPE480,1946157254TYPE464
Resolving www.nomisweb.co.uk (www.nomisweb.co.uk)... 129.234.253.212
Connecting to www.nomisweb.co.uk (www.nomisweb.co.uk)|129.234.253.212|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 1683 (1.6K) [text/csv]
Saving to: ‘/content/gdrive/MyDrive/WardProfiles/employment/census11_occupation_references.csv’


2021-11-16 12:27:05 (185 MB/s) - ‘/content/gdrive/MyDrive/WardProfiles/employment/census11_occupation_references.csv’ saved [1683/1683]



In [None]:
# Hours Worked, misc reference regions, 2011
# Source: 
# https://www.nomisweb.co.uk/census/2011/QS604EW
!wget 'https://www.nomisweb.co.uk/api/v01/dataset/nm_559_1.bulk.csv?time=latest&measures=20100&rural_urban=total&geography=2092957699TYPE499,2013265927TYPE480,1946157254TYPE464' \
  -O "{output_dir}/census11_hours_worked_references.csv"

--2021-11-16 12:25:47--  https://www.nomisweb.co.uk/api/v01/dataset/nm_559_1.bulk.csv?time=latest&measures=20100&rural_urban=total&geography=2092957699TYPE499,2013265927TYPE480,1946157254TYPE464
Resolving www.nomisweb.co.uk (www.nomisweb.co.uk)... 129.234.253.212
Connecting to www.nomisweb.co.uk (www.nomisweb.co.uk)|129.234.253.212|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 746 [text/csv]
Saving to: ‘/content/gdrive/MyDrive/WardProfiles/employment/census11_hours_worked_references.csv’


2021-11-16 12:25:49 (89.0 MB/s) - ‘/content/gdrive/MyDrive/WardProfiles/employment/census11_hours_worked_references.csv’ saved [746/746]



In [None]:
# Economic Activity, misc reference regions, 2011
# Source: 
# https://www.nomisweb.co.uk/census/2011/QS601EW
!wget 'https://www.nomisweb.co.uk/api/v01/dataset/nm_556_1.bulk.csv?time=latest&measures=20100&rural_urban=total&geography=2092957699TYPE499,2013265927TYPE480,1946157254TYPE464' \
  -O "{output_dir}/census11_economic_activity_references.csv"

--2021-11-16 12:26:57--  https://www.nomisweb.co.uk/api/v01/dataset/nm_556_1.bulk.csv?time=latest&measures=20100&rural_urban=total&geography=2092957699TYPE499,2013265927TYPE480,1946157254TYPE464
Resolving www.nomisweb.co.uk (www.nomisweb.co.uk)... 129.234.253.212
Connecting to www.nomisweb.co.uk (www.nomisweb.co.uk)|129.234.253.212|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 1820 (1.8K) [text/csv]
Saving to: ‘/content/gdrive/MyDrive/WardProfiles/employment/census11_economic_activity_references.csv’


2021-11-16 12:26:59 (205 MB/s) - ‘/content/gdrive/MyDrive/WardProfiles/employment/census11_economic_activity_references.csv’ saved [1820/1820]



Lookups
--
Used to match up OAs with their LSOAs, and 2022 electoral wards

In [6]:
# All OAs and their LSOAs in Lewisham
oa_lsoa_join = pd.read_csv(f"{oa_lookups_dir}/lbl_oa11_lsoa11_msoa11_lad20_rgn20_202012.csv")
oa_lsoa_join = oa_lsoa_join[['OA11CD', 'LSOA11CD']]
oa_lsoa_join.head()

Unnamed: 0,OA11CD,LSOA11CD
0,E00016277,E01003220
1,E00016278,E01003220
2,E00016285,E01003220
3,E00016257,E01003221
4,E00016263,E01003221


In [7]:
# All OAs and their 2022 wards in Lewisham
oa_wd22_join = pd.read_csv(f"{ward_lookups_dir}/lbl_oa11_wd22_proposed.csv")
oa_wd22_join = oa_wd22_join[['OA11CD', 'WD22CD_proposed']]
oa_wd22_join.head()

Unnamed: 0,OA11CD,WD22CD_proposed
0,E00016403,E05013721
1,E00016442,E05013721
2,E00016407,E05013721
3,E00016404,E05013721
4,E00016402,E05013721


Process & export
==
Lewisham
--

In [8]:
# For filtering of source data
lbl_oa_list = oa_lsoa_join.OA11CD.unique() # All OAs in Lewisham
years = [2011] # Just a precaution, in case any of the downloads include multiple periods

for datname in ['occupation', 'occupation_minor_groups', 
                  'hours_worked', 'economic_activity']:
  IPython.display.display(f"=== {datname} ===")
  # Load the Nomis data
  d = pd.read_csv(f"{output_dir}/census11_{datname}_oa11_london.csv")

  # Simplify the column names
  d.columns = d.columns.str.replace(
      # Remove the redundant title prefix and qualifier suffix
      r'^.*?: (.*?); measures: Value', 
      r'\1')
  d = d.rename(columns={
      'All categories: Occupation': 'Total',
      'All categories: Hours worked': 'Total',
      'All categories: Economic activity': 'Total',
  })

  # Transform to OA index data
  lbl_oa = nomis_to_oa(d, years, lbl_oa_list)
  lbl_oa.to_csv(f"{output_dir}/lbl_{datname}_oa11.csv", index=False)

  # Aggregate to LSOA level
  lbl_lsoa = aggregate_oa_groups(lbl_oa, oa_lsoa_join, oa_col='OA11CD', group_col='LSOA11CD')
  lbl_lsoa.to_csv(f"{output_dir}/lbl_{datname}_lsoa11.csv", index=True)
  IPython.display.display(lbl_lsoa.head())

  # Aggregate to WD22 level
  lbl_wd22 = aggregate_oa_groups(lbl_oa, oa_wd22_join, oa_col='OA11CD', group_col='WD22CD_proposed')
  lbl_wd22.to_csv(f"{output_dir}/lbl_{datname}_wd22.csv", index=True)
  IPython.display.display(lbl_wd22.head())

'=== occupation ==='

Unnamed: 0_level_0,Total,"1. Managers, directors and senior officials",2. Professional occupations,3. Associate professional and technical occupations,4. Administrative and secretarial occupations,5. Skilled trades occupations,"6. Caring, leisure and other service occupations",7. Sales and customer service occupations,8. Process plant and machine operatives,9. Elementary occupations
LSOA11CD,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
E01003189,613,43,93,57,77,63,90,65,42,83
E01003190,791,57,93,119,87,95,135,77,34,94
E01003191,662,43,91,70,92,73,82,67,49,95
E01003192,656,30,91,60,79,72,95,87,45,97
E01003193,706,64,130,73,97,80,91,44,50,77


Unnamed: 0_level_0,Total,"1. Managers, directors and senior officials",2. Professional occupations,3. Associate professional and technical occupations,4. Administrative and secretarial occupations,5. Skilled trades occupations,"6. Caring, leisure and other service occupations",7. Sales and customer service occupations,8. Process plant and machine operatives,9. Elementary occupations
WD22CD_proposed,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
E05013714,4346,274,572,479,557,465,626,471,290,612
E05013715,8208,1090,2551,1626,845,390,522,434,192,558
E05013716,9479,985,2397,2001,1006,580,761,642,208,899
E05013717,7504,681,1496,934,1086,745,738,733,379,712
E05013718,8013,873,2187,1525,947,567,592,463,269,590


'=== occupation_minor_groups ==='

Unnamed: 0_level_0,Total,"1. Managers, directors and senior officials",11. Corporate managers and directors,111. Chief Executives and Senior Officials,112. Production Managers and Directors,113. Functional Managers and Directors,115. Financial Institution Managers and Directors,116. Managers and Directors in Transport and Logistics,117. Senior Officers in Protective Services,118. Health and Social Services Managers and Directors,119. Managers and Directors in Retail and Wholesale,12. Other managers and proprietors,121. Managers and Proprietors in Agriculture Related Services,122. Managers and Proprietors in Hospitality and Leisure Services,124. Managers and Proprietors in Health and Care Services,125. Managers and Proprietors in Other Services,2. Professional occupations,"21. Science research, engineering and technology professionals",211. Natural and Social Science Professionals,212. Engineering Professionals,213. Information Technology and Telecommunications Professionals,214. Conservation and Environment Professionals,215. Research and Development Managers,22. Health professionals,221. Health Professionals,222. Therapy Professionals,223. Nursing and Midwifery Professionals,23. Teaching and educational professionals,231. Teaching and Educational Professionals,"24. Business, media and public service professionals",241. Legal Professionals,242. Research and Administrative Professionals,"243. Architects, Town Planners and Surveyors",244. Welfare Professionals,245. Librarians and Related Professionals,246. Quality and Regulatory Professionals,247. Media Professionals,3. Associate professional and technical occupations,"31. Science, engineering and technology associate professionals","311. Science, Engineering and Production Technicians",...,"6. Caring, leisure and other service occupations",61. Caring personal service occupations,612. Childcare and Related Personal Services,613. Animal Care and Control Services,614. Caring Personal Services,"62. Leisure, travel and related personal service occupations",621. Leisure and Travel Services,622. Hairdressers and Related Services,623. Housekeeping and Related Services,624. Cleaning and Housekeeping Managers and Supervisors,7. Sales and customer service occupations,71. Sales occupations,711. Sales Assistants and Retail Cashiers,712. Sales Related Occupations,713. Sales Supervisors,72. Customer service occupations,721. Customer Service Occupations,722. Customer Service Managers and Supervisors,"8. Process, plant and machine operatives","81. Process, plant and machine operatives",811. Process Operatives,812. Plant and Machine Operatives,813. Assemblers and Routine Operatives,814. Construction Operatives,82. Transport and mobile machine drivers and operatives,821. Road Transport Drivers,822. Mobile Machine Drivers and Operatives,823. Other Drivers and Transport Operatives,9. Elementary occupations,91. Elementary trades and related occupations,911. Elementary Agricultural Occupations,912. Elementary Construction Occupations,913. Elementary Process Plant Occupations,92. Elementary administration and service occupations,921. Elementary Administration Occupations,923. Elementary Cleaning Occupations,924. Elementary Security Occupations,925. Elementary Sales Occupations,926. Elementary Storage Occupations,927. Other Elementary Services Occupations
LSOA11CD,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1
E01003189,613,43,24,2,2,1,2,4,0,2,11,19,0,4,1,14,93,15,2,4,9,0,0,33,5,1,27,19,19,26,1,9,4,5,0,0,7,57,6,3,...,90,70,36,0,34,20,6,5,6,3,65,50,47,0,3,15,13,2,42,6,0,0,0,6,36,29,4,3,83,2,0,0,2,81,6,25,14,7,7,22
E01003190,791,57,31,0,5,10,2,3,0,5,6,26,0,13,0,13,93,18,0,2,15,1,0,26,5,2,19,27,27,22,1,14,2,2,0,0,3,119,14,8,...,135,114,51,0,63,21,11,4,5,1,77,59,52,2,5,18,14,4,34,10,0,4,2,4,24,17,3,4,94,21,0,18,3,73,9,23,8,5,2,26
E01003191,662,43,29,0,7,4,7,1,0,5,5,14,0,1,2,11,91,21,0,6,14,1,0,20,2,2,16,25,25,25,4,7,2,10,0,0,2,70,11,5,...,82,59,25,1,33,23,4,11,7,1,67,60,55,1,4,7,7,0,49,9,0,3,2,4,40,34,0,6,95,4,0,2,2,91,12,27,17,9,6,20
E01003192,656,30,21,0,2,4,4,3,0,0,8,9,0,3,1,5,91,18,2,2,12,0,2,34,5,0,29,17,17,22,4,4,2,7,1,2,2,60,5,4,...,95,71,20,1,50,24,2,5,6,11,87,72,65,2,5,15,11,4,45,13,1,5,2,5,32,29,1,2,97,5,0,4,1,92,5,34,20,5,8,20
E01003193,706,64,32,1,5,6,3,3,0,5,9,32,0,9,3,20,130,20,2,4,13,0,1,27,4,1,22,34,34,49,11,18,3,5,1,1,10,73,3,0,...,91,66,27,0,39,25,6,9,7,3,44,31,23,4,4,13,10,3,50,22,1,3,3,15,28,23,2,3,77,2,0,2,0,75,7,23,17,3,6,19


Unnamed: 0_level_0,Total,"1. Managers, directors and senior officials",11. Corporate managers and directors,111. Chief Executives and Senior Officials,112. Production Managers and Directors,113. Functional Managers and Directors,115. Financial Institution Managers and Directors,116. Managers and Directors in Transport and Logistics,117. Senior Officers in Protective Services,118. Health and Social Services Managers and Directors,119. Managers and Directors in Retail and Wholesale,12. Other managers and proprietors,121. Managers and Proprietors in Agriculture Related Services,122. Managers and Proprietors in Hospitality and Leisure Services,124. Managers and Proprietors in Health and Care Services,125. Managers and Proprietors in Other Services,2. Professional occupations,"21. Science research, engineering and technology professionals",211. Natural and Social Science Professionals,212. Engineering Professionals,213. Information Technology and Telecommunications Professionals,214. Conservation and Environment Professionals,215. Research and Development Managers,22. Health professionals,221. Health Professionals,222. Therapy Professionals,223. Nursing and Midwifery Professionals,23. Teaching and educational professionals,231. Teaching and Educational Professionals,"24. Business, media and public service professionals",241. Legal Professionals,242. Research and Administrative Professionals,"243. Architects, Town Planners and Surveyors",244. Welfare Professionals,245. Librarians and Related Professionals,246. Quality and Regulatory Professionals,247. Media Professionals,3. Associate professional and technical occupations,"31. Science, engineering and technology associate professionals","311. Science, Engineering and Production Technicians",...,"6. Caring, leisure and other service occupations",61. Caring personal service occupations,612. Childcare and Related Personal Services,613. Animal Care and Control Services,614. Caring Personal Services,"62. Leisure, travel and related personal service occupations",621. Leisure and Travel Services,622. Hairdressers and Related Services,623. Housekeeping and Related Services,624. Cleaning and Housekeeping Managers and Supervisors,7. Sales and customer service occupations,71. Sales occupations,711. Sales Assistants and Retail Cashiers,712. Sales Related Occupations,713. Sales Supervisors,72. Customer service occupations,721. Customer Service Occupations,722. Customer Service Managers and Supervisors,"8. Process, plant and machine operatives","81. Process, plant and machine operatives",811. Process Operatives,812. Plant and Machine Operatives,813. Assemblers and Routine Operatives,814. Construction Operatives,82. Transport and mobile machine drivers and operatives,821. Road Transport Drivers,822. Mobile Machine Drivers and Operatives,823. Other Drivers and Transport Operatives,9. Elementary occupations,91. Elementary trades and related occupations,911. Elementary Agricultural Occupations,912. Elementary Construction Occupations,913. Elementary Process Plant Occupations,92. Elementary administration and service occupations,921. Elementary Administration Occupations,923. Elementary Cleaning Occupations,924. Elementary Security Occupations,925. Elementary Sales Occupations,926. Elementary Storage Occupations,927. Other Elementary Services Occupations
WD22CD_proposed,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1
E05013714,4346,274,160,2,29,29,20,11,0,15,54,114,0,39,7,68,572,125,12,25,83,2,3,166,26,9,131,127,127,154,14,61,13,46,2,3,15,479,60,29,...,626,502,215,4,283,124,32,37,35,20,471,389,354,11,24,82,72,10,290,68,7,19,7,35,222,188,10,24,612,46,0,35,11,566,58,168,112,43,45,140
E05013715,8208,1090,764,48,73,374,132,19,6,29,83,326,1,99,16,210,2551,475,44,84,311,13,23,423,239,63,121,502,502,1151,247,446,127,50,23,36,222,1626,89,29,...,522,342,159,5,178,180,77,50,35,18,434,347,281,37,29,87,67,20,192,74,7,24,14,29,118,103,7,8,558,31,1,18,12,527,44,134,93,15,32,209
E05013716,9479,985,592,32,68,243,63,24,0,31,131,393,1,129,19,244,2397,486,44,64,353,9,16,263,103,43,117,610,610,1038,134,399,126,91,34,29,225,2001,141,57,...,761,497,208,12,277,264,101,73,44,46,642,492,434,37,21,150,128,22,208,74,9,22,18,25,134,121,7,6,899,38,5,19,14,861,73,199,127,20,46,396
E05013717,7504,681,376,18,55,107,26,23,2,58,87,305,2,93,19,191,1496,280,31,71,165,2,11,338,77,12,249,398,398,480,67,170,68,97,11,15,52,934,107,38,...,738,550,261,5,284,188,50,72,44,22,733,584,506,37,41,149,125,24,379,100,16,30,11,43,279,246,13,20,712,36,1,20,15,676,93,182,122,27,50,202
E05013718,8013,873,538,20,68,238,50,26,1,49,86,335,1,94,21,219,2187,380,47,75,228,15,15,334,113,46,175,582,582,891,113,338,92,94,14,29,211,1525,160,41,...,592,441,237,8,196,151,58,45,34,14,463,367,305,28,34,96,84,12,269,80,10,13,8,49,189,155,17,17,590,30,1,20,9,560,67,135,98,23,22,215


'=== hours_worked ==='

Unnamed: 0_level_0,Total,Part-time: Total,Part-time: 15 hours or less worked,Part-time: 16 to 30 hours worked,Full-time: Total,Full-time: 31 to 48 hours worked,Full-time: 49 or more hours worked
LSOA11CD,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
E01003189,613,193,55,138,420,371,49
E01003190,791,270,70,200,521,446,75
E01003191,662,190,41,149,472,421,51
E01003192,656,219,71,148,437,380,57
E01003193,706,209,68,141,497,406,91


Unnamed: 0_level_0,Total,Part-time: Total,Part-time: 15 hours or less worked,Part-time: 16 to 30 hours worked,Full-time: Total,Full-time: 31 to 48 hours worked,Full-time: 49 or more hours worked
WD22CD_proposed,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
E05013714,4346,1397,365,1032,2949,2554,395
E05013715,8208,1816,628,1188,6392,4746,1646
E05013716,9479,2456,813,1643,7023,5704,1319
E05013717,7504,2061,691,1370,5443,4575,868
E05013718,8013,1991,639,1352,6022,4939,1083


'=== economic_activity ==='

Unnamed: 0_level_0,Total,Economically active: Total,Economically active: Employee: Part-time,Economically active: Employee: Full-time,Economically active: Self-employed with employees: Part-time,Economically active: Self-employed with employees: Full-time,Economically active: Self-employed without employees: Part-time,Economically active: Self-employed without employees: Full-time,Economically active: Unemployed,Economically active: Full-time student,Economically inactive: Total,Economically inactive: Retired,Economically inactive: Student (including full-time students),Economically inactive: Looking after home or family,Economically inactive: Long-term sick or disabled,Economically inactive: Other
LSOA11CD,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
E01003189,1101,710,142,363,6,12,22,39,84,42,391,82,93,73,90,53
E01003190,1284,904,225,423,2,14,19,68,101,52,380,82,80,82,88,48
E01003191,1044,752,143,400,1,9,26,51,83,39,292,80,54,69,60,29
E01003192,1164,788,176,382,5,9,14,41,110,51,376,113,57,69,85,52
E01003193,1138,791,162,406,4,16,26,62,74,41,347,140,50,57,72,28


Unnamed: 0_level_0,Total,Economically active: Total,Economically active: Employee: Part-time,Economically active: Employee: Full-time,Economically active: Self-employed with employees: Part-time,Economically active: Self-employed with employees: Full-time,Economically active: Self-employed without employees: Part-time,Economically active: Self-employed without employees: Full-time,Economically active: Unemployed,Economically active: Full-time student,Economically inactive: Total,Economically inactive: Retired,Economically inactive: Student (including full-time students),Economically inactive: Looking after home or family,Economically inactive: Long-term sick or disabled,Economically inactive: Other
WD22CD_proposed,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
E05013714,7325,5013,1104,2518,30,76,126,301,574,284,2312,590,480,472,501,269
E05013715,11806,8947,1126,5373,51,233,426,700,646,392,2859,908,720,522,397,312
E05013716,13672,10448,1383,5824,33,173,491,816,707,1021,3224,653,1432,367,457,315
E05013717,11622,8348,1438,4433,58,265,281,658,690,525,3274,1105,799,583,432,355
E05013718,11400,8718,1309,4985,55,197,412,783,609,368,2682,831,689,419,414,329


Reference geographies
--

In [11]:
# For filtering of source data
reference_names = ['United Kingdom', 'Great Britain', 'England and Wales',
                   'England', 'London', 'Lewisham']
years = [2011] # Just a precaution, in case any of the downloads include multiple periods

for datname in ['occupation', #'occupation_minor_groups', 
                  'hours_worked', 'economic_activity']:
  IPython.display.display(f"=== {datname} ===")
  # Load the Nomis data
  d = pd.read_csv(f"{output_dir}/census11_{datname}_references.csv")

  # Simplify the column names
  d.columns = d.columns.str.replace(
      # Remove the redundant title prefix and qualifier suffix
      r'^.*?: (.*?); measures: Value', 
      r'\1')
  d = d.rename(columns={
      'All categories: Occupation': 'Total',
      'All categories: Hours worked': 'Total',
      'All categories: Economic activity': 'Total',
  })

  # Filter & transform to index data
  ref = filter_nomis_data(d, years, reference_names).drop(columns=['date'])
  if 'Rural Urban' in ref.columns:
    ref = ref.drop(columns='Rural Urban') # unused
  ref = ref.rename(columns={'geography': 'Name', 'geography code': 'Code'})
  ref.to_csv(f"{output_dir}/references_{datname}.csv", index=False)
  IPython.display.display(ref.head())

'=== occupation ==='

Unnamed: 0,Name,Code,Total,"1. Managers, directors and senior officials",2. Professional occupations,3. Associate professional and technical occupations,4. Administrative and secretarial occupations,5. Skilled trades occupations,"6. Caring, leisure and other service occupations",7. Sales and customer service occupations,8. Process plant and machine operatives,9. Elementary occupations
0,United Kingdom,K02000001,29838494,3135421,5173804,3753396,3432622,3468636,2810462,2555147,2175899,3333107
1,Great Britain,K03000001,29043231,3071521,5037398,3684693,3320781,3357134,2736625,2475236,2112611,3247232
2,England,E92000001,25162721,2734900,4400375,3219067,2883230,2858680,2348650,2117477,1808024,2792318
6,England and Wales,K04000001,26526336,2860702,4615759,3366313,3034637,3041957,2492117,2240869,1919017,2954965
7,London,E12000007,3998897,464272,898018,651058,466488,332674,314023,299222,189123,384019


'=== hours_worked ==='

Unnamed: 0,Name,Code,Total,Part-time: Total,Part-time: 15 hours or less worked,Part-time: 16 to 30 hours worked,Full-time: Total,Full-time: 31 to 48 hours worked,Full-time: 49 or more hours worked
0,England,E92000001,25162721,7307083,2418518,4888565,17855638,14502713,3352925
1,London,E12000007,3998897,1025053,346106,678947,2973844,2338716,635128
2,Lewisham,E09000023,136057,36422,11762,24660,99635,82036,17599


'=== economic_activity ==='

Unnamed: 0,Name,Code,Total,Economically active: Total,Economically active: Employee: Part-time,Economically active: Employee: Full-time,Economically active: Self-employed with employees: Part-time,Economically active: Self-employed with employees: Full-time,Economically active: Self-employed without employees: Part-time,Economically active: Self-employed without employees: Full-time,Economically active: Unemployed,Economically active: Full-time student,Economically inactive: Total,Economically inactive: Retired,Economically inactive: Student (including full-time students),Economically inactive: Looking after home or family,Economically inactive: Long-term sick or disabled,Economically inactive: Other
0,England,E92000001,38881374,27183134,5333268,15016564,148074,715271,990573,1939714,1702847,1336823,11698240,5320691,2255831,1695134,1574134,852450
1,London,E12000007,6117482,4384217,666513,2437797,28553,123432,192724,368184,318500,248514,1733265,512057,477543,320880,226992,195793
2,Lewisham,E09000023,206255,151793,24079,82776,813,3293,6286,11658,12776,10112,54462,15088,15018,9179,8779,6398


Derivatives
==

Lewisham occupation groups
--

In [12]:
# Lewisham
for geog, geog_colname in zip(['oa11', 'wd22'], ['OA11CD', 'WD22CD_proposed']):
  IPython.display.display(f"=== {geog} ===")

  og = pd.read_csv(f"{output_dir}/lbl_occupation_{geog}.csv")

  d = og[[geog_colname]].copy()
  var_colnames = og.drop(columns=[geog_colname, 'Total']).columns
  for colname in var_colnames: 
    d[colname] = og[colname] * 100. / og.Total
  
  d.to_csv(f"{output_dir}/lbl_occupation_share_{geog}.csv", index=False)
  IPython.display.display(d.head())

'=== oa11 ==='

Unnamed: 0,OA11CD,"1. Managers, directors and senior officials",2. Professional occupations,3. Associate professional and technical occupations,4. Administrative and secretarial occupations,5. Skilled trades occupations,"6. Caring, leisure and other service occupations",7. Sales and customer service occupations,8. Process plant and machine operatives,9. Elementary occupations
0,E00016403,6.206897,20.0,13.793103,7.586207,9.655172,16.551724,6.896552,6.206897,13.103448
1,E00016437,4.347826,9.782609,7.065217,11.413043,9.782609,21.195652,9.23913,5.434783,21.73913
2,E00016439,3.184713,15.286624,12.738854,9.55414,8.280255,15.923567,6.369427,9.55414,19.10828
3,E00016442,7.407407,12.592593,8.148148,8.148148,11.111111,12.592593,9.62963,7.407407,22.962963
4,E00016399,1.898734,11.392405,7.594937,14.556962,8.860759,15.822785,15.822785,1.898734,22.151899


'=== wd22 ==='

Unnamed: 0,WD22CD_proposed,"1. Managers, directors and senior officials",2. Professional occupations,3. Associate professional and technical occupations,4. Administrative and secretarial occupations,5. Skilled trades occupations,"6. Caring, leisure and other service occupations",7. Sales and customer service occupations,8. Process plant and machine operatives,9. Elementary occupations
0,E05013714,6.304648,13.161528,11.021629,12.816383,10.699494,14.40405,10.837552,6.672803,14.081914
1,E05013715,13.279727,31.079435,19.809942,10.294834,4.751462,6.359649,5.287524,2.339181,6.798246
2,E05013716,10.391391,25.287478,21.109822,10.612934,6.118789,8.028273,6.772866,2.194324,9.484123
3,E05013717,9.07516,19.936034,12.446695,14.472281,9.928038,9.834755,9.768124,5.05064,9.488273
4,E05013718,10.894796,27.293149,19.031574,11.818295,7.076001,7.387995,5.778111,3.357045,7.363035



Lewisham economic activity
--

In [13]:
# Segmentation loosely based on Observatory categories
def economic_activity_groups(ea, fixed_columns):
  d = ea[fixed_columns].copy()
  d['Full-time'] = ea['Economically active: Employee: Full-time'] * 100. / ea.Total
  d['Part-time'] = ea['Economically active: Employee: Part-time'] * 100. / ea.Total
  d['Self-employed'] = ea[ea.columns[ea.columns.str.contains('Economically active: Self-employed')]].sum(axis=1) * 100 / ea.Total
  d['Working student'] = ea['Economically active: Full-time student'] * 100 / ea.Total
  d['Unemployed'] = ea['Economically active: Unemployed'] * 100 / ea.Total

  # # Option A -- replicating the observatory
  # d['Retired'] = ea['Economically inactive: Retired'] * 100 / ea.Total
  # # Lots of cases not included in the Observatory!
  # d['Other inactive (students, carers, sick or disabled, others)'] = ea[ea.columns[ea.columns.str.contains('Economically inactive:') & 
  #                                                                                  (ea.columns.str.contains('Retired')==False)]].sum(axis=1) * 100 / total

  # Option B -- only one 'inactive' catch-all group
  d['Inactive'] = ea['Economically inactive: Total'] * 100 / ea.Total

  return d#.round(1)

In [14]:
# Lewisham
for geog, geog_colname in zip(['oa11', 'wd22'], ['OA11CD', 'WD22CD_proposed']):
  IPython.display.display(f"=== {geog} ===")

  ea = pd.read_csv(f"{output_dir}/lbl_economic_activity_{geog}.csv")
  d = economic_activity_groups(ea, [geog_colname])
  d.to_csv(f"{output_dir}/lbl_economic_activity_groups_{geog}.csv", index=False)
  IPython.display.display(d.head())

'=== oa11 ==='

Unnamed: 0,OA11CD,Full-time,Part-time,Self-employed,Working student,Unemployed,Inactive
0,E00016403,35.622318,17.167382,8.583691,2.575107,11.587983,24.463519
1,E00016437,28.279883,13.702624,5.830904,6.413994,7.28863,38.483965
2,E00016439,36.55914,11.827957,4.659498,6.09319,9.318996,31.541219
3,E00016442,30.909091,18.181818,8.636364,5.454545,13.181818,23.636364
4,E00016399,27.483444,15.562914,3.97351,9.271523,8.278146,35.430464


'=== wd22 ==='

Unnamed: 0,WD22CD_proposed,Full-time,Part-time,Self-employed,Working student,Unemployed,Inactive
0,E05013714,34.375427,15.071672,7.276451,3.877133,7.836177,31.56314
1,E05013715,45.510757,9.537523,11.94308,3.320346,5.471794,24.2165
2,E05013716,42.598011,10.115565,11.066413,7.467817,5.171153,23.581042
3,E05013717,38.143177,12.373086,10.858716,4.517295,5.937016,28.170711
4,E05013718,43.72807,11.482456,12.692982,3.22807,5.342105,23.526316


Reference geographies economic activity
--

In [15]:
# Reference geographies
ea = pd.read_csv(f"{output_dir}/references_economic_activity.csv")
d = economic_activity_groups(ea, ['Name', 'Code'])
d.to_csv(f"{output_dir}/references_economic_activity_groups.csv", index=False)
IPython.display.display(d.head())

Unnamed: 0,Name,Code,Full-time,Part-time,Self-employed,Working student,Unemployed,Inactive
0,England,E92000001,38.621485,13.716768,9.756939,3.438209,4.379596,30.087003
1,London,E12000007,39.84968,10.895218,11.653373,4.062358,5.20639,28.332981
2,Lewisham,E09000023,40.132845,11.674384,10.69065,4.902669,6.194274,26.405178


Quick checks
--

In [None]:
# Let's make sure we've successfully reproduced the Observatory categories.

# Among the unchanged wards: Sydenham, Forest Hill, Crofton Park. See https://consultation.lgbce.org.uk/node/17020
# The Observatory has these numbers for Crofton Park (E05013718):
# Full-time: 43.7
# Part-time: 11.5
# Full-time student: 3.2
# Self-employed: 12.7
# Unemployed: 5.3
# Retired: 7.3

In [16]:
d = pd.read_csv(f"{output_dir}/lbl_economic_activity_groups_wd22.csv")
d[d.WD22CD_proposed=='E05013718'].transpose()

Unnamed: 0,4
WD22CD_proposed,E05013718
Full-time,43.7281
Part-time,11.4825
Self-employed,12.693
Working student,3.22807
Unemployed,5.34211
Inactive,23.5263


In [17]:
# These should all add up to 100
d.sum(axis=1)

0     100.0
1     100.0
2     100.0
3     100.0
4     100.0
5     100.0
6     100.0
7     100.0
8     100.0
9     100.0
10    100.0
11    100.0
12    100.0
13    100.0
14    100.0
15    100.0
16    100.0
17    100.0
18    100.0
dtype: float64

In [None]:
# # For reference: all new ward codes
# pd.read_csv(f"{ward_lookups_dir}/lbl_oa11_wd22_proposed.csv")[['WD22CD_proposed', 'WD22NM_proposed']].drop_duplicates().sort_values(by='WD22NM_proposed')

Results
--

In [18]:
!ls -lh '{output_dir}'

total 13M
-rw------- 1 root root 2.0M Nov  2 13:39 census11_economic_activity_oa11_london.csv
-rw------- 1 root root 1.8K Nov 16 12:27 census11_economic_activity_references.csv
-rw------- 1 root root 1.5M Nov  2 13:39 census11_hours_worked_oa11_london.csv
-rw------- 1 root root  746 Nov 16 12:25 census11_hours_worked_references.csv
-rw------- 1 root root 7.4M Nov  2 13:38 census11_occupation_minor_groups_oa11_london.csv
-rw------- 1 root root 1.5M Nov  2 13:38 census11_occupation_oa11_london.csv
-rw------- 1 root root 1.7K Nov 16 12:27 census11_occupation_references.csv
-rw------- 1 root root 102K Dec  7 11:47 lbl_economic_activity_groups_oa11.csv
-rw------- 1 root root 2.4K Dec  7 11:47 lbl_economic_activity_groups_wd22.csv
-rw------- 1 root root  12K Dec  7 11:43 lbl_economic_activity_lsoa11.csv
-rw------- 1 root root  54K Dec  7 11:43 lbl_economic_activity_oa11.csv
-rw------- 1 root root 2.2K Dec  7 11:43 lbl_economic_activity_wd22.csv
-rw------- 1 root root 6.3K Dec  7 11:43 lbl_ho