In [None]:
# ONS (Census 2011) statistics for Lewisham.
# Fetches Nomis data releases and prepares them for use in Lewisham ward profiles.
#
# This requires the OA->LSOA lookup table produced by "lbl_boundaries_oa.ipynb"
# as well as the OA->WD22 lookup table produced by "lbl_boundaries_2022_wards.ipynb"

In [None]:
import pandas as pd

from google.colab import drive
import google.colab.files as files

import IPython

Tools
==
Helpers to reformat & aggregate the Nomis source data.

In [None]:
# Extract the subset we need.
def filter_nomis_data(d, date_values, geography_values):
  return d[d.date.isin(date_values) & 
           d.geography.isin(geography_values)]

# Reformat Nomis data into a lookup table format:
# clear labelling of the index (geography) column, 
# remove superfluous columns.
def format_nomis_data(d, geo_colname='OA11CD'):
  return d.\
           rename(columns={'geography code': geo_colname}).\
           drop(columns=['date', 'geography'])

# Convenience function to call the above.
def nomis_to_oa(d, years, oa_list):
  return format_nomis_data(filter_nomis_data(d, years, oa_list))

In [None]:
# Aggregate OA-level data to LSOAs, Wards, etc. (Calculates the sum.)
def aggregate_oa_groups(oa_data, oa_group_table, oa_col, group_col):
  return pd.merge(oa_group_table, oa_data, on=oa_col, how='right').\
              drop(columns=[oa_col]).\
              groupby(group_col).sum()

Data
==
GDrive mount
--

In [None]:
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [None]:
# Used as root folder.
project_dir = '/content/gdrive/MyDrive/WardProfiles'
oa_lookups_dir = f"{project_dir}/lookups/oa"
ward_lookups_dir = f"{project_dir}/lookups/2022_wards"

# For exports
output_dir = f"{project_dir}/health"

In [None]:
!mkdir -p '{output_dir}'

Downloads
--

In [None]:
# Health and provision of unpaid care, OAs in London 2011
# Source: 
# https://www.nomisweb.co.uk/census/2011/ks301ew
!wget 'https://www.nomisweb.co.uk/api/v01/dataset/nm_617_1.bulk.csv?time=latest&measures=20100&rural_urban=total&geography=2013265927TYPE299' \
  -O "{output_dir}/census11_health_and_provision_of_unpaid_care_oa11_london.csv"

--2021-12-09 11:26:44--  https://www.nomisweb.co.uk/api/v01/dataset/nm_617_1.bulk.csv?time=latest&measures=20100&rural_urban=total&geography=2013265927TYPE299
Resolving www.nomisweb.co.uk (www.nomisweb.co.uk)... 129.234.3.145
Connecting to www.nomisweb.co.uk (www.nomisweb.co.uk)|129.234.3.145|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: unspecified [text/csv]
Saving to: ‘/content/gdrive/MyDrive/WardProfiles/health/census11_health_and_provision_of_unpaid_care_oa11_london.csv’

/content/gdrive/MyD     [         <=>        ]   2.11M   276KB/s    in 7.8s    

2021-12-09 11:26:55 (276 KB/s) - ‘/content/gdrive/MyDrive/WardProfiles/health/census11_health_and_provision_of_unpaid_care_oa11_london.csv’ saved [2212433]



In [None]:
# reference regions
!wget 'https://www.nomisweb.co.uk/api/v01/dataset/nm_617_1.bulk.csv?time=latest&measures=20100&rural_urban=total&geography=2092957699,2013265927TYPE480,1946157254TYPE464' \
  -O "{output_dir}/census11_health_and_provision_of_unpaid_care_references.csv"

--2021-12-09 11:27:00--  https://www.nomisweb.co.uk/api/v01/dataset/nm_617_1.bulk.csv?time=latest&measures=20100&rural_urban=total&geography=2092957699,2013265927TYPE480,1946157254TYPE464
Resolving www.nomisweb.co.uk (www.nomisweb.co.uk)... 129.234.3.145
Connecting to www.nomisweb.co.uk (www.nomisweb.co.uk)|129.234.3.145|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: unspecified [text/csv]
Saving to: ‘/content/gdrive/MyDrive/WardProfiles/health/census11_health_and_provision_of_unpaid_care_references.csv’

/content/gdrive/MyD     [ <=>                ]   1.69K  --.-KB/s    in 0.05s   

2021-12-09 11:27:01 (32.7 KB/s) - ‘/content/gdrive/MyDrive/WardProfiles/health/census11_health_and_provision_of_unpaid_care_references.csv’ saved [1734]



Lookups
--
Used to match up OAs with their LSOAs, and 2022 electoral wards

In [None]:
# All OAs and their LSOAs in Lewisham
oa_lsoa_join = pd.read_csv(f"{oa_lookups_dir}/lbl_oa11_lsoa11_msoa11_lad20_rgn20_202012.csv")
oa_lsoa_join = oa_lsoa_join[['OA11CD', 'LSOA11CD']]
oa_lsoa_join.head()

Unnamed: 0,OA11CD,LSOA11CD
0,E00016277,E01003220
1,E00016278,E01003220
2,E00016285,E01003220
3,E00016257,E01003221
4,E00016263,E01003221


In [None]:
# All OAs and their 2022 wards in Lewisham
oa_wd22_join = pd.read_csv(f"{ward_lookups_dir}/lbl_oa11_wd22_proposed.csv")
oa_wd22_join = oa_wd22_join[['OA11CD', 'WD22CD_proposed']]
oa_wd22_join.head()

Unnamed: 0,OA11CD,WD22CD_proposed
0,E00016403,E05013721
1,E00016442,E05013721
2,E00016407,E05013721
3,E00016404,E05013721
4,E00016402,E05013721


Process & export
==
Lewisham
--

In [None]:
# For filtering of source data
lbl_oa_list = oa_lsoa_join.OA11CD.unique() # All OAs in Lewisham
years = [2011] # Just a precaution, in case any of the downloads include multiple periods

for datname in ['health_and_provision_of_unpaid_care']:
  IPython.display.display(f"=== {datname} ===")
  # Load the Nomis data
  d = pd.read_csv(f"{output_dir}/census11_{datname}_oa11_london.csv")

  # Simplify the column names
  d.columns = d.columns.str.replace(
      # Remove the redundant title prefix and qualifier suffix
      r'^.*?: (.*?); measures: Value', 
      r'\1')

  # Transform to OA index data
  lbl_oa = nomis_to_oa(d, years, lbl_oa_list)
  lbl_oa.to_csv(f"{output_dir}/lbl_{datname}_oa11.csv", index=False)

  # Aggregate to LSOA level
  lbl_lsoa = aggregate_oa_groups(lbl_oa, oa_lsoa_join, oa_col='OA11CD', group_col='LSOA11CD')
  lbl_lsoa.to_csv(f"{output_dir}/lbl_{datname}_lsoa11.csv", index=True)
  IPython.display.display(lbl_lsoa.head())

  # Aggregate to WD22 level
  lbl_wd22 = aggregate_oa_groups(lbl_oa, oa_wd22_join, oa_col='OA11CD', group_col='WD22CD_proposed')
  lbl_wd22.to_csv(f"{output_dir}/lbl_{datname}_wd22.csv", index=True)
  IPython.display.display(lbl_wd22.head())

'=== health_and_provision_of_unpaid_care ==='

Unnamed: 0_level_0,All categories: Long-term health problem or disability,Day-to-day activities limited a lot,Day-to-day activities limited a little,Day-to-day activities not limited,Day-to-day activities limited a lot: Age 16 to 64,Day-to-day activities limited a little: Age 16 to 64,Day-to-day activities not limited: Age 16 to 64,Very good health,Good health,Fair health,Bad health,Very bad health,Provides no unpaid care,Provides 1 to 19 hours unpaid care a week,Provides 20 to 49 hours unpaid care a week,Provides 50 or more hours unpaid care a week
LSOA11CD,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
E01003189,1670,144,157,1369,77,92,858,741,552,263,99,15,1495,93,28,54
E01003190,1878,136,145,1597,86,95,1043,874,650,229,87,38,1725,72,28,53
E01003191,1494,119,122,1253,80,84,813,680,558,177,65,14,1353,92,25,24
E01003192,1639,173,146,1320,108,86,876,714,541,249,102,33,1497,75,25,42
E01003193,1568,157,141,1270,80,78,868,653,600,197,93,25,1412,84,30,42


Unnamed: 0_level_0,All categories: Long-term health problem or disability,Day-to-day activities limited a lot,Day-to-day activities limited a little,Day-to-day activities not limited,Day-to-day activities limited a lot: Age 16 to 64,Day-to-day activities limited a little: Age 16 to 64,Day-to-day activities not limited: Age 16 to 64,Very good health,Good health,Fair health,Bad health,Very bad health,Provides no unpaid care,Provides 1 to 19 hours unpaid care a week,Provides 20 to 49 hours unpaid care a week,Provides 50 or more hours unpaid care a week
WD22CD_proposed,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
E05013714,10627,957,935,8735,580,571,5675,4821,3572,1478,584,172,9632,539,172,284
E05013715,15544,1038,1035,13471,476,602,9851,8211,5017,1546,578,192,14409,760,164,211
E05013716,16587,859,1003,14725,538,647,11875,8616,5732,1580,501,158,15564,712,139,172
E05013717,16097,1172,1241,13684,544,694,9386,7629,5634,2030,616,188,14523,1000,243,331
E05013718,14937,932,1020,12985,480,626,9555,7690,4959,1604,523,161,13629,851,190,267


Reference geographies
--

In [None]:
# For filtering of source data
reference_names = ['United Kingdom', 'Great Britain', 'England and Wales',
                   'England', 'London', 'Lewisham']
years = [2011] # Just a precaution, in case any of the downloads include multiple periods

for datname in ['health_and_provision_of_unpaid_care']:
  IPython.display.display(f"=== {datname} ===")
  # Load the Nomis data
  d = pd.read_csv(f"{output_dir}/census11_{datname}_references.csv")

  # Simplify the column names
  d.columns = d.columns.str.replace(
      # Remove the redundant title prefix and qualifier suffix
      r'^.*?: (.*?); measures: Value', 
      r'\1')

  # Filter & transform to index data
  ref = filter_nomis_data(d, years, reference_names).drop(columns=['date'])
  ref = ref.rename(columns={'geography': 'Name', 'geography code': 'Code'})
  ref.to_csv(f"{output_dir}/references_{datname}.csv", index=False)
  IPython.display.display(ref.head())

'=== health_and_provision_of_unpaid_care ==='

Unnamed: 0,Name,Code,Rural Urban,All categories: Long-term health problem or disability,Day-to-day activities limited a lot,Day-to-day activities limited a little,Day-to-day activities not limited,Day-to-day activities limited a lot: Age 16 to 64,Day-to-day activities limited a little: Age 16 to 64,Day-to-day activities not limited: Age 16 to 64,Very good health,Good health,Fair health,Bad health,Very bad health,Provides no unpaid care,Provides 1 to 19 hours unpaid care a week,Provides 20 to 49 hours unpaid care a week,Provides 50 or more hours unpaid care a week
0,England,E92000001,Total,53012456,4405394,4947192,43659870,1924080,2452742,29952269,25005712,18141457,6954092,2250446,660749,47582440,3452636,721143,1256237
1,London,E12000007,Total,8173941,551664,605501,7016776,276070,344797,5023557,4127788,2725645,915035,305343,100130,7483968,435278,105399,149296
2,Lewisham,E09000023,Total,275885,19523,20212,236150,10388,12264,169962,135428,93850,32289,10755,3563,253364,13931,3502,5088


Derivatives
==

Health and Provision of Unpaid Care
--

In [None]:
# health_and_provision_of_unpaid_care = hpuc
hpuc = pd.read_csv(f"{output_dir}/census11_health_and_provision_of_unpaid_care_oa11_london.csv")
hpuc.columns

Index(['date', 'geography', 'geography code', 'Rural Urban',
       'disability/health/care: All categories: Long-term health problem or disability; measures: Value',
       'disability/health/care: Day-to-day activities limited a lot; measures: Value',
       'disability/health/care: Day-to-day activities limited a little; measures: Value',
       'disability/health/care: Day-to-day activities not limited; measures: Value',
       'disability/health/care: Day-to-day activities limited a lot: Age 16 to 64; measures: Value',
       'disability/health/care: Day-to-day activities limited a little: Age 16 to 64; measures: Value',
       'disability/health/care: Day-to-day activities not limited: Age 16 to 64; measures: Value',
       'disability/health/care: Very good health; measures: Value',
       'disability/health/care: Good health; measures: Value',
       'disability/health/care: Fair health; measures: Value',
       'disability/health/care: Bad health; measures: Value',
       'dis

In [None]:
# Segmentation of categories
def hpuc_groups(hpuc, fixed_columns):
  d = hpuc[fixed_columns].copy()
  total = hpuc['All categories: Long-term health problem or disability']

  # general health
  d['Very Good Health'] = hpuc['Very good health'] * 100. / total
  d['Good Health'] = hpuc['Good health'] * 100. / total
  d['Fair Health'] = hpuc['Fair health'] * 100. / total
  d['Bad Health'] = hpuc['Bad health'] * 100. / total
  d['Very Bad Health'] = hpuc['Very bad health'] * 100. / total

  # day-to-day activities & limitation
  d['Day-to-Day Activities Limited a Lot'] = hpuc['Day-to-day activities limited a lot'] * 100. / total
  d['Day-to-Day Activities Limited a Little'] = hpuc['Day-to-day activities limited a little'] * 100. / total
  d['Day-to-Day Activities Not Limited'] = hpuc['Day-to-day activities not limited'] * 100. / total

  # provision of unpaid care
  d['Provides No Unpaid Care'] = hpuc['Provides no unpaid care'] * 100. / total
  d['Provides 1 to 19 Hours Unpaid Care a Week'] = hpuc['Provides 1 to 19 hours unpaid care a week'] * 100. / total
  d['Provides 20 to 49 Hours Unpaid Care a Week'] = hpuc['Provides 20 to 49 hours unpaid care a week'] * 100. / total
  d['Provides 50 or More Hours Unpaid Care a Week'] = hpuc['Provides 50 or more hours unpaid care a week'] * 100. / total
  
  
  return d#.round(1)

In [None]:
# Lewisham
for geog, geog_colname in zip(['oa11', 'wd22'], ['OA11CD', 'WD22CD_proposed']):
  IPython.display.display(f"=== {geog} ===")

  hpuc = pd.read_csv(f"{output_dir}/lbl_health_and_provision_of_unpaid_care_{geog}.csv")
  d = hpuc_groups(hpuc, [geog_colname])
  d.to_csv(f"{output_dir}/lbl_health_and_provision_of_unpaid_care_groups_{geog}.csv", index=False)
  IPython.display.display(d.head())

'=== oa11 ==='

Unnamed: 0,OA11CD,Very Good Health,Good Health,Fair Health,Bad Health,Very Bad Health,Day-to-Day Activities Limited a Lot,Day-to-Day Activities Limited a Little,Day-to-Day Activities Not Limited,Provides No Unpaid Care,Provides 1 to 19 Hours Unpaid Care a Week,Provides 20 to 49 Hours Unpaid Care a Week,Provides 50 or More Hours Unpaid Care a Week
0,E00016403,52.73224,35.519126,7.650273,3.825137,0.273224,3.825137,5.737705,90.437158,95.355191,2.459016,0.819672,1.36612
1,E00016437,57.620818,29.553903,8.921933,2.973978,0.929368,4.64684,6.505576,88.847584,93.680297,2.416357,1.301115,2.60223
2,E00016439,45.552561,38.544474,10.242588,4.312668,1.347709,7.277628,9.973046,82.749326,95.956873,1.886792,0.269542,1.886792
3,E00016442,54.519774,33.898305,7.909605,3.107345,0.564972,3.672316,5.367232,90.960452,95.480226,3.107345,0.282486,1.129944
4,E00016399,54.077253,28.969957,9.012876,5.579399,2.360515,8.154506,6.223176,85.622318,92.274678,3.218884,2.145923,2.360515


'=== wd22 ==='

Unnamed: 0,WD22CD_proposed,Very Good Health,Good Health,Fair Health,Bad Health,Very Bad Health,Day-to-Day Activities Limited a Lot,Day-to-Day Activities Limited a Little,Day-to-Day Activities Not Limited,Provides No Unpaid Care,Provides 1 to 19 Hours Unpaid Care a Week,Provides 20 to 49 Hours Unpaid Care a Week,Provides 50 or More Hours Unpaid Care a Week
0,E05013714,45.365578,33.612496,13.90797,5.495436,1.618519,9.005364,8.798344,82.196292,90.637057,5.071986,1.618519,2.672438
1,E05013715,52.824241,32.276119,9.94596,3.718477,1.235203,6.677818,6.658518,86.663664,92.698147,4.889346,1.055069,1.357437
2,E05013716,51.944294,34.557183,9.525532,3.020438,0.952553,5.178754,6.046904,88.774341,93.832519,4.292518,0.838006,1.036957
3,E05013717,47.393924,35.000311,12.611046,3.8268,1.167919,7.28086,7.709511,85.009629,90.22178,6.212338,1.509598,2.056284
4,E05013718,51.482895,33.199438,10.738435,3.501372,1.07786,6.239539,6.82868,86.93178,91.243222,5.697262,1.272009,1.787508


Reference geographies health and provision of unpaid care
--

In [None]:
# Reference geographies
hpuc_ref = pd.read_csv(f"{output_dir}/references_health_and_provision_of_unpaid_care.csv")
d = hpuc_groups(hpuc_ref, ['Name', 'Code'])
d.to_csv(f"{output_dir}/references_health_and_provision_of_unpaid_care_groups.csv", index=False)
IPython.display.display(d.head())


Unnamed: 0,Name,Code,Very Good Health,Good Health,Fair Health,Bad Health,Very Bad Health,Day-to-Day Activities Limited a Lot,Day-to-Day Activities Limited a Little,Day-to-Day Activities Not Limited,Provides No Unpaid Care,Provides 1 to 19 Hours Unpaid Care a Week,Provides 20 to 49 Hours Unpaid Care a Week,Provides 50 or More Hours Unpaid Care a Week
0,England,E92000001,47.169503,34.221122,13.117845,4.245127,1.246403,8.310111,9.332131,82.357758,89.757094,6.512877,1.360327,2.369702
1,London,E12000007,50.499361,33.345543,11.194539,3.735566,1.22499,6.749058,7.4077,85.843243,91.55887,5.325191,1.289451,1.826487
2,Lewisham,E09000023,49.08857,34.017797,11.70379,3.898363,1.29148,7.076499,7.326241,85.59726,91.836816,5.049568,1.269369,1.844247
