In [1]:
from argparse import ArgumentParser
from functools import reduce
from pathlib import Path
import os
from typing import Optional

import yaml
from caf.core.data_structures import DVector, IpfTarget
from caf.core.segments import SegmentsSuper
from caf.core.segmentation import Segmentation, SegmentationInput
from caf.core.zoning import TranslationWeighting
import numpy as np
import pandas as pd

os.chdir('..')
from land_use import constants, data_processing
os.chdir('supporting_processes')

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# read in a DVector to IPF
input_path = Path(r'F:\Working\Land-Use\OUTPUTS_full run_final')
file = 'Output P11_EM.hdf'

In [3]:
# load dvector data
dvector = DVector.load(input_path / file)

In [4]:
# add required segmentation for IPF
dvector = dvector.add_segments(new_segs=['status_aps'])

In [5]:
dvector.data.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,Unnamed: 7_level_0,Unnamed: 8_level_0,Unnamed: 9_level_0,Unnamed: 10_level_0,LSOA2021,E01013453,E01013454,E01013455,E01013456,E01013457,E01013458,E01013459,E01013460,E01013461,E01013462,...,E01035501,E01035502,E01035503,E01035504,E01035505,E01035506,E01035507,E01035508,E01035509,E01035510
accom_h,ns_sec,adults,car_availability,children,adult_nssec,age_9,g,economic_status,pop_emp,soc,status_aps,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1
1,1,1,1,1,1,4,1,1,1,1,1,0.018584,0.017155,0.173532,0.002436,0.031165,0.290394,0.052875,0.008162,0.360195,0.312306,...,0.002798,0.000711,0.381768,0.008655,0.024251,0.0,6.214185,0.121127,0.257242,0.115581
1,1,1,1,1,1,4,1,1,1,2,1,0.003041,0.002807,0.018171,0.000399,0.0051,0.030408,0.010575,0.001336,0.033533,0.029075,...,7.6e-05,1.9e-05,0.041496,0.000941,0.002636,0.0,0.484438,0.009443,0.018712,0.008408
1,1,1,1,1,1,4,1,1,2,1,1,0.004393,0.004055,0.048607,0.000576,0.007366,0.081341,0.015275,0.001929,0.098866,0.085721,...,0.001109,0.000282,0.082993,0.001881,0.005272,0.0,1.497864,0.029196,0.070318,0.031595
1,1,1,1,1,1,4,1,1,2,2,1,0.003717,0.003431,0.018171,0.000487,0.006233,0.030408,0.012925,0.001632,0.040471,0.035091,...,0.000227,5.8e-05,0.020748,0.00047,0.001318,0.0,0.556826,0.010854,0.018712,0.008408
1,1,1,1,1,1,4,1,2,3,4,2,0.001014,0.000936,0.00954,0.000133,0.0017,0.015964,0.0047,0.000445,0.01272,0.011028,...,5e-05,1.3e-05,0.012449,0.000282,0.000791,0.0,0.161481,0.003148,0.006303,0.002832


In [6]:
# read in the APS targets and store as DVector
target = Path(r'I:\NorMITs Land Use\2023\import\APS 2023 for IPF\Regional-based-targets\preprocessing\APS-24-regional-based-targets_revamp_t01.hdf')
target_dvector = data_processing.read_dvector_data(
    file_path=target,
    geographical_level='RGN2021',
    input_segments=['g', 'status_aps'],
    geography_subset='EM'
)

The input data at I:\NorMITs Land Use\2023\import\APS 2023 for IPF\Regional-based-targets\preprocessing\APS-24-regional-based-targets_revamp_t01.hdf started with 11 columns. Filtering to EM results in 1 columns.


In [7]:
target_dvector.data.head()

Unnamed: 0_level_0,RGN2021,E12000004
g,status_aps,Unnamed: 2_level_1
1,1,1269700
1,2,55400
1,3,599600
2,1,1080300
2,2,40500


In [8]:
target_dvector.segmentation.seg_dict

{'g': Segment(name='g', values={1: 'male', 2: 'female'}, exclusions=[Exclusion(other_name='gender_3', exclusions={1: {3}, 2: {2}})], lookups=[]),
 'status_aps': Segment(name='status_aps', values={1: 'Economically active employed', 2: 'Economically active unemployed', 3: 'Economically inactive'}, exclusions=[], lookups=[Exclusion(other_name='economic_status', exclusions={1: {1, 3}, 2: {2, 4}, 3: {5, 6}, -8: {-8}})])}

In [9]:
# try and apply IPF with the subset target
rebalanced_data, rmse = dvector.ipf(
    targets=[IpfTarget(target_dvector)],
    zone_trans_cache=constants.CACHE_FOLDER
)



Convergence met, returning DVector.




In [21]:
rebalanced_data.aggregate(segs=['status_aps']).data

LSOA2021,E01013453,E01013454,E01013455,E01013456,E01013457,E01013458,E01013459,E01013460,E01013461,E01013462,...,E01035501,E01035502,E01035503,E01035504,E01035505,E01035506,E01035507,E01035508,E01035509,E01035510
status_aps,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
-8,363.097525,328.709458,350.96056,348.937733,358.040456,328.709458,503.684031,246.784947,285.218668,312.526839,...,203.294157,80.913097,86.98158,134.518024,279.150186,75.856029,344.892078,532.003615,255.887671,719.115153
1,1176.723751,694.699176,944.726165,876.003037,907.716661,805.647973,771.72754,1005.162802,865.921374,774.327205,...,1054.632762,776.186504,460.836902,539.984196,885.525655,1286.299943,2456.102085,2548.844117,652.810944,1172.598868
2,85.848898,44.808341,40.993193,73.335548,57.517238,34.085689,44.416948,66.287037,21.594424,24.48547,...,109.873906,103.927193,77.217056,70.231465,116.031297,94.331037,131.705224,161.559718,20.265239,30.911348
3,620.678884,308.578686,602.493973,497.407726,394.412455,683.45825,763.522079,463.109771,528.934587,385.941877,...,1301.002041,2104.580489,1468.245961,1058.283725,1788.055116,687.497253,2295.057882,1931.983768,396.270676,343.625524


In [22]:
dvector.aggregate(segs=['status_aps']).data

LSOA2021,E01013453,E01013454,E01013455,E01013456,E01013457,E01013458,E01013459,E01013460,E01013461,E01013462,...,E01035501,E01035502,E01035503,E01035504,E01035505,E01035506,E01035507,E01035508,E01035509,E01035510
status_aps,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
-8,363.097525,328.709458,350.96056,348.937733,358.040456,328.709458,503.684031,246.784947,285.218668,312.526839,...,203.294157,80.913097,86.98158,134.518024,279.150186,75.856029,344.892078,532.003615,255.887671,719.115153
1,1347.138544,803.562127,1088.510277,1005.086609,1046.951429,926.559535,886.870831,1149.47819,999.519832,890.933048,...,1213.458577,896.236961,528.870012,618.1334,1018.136735,1464.288477,2834.264873,2937.652786,753.18501,1355.587262
2,141.798588,75.028332,68.56242,121.219136,96.362712,56.848742,74.015513,109.461781,36.27795,40.929554,...,183.694533,174.778516,128.615711,116.498244,193.841906,154.740054,221.61495,271.406705,33.990753,52.034958
3,515.684856,255.204318,495.577318,417.111588,327.281404,562.031883,637.147329,386.630147,442.889074,316.221925,...,1081.821912,1744.760312,1225.766619,886.664546,1491.530226,582.819132,1901.058807,1605.269805,330.436395,284.47293
