### Imports


In [1]:
from uatk_spc.builder import Builder

path = "../../data/output/England/2020"
region = "rutland"

#### Example with `Builder` class combining people and households

In [2]:
df = (
    Builder(path, region, backend="pandas", input_type="protobuf")
    .add_households()
    .unnest(["health", "details"])
    .build()
)

In [3]:
df.columns

Index(['id', 'household', 'workplace', 'demographics', 'employment', 'events',
       'weekday_diaries', 'weekend_diaries', 'orig_pid', 'id_tus_hh',
       'id_tus_p', 'pid_hs', 'msoa11cd', 'oa11cd', 'members', 'bmi',
       'has_cardiovascular_disease', 'has_diabetes', 'has_high_blood_pressure',
       'self_assessed_health', 'life_satisfaction', 'number_medications',
       'hid', 'nssec8', 'accommodation_type', 'num_rooms', 'central_heat',
       'tenure', 'num_cars', 'communal_type'],
      dtype='object')

In [4]:
df.head()

Unnamed: 0,id,household,workplace,demographics,employment,events,weekday_diaries,weekend_diaries,orig_pid,id_tus_hh,...,life_satisfaction,number_medications,hid,nssec8,accommodation_type,num_rooms,central_heat,tenure,num_cars,communal_type
0,0,0,1168,"{'sex': 'MALE', 'age_years': 44, 'ethnicity': ...","{'sic1d2007': 'F', 'sic2d2007': '43', 'soc2010...","{'sport': 0.255, 'rugby': 0.16065, 'concert_m'...","[1796, 2877, 4451, 5231, 5497, 6073, 6593, 669...","[1795, 2876, 4452, 5230, 5496, 6072, 6592, 669...",E02002863_0001_001,17310308,...,HIGH,0,E02002863_0001,SEMIROUTINE,DETACHED,6,True,OWNED_FULLY,1,COMMUNAL
1,1,0,155,"{'sex': 'FEMALE', 'age_years': 41, 'ethnicity'...","{'sic1d2007': 'G', 'sic2d2007': '47', 'soc2010...","{'sport': 0.255, 'rugby': 0.1887, 'concert_m':...","[289, 2183, 2190, 6542, 7132, 9122, 9730, 1035...","[288, 2184, 2189, 6543, 7133, 9123, 9729, 1035...",E02002863_0001_002,12050117,...,VERY_HIGH,1,E02002863_0001,SEMIROUTINE,DETACHED,6,True,OWNED_FULLY,1,COMMUNAL
2,2,1,0,"{'sex': 'MALE', 'age_years': 68, 'ethnicity': ...","{'sic1d2007': 'S', 'sic2d2007': '96', 'soc2010...","{'sport': 0.239, 'rugby': 0.15057, 'concert_m'...","[89, 677, 1331, 1480, 2111, 3457, 4855, 5763, ...","[88, 678, 1330, 1481, 2110, 3458, 4854, 5762, ...",E02002863_0002_001,11280408,...,LOW,0,E02002863_0002,SUPER,TERRACED,5,True,RENTED_PRIVATE,2,COMMUNAL
3,3,1,0,"{'sex': 'FEMALE', 'age_years': 62, 'ethnicity'...","{'sic1d2007': 'P', 'sic2d2007': '85', 'soc2010...","{'sport': 0.268, 'rugby': 0.09916, 'concert_m'...","[2844, 5035, 11790, 12206, 12444, 15284]","[2845, 5034, 11791, 12207, 12443, 15283]",E02002863_0002_002,16110805,...,MEDIUM,0,E02002863_0002,SUPER,TERRACED,5,True,RENTED_PRIVATE,2,COMMUNAL
4,4,1,0,"{'sex': 'MALE', 'age_years': 10, 'ethnicity': ...","{'pwkstat': 'NA', 'sic1d2007': '', 'sic2d2007'...","{'sport': 0.257, 'rugby': 0.16191, 'concert_m'...","[33, 74, 213, 264, 291, 533, 548, 549, 561, 56...","[32, 75, 212, 265, 290, 534, 547, 550, 562, 56...",E02002863_0002_003,16241112,...,LOW,0,E02002863_0002,SUPER,TERRACED,5,True,RENTED_PRIVATE,2,COMMUNAL


#### Example with `Builder` class combining people and time use diaries

In [5]:
# Subset of features to include and unnest
features = {
    "health": [
        "bmi",
        "has_cardiovascular_disease",
        "has_diabetes",
        "has_high_blood_pressure",
        "self_assessed_health",
        "life_satisfaction",
    ],
    "demographics": ["age_years", "sex", "nssec8"],
    "employment": ["pwkstat", "salary_yearly"],
    
}

df = (
    Builder(path, region, backend="pandas", input_type="protobuf")
    .add_time_use_diaries(features, diary_type="weekday_diaries")
    .build()
)


In [6]:
df.columns

Index(['id', 'household', 'bmi', 'has_cardiovascular_disease', 'has_diabetes',
       'has_high_blood_pressure', 'self_assessed_health', 'life_satisfaction',
       'age_years', 'sex', 'nssec8', 'pwkstat', 'salary_yearly',
       'weekday_diaries', 'uid', 'weekday', 'day_type', 'month', 'pworkhome',
       'phomeother', 'pwork', 'pschool', 'pshop', 'pservices', 'pleisure',
       'pescort', 'ptransport', 'phome_total', 'pnothome_total',
       'punknown_total', 'pmwalk', 'pmcycle', 'pmprivate', 'pmpublic',
       'pmunknown', 'age35g'],
      dtype='object')

In [7]:
df.head()

Unnamed: 0,id,household,bmi,has_cardiovascular_disease,has_diabetes,has_high_blood_pressure,self_assessed_health,life_satisfaction,age_years,sex,...,ptransport,phome_total,pnothome_total,punknown_total,pmwalk,pmcycle,pmprivate,pmpublic,pmunknown,age35g
0,0,0,39.47709,False,False,False,GOOD,HIGH,44,MALE,...,0.125,0.81945,0.18055,0.0,0.0,0.0,0.92857,0.0,0.07143,12
1,0,0,39.47709,False,False,False,GOOD,HIGH,44,MALE,...,0.15278,0.36133,0.63867,0.0,0.0,0.0,0.95455,0.0,0.04545,12
2,0,0,39.47709,False,False,False,GOOD,HIGH,44,MALE,...,0.27778,0.31944,0.68056,0.0,0.0,0.0,1.0,0.0,0.0,12
3,0,0,39.47709,False,False,False,GOOD,HIGH,44,MALE,...,0.15278,0.47222,0.52778,0.0,0.0,0.0,1.0,0.0,0.0,12
4,0,0,39.47709,False,False,False,GOOD,HIGH,44,MALE,...,0.1875,0.68056,0.31944,0.0,0.11111,0.59259,0.96296,0.0,0.03704,12
