In [1]:
import pandas as pd

pdf = pd.read_parquet('../../time-use-survey-2024/clean/per-tus-2024.parquet')
hdf = pd.read_parquet('../../time-use-survey-2024/clean/hh-tus-2024.parquet')
pdf.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,schedule_id,schedule,survey_year,sector,nss_region,district,stratum,sub_stratum,sub_round,fod_sub_region,...,time_to,performed_multiple_activities,performed_simultaneous_activities,major_minor_activity,activity_code,where_activity_performed,paid_unpaid_status,enterprise_type,nsc,mult
fsu_serial_no,sample_hh_no,person_serial_no,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
32223,1,1,TUS,106,2024,Rural,11,21,13,1,2,110,...,07:00,no,,major,911.0,within premises of the dwelling unit of the se...,self development / self care / self maintenanc...,,4,67243
32223,1,1,TUS,106,2024,Rural,11,21,13,1,2,110,...,07:30,no,,major,931.0,within premises of the dwelling unit of the se...,self development / self care / self maintenanc...,,4,67243
32223,1,1,TUS,106,2024,Rural,11,21,13,1,2,110,...,08:00,no,,major,921.0,within premises of the dwelling unit of the se...,self development / self care / self maintenanc...,,4,67243
32223,1,1,TUS,106,2024,Rural,11,21,13,1,2,110,...,09:30,no,,major,182.0,outside premises of the dwelling unit of the s...,self development / self care / self maintenanc...,,4,67243
32223,1,1,TUS,106,2024,Rural,11,21,13,1,2,110,...,12:00,no,,major,131.0,outside premises of the dwelling unit of the s...,regular wage/salary for production of services,proprietary,4,67243


In [2]:
# Get the workforce - people who have spent anytime in work, and are in the working age, i.e. 15 to 60 years
workers = pdf[pdf['activity_code'].astype(str).str.startswith('1')].index.drop_duplicates()

workforce_df = pdf.loc[workers].copy()
workforce_df = workforce_df[workforce_df['age'] >= 15]
workforce_df = workforce_df[workforce_df['age'] <= 60]

# Get time spent
def get_time_spent(time_from, time_to):
    if time_from == time_to:
        return 24 * 60
    start_hour, start_min = map(int, time_from.split(':'))
    end_hour, end_min = map(int, time_to.split(':'))
    minutes = end_hour * 60 + end_min - start_hour * 60 - start_min
    if minutes > 0:
        return minutes
    return 24 * 60 + minutes

workforce_df['ts'] = workforce_df[['time_from', 'time_to']].apply(lambda x: get_time_spent(**x), axis=1)

# Summarize time spent on major activity categories
workforce_df['ACT_CODE'] = workforce_df['activity_code'].astype(str).str.get(0).astype(int)
time_spent = workforce_df.groupby([workforce_df.index, 'ACT_CODE'])['ts'].sum().unstack().fillna(value=0)
time_spent.index = pd.MultiIndex.from_tuples(time_spent.index, names=pdf.index.names)
time_spent.columns = [f'ACT_{i}' for i in range(1, 10)]

# Get other person-level columns
PERSON_COLS = ['sector', 'gender', 'age', 'marital_status', 'education', 'nic_2008_principal_activity', 'mult']
personal_info = workforce_df.groupby(workforce_df.index)[PERSON_COLS].first()
personal_info.index = pd.MultiIndex.from_tuples(personal_info.index, names=pdf.index.names)

# Combine this with household info
hh_info = hdf.loc[workforce_df.index.droplevel(-1).drop_duplicates(),
                  ['religion', 'social_group', 'total_monthly_ce',
                   'energy_cooking', 'energy_lighting', 'washing_type',
                   'sweeping_type', 'dwelling_unit', 'dwelling_unit_structure_type']]
wf_agg = personal_info.reset_index(-1).merge(
    hh_info, how='outer', left_index=True, right_index=True
).reset_index().set_index(workforce_df.index.names, verify_integrity=True)

wf_agg = pd.concat([wf_agg, time_spent], axis=1)

In [3]:
wf_agg.to_parquet('workforce-summary.parquet')
workforce_df.to_parquet('workforce-activities.parquet')