In [1]:
# Import packages
import pandas as pd
import os
import os.path as osp
import numpy as np
import json
import matplotlib.pyplot as plt
from datetime import datetime

In [2]:
# Paths to directories
currentdir = os.getcwd()
inputdir = osp.realpath(osp.join(currentdir, '..', 'inputData'))
moogaldir = osp.realpath(osp.join(currentdir, '..', 'MOOGALdefs'))
outputdir = osp.realpath(osp.join(currentdir, '..', 'outputData'))

In [3]:
current_date = datetime.today().strftime('%y%m%d')

In [4]:
# Load files
economic = pd.read_csv(outputdir + '/M24_economic_activity_' + current_date + '.csv')
TUS = pd.read_csv(outputdir + '/M24_TUS_and_youth_' + current_date + '.csv')

GDP = pd.read_csv(inputdir + '/gdp_per_capita.csv').set_index('country_iso3')
GDP = GDP.dropna(axis=0, how='all')

indexfile = pd.read_csv(inputdir + '/indexfile.csv').set_index(['countryISO3'])
indexfile = indexfile[indexfile['include'] == 1.0]

demography = pd.read_csv(inputdir + '/demography.csv').set_index(['countryISO3','year'])

In [5]:
# WPAC reclassified, remove empty rows
economic = economic[economic['region_code'] != 'WPAC']

# Rename TUS subcat of organization to allocation, and things to artifacts
TUS['subcategory'] = TUS['subcategory'].str.replace('organization', 'allocation')
TUS['subcategory'] = TUS['subcategory'].str.replace('things', 'artifacts')
economic['subcategory'] = economic['subcategory'].str.replace('things', 'artifacts')

In [26]:
categories = ['Experience oriented', 'Somatic maintenance', 'Organization', 'Technosphere modification',             
              'Somatic maintenance', 'Deliberate neural restructuring', 'Nonfood provision', 'Food provision', 
              'Somatic maintenance', 'Technosphere modification', 'Maintenance of surroundings', 
              'Experience oriented', 'Nonfood provision', 'Experience oriented', 'Organization', 'Organization', 
              'Experience oriented', 'Food provision', 'Food provision', 'Deliberate neural restructuring', 
              'Somatic maintenance', 'Experience oriented', 'Technosphere modification', 
              'Maintenance of surroundings']

cat_num = [8,5,7,3,5,6,2,1,5,3,4,8,2,8,7,7,8,1,1,6,5,8,3,5]

subcategories = ['Active recreation','Health care','Allocation','Buildings','Physical child care',
                'Schooling & research','Energy','Food growth & collection','Hygiene & grooming','Infrastructure',
                'Inhabited environment','Interactive','Materials','Meals','Human transportation',
                'Material transportation','Passive','Food preparation','Food processing','Religious practice',
                'Sleep & bedrest','Social','Artifacts','Waste management']

In [64]:
# Compute global human day

TUS_all = TUS[['countryISO3','subcategory','hoursPerDay','uncertainty','dataStatus']]
economic_all = economic.rename(columns={\
        'hoursPerDay':'hoursPerDayEconomic','uncertainty':'uncertaintyEconomic','dataStatus':'dataStatusEconomic'})

all_data = economic_all.merge(TUS_all, on=['countryISO3','subcategory']).set_index('subcategory')
group = all_data.groupby('countryISO3')

all_data_list = []
all_data_econ_list = []

for name, grp in group:
    economic_worktime = grp['hoursPerDayEconomic'].sum()
    TUS_work_employment = grp.loc['work_employment']['hoursPerDay']
    
    # Replace M24 subcategory codenames with proper publication names
    grp = grp.drop(['work_employment','work_other'], axis=0)
    grp['Subcategory'] = subcategories
    grp['Category'] = categories
    grp['catNum'] = cat_num
    grp = grp.set_index(['Category','Subcategory']).sort_values(by='catNum').drop('catNum', axis=1)
        
    # Assume TUS is correct estimation of total working time, scale economic working time to fit (see SI for detail)
    worktime_ratio = TUS_work_employment / economic_worktime
    
    # Add the worktime ratio to uncertainty in economic working time
    grp['uncertaintyEconomic'] = grp['uncertaintyEconomic'] + abs(1 - worktime_ratio)**2 * \
                                                                        grp['hoursPerDayEconomic']**2
    
    grp['hoursPerDayEconomic'] = grp['hoursPerDayEconomic'].apply(lambda x: x * worktime_ratio)
    
    # Combine TUS with economic
    grp['hoursPerDayCombined'] = grp['hoursPerDay'] + grp['hoursPerDayEconomic']
    # Uncertainty is in units of variance
    grp['uncertaintyCombined'] = grp['uncertainty'] + grp['uncertaintyEconomic'] 
    
    all_data_list.append(grp[['countryISO3','region_code','population','hoursPerDayCombined',\
                              'uncertaintyCombined','dataStatus','dataStatusEconomic']]) 
    
    all_data_econ_list.append(grp[['countryISO3','region_code','population','hoursPerDayEconomic',\
                                   'uncertaintyEconomic']])

# Global Human Day
all_data_combined = pd.concat(all_data_list)

total_pop = all_data_combined['population'].unique().sum()

person_hours = all_data_combined['hoursPerDayCombined'] * all_data_combined['population']
global_M24_hours = (person_hours / total_pop).groupby('Subcategory').sum()

pop_weight = all_data_combined['population'] / total_pop
var_combined = all_data_combined['uncertaintyCombined']

global_M24_uncertainty = (pop_weight**2 * var_combined).groupby('Subcategory').sum()

global_M24_all = pd.DataFrame(global_M24_hours, columns=['hoursPerDay'])
global_M24_all['uncertainty'] = 2*np.sqrt(global_M24_uncertainty)

# Global Economic Activity
all_data_econ_combined = pd.concat(all_data_econ_list)

person_hours = all_data_econ_combined['hoursPerDayEconomic'] * all_data_econ_combined['population']
global_M24_hours = (person_hours / total_pop).groupby('Subcategory').sum()

var_combined_econ = all_data_econ_combined['uncertaintyEconomic']
pop_weight = all_data_econ_combined['population'] / total_pop

global_M24_uncertainty = (pop_weight**2 * var_combined_econ).groupby('Subcategory').sum()

global_M24_econ_all = pd.DataFrame(global_M24_hours, columns=['hoursPerDay'])
global_M24_econ_all['uncertainty'] = 2*np.sqrt(global_M24_uncertainty)

In [65]:
difference = (global_M24_all['hoursPerDay'].sum() - 24)
initial_hours = global_M24_all['hoursPerDay'].sum()

adj = global_M24_all['hoursPerDay'].apply(lambda t: t - (difference * t / initial_hours))
global_M24_all['hoursPerDay'] = adj

global_M24_all = round(global_M24_all,2)
global_M24_econ_all = round(global_M24_econ_all,2)
all_data_combined['hoursPerDayCombined'] = round(all_data_combined['hoursPerDayCombined'],2)

In [66]:
# Save out main GHD estimate
global_M24_all.to_csv(outputdir + '/global_human_day_' + current_date + '.csv')
global_M24_econ_all.to_csv(outputdir + '/global_economic_activity_' + current_date + '.csv')

# Save out the by-country combined data
all_data_combined.to_csv(outputdir + '/all_countries_' + current_date + '.csv')