In [1]:
import pandas as pd
import os

In [3]:
## MAIN FUNCTION - input

## INPUT FILE
df_input_lab = pd.read_csv('laborparticipation_p.csv', usecols=['ref_area','time','sex', 'classif1','obs_value'])
summaryfile = './laborparticipation_ols.txt'

country_names = pd.read_csv('../../data/dl1_countrycodeorg_country_name.csv')
codemap = dict(zip(country_names['Country Code'], country_names.country))

ref_area_lab = df_input_lab['ref_area'].unique() #

## OUTPUT FILE
lab_save_file_name = "laborparticipation.csv"
lab_final_file_name = "laborparticipation_final.csv"

In [None]:
ages_dict = {'Age (5-year bands): 0-4': 'AGE_5YRBANDS_Y00-04', 
        'Age (5-year bands): 5-9': 'AGE_5YRBANDS_Y05-09', 
        'Age (5-year bands): 10-14': 'AGE_5YRBANDS_Y10-14', 
        'Age (5-year bands): 15-19': 'AGE_5YRBANDS_Y15-19', 
        'Age (5-year bands): 20-24': 'AGE_5YRBANDS_Y20-24', 
        'Age (5-year bands): 25-29': 'AGE_5YRBANDS_Y25-29', 
        'Age (5-year bands): 30-34': 'AGE_5YRBANDS_Y30-34', 
        'Age (5-year bands): 35-39': 'AGE_5YRBANDS_Y35-39', 
        'Age (5-year bands): 40-44': 'AGE_5YRBANDS_Y40-44', 
        'Age (5-year bands): 45-49': 'AGE_5YRBANDS_Y45-49', 
        'Age (5-year bands): 50-54': 'AGE_5YRBANDS_Y50-54', 
        'Age (5-year bands): 55-59': 'AGE_5YRBANDS_Y55-59', 
        'Age (5-year bands): 60-64': 'AGE_5YRBANDS_Y60-64', 
        'Age (5-year bands): 65+': 'AGE_5YRBANDS_YGE65'}


In [4]:
ages = {'AGE_5YRBANDS_Y00-04': 'd0', 
        'AGE_5YRBANDS_Y05-09': 'd5', 
        'AGE_5YRBANDS_Y10-14': 'd10', 
        'AGE_5YRBANDS_Y15-19': 'd15', 
        'AGE_5YRBANDS_Y20-24': 'd20', 
        'AGE_5YRBANDS_Y25-29': 'd25', 
        'AGE_5YRBANDS_Y30-34': 'd30', 
        'AGE_5YRBANDS_Y35-39': 'd35', 
        'AGE_5YRBANDS_Y40-44': 'd40', 
        'AGE_5YRBANDS_Y45-49': 'd45', 
        'AGE_5YRBANDS_Y50-54': 'd50', 
        'AGE_5YRBANDS_Y55-59': 'd55', 
        'AGE_5YRBANDS_Y60-64': 'd60', 
        'AGE_5YRBANDS_YGE65': 'd65'}

In [5]:
def process_ILO(df, country_code, ages):
    df = df[df['Country Code']== country_code]
    df = df[df['classif1'].isin(ages.keys())]
    df = df[df['sex'].isin(['SEX_M', 'SEX_F'])]
    df['sex'].replace(to_replace='SEX_M', value='M', inplace=True)
    df['sex'].replace(to_replace='SEX_F', value='F', inplace=True)
    df['age'] = df['classif1'].apply(lambda x: ages[x])
    df = df[['Country Code', 'time', 'sex', 'age', 'obs_value']]

    return df  

def process_all(df_input, ref_areas):
    df = df_input.rename(columns={'ref_area':'Country Code'})
    #Now process the data
    pieces = []
    for country_code in ref_areas:
        # print(country_code)
        dff = process_ILO(df, country_code, ages)
        pieces.append(dff)
    data = pd.concat(pieces)
    data = pd.pivot_table(data, index=['Country Code', 'sex', 'age'], columns=['time'], values='obs_value')
    return data
  

In [6]:
df_output_lab = process_all(df_input_lab, ref_area_lab)
df_output_lab = df_output_lab / 100.0
df_save = df_output_lab.round(3)
df_save.to_csv(lab_save_file_name)  

# Project labor

In [7]:
df = pd.read_csv(lab_save_file_name)
print(df.columns)

Index(['Country Code', 'sex', 'age', '2010', '2011', '2012', '2013', '2014',
       '2015', '2016', '2017', '2018', '2019'],
      dtype='object')


In [8]:
import statsmodels.api as sm 
import numpy as np

df_logistic = df.set_index(['Country Code', 'sex', 'age'])
df_logistic = np.log(df_logistic / (1 - df_logistic + 1e-16))

pieces = []
with open(summaryfile, 'w') as f:
    for i in range(len(df_logistic)):
        data = df_logistic.iloc[i]
        X = np.arange(2010,2020)
        X = sm.add_constant(X)
        Y = data.values
        model = sm.OLS(Y, X)
        results = model.fit()
        X_pred = np.arange(2020,2051)
        Y_pred = results.params[0]+results.params[1]* X_pred
        print(df_logistic.iloc[i].name, file=f)
        print(results.rsquared, file=f)
        data_project = data.append(pd.Series(dict(zip(X_pred, Y_pred))))
        data_project.name = data.name
        pieces.append(data_project.to_frame().T)
    data_projection = pd.concat(pieces)
df_projection = np.exp(data_projection) / (1 + np.exp(data_projection))

  return 1 - self.ssr/self.centered_tss


In [9]:
df_projection.reset_index()

Unnamed: 0,level_0,level_1,level_2,2010,2011,2012,2013,2014,2015,2016,...,2041,2042,2043,2044,2045,2046,2047,2048,2049,2050
0,AFG,F,d15,0.125,0.128,0.132,0.139,0.146,0.153,0.161,...,0.385820,0.397303,0.408899,0.420598,0.432387,0.444252,0.456182,0.468162,0.480179,0.492219
1,AFG,F,d20,0.160,0.165,0.173,0.184,0.195,0.208,0.220,...,0.580571,0.596180,0.611596,0.626793,0.641743,0.656422,0.670808,0.684882,0.698624,0.712018
2,AFG,F,d25,0.185,0.189,0.195,0.202,0.209,0.216,0.224,...,0.434864,0.444618,0.454415,0.464248,0.474108,0.483989,0.493882,0.503780,0.513675,0.523559
3,AFG,F,d30,0.174,0.178,0.183,0.196,0.209,0.224,0.239,...,0.623218,0.638945,0.654379,0.669494,0.684268,0.698678,0.712707,0.726339,0.739560,0.752360
4,AFG,F,d35,0.184,0.189,0.194,0.208,0.223,0.239,0.255,...,0.657653,0.673270,0.688513,0.703357,0.717785,0.731778,0.745323,0.758411,0.771032,0.783182
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6221,ZWE,M,d45,0.946,0.941,0.939,0.939,0.939,0.939,0.939,...,0.926191,0.925620,0.925044,0.924464,0.923880,0.923292,0.922700,0.922103,0.921502,0.920898
6222,ZWE,M,d50,0.964,0.968,0.965,0.965,0.965,0.965,0.966,...,0.964057,0.964009,0.963960,0.963912,0.963863,0.963815,0.963766,0.963717,0.963668,0.963619
6223,ZWE,M,d55,0.945,0.946,0.940,0.940,0.939,0.939,0.939,...,0.919204,0.918279,0.917345,0.916401,0.915448,0.914484,0.913511,0.912527,0.911534,0.910530
6224,ZWE,M,d60,0.886,0.889,0.887,0.888,0.889,0.889,0.890,...,0.891358,0.891475,0.891593,0.891710,0.891827,0.891944,0.892061,0.892178,0.892294,0.892411


In [10]:
df_save = df_projection.round(3)
df_save = df_save.reset_index()
columns={'level_0': 'Country Code', 'level_1':'sex', 'level_2':'age'}
df_save.rename(columns=columns).to_csv(lab_final_file_name,index=False)  

In [11]:
def check_countries(filename):
    df = pd.read_csv(filename)
    countries = df['Country Code'].unique()
    country_names = pd.read_csv('../../data/dl1_countrycodeorg_country_name.csv')
    GBD_countries = country_names[country_names["country"].notnull()]['Country Code']
    WB_countries = country_names[country_names["WBCountry"].notnull()]['Country Code']
    plus_GBD = set(countries) - set(GBD_countries)
    sub_GBD = set(GBD_countries) - set(countries)
    plus_WB = set(countries) - set(WB_countries)
    sub_WB = set(WB_countries) - set(countries)

    print ('Subtraction from WB:', sub_WB, len(sub_WB))
    print ('Plus from WB:', plus_WB, len(plus_WB))
    print ('Subtraction from GBD :', sub_GBD, len(sub_GBD))
    print ('Plus from GBD :', plus_GBD, len(plus_GBD))
check_countries(lab_save_file_name)

Subtraction from WB: {'ASM', 'CUW', 'ABW', 'BMU', 'SYC', 'FRO', 'TCA', 'MNP', 'ATG', 'IMN', 'XKX', 'CHI', 'KNA', 'MCO', 'LIE', 'AND', 'FSM', 'PLW', 'GRD', 'TUV', 'NRU', 'MAF', 'SXM', 'SMR', 'MHL', 'KIR', 'DMA', 'VGB', 'GIB', 'CYM', 'GRL'} 31
Plus from WB: {'X90', 'X63', 'X49', 'X09', 'X31', 'X08', 'X54', 'X07', 'X44', 'X48', 'X24', 'X72', 'ESH', 'X57', 'X17', 'X96', 'X61', 'X14', 'X01', 'X58', 'X45', 'X84', 'X64', 'X18', 'X71', 'X60', 'X35', 'X73', 'TWN', 'X85', 'X02', 'X66', 'X68', 'X75', 'X06', 'X25', 'X77', 'X23', 'X28', 'X76', 'X40', 'X86', 'X11', 'X43', 'X37', 'X38', 'X12', 'X41', 'X27', 'X15', 'X70', 'X03', 'X30', 'X53', 'X62', 'X05', 'X65', 'X89', 'X20', 'X74', 'X22', 'X69', 'X16', 'X32', 'X91', 'X19', 'X93', 'X42', 'X83', 'X04', 'X34', 'X55', 'X59', 'X39', 'X47', 'X78', 'X33', 'X52', 'X29', 'X94', 'X88', 'X51', 'X95', 'X79', 'X46', 'CHA', 'X67', 'X21', 'PSE', 'X26', 'X81', 'X10', 'X36', 'X92', 'X56', 'X82', 'X87', 'X13'} 98
Subtraction from GBD : {'ASM', 'BMU', 'SYC', 'MNP', 'A