In [1]:
import pandas as pd
import numpy as np

## Read data

In [2]:
#pull main data
data = pd.read_excel('pwt1001.xlsx', sheet_name = 'Data')
#keep data only after 1960
data=data[data['year']>=1960]

In [3]:
#pull working age pop
workagepop=pd.read_csv('Working age pop.csv')
workagepop.drop(columns=['Indicator Name','Indicator Code','Country Name'],inplace=True)
workagepop=workagepop.melt(['Country Code'],var_name='year',value_name='wapop')
workagepop.rename(columns={'Country Code':'countrycode'},inplace=True)
workagepop['year']=workagepop['year'].astype(int)
pri_data=pd.merge(data,workagepop,on=['countrycode','year'])

In [4]:
#pull share of investment in gdp
invest=pd.read_csv('capital of gdp.csv')
invest.drop(columns=['Indicator Name','Indicator Code','Country Name'],inplace=True)
invest=invest.melt(['Country Code'],var_name='year',value_name='i_y')
invest.rename(columns={'Country Code':'countrycode'},inplace=True)
invest['year']=invest['year'].astype(int)
pri_data=pd.merge(pri_data,invest,on=['countrycode','year'])

## Variable of interest priliminary setting

In [5]:
#dependent variable: real gdp per working-age person
pri_data['rgdpwap']=(pri_data['rgdpna']*10**6)/(pri_data['wapop'])

#independent variable component: TFPG
pri_data['tfpg'] = pri_data.groupby('countrycode').rtfpna.pct_change()

#independent variable component: growth of th working-age population
pri_data['popg'] = pri_data.groupby('countrycode').wapop.pct_change()

## Make dataset for replication

In [6]:
#year 1960-1985
pri_data_1=pri_data[pri_data['year']<=1985]
#construct a new datafrmame containing vairables for regression
rep_data=pd.DataFrame({'countrycode':pri_data_1['countrycode'].unique()})

In [7]:
#Add sample indicators
non_oil=['DZA', 'AGO', 'BEN', 'BWA', 'BFA', 'BDI', 'CMR', 'CAF', 'TCD', 'COG', 'EGY', 'ETH', 'GHA', 'CIV', 'KEN', 'LBR', 'MDG', 'MWI', 'MLI', 'MRT', 'MUS', 'MAR', 'MOZ', 'NER', 'NGA', 'RWA', 'SEN', 'SLE', 'SOM', 'ZAF', 'SDN', 'TZA', 'TGO', 'TUN', 'UGA', 'COD', 'ZMB', 'ZWE', 'BGD', 'MMR', 'HKG', 'IND', 'ISR', 'JPN', 'JOR', 'KOR', 'MYS', 'NPL', 'PAK', 'PHL', 'SGP', 'LKA', 'SYR', 'THA', 'AUT', 'BEL', 'DNK', 'FIN', 'FRA', 'DEU', 'GRC', 'IRL', 'ITA', 'NLD', 'NOR', 'PRT', 'ESP', 'SWE', 'CHE', 'TUR', 'GBR', 'CAN', 'CRI', 'DOM', 'SLV', 'GTM', 'HTI', 'HND', 'JAM', 'MEX', 'NIC', 'PAN', 'TTO', 'USA', 'ARG', 'BOL', 'BRA', 'CHL', 'COL', 'ECU', 'PRY', 'PER', 'URY', 'VEN', 'AUS', 'IDN', 'NZL', 'PNG']
intermediate=['DZA', 'BWA', 'CMR', 'ETH', 'CIV', 'KEN', 'MDG', 'MWI', 'MLI', 'MAR', 'NGA', 'SEN', 'ZAF', 'TZA', 'TUN', 'ZMB', 'ZWE', 'BGD', 'MMR', 'HKG', 'IND', 'ISR', 'JPN', 'JOR', 'KOR', 'MYS', 'PAK', 'PHL', 'SGP', 'LKA', 'SYR', 'THA', 'AUT', 'BEL', 'DNK', 'FIN', 'FRA', 'DEU', 'GRC', 'IRL', 'ITA', 'NLD', 'NOR', 'PRT', 'ESP', 'SWE', 'CHE', 'TUR', 'GBR', 'CAN', 'CRI', 'DOM', 'SLV', 'GTM', 'HTI', 'HND', 'JAM', 'MEX', 'NIC', 'PAN', 'TTO', 'USA', 'ARG', 'BOL', 'BRA', 'CHL', 'COL', 'ECU', 'PRY', 'PER', 'URY', 'VEN', 'AUS', 'IDN', 'NZL']
#there is a difference in oecd countries now, but I use the same list as the authors'
oecd=['JPN', 'AUT', 'BEL', 'DNK', 'FIN', 'FRA', 'DEU', 'GRC', 'IRL', 'ITA', 'NLD', 'NOR', 'PRT', 'ESP', 'SWE', 'CHE', 'TUR', 'GBR', 'CAN', 'USA', 'AUS', 'NZL']

rep_data['n']=(rep_data['countrycode'].isin(non_oil)).astype(int)
rep_data['i']=(rep_data['countrycode'].isin(intermediate)).astype(int)
rep_data['o']=(rep_data['countrycode'].isin(oecd)).astype(int)

### Dependent variables

In [8]:
#log gdp per working-age person 1960
rep_data=pd.merge(rep_data,pri_data_1[pri_data_1['year']==1960][['countrycode','rgdpwap']],on='countrycode')
rep_data['lgdp60']=np.log(rep_data['rgdpwap'])
rep_data.drop('rgdpwap',axis=1,inplace=True)

#log gdp per working-age person 1985
rep_data=pd.merge(rep_data,pri_data_1[pri_data_1['year']==1985][['countrycode','rgdpwap']],on='countrycode')
rep_data['lgdp85']=np.log(rep_data['rgdpwap'])
rep_data.drop('rgdpwap',axis=1,inplace=True)

#log difference gdp per working-age person 1960-1985
rep_data['lgdpdiff']=rep_data['lgdp85']-rep_data['lgdp60']

### Independent variables

In [9]:
#average working-age populaiton growth
pop=pri_data_1.groupby('countrycode')['popg'].mean().to_frame().reset_index()
rep_data=pd.merge(rep_data,pop,on='countrycode').rename(columns={'popg':'avg_popg'})

#average average share of investment in gdp and take log
s=pri_data_1.groupby('countrycode')['i_y'].mean().to_frame().reset_index()
rep_data=pd.merge(rep_data,s,on='countrycode')
rep_data['ls']=np.log(rep_data['i_y'])
rep_data.drop('i_y',axis=1,inplace=True)

#average human capital index
hc=pri_data_1.groupby('countrycode')['hc'].mean().to_frame().reset_index()
rep_data=pd.merge(rep_data,hc,on='countrycode')
rep_data['lschool']=np.log(rep_data['hc'])

#construct log(n+g+delta) 
#here I use the assumption by the authors' "g+delta=0.05"
rep_data['lgnd']=np.log(rep_data['avg_popg']+0.05)

  result = getattr(ufunc, method)(*inputs, **kwargs)


## Make dataset for extension

In [27]:
ext_data=pd.DataFrame({'countrycode':pri_data['countrycode'].unique()})

In [28]:
#Add sample indicators
ext_data['n']=(ext_data['countrycode'].isin(non_oil)).astype(int)
ext_data['i']=(ext_data['countrycode'].isin(intermediate)).astype(int)
ext_data['o']=(ext_data['countrycode'].isin(oecd)).astype(int)

### Dependent variables

In [29]:
#log gdp per working-age person 1960
ext_data=pd.merge(ext_data,pri_data[pri_data['year']==1960][['countrycode','rgdpwap']],on='countrycode')
ext_data['lgdp60']=np.log(ext_data['rgdpwap'])
ext_data.drop('rgdpwap',axis=1,inplace=True)

#log gdp per working-age person 1985
ext_data=pd.merge(ext_data,pri_data[pri_data['year']==1985][['countrycode','rgdpwap']],on='countrycode')
ext_data['lgdp85']=np.log(ext_data['rgdpwap'])
ext_data.drop('rgdpwap',axis=1,inplace=True)

#log difference gdp per working-age person 1960-1985
ext_data['lgdpdiff']=ext_data['lgdp85']-ext_data['lgdp60']

### Independent variables

In [31]:
#average working-age populaiton growth
pop=pri_data.groupby('countrycode')['popg'].mean().to_frame().reset_index()
ext_data=pd.merge(ext_data,pop,on='countrycode').rename(columns={'popg':'avg_popg'})

#average tfpg
tfpg=pri_data.groupby('countrycode')['tfpg'].mean().to_frame().reset_index()
ext_data=pd.merge(ext_data,tfpg,on='countrycode').rename(columns={'tfpg':'avg_tfpg'})

#average depreciation rate
delta=pri_data.groupby('countrycode')['delta'].mean().to_frame().reset_index()
ext_data=pd.merge(ext_data,delta,on='countrycode').rename(columns={'delta':'avg_delta'})

#average average share of investment in gdp and take log
s=pri_data.groupby('countrycode')['i_y'].mean().to_frame().reset_index()
ext_data=pd.merge(ext_data,s,on='countrycode')
ext_data['ls']=np.log(ext_data['i_y'])
ext_data.drop('i_y',axis=1,inplace=True)

#average human capital index
hc=pri_data.groupby('countrycode')['hc'].mean().to_frame().reset_index()
ext_data=pd.merge(ext_data,hc,on='countrycode')
ext_data['lschool']=np.log(ext_data['hc'])

#construct log(n+g+delta) 
ext_data['lgnd']=np.log(ext_data['avg_popg']+ext_data['avg_tfpg']+ext_data['avg_delta'])

In [35]:
(ext_data['avg_tfpg']+ext_data['avg_delta']).mean()

0.04658899269165173