In [1]:
import pandas as pd
import numpy as np

In [2]:
## MAIN FUNCTION - input

## INPUT FILE
PPP = True
df_imf = pd.read_excel('IMF_WEOApr2022all.xlsx', thousands=',')
df_worldbank = pd.read_csv('gdp_raw.csv')
countries_imf = df_imf['ISO'].unique() #
countries_worldbank = df_worldbank['Country Code'].unique() #

## OUTPUT FILE
save_file_name = "GDP_ppp.csv"

In [3]:
### references, data from IMF

df_ppp = df_imf[df_imf['WEO Subject Code'] == 'PPPGDP']
# print(df_ppp.columns)
df_ppp = df_ppp[['ISO', 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 
                        2020, 2021, 2022, 2023, 2024, 2025, 2026, 2027]]
df_ppp = df_ppp.rename(columns = {'ISO': 'Country Code'})
df_ppp = df_ppp.set_index('Country Code')
# print (df_ppp.loc[['USA','IND','CHN']])

df_growth = df_imf[df_imf['WEO Subject Code'] == 'NGDP_RPCH']
# print(df_growth.columns)
df_growth = df_growth[['ISO', 2015, 2016, 2017, 2018, 2019, 
                        2020, 2021, 2022, 2023, 2024, 2025, 2026, 2027]]
df_growth = df_growth.rename(columns = {'ISO': 'Country Code'})
df_growth = df_growth.set_index('Country Code')
df_growth = df_growth.replace('--', np.nan)
df_growth = df_growth.dropna(axis=0, how='all')
df_growth = df_growth.T.interpolate(limit_direction='both').T

# print (df_growth.loc[['USA','IND','CHN']])

In [4]:
### GDP data from 2010-2020 in World Bank
df = df_worldbank.drop(columns=['Country Name', 'Country Name', 'Indicator Name' , 'Indicator Code'])
df = df.set_index(['Country Code'])
# linear interpolate missing values
df = df.T.interpolate(limit_direction='both').T
df = df.dropna(axis=0)
# print (df.loc[['USA','IND','CHN']])

In [5]:
df_combine = df.merge(df_growth, on='Country Code', how='inner')

In [6]:
df_combine.columns

Index(['2010', '2011', '2012', '2013', '2014', '2015', '2016', '2017', '2018',
       '2019', '2020',   2015,   2016,   2017,   2018,   2019,   2020,   2021,
         2022,   2023,   2024,   2025,   2026,   2027],
      dtype='object')

In [7]:
# project to 2021 using the growth rate from data
for t in range(2021, 2029,1):
    df_combine[str(t)]=df_combine[str(t-1)] * (df_combine[t-1]/100+1)
# project to 2050 using the growthrate column 
for t in range(2029, 2051,1):
    rate = (df_combine[2015] + df_combine[2016] + df_combine[2017] + df_combine[2018] + df_combine[2019]) / 5
    df_combine[str(t)]=df_combine[str(t-1)]*(rate/100+1)

In [8]:
df_combine = df_combine[[str(t) for t in range(2010,2051,1)]]

In [9]:
for t in range(2029, 2051,1):
    df_combine.at['CHN', str(t)] = df_combine.loc['CHN',str(t-1)] * 1.035

In [10]:
df_combine.to_csv(save_file_name)

In [11]:
def check_countries(filename):
    df = pd.read_csv(filename)
    countries = df['Country Code'].unique()
    country_names = pd.read_csv('../../data/dl1_countrycodeorg_country_name.csv')
    GBD_countries = country_names[country_names["country"].notnull()]['Country Code']
    WB_countries = country_names[country_names["WBCountry"].notnull()]['Country Code']
    plus_GBD = sorted(list(set(countries) - set(GBD_countries)))
    sub_GBD = sorted(list(set(GBD_countries) - set(countries)))
    plus_WB = sorted(list(set(countries) - set(WB_countries)))
    sub_WB = sorted(list(set(WB_countries) - set(countries)))

    print ('Subtraction from WB:', sub_WB, len(sub_WB))
    print ('Plus from WB:', plus_WB, len(plus_WB))
    print ('Subtraction from GBD :', sub_GBD, len(sub_GBD))
    print ('Plus from GBD :', plus_GBD, len(plus_GBD))
# check_countries(pop_save_file_name)


In [12]:
check_countries("GDP_ppp.csv")

Subtraction from WB: ['AND', 'ASM', 'BMU', 'CHI', 'CUB', 'CUW', 'CYM', 'ERI', 'FRO', 'GIB', 'GRL', 'GUM', 'IMN', 'LIE', 'MAF', 'MCO', 'MNP', 'NCL', 'PRK', 'PYF', 'SSD', 'SXM', 'SYR', 'TCA', 'VEN', 'VGB', 'VIR', 'XKX', 'YEM'] 29
Plus from WB: [] 0
Subtraction from GBD : ['AND', 'ASM', 'BMU', 'COK', 'CUB', 'ERI', 'GRL', 'GUM', 'MCO', 'MNP', 'NIU', 'PRK', 'PSE', 'SSD', 'SYR', 'TKL', 'TWN', 'VEN', 'VIR', 'YEM'] 20
Plus from GBD : ['ABW', 'HKG', 'MAC'] 3


## CIA 
GDP for each country.
Because the data in world bank and IMF do not contain all GBD 204 countries.
 https://www.cia.gov/the-world-factbook/field/real-gdp-purchasing-power-parity/country-comparison

In [13]:
import pandas as pd
## MAIN FUNCTION - input

## INPUT FILE
df_gdp_cia = pd.read_csv('gdp_raw_cia.csv')
country_names_cia = pd.read_csv('d2_countrycode_ciagov.csv')

## OUTPUT FILE

save_file_name_gdp_cia = "GDP_ppp_cia.csv"

gdp_cia = pd.merge(df_gdp_cia, country_names_cia, left_on='name', right_on='Entity')
gdp_cia = gdp_cia[['Country Code', 'value', 'date_of_information']]

def changevalue(str):
    return float(str)

gdp_cia['value'] = gdp_cia['value'].str.replace(',', '')
gdp_cia['value'] = gdp_cia['value'].str.replace('$', '')

gdp_cia = gdp_cia.set_index('Country Code')
gdp_cia = gdp_cia.sort_index()
gdp_cia.to_csv(save_file_name_gdp_cia)


check_countries(save_file_name_gdp_cia)

Subtraction from WB: ['CHI'] 1
Plus from WB: ['AIA', 'COK', 'FLK', 'GGY', 'JEY', 'MSR', 'NIU', 'PSE', 'SHN', 'SPM', 'TKL', 'TWN', 'WLF'] 13
Subtraction from GBD : [] 0
Plus from GBD : ['ABW', 'AIA', 'CUW', 'CYM', 'FLK', 'FRO', 'GGY', 'GIB', 'HKG', 'IMN', 'JEY', 'LIE', 'MAC', 'MAF', 'MSR', 'NCL', 'PYF', 'SHN', 'SPM', 'SXM', 'TCA', 'VGB', 'WLF', 'XKX'] 24


