# Loading packages

In [4]:
import pandas as pd
import numpy as np
import os
import requests
from weo import download, WEO
import country_converter as coco

converter = coco.CountryConverter()

# Global variables

In [2]:
year = 2015
last_year = 2018

## IMF - World Economic Outlook: imf

In [5]:
url = f"http://dataservices.imf.org/REST/SDMX_JSON.svc/CompactData/GFSMAB{year}/A.AD+AE+AF+AG+AI+AL+AM+AN+AO+AQ+AR+AS+AT+AU+AW+AX+AZ+BA+BB+BD+BE+BF+BG+BH+BI+BJ+BL+BM+BN+BO+BQ+BR+BS+BT+BV+BW+BY+BZ+CA+CC+CD+CF+CG+CH+CI+CK+CL+CM+CN+CO+CR+CS+CU+CV+CW+CX+CY+CZ+DD+DE.S1311B.XDC_R_B1GQ.GNLB__Z?startPeriod={year}&endPeriod={last_year}"
print(url)
data = requests.get(url).json()
data

http://dataservices.imf.org/REST/SDMX_JSON.svc/CompactData/GFSMAB2015/A.AD+AE+AF+AG+AI+AL+AM+AN+AO+AQ+AR+AS+AT+AU+AW+AX+AZ+BA+BB+BD+BE+BF+BG+BH+BI+BJ+BL+BM+BN+BO+BQ+BR+BS+BT+BV+BW+BY+BZ+CA+CC+CD+CF+CG+CH+CI+CK+CL+CM+CN+CO+CR+CS+CU+CV+CW+CX+CY+CZ+DD+DE.S1311B.XDC_R_B1GQ.GNLB__Z?startPeriod=2015&endPeriod=2018


{'CompactData': {'@xmlns:xsi': 'http://www.w3.org/2001/XMLSchema-instance',
  '@xmlns:xsd': 'http://www.w3.org/2001/XMLSchema',
  '@xsi:schemaLocation': 'http://www.SDMX.org/resources/SDMXML/schemas/v2_0/message https://registry.sdmx.org/schemas/v2_0/SDMXMessage.xsd http://dataservices.imf.org/compact/GFSMAB2015 http://dataservices.imf.org/compact/GFSMAB2015.xsd',
  '@xmlns': 'http://www.SDMX.org/resources/SDMXML/schemas/v2_0/message',
  'Header': {'ID': 'e274413e-f1d7-4e52-89e8-dd81c8704301',
   'Test': 'false',
   'Prepared': '2020-06-02T10:14:17',
   'Sender': {'@id': '1C0',
    'Name': {'@xml:lang': 'en', '#text': 'IMF'},
    'Contact': {'URI': 'http://www.imf.org',
     'Telephone': '+ 1 (202) 623-6220'}},
   'Receiver': {'@id': 'ZZZ'},
   'DataSetID': 'GFSMAB2015'},
  'DataSet': {'@xmlns': 'http://dataservices.imf.org/compact/GFSMAB2015'}}}

In [3]:
path = f'../data/weo_{last_year}.csv'
if not os.path.exists(path):
  download(path, last_year, 2)
else:
  print('File already downloaded:', path)  
w = WEO(path)
imf = w.df
imf = imf.set_index(['ISO', 'Country'])
last_year = int(imf["Estimates Start After"].iloc[0])
imf.drop(['WEO Country Code'], axis='columns', inplace=True)
imf = imf[['WEO Subject Code'] +  [str(year)]]
imf.reset_index(inplace=True)
imf = imf.set_index(['ISO'])
imf.drop(['Country'], axis='columns', inplace=True)
imf = imf.pivot(columns='WEO Subject Code', values=str(year))
imf = imf.apply(pd.to_numeric, errors='coerce')
imf

File already downloaded: ../data/weo_2018.csv


WEO Subject Code,BCA,BCA_NGDPD,FLIBOR6,GGR,GGR_NGDP,GGSB,GGSB_NPGDP,GGX,GGXCNL,GGXCNL_NGDP,...,PCPIEPCH,PCPIPCH,PPPEX,PPPGDP,PPPPC,PPPSH,TMG_RPCH,TM_RPCH,TXG_RPCH,TX_RPCH
ISO,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
ABW,0.112,4.147,,1.267,26.307,,,1.346,-0.078,-1.625,...,-0.921,0.481,1.207,3.992,,0.003,,,,
AFG,0.576,2.869,,301.356,24.569,,,318.255,-16.898,-1.378,...,1.131,-0.662,19.083,64.276,,0.056,24.293,15.573,13.688,-11.078
AGO,-10.273,-8.841,,,24.134,193.585,1.094,,-406.964,-2.917,...,12.091,9.159,70.497,197.885,,0.171,-20.184,-21.818,5.750,6.721
ALB,-0.978,-8.590,,377.545,26.322,,,435.698,-58.153,-4.054,...,1.937,1.910,44.041,32.567,,0.028,1.842,-0.085,3.541,5.005
ARE,17.555,4.902,,381.376,28.996,,,425.612,-44.236,-3.363,...,3.596,4.070,2.021,650.826,,0.562,0.585,0.935,1.540,4.334
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
WSM,-0.025,-3.093,,0.683,35.067,,,0.759,-0.075,-3.868,...,0.370,1.923,1.939,1.005,,0.001,,,,
YEM,-2.816,-6.176,,,10.680,,,,-857.270,-8.750,...,32.035,11.996,111.367,87.976,,0.076,-10.992,-7.131,-42.603,-33.830
ZAF,-14.592,-4.593,,,28.141,-167.080,-4.119,,-193.336,-4.772,...,5.281,4.575,5.543,730.903,,0.632,5.469,5.387,3.381,2.802
ZMB,-0.831,-3.911,,34.421,18.770,,,51.545,-17.124,-9.338,...,21.112,10.107,2.940,62.369,,0.054,3.060,3.060,-11.374,-11.374


## Generating quantitative data

In [4]:
imf["GDPgrowth"] = imf["NGDP_RPCH"]
imf["CurrentAccountBal/GDP"] = imf["BCA_NGDPD"]
imf['GrossPublicDebt/GDP'] = imf['GGXWDG']/imf['NGDP']
imf['NetPublicDebt/GDP'] = imf['GGXWDN']/imf['NGDP']
imf['PrimarySurplus/GDP'] = imf['GGXONLB']/imf['NGDP']

quantitative = [
    'GDPgrowth', 'CurrentAccountBal/GDP', 'GrossPublicDebt/GDP',
    'NetPublicDebt/GDP', 'PrimarySurplus/GDP'
]

imf = imf[quantitative]
imf.to_csv(f"../data/weo_{year}_subset.csv")
imf

WEO Subject Code,GDPgrowth,CurrentAccountBal/GDP,GrossPublicDebt/GDP,NetPublicDebt/GDP,PrimarySurplus/GDP
ISO,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
ABW,-0.448,4.147,0.813200,,0.025322
AFG,0.988,2.869,,,
AGO,0.944,-8.841,,,
ALB,2.219,-8.590,,,
ARE,5.065,4.902,,,
...,...,...,...,...,...
WSM,1.635,-3.093,0.577732,,
YEM,-16.678,-6.176,,,
ZAF,1.280,-4.593,,,
ZMB,2.920,-3.911,0.622600,0.560876,-0.064892


In [9]:


imf[['PrimarySurplus/GDP']].info()

<class 'pandas.core.frame.DataFrame'>
Index: 194 entries, ABW to ZWE
Data columns (total 1 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   PrimarySurplus/GDP  97 non-null     float64
dtypes: float64(1)
memory usage: 3.0+ KB


In [8]:
imf.shape[0]

194

### To Do


- [X] Budgetary primary surplus
- [X] Ratio of public debt to GDP
- [X] Current account balance 
- [X] GDP growth rate
- [X] Export to csv

## Index of Economic Freedom: ief

In [5]:
url = 'https://www.heritage.org/index/explore'
ief = pd.read_csv('../data/raw_ief.csv', index_col=['Name'])
current_year = str(ief['Index Year'][0])

rule_of_law = ['Property Rights', 'Government Integrity', 'Judicial Effectiveness']
government_size = ['Government Spending', 'Tax Burden', 'Fiscal Health']
regulatory_efficiency = ['Business Freedom', "Labor Freedom", "Monetary Freedom"]
open_markets = ["Trade Freedom", "Investment Freedom", "Financial Freedom"]
unctad = ["Labor Freedom", "Financial Freedom", "Business Freedom", "Financial Freedom"]
ief.drop(['Index Year'], axis='columns', inplace=True)
#ief = ief[unctad]
ief.index = converter.convert(names = list(ief.index), to = 'ISO3',)
ief = ief/100

rule_of_law = ["Property Rights", "Government Integrity", "Judicial Effectiveness"]
government_size = ["Government Spending", "Tax Burden", "Fiscal Health"]
regulatory_efficiency = ["Business Freedom", "Labor Freedom", "Monetary Freedom"]
open_markets = ["Trade Freedom", "Investment Freedom", "Financial Freedom"]

ief["Rule of Law"] = ief[rule_of_law].mean(axis='columns')
ief['Government Size'] = ief[government_size].mean(axis='columns')
ief["Regulatory Efficiency"] = ief[regulatory_efficiency].mean(axis='columns')
ief["Open Markets"] = ief[open_markets].mean(axis='columns')

aggregates = [
    "Rule of Law",
    "Government Size",
    "Regulatory Efficiency",
    "Open Markets"
]
ief[aggregates].to_csv(f'../data/index_economic_freedom_{current_year}_AggregatesSubset.csv')
ief

Unnamed: 0,Overall Score,Property Rights,Judicial Effectiveness,Government Integrity,Tax Burden,Government Spending,Fiscal Health,Business Freedom,Labor Freedom,Monetary Freedom,Trade Freedom,Investment Freedom,Financial Freedom,Rule of Law,Government Size,Regulatory Efficiency,Open Markets
AFG,,,,0.08,0.917,0.812,,0.614,0.675,0.726,,0.55,,0.080,0.8645,0.671667,0.550000
ALB,0.657,0.30,,0.31,0.872,0.761,,0.706,0.529,0.808,0.878,0.70,0.7,0.305,0.8165,0.681000,0.759333
DZA,0.489,0.30,,0.36,0.800,0.387,,0.666,0.505,0.712,0.608,0.25,0.3,0.330,0.5935,0.627667,0.386000
AGO,0.479,0.15,,0.23,0.845,0.501,,0.474,0.432,0.654,0.702,0.40,0.4,0.190,0.6730,0.520000,0.500667
ARG,0.441,0.15,,0.34,0.668,0.412,,0.528,0.433,0.596,0.688,0.30,0.3,0.245,0.5400,0.519000,0.429333
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
VEN,0.343,0.05,,0.20,0.750,0.520,,0.416,0.242,0.428,0.628,0.00,0.2,0.125,0.6350,0.362000,0.276000
VNM,0.517,0.15,,0.31,0.791,0.771,,0.615,0.629,0.668,0.786,0.15,0.3,0.230,0.7810,0.637333,0.412000
YEM,0.537,0.30,,0.18,0.915,0.599,,0.540,0.571,0.685,0.776,0.50,0.3,0.240,0.7570,0.598667,0.525333
ZMB,0.587,0.30,,0.38,0.719,0.780,,0.682,0.460,0.732,0.768,0.55,0.5,0.340,0.7495,0.624667,0.606000


### To Do

- [x] Create aggregate variebles for categories
- [ ] Check if Fiscal Freedom == Financial Freedom
- [x] Transform countries into ISO3 format
- [x] Export csv

# Rating

In [6]:
categories = pd.read_html(
    'https://tradingeconomics.com/country-list/rating',
    index_col = [-1]
)[1]

for j in range(2,categories.shape[1]):

    fuzzies = {
        categories.iloc[i,1] : categories.iloc[i,0]/100 for i in range(categories.shape[0])
    }
    fuzzies.update(
        {
        categories.iloc[i,j] : categories.iloc[i,0]/100 for i in range(categories.shape[0])
    }
    )

def fuzzy_rating(grade):
    result = grade.map(fuzzies).mean()
    return result

In [7]:
countries = pd.read_html(
    'https://tradingeconomics.com/country-list/rating',
)[0]
countries = countries["Unnamed: 0"].to_list()
countries = [i.lower().strip().replace(" ", "-") for i in countries]

rating = pd.DataFrame()
for country in countries:
    try:
        url = f"https://tradingeconomics.com/{country}/rating"
        df = pd.read_html(
                url, 
                parse_dates=True,
                index_col=[-1]
        )[0]
        df = df[str(year)]
        df["ISO3"] = converter.convert(names = country, to = 'ISO3',)
        df = df.set_index('ISO3')
        df = df[["Rating"]]
        df["Rating"] = fuzzy_rating(grade=df["Rating"])
        df = df.drop_duplicates()
        rating = rating.append(df)
    except (KeyError, ValueError) as e:
        print(f"{country} data is not available")
        print(f"Warning message: {e}")
        pass
    
rating



cayman-islands data is not available




grenada data is not available
mali data is not available
montserrat data is not available
niger data is not available
tanzania data is not available


Unnamed: 0_level_0,Rating
ISO3,Unnamed: 1_level_1
AND,0.5750
AGO,0.3625
ARG,
ARM,0.3500
AUT,0.9500
...,...
UKR,0.1500
GBR,1.0000
URY,0.6000
VEN,0.1500


# Merging data: df

In [16]:
developing = converter.data["ISO3"][np.isnan(converter.data["OECD"])].to_list()
oecd = converter.data["ISO3"][~np.isnan(converter.data["OECD"])].to_list()

df = pd.merge(rating, ief, left_index=True, right_index=True)
df = pd.merge(df, imf[["GDPgrowth", "CurrentAccountBal/GDP"]], left_index=True, right_index=True)
df.columns = df.columns.str.replace(' ', '_')
df.index.name = 'ISO3'
df = df.dropna(axis='columns', )
df = pd.merge(df, rating, left_index=True, right_index=True)
df = df.dropna()
df = df.iloc[:,::-1]
df[df.index.isin(developing)].to_csv(f'../data/merged_developing.csv') # Subseting non-OECD countries
df[df.index.isin(developing, )].to_csv(f'../data/merged_oecd.csv') # Subseting OECD countries
df.to_csv(f'../data/merged_full.csv')
df

Unnamed: 0_level_0,Rating,CurrentAccountBal/GDP,GDPgrowth,Regulatory_Efficiency,Government_Size,Rule_of_Law,Monetary_Freedom,Labor_Freedom,Business_Freedom,Government_Spending,Government_Integrity
ISO3,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
AGO,0.3625,-8.841,0.944,0.520000,0.6730,0.190,0.654,0.432,0.474,0.501,0.23
ARM,0.3500,-2.581,3.254,0.725333,0.8360,0.280,0.706,0.643,0.827,0.828,0.36
AUT,0.9500,1.923,1.092,0.783333,0.3495,0.795,0.803,0.767,0.780,0.198,0.69
AZE,0.5500,-0.438,0.648,0.778000,0.7390,0.240,0.798,0.791,0.745,0.597,0.28
BHS,0.5750,-13.656,1.045,0.743333,0.9050,0.705,0.788,0.753,0.689,0.832,0.71
...,...,...,...,...,...,...,...,...,...,...,...
UKR,0.1500,1.721,-9.773,0.620333,0.5335,0.225,0.786,0.482,0.593,0.280,0.25
GBR,1.0000,-4.914,2.349,0.803667,0.4660,0.830,0.744,0.756,0.911,0.303,0.76
URY,0.6000,-0.960,0.371,0.695000,0.7110,0.715,0.716,0.643,0.726,0.651,0.73
VEN,0.1500,-6.616,-6.221,0.362000,0.6350,0.125,0.428,0.242,0.416,0.520,0.20


In [12]:
developing = converter.data["ISO3"][np.isnan(converter.data["OECD"])].to_list()
len(developing)

215

## To Do

- [x] Merge imf e ief