In [1]:
import wbdata

In [2]:
wb_regions = ['Arab World', 'Caribbean small states',
              'Central Europe and the Baltics', 'Early-demographic dividend',
              'East Asia & Pacific', 'East Asia & Pacific (excluding high income)',
              'East Asia & Pacific (IDA & IBRD countries)',
              'Euro area', 'Europe & Central Asia',
              'Europe & Central Asia (excluding high income)',
              'Europe & Central Asia (IDA & IBRD countries)',
              'European Union',
              'Fragile and conflict affected situations',
              'Heavily indebted poor countries (HIPC)',
              'High income',
              'IBRD only',
              'IDA & IBRD total',
              'IDA blend',
              'IDA only',
              'IDA total',
              'Late-demographic dividend',
              'Latin America & Caribbean',
              'Latin America & Caribbean (excluding high income)',
              'Latin America & the Caribbean (IDA & IBRD countries)',
              'Least developed countries',
              'Least developed countries: UN classification',
              'Low & middle income',
              'Low income',
              'Lower middle income',
              'Middle East & North Africa',
              'Middle East & North Africa (excluding high income)',
              'Middle East & North Africa (IDA & IBRD countries)',
              'Middle income',
              'North America',
              'Not classified',
              'OECD members',
              'Other small states',
              'Pacific island small states',
              'Post-demographic dividend',
              'Pre-demographic dividend',
              'Small states',
              'South Asia',
              'South Asia (IDA & IBRD)',
              'Sub-Saharan Africa',
              'Sub-Saharan Africa (excluding high income)',
              'Sub-Saharan Africa (IDA & IBRD countries)',
              'Upper middle income',
              'World']

In [3]:
import matplotlib.pyplot as plt
plt.style.use('ggplot')
import pandas as pd
import numpy as np
import statsmodels.api as sm

In [4]:
Governmentdebt_raw = wbdata.get_data("GC.DOD.TOTL.GD.ZS")

In [5]:
Inflation_raw = wbdata.get_data("FP.CPI.TOTL.ZG")

In [6]:
Expense_raw = wbdata.get_data("GC.XPN.TOTL.GD.ZS")

In [7]:
Tax_raw = wbdata.get_data("GC.TAX.TOTL.GD.ZS")

In [8]:
Reserves_raw = wbdata.get_data("FI.RES.TOTL.CD")

In [9]:
Officialexchange_raw = wbdata.get_data("PA.NUS.FCRF")

In [10]:
def wb_clean_year_data(wb_raw, year):
    rv = {}
    for X in wb_raw:
        date = X['date']
        country = X['country']['value']
        if date == year and country not in wb_regions:
            try:
                value = float(X['value'])
                rv[country] = value
            except:
                pass
    return rv

In [11]:
Governmentdebt_clean = wb_clean_year_data(Governmentdebt_raw, '2015')

In [12]:
Governmentdebt_df = pd.DataFrame({'pais': list(Governmentdebt_clean.keys()), 'Gov.Debt_2015': list(Governmentdebt_clean.values())})

In [13]:
Governmentdebt_df

Unnamed: 0,pais,Gov.Debt_2015
0,Albania,79.864445
1,Australia,0.046948
2,"Bahamas, The",48.119517
3,Barbados,141.413895
4,Belarus,37.598178
5,Bhutan,93.969228
6,Botswana,17.909159
7,Brazil,67.537554
8,Colombia,66.667819
9,El Salvador,51.278928


In [14]:
def wb_dict_to_dataframe(dict_clean, nombre_variable, año):
    df = pd.DataFrame({'pais': list(dict_clean.keys()),
                       nombre_variable: list(dict_clean.values()),
                       'año': [año] * len(dict_clean)})
    return df

In [15]:
variables=['GC.DOD.TOTL.GD.ZS','FP.CPI.TOTL.ZG','GC.XPN.TOTL.GD.ZS','GC.TAX.TOTL.GD.ZS','FI.RES.TOTL.CD','PA.NUS.FCRF']

In [16]:
Governmentdebt_raw = wbdata.get_data("GC.DOD.TOTL.GD.ZS")
Inflation_raw = wbdata.get_data("FP.CPI.TOTL.ZG")
Expense_raw = wbdata.get_data("GC.XPN.TOTL.GD.ZS")
Tax_raw = wbdata.get_data("GC.TAX.TOTL.GD.ZS")
Reserves_raw = wbdata.get_data("FI.RES.TOTL.CD")
Officialexchange_raw = wbdata.get_data("PA.NUS.FCRF")

In [17]:
raw_lists = {'Deuda': Governmentdebt_raw,
             'Inflacion': Inflation_raw,
             'Gasto público': Expense_raw,
             'Tasa fiscal': Tax_raw,
             'Reservas':Reserves_raw,
             'Tipo de cambio':Officialexchange_raw}
            

In [19]:
for año in range(1989, 2011):
    dfs_año = []
    for k, v in raw_lists.items():
        v_clean = wb_clean_year_data(v, str(año))
        df = wb_dict_to_dataframe(v_clean, k, str(año))
        dfs_año.append(df)
    if año == 1989:
        for i, df in enumerate(dfs_año):
            if i == 0:
                df_año = df.merge(dfs_año[i + 1], on=['pais', 'año'])
            else:
                try:
                    df_año = df_año.merge(dfs_año[i + 1], on=['pais', 'año'])
                except:
                    pass
    else:
        for i, df in enumerate(dfs_año):
            if i == 0:
                df_otros_años = df.merge(dfs_año[i + 1], on=['pais', 'año'])
            else:
                try:
                    df_otros_años = df_otros_años.merge(dfs_año[i + 1], on=['pais', 'año'])
                except:
                    pass
        df_año = df_año.append(df_otros_años, ignore_index=True,)

In [20]:
df_año.to_excel('base.xlsx')

In [21]:
df_año.head (92) 

Unnamed: 0,pais,Deuda,año,Inflacion,Gasto público,Tasa fiscal,Reservas,Tipo de cambio
0,United States,39.128400,1989,4.827003,20.863127,10.782440,1.685836e+11,1.000000
1,Australia,12.253160,1990,7.333022,21.861559,22.476879,1.931874e+10,1.281057
2,"Bahamas, The",24.418825,1990,4.669319,16.610865,13.862919,1.581654e+08,1.000000
3,Bahrain,8.727364,1990,0.929335,27.118964,5.187374,1.292613e+09,0.376000
4,Bhutan,51.188019,1990,10.000000,20.557124,4.413943,8.883421e+07,17.505325
...,...,...,...,...,...,...,...,...
87,Bhutan,45.480489,1992,15.980114,18.196810,5.048773,8.512000e+07,25.918083
88,Botswana,12.532151,1992,16.167612,30.498748,27.845540,3.793419e+09,2.109725
89,Burundi,103.991580,1992,1.823333,27.691933,15.122784,1.799118e+08,208.302667
90,Cyprus,95.833990,1992,6.510220,50.698586,30.868149,1.180838e+09,0.449549


In [22]:
df_año ['pais'].value_counts()

United States       22
Iceland             21
Thailand            21
Jordan              21
Tunisia             21
                    ..
Zambia               1
Egypt, Arab Rep.     1
Poland               1
Latvia               1
Ireland              1
Name: pais, Length: 85, dtype: int64

In [23]:
missing_years = []
for c in df_año['pais'].unique():
    c_df = df_año.loc[df_año['pais'] == c]
    for y in df_año['año'].unique():
        c_y_df = c_df.loc[c_df['año'] == y]
        if len(c_y_df) == 0:
            print(c, y)
            missing_years.append(y)

Australia 1989
Bahamas, The 1989
Bahamas, The 2001
Bahamas, The 2002
Bahamas, The 2003
Bahamas, The 2004
Bahamas, The 2005
Bahrain 1989
Bahrain 2005
Bahrain 2006
Bahrain 2007
Bahrain 2008
Bahrain 2009
Bahrain 2010
Bhutan 1989
Botswana 1989
Botswana 1997
Botswana 1998
Botswana 1999
Botswana 2000
Botswana 2001
Botswana 2002
Botswana 2003
Botswana 2004
Botswana 2005
Cyprus 1989
Cyprus 1995
Cyprus 1996
Cyprus 1997
Cyprus 1998
Cyprus 1999
Cyprus 2000
Cyprus 2001
Cyprus 2002
Cyprus 2003
Cyprus 2004
Cyprus 2005
Cyprus 2006
Cyprus 2007
Cyprus 2008
Cyprus 2009
Cyprus 2010
Ethiopia 1989
Ethiopia 2000
Ethiopia 2001
Ethiopia 2002
Ethiopia 2003
Ethiopia 2004
Ethiopia 2005
Ethiopia 2006
Ethiopia 2007
Ethiopia 2008
Ethiopia 2009
Ethiopia 2010
Finland 1989
Finland 1995
Finland 1996
Finland 1997
Finland 1998
Finland 1999
Finland 2000
Finland 2001
Finland 2002
Finland 2003
Finland 2004
Finland 2005
Finland 2006
Finland 2007
Finland 2008
Finland 2009
Finland 2010
Germany 1989
Germany 1999
Germany 2000
Ge

In [24]:
missing_years

['1989',
 '1989',
 '2001',
 '2002',
 '2003',
 '2004',
 '2005',
 '1989',
 '2005',
 '2006',
 '2007',
 '2008',
 '2009',
 '2010',
 '1989',
 '1989',
 '1997',
 '1998',
 '1999',
 '2000',
 '2001',
 '2002',
 '2003',
 '2004',
 '2005',
 '1989',
 '1995',
 '1996',
 '1997',
 '1998',
 '1999',
 '2000',
 '2001',
 '2002',
 '2003',
 '2004',
 '2005',
 '2006',
 '2007',
 '2008',
 '2009',
 '2010',
 '1989',
 '2000',
 '2001',
 '2002',
 '2003',
 '2004',
 '2005',
 '2006',
 '2007',
 '2008',
 '2009',
 '2010',
 '1989',
 '1995',
 '1996',
 '1997',
 '1998',
 '1999',
 '2000',
 '2001',
 '2002',
 '2003',
 '2004',
 '2005',
 '2006',
 '2007',
 '2008',
 '2009',
 '2010',
 '1989',
 '1999',
 '2000',
 '2001',
 '2002',
 '2003',
 '2004',
 '2005',
 '2006',
 '2007',
 '2008',
 '2009',
 '2010',
 '1989',
 '1989',
 '1989',
 '1989',
 '2000',
 '2001',
 '2004',
 '2005',
 '2006',
 '2007',
 '1989',
 '2000',
 '2001',
 '2002',
 '2003',
 '2004',
 '2005',
 '2006',
 '2007',
 '2008',
 '2009',
 '2010',
 '1989',
 '1989',
 '1991',
 '1992',
 '1993',
 

In [25]:
df_año ['Deuda'].value_counts()

22.342832    1
13.365567    1
72.252195    1
17.880003    1
72.735334    1
            ..
38.218236    1
14.183979    1
55.565645    1
31.162545    1
51.434530    1
Name: Deuda, Length: 835, dtype: int64

In [26]:
missing_debt = []
for c in df_año['pais'].unique():
    c_df = df_año.loc[df_año['pais'] == c]
    for y in df_año['Deuda'].unique():
        c_y_df = c_df.loc[c_df['Deuda'] == y]
        if len(c_y_df) == 0:
            print(c, y)
            missing_debt.append(y)

United States 12.2531601688725
United States 24.4188250157928
United States 8.72736418511066
United States 51.1880192526389
United States 11.3711032492715
United States 86.6187458378789
United States 48.3529569208809
United States 10.475619549193
United States 20.9305896021553
United States 24.4806851423724
United States 29.8694677729495
United States 50.7832537948949
United States 45.7414523316285
United States 138.847728191103
United States 138.741327638319
United States 52.8940705476198
United States 128.799599970243
United States 7.54985906289088
United States 2.43434946546257
United States 46.911699356094
United States 39.650354969574
United States 46.6268573934833
United States 87.1967599759139
United States 54.80317756254
United States 19.8470381137812
United States 48.9532540146935
United States 189.246285557587
United States 51.3017098372967
United States 49.7969202729087
United States 77.855311856756
United States 35.3398824635165
United States 96.5806255127663
United States 

In [27]:
missing_debt

[12.2531601688725,
 24.4188250157928,
 8.72736418511066,
 51.1880192526389,
 11.3711032492715,
 86.6187458378789,
 48.3529569208809,
 10.475619549193,
 20.9305896021553,
 24.4806851423724,
 29.8694677729495,
 50.7832537948949,
 45.7414523316285,
 138.847728191103,
 138.741327638319,
 52.8940705476198,
 128.799599970243,
 7.54985906289088,
 2.43434946546257,
 46.911699356094,
 39.650354969574,
 46.6268573934833,
 87.1967599759139,
 54.80317756254,
 19.8470381137812,
 48.9532540146935,
 189.246285557587,
 51.3017098372967,
 49.7969202729087,
 77.855311856756,
 35.3398824635165,
 96.5806255127663,
 12.692933371507,
 18.4477538501483,
 54.7703800956018,
 30.2454942016516,
 30.2033709452908,
 31.5959630364015,
 24.9665623988782,
 11.9432153214405,
 27.9767032232351,
 16.3795586794953,
 47.4653512960086,
 12.1168020575228,
 88.7936648764972,
 96.3842337020678,
 62.1353715549511,
 50.8858787649859,
 36.8418217433888,
 17.0154334973518,
 21.9525351967505,
 17.7517913078306,
 71.2275932643299,


In [39]:
paises_seleccionados = ['United States', 'Brazil','Argentina','Mexico','China']

In [40]:
df_año = df_año.loc[df_año['pais'].isin(paises_seleccionados)]

In [41]:
df_año.head()

Unnamed: 0,pais,Deuda,año,Inflacion,Gasto público,Tasa fiscal,Reservas,Tipo de cambio
0,United States,39.1284,1989,4.827003,20.863127,10.78244,168583600000.0,1.0
22,Mexico,46.626857,1990,26.651673,16.918575,11.848365,10216850000.0,2.812599
38,United States,40.933944,1990,5.397956,21.558929,10.465285,173093600000.0,1.0
66,Mexico,38.218236,1991,22.662359,13.772679,11.594493,18052070000.0,3.01843
82,United States,44.061597,1991,4.234964,22.884386,10.320342,159272900000.0,1.0


In [42]:
df_año['pais'].value_counts()

United States    22
Mexico           11
Brazil            5
Name: pais, dtype: int64

In [43]:
missing_years = []
for c in df_año['pais'].unique():
    c_df = df_año.loc[df_año['pais'] == c]
    for y in df_año['año'].unique():
        c_y_df = c_df.loc[c_df['año'] == y]
        if len(c_y_df) == 0:
            print(c, y)
            missing_years.append(y)

Mexico 1989
Mexico 2001
Mexico 2002
Mexico 2003
Mexico 2004
Mexico 2005
Mexico 2006
Mexico 2007
Mexico 2008
Mexico 2009
Mexico 2010
Brazil 1989
Brazil 1990
Brazil 1991
Brazil 1992
Brazil 1993
Brazil 1994
Brazil 1995
Brazil 1996
Brazil 1997
Brazil 1998
Brazil 1999
Brazil 2000
Brazil 2001
Brazil 2002
Brazil 2003
Brazil 2004
Brazil 2005


In [51]:
defaults = pd.read_csv("/Users/carolinasanchez/Desktop/Data_Base_Defaults.csv", encoding='latin1')

In [52]:
defaults.head()

Unnamed: 0,Country,Time,Total,default,default_previo_3aos
0,Afghanistan,1960,0,0,
1,Afghanistan,1961,0,0,
2,Afghanistan,1962,0,0,
3,Afghanistan,1963,0,0,0.0
4,Afghanistan,1964,267,1,0.0
