In [1]:
import wbdata

In [2]:
wb_regions = ['Arab World', 'Caribbean small states',
              'Central Europe and the Baltics', 'Early-demographic dividend',
              'East Asia & Pacific', 'East Asia & Pacific (excluding high income)',
              'East Asia & Pacific (IDA & IBRD countries)',
              'Euro area', 'Europe & Central Asia',
              'Europe & Central Asia (excluding high income)',
              'Europe & Central Asia (IDA & IBRD countries)',
              'European Union',
              'Fragile and conflict affected situations',
              'Heavily indebted poor countries (HIPC)',
              'High income',
              'IBRD only',
              'IDA & IBRD total',
              'IDA blend',
              'IDA only',
              'IDA total',
              'Late-demographic dividend',
              'Latin America & Caribbean',
              'Latin America & Caribbean (excluding high income)',
              'Latin America & the Caribbean (IDA & IBRD countries)',
              'Least developed countries',
              'Least developed countries: UN classification',
              'Low & middle income',
              'Low income',
              'Lower middle income',
              'Middle East & North Africa',
              'Middle East & North Africa (excluding high income)',
              'Middle East & North Africa (IDA & IBRD countries)',
              'Middle income',
              'North America',
              'Not classified',
              'OECD members',
              'Other small states',
              'Pacific island small states',
              'Post-demographic dividend',
              'Pre-demographic dividend',
              'Small states',
              'South Asia',
              'South Asia (IDA & IBRD)',
              'Sub-Saharan Africa',
              'Sub-Saharan Africa (excluding high income)',
              'Sub-Saharan Africa (IDA & IBRD countries)',
              'Upper middle income',
              'World']

In [3]:
import matplotlib.pyplot as plt
plt.style.use('ggplot')
import pandas as pd
import numpy as np
import statsmodels.api as sm

In [4]:
Governmentdebt_raw = wbdata.get_data("GC.DOD.TOTL.GD.ZS")

In [5]:
Inflation_raw = wbdata.get_data("FP.CPI.TOTL.ZG")

In [6]:
Expense_raw = wbdata.get_data("GC.XPN.TOTL.GD.ZS")

In [7]:
Tax_raw = wbdata.get_data("GC.TAX.TOTL.GD.ZS")

In [8]:
Reserves_raw = wbdata.get_data("FI.RES.TOTL.CD")

In [9]:
Officialexchange_raw = wbdata.get_data("PA.NUS.FCRF")

In [10]:
Interestrate_raw = wbdata.get_data("FR.INR.RINR")

In [11]:
Unemployment_raw = wbdata.get_data("SL.UEM.TOTL.ZS")

In [12]:
def wb_clean_year_data(wb_raw, year):
    rv = {}
    for X in wb_raw:
        date = X['date']
        country = X['country']['value']
        if date == year and country not in wb_regions:
            try:
                value = float(X['value'])
                rv[country] = value
            except:
                pass
    return rv

In [13]:
Governmentdebt_clean = wb_clean_year_data(Governmentdebt_raw, '2015')

In [14]:
Governmentdebt_df = pd.DataFrame({'pais': list(Governmentdebt_clean.keys()), 'Gov.Debt_2015': list(Governmentdebt_clean.values())})

In [15]:
Governmentdebt_df

Unnamed: 0,pais,Gov.Debt_2015
0,Albania,79.864445
1,Australia,0.046948
2,"Bahamas, The",48.119517
3,Barbados,141.413895
4,Belarus,37.598178
5,Bhutan,93.969228
6,Botswana,17.909159
7,Brazil,67.537554
8,Colombia,66.667819
9,El Salvador,51.278928


In [16]:
def wb_dict_to_dataframe(dict_clean, nombre_variable, año):
    df = pd.DataFrame({'pais': list(dict_clean.keys()),
                       nombre_variable: list(dict_clean.values()),
                       'año': [año] * len(dict_clean)})
    return df

In [17]:
variables=['GC.DOD.TOTL.GD.ZS','FP.CPI.TOTL.ZG','GC.XPN.TOTL.GD.ZS','GC.TAX.TOTL.GD.ZS','FI.RES.TOTL.CD','PA.NUS.FCRF','FR.INR.RINR','SL.UEM.TOTL.ZS']

In [18]:
Governmentdebt_raw = wbdata.get_data("GC.DOD.TOTL.GD.ZS")
Inflation_raw = wbdata.get_data("FP.CPI.TOTL.ZG")
Expense_raw = wbdata.get_data("GC.XPN.TOTL.GD.ZS")
Tax_raw = wbdata.get_data("GC.TAX.TOTL.GD.ZS")
Reserves_raw = wbdata.get_data("FI.RES.TOTL.CD")
Officialexchange_raw = wbdata.get_data("PA.NUS.FCRF")
Interestrate_raw = wbdata.get_data("FR.INR.RINR")
Unemployment_raw = wbdata.get_data("SL.UEM.TOTL.ZS")

In [19]:
raw_lists = {'Deuda': Governmentdebt_raw,
             'Inflacion': Inflation_raw,
             'Gasto público': Expense_raw,
             'Tasa fiscal': Tax_raw,
             'Reservas':Reserves_raw,
             'Tipo de cambio':Officialexchange_raw,
             'Tasa de interes':Interestrate_raw,}
            

In [20]:
for año in range(1989, 2011):
    dfs_año = []
    for k, v in raw_lists.items():
        v_clean = wb_clean_year_data(v, str(año))
        df = wb_dict_to_dataframe(v_clean, k, str(año))
        dfs_año.append(df)
    if año == 1989:
        for i, df in enumerate(dfs_año):
            if i == 0:
                df_año = df.merge(dfs_año[i + 1], on=['pais', 'año'])
            else:
                try:
                    df_año = df_año.merge(dfs_año[i + 1], on=['pais', 'año'])
                except:
                    pass
    else:
        for i, df in enumerate(dfs_año):
            if i == 0:
                df_otros_años = df.merge(dfs_año[i + 1], on=['pais', 'año'])
            else:
                try:
                    df_otros_años = df_otros_años.merge(dfs_año[i + 1], on=['pais', 'año'])
                except:
                    pass
        df_año = df_año.append(df_otros_años, ignore_index=True,)

In [21]:
df_año.to_excel('base.xlsx')

In [22]:
df_año['año'].unique()

array(['1989', '1990', '1991', '1992', '1993', '1994', '1995', '1996',
       '1997', '1998', '1999', '2000', '2001', '2002', '2003', '2004',
       '2005', '2006', '2007', '2008', '2009', '2010'], dtype=object)

In [23]:
df_año ['pais'].value_counts()

United States          22
Mauritius              21
United Kingdom         21
Bhutan                 21
India                  21
                       ..
Trinidad and Tobago     2
Korea, Rep.             2
Malawi                  2
Egypt, Arab Rep.        1
Zambia                  1
Name: pais, Length: 63, dtype: int64

In [24]:
paises_buenos = []
for country, counts in dict(df_año['pais'].value_counts()).items():
    if counts >= 21:
        paises_buenos.append(country)

In [25]:
df_año_paises_buenos = df_año.loc[df_año['pais'].isin(paises_buenos)]

In [26]:
df_año_paises_buenos = df_año_paises_buenos.loc[df_año_paises_buenos['año']!="1989"]

In [27]:
df_año_paises_buenos ['pais'].value_counts()

Bhutan            21
India             21
Jamaica           21
United Kingdom    21
United States     21
Iceland           21
Australia         21
Singapore         21
Mauritius         21
Name: pais, dtype: int64

In [28]:
df_año_paises_buenos

Unnamed: 0,pais,Deuda,año,Inflacion,Gasto público,Tasa fiscal,Reservas,Tipo de cambio,Tasa de interes
1,Australia,12.253160,1990,7.333022,21.861559,22.476879,1.931874e+10,1.281057,9.669791
4,Bhutan,51.188019,1990,10.000000,20.557124,4.413943,8.883421e+07,17.505325,8.789378
7,Iceland,29.869468,1990,15.510722,29.377902,23.472225,4.549824e+08,58.283775,0.878499
8,India,50.783254,1990,8.971233,15.484217,9.993591,5.637446e+09,17.503500,5.269527
10,Jamaica,138.741328,1990,21.960175,22.642856,23.486431,1.681553e+08,7.184025,4.302951
...,...,...,...,...,...,...,...,...,...
611,Jamaica,136.224335,2010,12.609508,28.901746,24.278172,2.501095e+09,87.196146,9.683364
617,Mauritius,36.787376,2010,2.932363,22.018431,18.016713,2.618621e+09,30.784400,7.662272
627,Singapore,101.449782,2010,2.823277,12.412804,12.785824,2.312597e+11,1.363508,4.230749
635,United Kingdom,81.696357,2010,2.492655,42.283512,25.142407,9.802546e+10,0.647179,-1.017853


In [29]:
df_año_paises_buenos.to_excel('base_final.xlsx')

In [35]:
Base_final = pd.read_excel('/Users/joaco/Downloads/trabajo_econometria_up_5-master 4/base_final_1.xlsx')

In [37]:
Base_final.head(100)

Unnamed: 0,Pais,Año,Deuda,Inflacion,Gasto público,Tasa fiscal,Reservas,Tipo de cambio,Defaults prev,Default previo_c_t (3 años)
0,Australia,1990,12.253160,7.333022,21.861559,22.476879,1.931874e+10,1.281057,0,0
1,Bhutan,1990,51.188019,10.000000,20.557124,4.413943,8.883421e+07,17.505325,1,0
2,Guatemala,1990,24.480685,41.221867,9.131265,6.865212,3.618105e+08,4.485758,1,1
3,Iceland,1990,29.869468,15.510722,29.377902,23.472225,4.549824e+08,58.283775,0,0
4,India,1990,50.783254,8.971233,15.484217,9.993591,5.637446e+09,17.503500,0,0
...,...,...,...,...,...,...,...,...,...,...
95,Tunisia,1996,55.296518,3.725145,28.016448,19.837619,1.977688e+09,0.973408,1,1
96,United Kingdom,1996,43.628347,2.851782,33.857007,23.134875,4.670002e+10,0.640958,0,0
97,United States,1996,46.741769,2.931204,20.005396,11.373915,1.606602e+11,1.000000,0,0
98,Australia,1997,20.911558,0.224888,23.864424,22.430942,1.758826e+10,1.347380,0,0


In [30]:
missing_years = []
for c in df_año['pais'].unique():
    c_df = df_año.loc[df_año['pais'] == c]
    for y in df_año['año'].unique():
        c_y_df = c_df.loc[c_df['año'] == y]
        if len(c_y_df) == 0:
            print(c, y)
            missing_years.append(y)

Bahamas, The 2001
Bahamas, The 2002
Bahamas, The 2003
Bahamas, The 2004
Bahamas, The 2005
Bahrain 2005
Bahrain 2006
Bahrain 2007
Bahrain 2008
Bahrain 2009
Bahrain 2010
Botswana 1997
Botswana 1998
Botswana 1999
Botswana 2000
Botswana 2001
Botswana 2002
Botswana 2003
Botswana 2004
Botswana 2005
Burundi 1996
Burundi 1997
Burundi 2000
Burundi 2001
Burundi 2002
Burundi 2003
Burundi 2004
Burundi 2005
Burundi 2006
Burundi 2007
Burundi 2008
Burundi 2009
Burundi 2010
Ethiopia 2000
Ethiopia 2001
Ethiopia 2002
Ethiopia 2003
Ethiopia 2004
Ethiopia 2005
Ethiopia 2006
Ethiopia 2007
Ethiopia 2008
Ethiopia 2009
Ethiopia 2010
Indonesia 2000
Indonesia 2001
Indonesia 2004
Indonesia 2005
Indonesia 2006
Indonesia 2007
Italy 1993
Italy 1994
Italy 1995
Italy 1996
Italy 1997
Italy 1998
Italy 1999
Italy 2000
Italy 2001
Italy 2002
Italy 2003
Italy 2004
Italy 2005
Italy 2006
Italy 2007
Italy 2008
Italy 2009
Italy 2010
Papua New Guinea 2003
Papua New Guinea 2004
Papua New Guinea 2005
Papua New Guinea 2006
Papua N

In [31]:
df_año ['Deuda'].value_counts()

72.569022     1
58.122274     1
14.183979     1
31.162545     1
117.012590    1
             ..
34.225028     1
23.691505     1
75.955713     1
55.999696     1
44.038308     1
Name: Deuda, Length: 593, dtype: int64

In [32]:
missing_debt = []
for c in df_año['pais'].unique():
    c_df = df_año.loc[df_año['pais'] == c]
    for y in df_año['Deuda'].unique():
        c_y_df = c_df.loc[c_df['Deuda'] == y]
        if len(c_y_df) == 0:
            print(c, y)
            missing_debt.append(y)

Australia 27.9767032232351
Australia 16.3795586794953
Australia 47.4653512960086
Australia 12.1168020575228
Australia 88.7936648764972
Australia 50.8858787649859
Australia 31.2681629399946
Australia 50.0100803583574
Australia 40.2756295663842
Australia 71.423088166247
Australia 232.832210578933
Australia 39.9249411464254
Australia 46.6108276015088
Australia 78.2167512063909
Australia 49.6981196479839
Australia 78.97732191875
Australia 36.9948428757177
Australia 13.3655673362757
Australia 29.1948960284953
Australia 44.0615972806026
Australia 24.5905757591411
Australia 30.6304277902863
Australia 21.2998208687864
Australia 45.4804885550539
Australia 12.5321505732804
Australia 103.991579904102
Australia 44.3811735667036
Australia 33.6867480345741
Australia 75.4756911847562
Australia 37.5923714463574
Australia 49.6809013594637
Australia 42.7093574260874
Australia 77.4636589850747
Australia 123.606951339215
Australia 31.9560090496516
Australia 44.8643144683874
Australia 80.142729037233
Austr

In [33]:
paises_seleccionados = ['United States', 'Brazil','Argentina','Mexico','China']

In [34]:
df_año = df_año.loc[df_año['pais'].isin(paises_seleccionados)]

In [35]:
df_año.head()

Unnamed: 0,pais,Deuda,año,Inflacion,Gasto público,Tasa fiscal,Reservas,Tipo de cambio,Tasa de interes,Desempleo
20,United States,44.061597,1991,4.234964,22.884386,10.320342,159272900000.0,1.0,4.915352,6.8
44,United States,46.050144,1992,3.02882,21.818384,9.984162,147525900000.0,1.0,3.88424,7.5
61,Mexico,20.375128,1993,9.75146,10.87185,9.070611,25298700000.0,3.115617,-13.173946,3.214
69,United States,48.24614,1993,2.951657,21.417473,10.304934,164620200000.0,1.0,3.546689,6.9
89,Mexico,28.156432,1994,6.965812,10.913582,8.491922,6441391000.0,3.375117,9.64015,4.248


In [36]:
df_año['pais'].value_counts()

United States    20
Mexico            8
Brazil            5
Name: pais, dtype: int64

In [37]:
missing_years = []
for c in df_año['pais'].unique():
    c_df = df_año.loc[df_año['pais'] == c]
    for y in df_año['año'].unique():
        c_y_df = c_df.loc[c_df['año'] == y]
        if len(c_y_df) == 0:
            print(c, y)
            missing_years.append(y)

Mexico 1991
Mexico 1992
Mexico 2001
Mexico 2002
Mexico 2003
Mexico 2004
Mexico 2005
Mexico 2006
Mexico 2007
Mexico 2008
Mexico 2009
Mexico 2010
Brazil 1991
Brazil 1992
Brazil 1993
Brazil 1994
Brazil 1995
Brazil 1996
Brazil 1997
Brazil 1998
Brazil 1999
Brazil 2000
Brazil 2001
Brazil 2002
Brazil 2003
Brazil 2004
Brazil 2005


In [38]:
defaults = pd.read_csv("/Users/carolinasanchez/Desktop/Data_Base_Defaults.csv", encoding='latin1')

In [39]:
defaults.head()

Unnamed: 0,Country,Time,Total,default,default_previo_3aos
0,Afghanistan,1960,0,0,
1,Afghanistan,1961,0,0,
2,Afghanistan,1962,0,0,
3,Afghanistan,1963,0,0,0.0
4,Afghanistan,1964,267,1,0.0
