In [1]:
import requests
import json
import pandas as pd
import numpy as np
from functools import reduce
import datetime
import wbdata
import plotly.express as px
import plotly.io as pio
pio.renderers.default = "notebook" # use "pio.renderers" to see the default renderer
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from sklearn.metrics import mean_squared_error
from tqdm import tqdm
from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.tsa.arima.model import ARIMA


In [2]:
pd.set_option("display.max_rows", None, "display.max_columns", None)

In [3]:
import warnings
warnings.filterwarnings('ignore')

# Debt forecast

In [4]:
# load data
df = pd.read_csv("debt_data.csv",index_col="date", infer_datetime_format=True, parse_dates=True)

In [5]:
# resample to enforce index frequency
# needed for later steps
# reference to frequency strings: https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#dateoffset-objects
df = df.resample("AS").mean()
# fill NaNs with bfills
df = df.fillna(method="bfill")
# drop cols that only contain NaNs
df = df.dropna(how='all', axis=1)
# drop rows that only contain NaNs
df = df.dropna(how='all', axis=0)
# convert column types from np.float64 to np.int64
df = df.astype(np.int64)
df.head()

Unnamed: 0_level_0,Afghanistan,Angola,Albania,Argentina,Armenia,Azerbaijan,Burundi,Benin,Burkina Faso,Bangladesh,Bulgaria,Bosnia and Herzegovina,Belarus,Belize,Bolivia,Brazil,Bhutan,Botswana,Central African Republic,China,Cote d'Ivoire,Cameroon,"Congo, Dem. Rep.","Congo, Rep.",Colombia,Comoros,Cabo Verde,Costa Rica,Djibouti,Dominica,Dominican Republic,Algeria,Ecuador,"Egypt, Arab Rep.",Eritrea,Ethiopia,Fiji,Gabon,Georgia,Ghana,Guinea,"Gambia, The",Guinea-Bissau,Grenada,Guatemala,Guyana,Honduras,Haiti,Indonesia,India,"Iran, Islamic Rep.",Jamaica,Jordan,Kazakhstan,Kenya,Kyrgyz Republic,Cambodia,Lao PDR,Lebanon,Liberia,St. Lucia,Sri Lanka,Lesotho,Morocco,Moldova,Madagascar,Maldives,Mexico,North Macedonia,Mali,Myanmar,Montenegro,Mongolia,Mozambique,Mauritania,Mauritius,Malawi,Niger,Nigeria,Nicaragua,Nepal,Pakistan,Panama,Peru,Philippines,Papua New Guinea,Paraguay,Romania,Russian Federation,Rwanda,Sudan,Senegal,Solomon Islands,Sierra Leone,El Salvador,Somalia,Serbia,Sao Tome and Principe,Eswatini,Syrian Arab Republic,Chad,Togo,Thailand,Tajikistan,Turkmenistan,Timor-Leste,Tonga,Tunisia,Turkey,Tanzania,Uganda,Ukraine,Uzbekistan,St. Vincent and the Grenadines,Vietnam,Vanuatu,Samoa,Kosovo,"Yemen, Rep.",South Africa,Zambia,Zimbabwe
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1,Unnamed: 102_level_1,Unnamed: 103_level_1,Unnamed: 104_level_1,Unnamed: 105_level_1,Unnamed: 106_level_1,Unnamed: 107_level_1,Unnamed: 108_level_1,Unnamed: 109_level_1,Unnamed: 110_level_1,Unnamed: 111_level_1,Unnamed: 112_level_1,Unnamed: 113_level_1,Unnamed: 114_level_1,Unnamed: 115_level_1,Unnamed: 116_level_1,Unnamed: 117_level_1,Unnamed: 118_level_1,Unnamed: 119_level_1,Unnamed: 120_level_1,Unnamed: 121_level_1,Unnamed: 122_level_1
1960-01-01,979344507,7288778066,511485756,5893191433,133922025,35540000,14970400,43206645,20789505,146504255,772737948,2408825764,968283686,4084800,716253874,5984162468,325908,17358400,24142239,5798013609,373548111,145926448,345217589,118823126,2337070268,1172240,39376101,320105203,2594794,14433362,389548000,939559305,402122588,1801624657,29056800,169291507,11665440,103822382,79312483,572138164,336538268,5076000,0,16048000,192905000,82870994,158332049,52164000,3432691278,8425121119,4618942613,981947821,119092816,34977844,477537524,5527412,163589000,8345623,64232860,162259998,15516307,435678442,8122200,984947643,38504010,485146240,8884310,7097271963,1041777825,247329389,163680999,879825840,354767148,1318271500,26315139,31738400,135209089,31677128,836680592,242870748,3800810,3406742576,228889000,3299813044,2196027768,208775480,144131808,30147075,79671384011,4878977,385062510,145252948,11479143,61400031,205547537,73220144,2329427795,2183745,37033600,5388704719,35194310,39801356,1000572108,9731000,276435325,76035317,24548074,603200003,2746453200,197359339,151676389,550691959,59702042,1455000,25579000,3008900,2682000,75993698,48815506,21670999666,813521836,232362560
1961-01-01,979344507,7288778066,511485756,5893191433,133922025,35540000,14970400,43206645,20789505,146504255,772737948,2408825764,968283686,4084800,716253874,5984162468,325908,17358400,24142239,5798013609,373548111,145926448,345217589,118823126,2337070268,1172240,39376101,320105203,2594794,14433362,389548000,939559305,402122588,1801624657,29056800,169291507,11665440,103822382,79312483,572138164,336538268,5076000,0,16048000,192905000,82870994,158332049,52164000,3432691278,8425121119,4618942613,981947821,119092816,34977844,477537524,5527412,163589000,8345623,64232860,162259998,15516307,435678442,8122200,984947643,38504010,485146240,8884310,7097271963,1041777825,247329389,163680999,879825840,354767148,1318271500,26315139,31738400,135209089,31677128,836680592,242870748,3800810,3406742576,228889000,3299813044,2196027768,208775480,144131808,30147075,79671384011,4878977,385062510,145252948,11479143,61400031,205547537,73220144,2329427795,2183745,37033600,5388704719,35194310,39801356,1000572108,9731000,276435325,76035317,24548074,603200003,2746453200,197359339,151676389,550691959,59702042,1455000,25579000,3008900,2682000,75993698,48815506,21670999666,813521836,232362560
1962-01-01,979344507,7288778066,511485756,5893191433,133922025,35540000,14970400,43206645,20789505,146504255,772737948,2408825764,968283686,4084800,716253874,5984162468,325908,17358400,24142239,5798013609,373548111,145926448,345217589,118823126,2337070268,1172240,39376101,320105203,2594794,14433362,389548000,939559305,402122588,1801624657,29056800,169291507,11665440,103822382,79312483,572138164,336538268,5076000,0,16048000,192905000,82870994,158332049,52164000,3432691278,8425121119,4618942613,981947821,119092816,34977844,477537524,5527412,163589000,8345623,64232860,162259998,15516307,435678442,8122200,984947643,38504010,485146240,8884310,7097271963,1041777825,247329389,163680999,879825840,354767148,1318271500,26315139,31738400,135209089,31677128,836680592,242870748,3800810,3406742576,228889000,3299813044,2196027768,208775480,144131808,30147075,79671384011,4878977,385062510,145252948,11479143,61400031,205547537,73220144,2329427795,2183745,37033600,5388704719,35194310,39801356,1000572108,9731000,276435325,76035317,24548074,603200003,2746453200,197359339,151676389,550691959,59702042,1455000,25579000,3008900,2682000,75993698,48815506,21670999666,813521836,232362560
1963-01-01,979344507,7288778066,511485756,5893191433,133922025,35540000,14970400,43206645,20789505,146504255,772737948,2408825764,968283686,4084800,716253874,5984162468,325908,17358400,24142239,5798013609,373548111,145926448,345217589,118823126,2337070268,1172240,39376101,320105203,2594794,14433362,389548000,939559305,402122588,1801624657,29056800,169291507,11665440,103822382,79312483,572138164,336538268,5076000,0,16048000,192905000,82870994,158332049,52164000,3432691278,8425121119,4618942613,981947821,119092816,34977844,477537524,5527412,163589000,8345623,64232860,162259998,15516307,435678442,8122200,984947643,38504010,485146240,8884310,7097271963,1041777825,247329389,163680999,879825840,354767148,1318271500,26315139,31738400,135209089,31677128,836680592,242870748,3800810,3406742576,228889000,3299813044,2196027768,208775480,144131808,30147075,79671384011,4878977,385062510,145252948,11479143,61400031,205547537,73220144,2329427795,2183745,37033600,5388704719,35194310,39801356,1000572108,9731000,276435325,76035317,24548074,603200003,2746453200,197359339,151676389,550691959,59702042,1455000,25579000,3008900,2682000,75993698,48815506,21670999666,813521836,232362560
1964-01-01,979344507,7288778066,511485756,5893191433,133922025,35540000,14970400,43206645,20789505,146504255,772737948,2408825764,968283686,4084800,716253874,5984162468,325908,17358400,24142239,5798013609,373548111,145926448,345217589,118823126,2337070268,1172240,39376101,320105203,2594794,14433362,389548000,939559305,402122588,1801624657,29056800,169291507,11665440,103822382,79312483,572138164,336538268,5076000,0,16048000,192905000,82870994,158332049,52164000,3432691278,8425121119,4618942613,981947821,119092816,34977844,477537524,5527412,163589000,8345623,64232860,162259998,15516307,435678442,8122200,984947643,38504010,485146240,8884310,7097271963,1041777825,247329389,163680999,879825840,354767148,1318271500,26315139,31738400,135209089,31677128,836680592,242870748,3800810,3406742576,228889000,3299813044,2196027768,208775480,144131808,30147075,79671384011,4878977,385062510,145252948,11479143,61400031,205547537,73220144,2329427795,2183745,37033600,5388704719,35194310,39801356,1000572108,9731000,276435325,76035317,24548074,603200003,2746453200,197359339,151676389,550691959,59702042,1455000,25579000,3008900,2682000,75993698,48815506,21670999666,813521836,232362560


### Functions

In [6]:
def get_ARMA_preds(country_name, train, test):
    y = train
    ARMAmodel = SARIMAX(y, order = (1, 1, 0))
    ARMAmodel = ARMAmodel.fit()
    
    y_pred = ARMAmodel.get_forecast(len(test.index))
    y_pred_df = y_pred.conf_int(alpha = 0.05) 
    y_pred_df["Predictions"] = ARMAmodel.predict(start = y_pred_df.index[0], end = y_pred_df.index[-1])
    y_pred_df.index = test.index
    y_pred_out = y_pred_df["Predictions"] 

    rmse = np.sqrt(mean_squared_error(test.values, y_pred_df["Predictions"]))
    #print(country_name, rmse)
    return (rmse, y_pred_out)

In [7]:
def get_ARIMA_preds(country_name, train, test):
    y = train
    ARIMAmodel = ARIMA(y, order = (2, 2, 2))
    ARIMAmodel = ARIMAmodel.fit()
    
    y_pred = ARIMAmodel.get_forecast(len(test.index))
    y_pred_df = y_pred.conf_int(alpha = 0.05) 
    y_pred_df["Predictions"] = ARIMAmodel.predict(start = y_pred_df.index[0], end = y_pred_df.index[-1])
    y_pred_df.index = test.index
    y_pred_out = y_pred_df["Predictions"] 

    rmse = np.sqrt(mean_squared_error(test.values, y_pred_df["Predictions"]))
    #print(country_name, rmse)
    return (rmse, y_pred_out)

In [8]:
def get_SARIMA_preds(country_name, train, test):
    y = train
    SARIMAXmodel = SARIMAX(y, order = (1, 1, 0), seasonal_order=(2,2,2,6))
    SARIMAXmodel = SARIMAXmodel.fit()
    
    y_pred = SARIMAXmodel.get_forecast(len(test.index))
    y_pred_df = y_pred.conf_int(alpha = 0.05) 
    y_pred_df["Predictions"] = SARIMAXmodel.predict(start = y_pred_df.index[0], end = y_pred_df.index[-1])
    y_pred_df.index = test.index
    y_pred_out = y_pred_df["Predictions"] 

    rmse = np.sqrt(mean_squared_error(test.values, y_pred_df["Predictions"]))
    #print(country_name, rmse)
    return (rmse, y_pred_out)

### Method
- For each country:
    - run each model and save in dict: ```{
        "country_name": country_name,
        tests:[
            {"preds": preds, "score": score}, 
            {"preds": preds, "score": score},
            {"preds": preds, "score": score}
        ]
    }```
    - 
    in dict: {"country_name": country_name, "preds":preds}

In [9]:
# find index of min value in list
def find_min_index(number_list):
    max_value = min(number_list)
    return number_list.index(max_value)

In [11]:
# for train/test split
date_split = pd.to_datetime("2019-01-01", format='%Y-%m-%d')

- format axis
- fix points
- save plots to folder

In [16]:
total_preds = {}
for col in tqdm(df.columns.unique()[:4]):
    country_name = col
    
    # split series
    train = df[country_name][df.index <= date_split]
    test = df[country_name][df.index >= date_split]
    
    # get predictions
    arma_score, arma_preds = get_ARMA_preds(country_name, train, test)
    arima_score, arima_preds = get_ARIMA_preds(country_name, train, test)
    sarima_score, sarima_preds = get_SARIMA_preds(country_name, train, test)

    # select predictions with lowest score
    scores = [arma_score, arima_score, sarima_score]
    preds = [arma_preds, arima_preds, sarima_preds]
    best_preds = preds[find_min_index(scores)]
    #total_preds[country_name] = best_preds
    total_preds[country_name] = {"scores": scores, "preds": preds, "best_preds": best_preds}

    fig, ax = plt.subplots()
    plt.plot(train, color = "black", label = 'Train')
    plt.plot(test, color = "red", label = 'Test')
    plt.plot(best_preds, color='Blue', label = 'Forecast')
    # format y axis
    func = lambda y, pos: f"${y:,.0f}"
    ax.yaxis.set_major_formatter(func)
    # format x axis
    ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y'))
    ax.xaxis.set_major_locator(mdates.YearLocator(5))
    plt.ylabel('Debt in current USD')
    plt.xlabel('Date')
    plt.xticks(rotation=45)
    plt.title("Debt forecast: "+ country_name)
    plt.grid()
    plt.legend()
    plt.savefig(country_name +'.png',dpi=300, transparent=True, bbox_inches='tight')
    print("test", fig, ax)
    plt.close()
    
total_preds

 25%|██▌       | 1/4 [00:00<00:01,  2.23it/s]

test Figure(432x288) AxesSubplot(0.125,0.125;0.775x0.755)


 50%|█████     | 2/4 [00:00<00:00,  2.14it/s]

test Figure(432x288) AxesSubplot(0.125,0.125;0.775x0.755)


 75%|███████▌  | 3/4 [00:01<00:00,  2.00it/s]

test Figure(432x288) AxesSubplot(0.125,0.125;0.775x0.755)


100%|██████████| 4/4 [00:01<00:00,  2.01it/s]

test Figure(432x288) AxesSubplot(0.125,0.125;0.775x0.755)





{'Afghanistan': {'scores': [265751895.0226802,
   234287390.1451851,
   187033520.60740814],
  'preds': [date
   2019-01-01    2.660609e+09
   2020-01-01    2.660541e+09
   Freq: AS-JAN, Name: Predictions, dtype: float64,
   date
   2019-01-01    2.670578e+09
   2020-01-01    2.705156e+09
   Freq: AS-JAN, Name: Predictions, dtype: float64,
   date
   2019-01-01    2.596439e+09
   2020-01-01    2.780038e+09
   Freq: AS-JAN, Name: Predictions, dtype: float64],
  'best_preds': date
  2019-01-01    2.596439e+09
  2020-01-01    2.780038e+09
  Freq: AS-JAN, Name: Predictions, dtype: float64},
 'Angola': {'scores': [1361509423.205981,
   737288392.1792337,
   1425213045.2437143],
  'preds': [date
   2019-01-01    6.511623e+10
   2020-01-01    6.548169e+10
   Freq: AS-JAN, Name: Predictions, dtype: float64,
   date
   2019-01-01    6.444789e+10
   2020-01-01    6.624411e+10
   Freq: AS-JAN, Name: Predictions, dtype: float64,
   date
   2019-01-01    6.435416e+10
   2020-01-01    6.527334e+10
 