In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm

from scipy.stats import linregress
import statsmodels.api as sm
import statsmodels.formula.api as smf

In [2]:
!ls

CIR-model.ipynb   DGS10_cleaned.csv IR-data.xlsx      README.md


In [92]:
ir = pd.read_excel('IR-data.xlsx')
ir['delta'] = ir['Interest rate'].diff()
# ir.loc[:, ['Interest rate', 'delta']] = ir.loc[:, ['Interest rate', 'delta']].multiply(0.01)
ir['ir_sqrt'] = np.sqrt(ir['Interest rate'].shift(1))
ir['const'] = 1 / np.sqrt(ir['Interest rate'].shift(1))
ir = ir.rename(columns={'Interest rate': 'interest_rate'})
# ir = ir.bfill()
ir = ir.dropna()
ir

Unnamed: 0,Date,interest_rate,delta,ir_sqrt,const
1,2011-01-06,4.13,-0.07,2.049390,0.487950
2,2011-01-13,4.08,-0.05,2.032240,0.492068
3,2011-01-20,4.05,-0.03,2.019901,0.495074
4,2011-01-27,4.09,0.04,2.012461,0.496904
5,2011-02-03,4.08,-0.01,2.022375,0.494468
...,...,...,...,...,...
582,2022-02-24,3.14,-0.01,1.774824,0.563436
583,2022-03-03,3.01,-0.13,1.772005,0.564333
584,2022-03-10,3.09,0.08,1.734935,0.576390
585,2022-03-17,3.39,0.30,1.757840,0.568880


In [6]:
ir = pd.read_csv('DGS10_cleaned.csv')
ir['delta'] = ir['interest_rate'].diff()
ir['ir_sqrt'] = np.sqrt(ir['interest_rate'].shift(1))
ir['const'] = 1 / np.sqrt(ir['interest_rate'].shift(1))
ir = ir.dropna()
ir

Unnamed: 0,Date,interest_rate,delta,ir_sqrt,const
1,2007-03-07,4.50,-0.03,2.128380,0.469841
2,2007-03-08,4.51,0.01,2.121320,0.471405
3,2007-03-09,4.59,0.08,2.123676,0.470882
4,2007-03-12,4.56,-0.03,2.142429,0.466760
5,2007-03-13,4.50,-0.06,2.135416,0.468293
...,...,...,...,...,...
4377,2024-08-29,3.87,0.03,1.959592,0.510310
4378,2024-08-30,3.91,0.04,1.967232,0.508329
4379,2024-09-03,3.84,-0.07,1.977372,0.505722
4380,2024-09-04,3.77,-0.07,1.959592,0.510310


In [8]:
train = ir.loc[ir['Date'] < '2021-01-01', :]
test = ir.loc[ir['Date'] >= '2021-01-01', :]

x_train = train.loc[:, ['const', 'ir_sqrt']]
y_train = train.loc[:, 'delta'] / train.loc[:, 'ir_sqrt']

x_test = test.loc[:, ['const', 'ir_sqrt']]
y_test = test.loc[:, 'delta'] / test.loc[:, 'ir_sqrt']

x_train

Unnamed: 0,const,ir_sqrt
1,0.469841,2.128380
2,0.471405,2.121320
3,0.470882,2.123676
4,0.466760,2.142429
5,0.468293,2.135416
...,...,...
3456,1.020621,0.979796
3457,1.031421,0.969536
3458,1.031421,0.969536
3459,1.031421,0.969536


In [9]:
linreg = smf.ols('delta ~ const + ir_sqrt - 1', data=train).fit()
linreg.summary()

0,1,2,3
Dep. Variable:,delta,R-squared (uncentered):,0.001
Model:,OLS,Adj. R-squared (uncentered):,0.001
Method:,Least Squares,F-statistic:,2.297
Date:,"Mon, 09 Sep 2024",Prob (F-statistic):,0.101
Time:,22:37:19,Log-Likelihood:,5018.3
No. Observations:,3460,AIC:,-10030.0
Df Residuals:,3458,BIC:,-10020.0
Df Model:,2,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,0.0056,0.004,1.608,0.108,-0.001,0.013
ir_sqrt,-0.0030,0.001,-2.044,0.041,-0.006,-0.000

0,1,2,3
Omnibus:,304.03,Durbin-Watson:,2.039
Prob(Omnibus):,0.0,Jarque-Bera (JB):,1841.482
Skew:,-0.148,Prob(JB):,0.0
Kurtosis:,6.562,Cond. No.,6.74


In [10]:
a_value = -1 * linreg.params[1]
b_value = linreg.params[0] / a_value
ir_t = train.loc[len(train)-1:, 'interest_rate'].values[0] # last value of interest rate on training set
sigma = linreg.mse_resid # MSE for Residuals / Variance for the model
print(f' a: {a_value} \n b: {b_value} \n σ: {sigma} \n last training data interest rate: {ir_t}')

 a: 0.003035615593512996 
 b: 1.857822651428973 
 σ: 0.0032209956337104547 
 last training data interest rate: 0.93


In [13]:
# forecasting with Vasicek model formula
cir_pred = []
for day in tqdm(range(len(test))):
    new_ir = ir_t * np.exp(-1 * a_value * day) + b_value * (1 - np.exp(-1 * a_value * day))
    new_var = (1 - np.exp(-2 * a_value * day)) * sigma / (2 * a_value) + ((np.exp(-1 * a_value * day)) - (np.exp(-2 * a_value * day))) * sigma * ir_t / a_value
    cir_pred.append([new_ir, new_var])

cir_pred_df = pd.DataFrame(cir_pred, columns=['interest_rate', 'Variance'])
cir_pred_df['Date'] = test['Date'].values
cir_pred_df = cir_pred_df[['Date', 'interest_rate', 'Variance']]
cir_pred_df

100%|██████████| 921/921 [00:00<00:00, 66124.96it/s]


Unnamed: 0,Date,interest_rate,Variance
0,2021-01-04,0.930000,0.000000
1,2021-01-05,0.932812,0.006193
2,2021-01-06,0.935616,0.012340
3,2021-01-07,0.938411,0.018440
4,2021-01-08,0.941198,0.024495
...,...,...,...
916,2024-08-29,1.800298,0.585883
917,2024-08-30,1.800472,0.585732
918,2024-09-03,1.800646,0.585583
919,2024-09-04,1.800819,0.585433


In [14]:
# long run forecast
ir_long = b_value
vol_long = sigma / (2 * a_value)
print(f'Long run interest rate: {ir_long} \nLong run variance: {vol_long}')

Long run interest rate: 1.857822651428973 
Long run variance: 0.5305341757687649


In [15]:
# OLS Prediction / forecasting with linear regression model
pred = linreg.predict(x_test)
linreg_mse = np.mean((pred - y_test) ** 2)
pred, linreg_mse, linreg.rsquared

(3461    0.002921
 3462    0.002921
 3463    0.002782
 3464    0.002434
 3465    0.002272
           ...   
 4377   -0.003071
 4378   -0.003105
 4379   -0.003150
 4380   -0.003071
 4381   -0.002990
 Length: 921, dtype: float64,
 0.0014621421381115678,
 0.00132673707784714)

In [16]:
ir4ols = ir.loc[len(train) - 1:, ['Date', 'interest_rate']]
ir4ols['interest_rate'] = ir4ols['interest_rate'].shift(1)
ir4ols.loc[:, 'interest_rate'] = ir4ols.loc[:, 'interest_rate'] + pred.values[1]
ir4ols = ir4ols.dropna()
ir4ols

Unnamed: 0,Date,interest_rate
3460,2020-12-31,0.932921
3461,2021-01-04,0.932921
3462,2021-01-05,0.932921
3463,2021-01-06,0.962921
3464,2021-01-07,1.042921
...,...,...
4377,2024-08-29,3.842921
4378,2024-08-30,3.872921
4379,2024-09-03,3.912921
4380,2024-09-04,3.842921


In [17]:
ols_cir = pd.merge(ir4ols, cir_pred_df, on='Date', suffixes=('_ols', '_cir'))
ols_cir

Unnamed: 0,Date,interest_rate_ols,interest_rate_cir,Variance
0,2021-01-04,0.932921,0.930000,0.000000
1,2021-01-05,0.932921,0.932812,0.006193
2,2021-01-06,0.962921,0.935616,0.012340
3,2021-01-07,1.042921,0.938411,0.018440
4,2021-01-08,1.082921,0.941198,0.024495
...,...,...,...,...
916,2024-08-29,3.842921,1.800298,0.585883
917,2024-08-30,3.872921,1.800472,0.585732
918,2024-09-03,3.912921,1.800646,0.585583
919,2024-09-04,3.842921,1.800819,0.585433


In [18]:
mse = np.mean((ols_cir['interest_rate_ols'] - ols_cir['interest_rate_cir']) ** 2)
mse

3.2070276649066716

In [20]:
og_ols_cir = pd.merge(ols_cir, test, on='Date').drop(columns= ['delta'])
og_ols_cir

Unnamed: 0,Date,interest_rate_ols,interest_rate_cir,Variance,interest_rate,ir_sqrt,const
0,2021-01-04,0.932921,0.930000,0.000000,0.93,0.964365,1.036952
1,2021-01-05,0.932921,0.932812,0.006193,0.96,0.964365,1.036952
2,2021-01-06,0.962921,0.935616,0.012340,1.04,0.979796,1.020621
3,2021-01-07,1.042921,0.938411,0.018440,1.08,1.019804,0.980581
4,2021-01-08,1.082921,0.941198,0.024495,1.13,1.039230,0.962250
...,...,...,...,...,...,...,...
916,2024-08-29,3.842921,1.800298,0.585883,3.87,1.959592,0.510310
917,2024-08-30,3.872921,1.800472,0.585732,3.91,1.967232,0.508329
918,2024-09-03,3.912921,1.800646,0.585583,3.84,1.977372,0.505722
919,2024-09-04,3.842921,1.800819,0.585433,3.77,1.959592,0.510310


In [21]:
# OLS did better than Vasicek
og_vas_mse = np.mean((og_ols_cir['interest_rate'] - og_ols_cir['interest_rate_cir']) ** 2)
og_ols_mse = np.mean((og_ols_cir['interest_rate'] - og_ols_cir['interest_rate_ols']) ** 2)
og_vas_mse, og_ols_mse

(3.2039032001027556, 0.004313464369810357)