# Libraries

In [1]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.vector_ar.vecm import coint_johansen

# Dataset

In [2]:
inflation_df = pd.read_excel('../Dataset/Inflation Dataset.xlsx')
inflation_df.head()

Unnamed: 0,Date,Exchange Rate,Inflation,Unemployment,Interest Rate,GDP
0,1991-01-01,120.911342,9.419058,7.866,19.3125,1556.806664
1,1992-01-01,117.321483,7.523517,8.242,15.9875,1629.847978
2,1993-01-01,121.661275,9.671893,8.565,10.6175,1706.952365
3,1994-01-01,120.982708,8.532005,13.108,10.294167,1805.603885
4,1995-01-01,117.086225,9.420323,14.013,14.111667,1922.352118


In [3]:
inflation_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 33 entries, 0 to 32
Data columns (total 6 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Date           33 non-null     object 
 1   Exchange Rate  33 non-null     float64
 2   Inflation      33 non-null     float64
 3   Unemployment   33 non-null     float64
 4   Interest Rate  33 non-null     float64
 5   GDP            33 non-null     float64
dtypes: float64(5), object(1)
memory usage: 1.7+ KB


In [4]:
cols = ['Exchange Rate', 'Inflation', 'Unemployment', 'Interest Rate', 'GDP']

inflation_df[cols].describe()

Unnamed: 0,Exchange Rate,Inflation,Unemployment,Interest Rate,GDP
count,33.0,33.0,33.0,33.0,33.0
mean,104.431038,8.516708,16.789909,10.824975,2628.89126
std,13.994224,9.74742,4.380326,8.3898,832.780018
min,57.422148,1.56013,7.866,3.520833,1556.806664
25%,100.0,4.209464,14.053,6.0,1922.352118
50%,104.827658,6.394925,16.047,8.604167,2377.254993
75%,115.394983,9.420323,19.177,13.819167,3322.581679
max,125.827775,58.451044,26.381,49.324167,4247.853155


# Stationary Test

In [5]:
def adf_test(series, column_name):
    result = adfuller(series, autolag='AIC')
    return {
        'Variable': column_name,
        'ADF Statistic': result[0],
        'p-value': result[1],
        'Stationary': 'Yes' if result[1] < 0.05 else 'No'
    }

adf_results = []

for column in cols:
    adf_results.append(adf_test(inflation_df[column], column))

adf_results_df = pd.DataFrame(adf_results)

adf_results_df

Unnamed: 0,Variable,ADF Statistic,p-value,Stationary
0,Exchange Rate,-2.31846,0.166083,No
1,Inflation,-1.843719,0.358982,No
2,Unemployment,-2.057269,0.26204,No
3,Interest Rate,-2.700521,0.073948,No
4,GDP,1.644433,0.997985,No


## Differencing

In [6]:
def ensure_stationarity(data, columns, max_diff=5):
    differencing_count = {col: 0 for col in columns}
    differenced_data = data.copy()
    adf_results = []

    for col in columns:
        series = data[col]
        while differencing_count[col] < max_diff:
            result = adf_test(series, col)
            if result['Stationary'] == 'Yes':
                adf_results.append(result)
                break
            differencing_count[col] += 1
            series = series.diff().dropna()
            differenced_data[col] = series
        else:
            result = adf_test(series, col)
            adf_results.append(result)

    differenced_data = differenced_data.dropna()
    adf_results_df = pd.DataFrame(adf_results)

    return differenced_data, adf_results_df, differencing_count

data_stationary, adf_results_df, diff_counts = ensure_stationarity(inflation_df, cols)

print("Number of Differencing Steps for Stationarity:")
for col, count in diff_counts.items():
    print(f"{col}: {count} times")

print("\nADF Results:")
adf_results_df

Number of Differencing Steps for Stationarity:
Exchange Rate: 3 times
Inflation: 2 times
Unemployment: 1 times
Interest Rate: 3 times
GDP: 1 times

ADF Results:


Unnamed: 0,Variable,ADF Statistic,p-value,Stationary
0,Exchange Rate,-3.610019,0.005572355,Yes
1,Inflation,-7.846121,5.76312e-12,Yes
2,Unemployment,-5.052213,1.747622e-05,Yes
3,Interest Rate,-6.001554,1.652735e-07,Yes
4,GDP,-3.983258,0.001500759,Yes


In [7]:
data_stationary.head()

Unnamed: 0,Date,Exchange Rate,Inflation,Unemployment,Interest Rate,GDP
3,1994-01-01,-12.948008,-3.288264,4.543,7.091667,98.651521
4,1995-01-01,1.800442,2.028206,0.905,-0.905833,116.748233
5,1996-01-01,15.85595,-2.33536,0.938,-8.250833,117.010871
6,1997-01-01,-27.869312,-0.300097,0.007,5.080833,61.891161
7,1998-01-01,-40.194892,53.972042,1.799,33.1775,-304.170228


# Cointegration Test (Coint Johansen)

In [8]:
from statsmodels.tsa.vector_ar.vecm import coint_johansen

def johansen_cointegration_test(data, det_order=0, k_ar_diff=1):
    johansen_result = coint_johansen(data, det_order, k_ar_diff)
    
    eigenvalues = johansen_result.eig
    trace_stat = johansen_result.lr1
    critical_values_1 = johansen_result.cvt[:, 0]
    critical_values_5 = johansen_result.cvt[:, 1]
    critical_values_10 = johansen_result.cvt[:, 2]
    
    results = []
    for i in range(len(eigenvalues)):
        result = {
            'Cointegration Relation': i + 1,
            'Eigenvalue': eigenvalues[i],
            'Trace Statistic': trace_stat[i],
            'Critical Value (1%)': critical_values_1[i],
            'Critical Value (5%)': critical_values_5[i],
            'Critical Value (10%)': critical_values_10[i],
            'Cointegration': 'Yes' if trace_stat[i] > critical_values_5[i] else 'No'
        }
        results.append(result)
    
    return pd.DataFrame(results)

cointegration_results_df = johansen_cointegration_test(data_stationary[cols])

cointegration_results_df

Unnamed: 0,Cointegration Relation,Eigenvalue,Trace Statistic,Critical Value (1%),Critical Value (5%),Critical Value (10%),Cointegration
0,1,0.896321,171.832903,65.8202,69.8189,77.8202,Yes
1,2,0.874855,108.372204,44.4929,47.8545,54.6815,Yes
2,3,0.6443,50.180254,27.0669,29.7961,35.4628,Yes
3,4,0.402037,21.237588,13.4294,15.4943,19.9349,Yes
4,5,0.216715,6.839247,2.7055,3.8415,6.6349,Yes


# Error Correction Model

## Long Term Model

In [9]:
data = data_stationary.copy()
X_col = ['Inflation', 'Unemployment', 'Interest Rate', 'GDP']

long_term_model = sm.OLS(data['Exchange Rate'], sm.add_constant(data[X_col])).fit()
data['ECT'] = long_term_model.resid
print(long_term_model.summary())

                            OLS Regression Results                            
Dep. Variable:          Exchange Rate   R-squared:                       0.867
Model:                            OLS   Adj. R-squared:                  0.846
Method:                 Least Squares   F-statistic:                     40.91
Date:                Fri, 29 Nov 2024   Prob (F-statistic):           1.27e-10
Time:                        23:02:50   Log-Likelihood:                -121.31
No. Observations:                  30   AIC:                             252.6
Df Residuals:                      25   BIC:                             259.6
Df Model:                           4                                         
Covariance Type:            nonrobust                                         
                    coef    std err          t      P>|t|      [0.025      0.975]
---------------------------------------------------------------------------------
const            -1.9817      4.107     -0.483

## Short Term Model

In [10]:
lags = [1, 2, 3]
best_lag = None
best_pvalue = float('inf')
best_model = None

for lag in lags:
    data['ECT_lag'] = data['ECT'].shift(lag)
    data.dropna(inplace=True)
    
    X_col = ['Inflation', 'Unemployment', 'Interest Rate', 'GDP', 'ECT_lag']
    ecm_model = sm.OLS(data['Exchange Rate'], sm.add_constant(data[X_col])).fit()

    if ecm_model.pvalues['ECT_lag'] < best_pvalue:
        best_pvalue = ecm_model.pvalues['ECT_lag']
        best_lag = lag
        best_model = ecm_model

print(f"Best Lag for ECT: {best_lag}")
print(best_model.summary())


Best Lag for ECT: 2
                            OLS Regression Results                            
Dep. Variable:          Exchange Rate   R-squared:                       0.880
Model:                            OLS   Adj. R-squared:                  0.852
Method:                 Least Squares   F-statistic:                     30.85
Date:                Fri, 29 Nov 2024   Prob (F-statistic):           5.38e-09
Time:                        23:02:50   Log-Likelihood:                -109.11
No. Observations:                  27   AIC:                             230.2
Df Residuals:                      21   BIC:                             238.0
Df Model:                           5                                         
Covariance Type:            nonrobust                                         
                    coef    std err          t      P>|t|      [0.025      0.975]
---------------------------------------------------------------------------------
const            -1.8477  