In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from scipy.stats import skew, kurtosis, jarque_bera
from statsmodels.tsa.stattools import adfuller
from arch.unitroot import PhillipsPerron

In [2]:
df = pd.read_csv('dataset.csv')
df.describe()

Unnamed: 0,OMR3M,BHD3M,KWD3M,QAR3M,SAR3M,AED3M,EUR3M,EIBO3M,Gold,Oil,MSCI EM,SPX,USDL3M,GAS
count,3914.0,3914.0,3914.0,3914.0,3914.0,3914.0,3914.0,3914.0,3914.0,3914.0,3914.0,3914.0,3914.0,3914.0
mean,0.388481,0.38002,0.297868,3.648441,3.752079,3.673406,1.208145,1.845811,1480.835984,71.887241,1020.410507,2532.565324,0.012992,3.429253
std,0.006969,0.002022,0.011122,0.018029,0.003757,0.000974,0.120243,1.31946,300.635917,21.439877,126.90221,1150.320374,0.015958,1.269002
min,0.376,0.375,0.2739,3.61685,3.7447,3.66425,0.96844,0.2526,908.5,-37.63,688.519,879.13,0.001141,1.1905
25%,0.38406,0.37842,0.290275,3.642,3.750025,3.672875,1.10944,0.82886,1241.95,53.27,952.12025,1512.3825,0.002681,2.621
50%,0.38658,0.37979,0.298035,3.6436,3.75075,3.673312,1.178455,1.51333,1366.5,72.64,1005.627,2186.32,0.004679,3.0615
75%,0.390977,0.38165,0.307685,3.648548,3.7533,3.674,1.312795,2.3425,1746.1875,89.49,1079.39325,3344.5775,0.018944,4.02225
max,0.43513,0.38615,0.32395,3.86465,3.77805,3.6773,1.50879,5.60264,2401.5,123.7,1444.93,5254.35,0.056874,9.68


In [3]:
df.loc[df['Oil'] < 0]

Unnamed: 0,Date,OMR3M,BHD3M,KWD3M,QAR3M,SAR3M,AED3M,EUR3M,EIBO3M,Gold,Oil,MSCI EM,SPX,USDL3M,GAS
2852,2020/4/20,0.42025,0.37705,0.3179,3.6542,3.75725,3.674875,1.09061,1.57708,1686.2,-37.63,899.326,2823.16,0.010976,1.924


In [4]:
df.iloc[[2851,2852,2853],10]

2851    18.27
2852   -37.63
2853     8.91
Name: Oil, dtype: float64

In [5]:
rate2852 = (df.iloc[2852,10] - df.iloc[2851,10]) / df.iloc[2851,10]*100
rate2853 = (df.iloc[2853,10] - df.iloc[2852,10]) / df.iloc[2852,10]*-100

In [6]:
df['Date'] = pd.to_datetime(df['Date'])
for column in df.columns.tolist()[1:]:
    df[column] = (np.log(df[column]) - np.log(df[column].shift(1)))*100
df.iloc[2852, 10] = rate2852
df.iloc[2853, 10] = rate2853
df = df.iloc[1:,:]
df.isna().sum()

  result = getattr(ufunc, method)(*inputs, **kwargs)


Date       0
OMR3M      0
BHD3M      0
KWD3M      0
QAR3M      0
SAR3M      0
AED3M      0
EUR3M      0
EIBO3M     0
Gold       0
Oil        0
MSCI EM    0
SPX        0
USDL3M     0
GAS        0
dtype: int64

In [7]:
df.columns.tolist()[1:]

['OMR3M',
 'BHD3M',
 'KWD3M',
 'QAR3M',
 'SAR3M',
 'AED3M',
 'EUR3M',
 'EIBO3M',
 'Gold',
 'Oil',
 'MSCI EM',
 'SPX',
 'USDL3M',
 'GAS']

In [8]:
columns=['Name', 'Mean', 'Maximun', 'Minimun', 'Std. Dev.', 'Skewness', 'Kurtosis', 'Jarque-Bera', 'ADF', 'PP']
ls = []
for i, column in enumerate(df.columns.tolist()[1:]):
    ls.append([column, df[column].mean(), df[column].max(), df[column].min(), df[column].std(), skew(df[column]), kurtosis(df[column]), jarque_bera(df[column])[1], adfuller(df[column])[1], PhillipsPerron(df[column]).pvalue])
df_stats = pd.DataFrame.from_records(ls, columns=columns)

In [9]:
cor_matrix = df.iloc[:,1:].corr()
print(cor_matrix)

            OMR3M     BHD3M     KWD3M     QAR3M     SAR3M     AED3M     EUR3M  \
OMR3M    1.000000  0.064175 -0.004342 -0.032026  0.075297  0.073529  0.007711   
BHD3M    0.064175  1.000000  0.048295  0.014369  0.038707  0.018838  0.010918   
KWD3M   -0.004342  0.048295  1.000000  0.021553  0.016946  0.087568 -0.202587   
QAR3M   -0.032026  0.014369  0.021553  1.000000  0.024471  0.002695  0.010328   
SAR3M    0.075297  0.038707  0.016946  0.024471  1.000000  0.064088  0.000726   
AED3M    0.073529  0.018838  0.087568  0.002695  0.064088  1.000000 -0.025512   
EUR3M    0.007711  0.010918 -0.202587  0.010328  0.000726 -0.025512  1.000000   
EIBO3M  -0.011979 -0.014450 -0.010113 -0.013793  0.013951 -0.004707  0.021393   
Gold     0.037293  0.004764 -0.084953  0.002930  0.003426 -0.016769  0.363884   
Oil      0.058072  0.036930 -0.018069 -0.002890  0.082453  0.014696  0.049036   
MSCI EM -0.000011 -0.000820 -0.064095 -0.016388 -0.003269  0.012727  0.292853   
SPX      0.047305 -0.009077 

In [10]:
from statsmodels.tsa.api import VAR

# Select the columns for the VAR model
var_cols = ['OMR3M', 'BHD3M', 'KWD3M', 'QAR3M', 'SAR3M', 'AED3M', 'EUR3M', 'EIBO3M', 'Gold', 'Oil', 'MSCI EM', 'SPX', 'USDL3M', 'GAS']
var_data = df[var_cols]

# Create the VAR model
model = VAR(var_data)

In [11]:
# Select the optimal lag order using information criteria
lag_order = model.select_order(maxlags=10)

# Print the selected lag order
print("Selected Lag Order:", lag_order)

Selected Lag Order: <statsmodels.tsa.vector_ar.var_model.LagOrderResults object. Selected orders are: AIC -> 9, BIC -> 2, FPE -> 9, HQIC ->  3>


In [12]:
# Fit the VAR model
results = model.fit(2)

# Print the summary of the VAR model
print(results.summary())

  Summary of Regression Results   
Model:                         VAR
Method:                        OLS
Date:           Mon, 03, Jun, 2024
Time:                     21:01:35
--------------------------------------------------------------------
No. of Equations:         14.0000    BIC:                   -19.9734
Nobs:                     3911.00    HQIC:                  -20.3934
Log likelihood:          -36955.4    FPE:                1.10384e-09
AIC:                     -20.6245    Det(Omega_mle):     9.95380e-10
--------------------------------------------------------------------
Results for equation OMR3M
                coefficient       std. error           t-stat            prob
-----------------------------------------------------------------------------
const              0.001316         0.004769            0.276           0.783
L1.OMR3M          -0.388544         0.016134          -24.082           0.000
L1.BHD3M           0.093795         0.033614            2.790           

In [31]:
original_results = model.fit(4)
print(original_results.summary())

  Summary of Regression Results   
Model:                         VAR
Method:                        OLS
Date:           Tue, 04, Jun, 2024
Time:                     21:43:18
--------------------------------------------------------------------
No. of Equations:         14.0000    BIC:                   -19.5543
Nobs:                     3909.00    HQIC:                  -20.3802
Log likelihood:          -36133.8    FPE:                8.94756e-10
AIC:                     -20.8345    Det(Omega_mle):     7.30610e-10
--------------------------------------------------------------------
Results for equation OMR3M
                coefficient       std. error           t-stat            prob
-----------------------------------------------------------------------------
const              0.002090         0.004721            0.443           0.658
L1.OMR3M          -0.405079         0.016120          -25.129           0.000
L1.BHD3M           0.108953         0.034037            3.201           

In [30]:
from statsmodels.tsa.stattools import grangercausalitytests
lll = ['AED3M', 'BHD3M', 'OMR3M', 'QAR3M', 'SAR3M', 'KWD3M', 'EIBO3M']
for i in lll:
  granger_test_result = grangercausalitytests(df.loc[:,['Oil', i]], maxlag=2, verbose=False)
  print(round(granger_test_result[2][0]['ssr_ftest'][1],2))
  granger_test_result = grangercausalitytests(df.loc[:,[i, 'Oil']], maxlag=2, verbose=False)
  print(round(granger_test_result[2][0]['ssr_ftest'][1],2))


0.67
0.31
0.03
0.48
0.0
0.01
0.03
0.11
0.67
0.0
0.12
0.0
0.03
0.48




In [19]:
# Get the p-values for each coefficient
p_values = results.pvalues

# Filter the significant parameters based on a significance level (e.g., 0.05)
significant_params = p_values[p_values < 0.05]

# List the significant parameters
significant_params_list = significant_params.index.tolist()
print("Significant Parameters:", significant_params_list)

Significant Parameters: ['const', 'L1.OMR3M', 'L1.BHD3M', 'L1.KWD3M', 'L1.QAR3M', 'L1.SAR3M', 'L1.AED3M', 'L1.EUR3M', 'L1.EIBO3M', 'L1.Gold', 'L1.Oil', 'L1.MSCI EM', 'L1.SPX', 'L1.USDL3M', 'L1.GAS', 'L2.OMR3M', 'L2.BHD3M', 'L2.KWD3M', 'L2.QAR3M', 'L2.SAR3M', 'L2.AED3M', 'L2.EUR3M', 'L2.EIBO3M', 'L2.Gold', 'L2.Oil', 'L2.MSCI EM', 'L2.SPX', 'L2.USDL3M', 'L2.GAS']


In [59]:
# Filter the parameters based on significance level
significance_level = 0.05
significant_parameters = results.params[results.pvalues < significance_level]

# Print the significant parameters
significant_parameters.dropna(how='all', inplace=True)
significant_parameters = significant_parameters.fillna('')
significant_parameters

Unnamed: 0,OMR3M,BHD3M,KWD3M,QAR3M,SAR3M,AED3M,EUR3M,EIBO3M,Gold,Oil,MSCI EM,SPX,USDL3M,GAS
const,,,,,,,,,,,,0.048239,,
L1.OMR3M,-0.388544,0.026061,0.035832,,0.00262,,,,-0.25742,-0.773951,-0.306342,-0.438931,,
L1.BHD3M,0.093795,-0.415733,,,,,,,,,,,,
L1.KWD3M,0.043305,,-0.258733,,,-0.001989,,,-0.166321,,,0.232385,0.320108,0.534497
L1.QAR3M,0.036584,,,-0.445284,,,,0.469332,,,,,,
L1.SAR3M,0.584286,0.256249,,,-0.428796,0.016081,,,,,,,,
L1.AED3M,,,-1.053435,,,-0.559541,2.0848,,2.956387,,,,,
L1.EUR3M,,,-0.040752,,,,,,,,0.13215,,,
L1.EIBO3M,-0.002523,,,,,,,-0.367572,,,,,0.027283,
L1.Gold,,,,,,,,,,0.227864,,,,


In [64]:
significance_level = 0.01
significant_parameters = results.params[results.pvalues < significance_level]

# Print the significant parameters
significant_parameters.dropna(how='all', inplace=True)
significant_parameters = significant_parameters.fillna('')
significant_parameters

Unnamed: 0,OMR3M,BHD3M,KWD3M,QAR3M,SAR3M,AED3M,EUR3M,EIBO3M,Gold,Oil,MSCI EM,SPX,USDL3M,GAS
const,,,,,,,,,,,,0.048239,,
L1.OMR3M,-0.388544,0.026061,0.035832,,,,,,-0.25742,,-0.306342,-0.438931,,
L1.BHD3M,0.093795,-0.415733,,,,,,,,,,,,
L1.KWD3M,,,-0.258733,,,-0.001989,,,,,,0.232385,0.320108,
L1.QAR3M,,,,-0.445284,,,,,,,,,,
L1.SAR3M,0.584286,0.256249,,,-0.428796,,,,,,,,,
L1.AED3M,,,-1.053435,,,-0.559541,2.0848,,,,,,,
L1.EUR3M,,,-0.040752,,,,,,,,0.13215,,,
L1.EIBO3M,,,,,,,,-0.367572,,,,,0.027283,
L1.Oil,,,,,,,,,,-0.253198,0.009668,0.01004,,
