### Investigating Oil Price fluctuations on the Saudi Arabian economy 

In [218]:
import numpy as np 
import pandas as pd
import statsmodels.api as sm
import matplotlib.pyplot as plt
import xml.etree.ElementTree as ET
from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.ardl import ardl_select_order
from statsmodels.tsa.ardl import ARDL
import math
from statsmodels.tsa.seasonal import seasonal_decompose

In [5]:
# Parse the XML file
tree = ET.parse('opec_daily.xml')
root = tree.getroot()
namespace = {'ns': 'http://tempuri.org/basketDayArchives.xsd'}

# Initialize lists to store the data
dates = []
values = []

# Loop through and extract the date (data) and price (val)
for basket in root.findall('ns:BasketList', namespace): 
    date = basket.get('data') 
    value = basket.get('val')  
    
    # Append the values to the lists
    dates.append(date)
    values.append(float(value))  # Convert to float for numeric operations

# Create a pandas DataFrame
opec = pd.DataFrame({'Date': dates, 'Oil_Price': values})

# Convert 'Date' to a pandas datetime format
opec['Date'] = pd.to_datetime(opec['Date'])

# Display the DataFrame
print(opec.head())

        Date  Oil_Price
0 2003-01-02      30.05
1 2003-01-03      30.83
2 2003-01-06      30.71
3 2003-01-07      29.72
4 2003-01-08      28.86


In [8]:
opec.to_csv('oil_prices.csv', index=False)

In [6]:
opec = pd.read_csv('oil_prices.csv')

In [7]:
opec['Date'] = pd.to_datetime(opec['Date'])
opec.set_index('Date', inplace=True)
q_opec = opec.resample('QE').mean()
q_opec = q_opec.loc['2010-01-01':'2023-09-30']
q_opec.reset_index(inplace=True)
q_opec['Quarter'] = q_opec['Date'].dt.to_period('Q')
q_opec.head()

Unnamed: 0,Date,Oil_Price,Quarter
0,2010-03-31,75.490476,2010Q1
1,2010-06-30,76.530937,2010Q2
2,2010-09-30,73.764545,2010Q3
3,2010-12-31,83.881061,2010Q4
4,2011-03-31,101.270781,2011Q1


In [8]:
core_data = pd.read_csv("core_data.csv")
core_data['Quarter'] = pd.PeriodIndex(core_data['Date'], freq='Q')
core_data['Quarter_Start_Date'] = core_data['Quarter'].dt.start_time
core_data.head()

Unnamed: 0,Date,Gross Domestic Product,GDP Oil Activities,GDP Non-Oil Activities,observation_date,exports_percent_gdp,Exports,CPI,Quarter,Quarter_Start_Date
0,2010-Q1,480428.86,201371.13,208194.93,01/01/2010,45.398501,21810749.91,80.878583,2010Q1,2010-01-01
1,2010-Q2,477912.07,212310.07,199158.91,01/04/2010,47.999415,22939499.88,81.904799,2010Q2,2010-04-01
2,2010-Q3,479432.42,210994.12,204003.49,01/07/2010,47.232413,22644749.98,83.448397,2010Q3,2010-07-01
3,2010-Q4,543004.01,250929.96,198283.07,01/10/2010,49.324221,26783249.77,84.534475,2010Q4,2010-10-01
4,2011-Q1,603134.11,283149.93,240588.73,01/01/2011,50.55592,30492000.06,86.538667,2011Q1,2011-01-01


In [13]:
reer = pd.read_csv("reer_r.csv")

In [15]:
reer['date'] = pd.to_datetime(reer['date'])
reer.set_index('date', inplace=True)
reer = reer.resample('QE').mean()
reer = reer.reset_index()
reer['Quarter'] = reer['date'].dt.to_period('Q')
reer = reer.head(-4)
reer.head()

Unnamed: 0,date,REER,Quarter
0,2010-03-31,84.77,2010Q1
1,2010-06-30,87.746667,2010Q2
2,2010-09-30,87.71,2010Q3
3,2010-12-31,85.02,2010Q4
4,2011-03-31,85.13,2011Q1


In [179]:
total_exports = pd.read_csv('total_exports_only.csv')

In [180]:
# Ensuring the dataframes are the same length (use same dates)
print(len(reer))
print(len(core_data))
print(len(q_opec))
print(len(total_exports))

55
55
55
55


In [251]:
varz = pd.DataFrame(data = { 'Quarter': reer['Quarter'], 'REER': reer["REER"], "GDP": core_data['Gross Domestic Product'], 'GDP Non-Oil' : core_data['GDP Non-Oil Activities'],
                            'GDP Oil': core_data['GDP Oil Activities'], 'CPI': core_data['CPI'], 'OPEC' : q_opec['Oil_Price'], 'Exports': total_exports['Total_Exports']})

In [244]:
varz.head()

Unnamed: 0,Quarter,REER,GDP,GDP Non-Oil,GDP Oil,CPI,OPEC,Exports
0,2010Q1,84.77,480428.86,208194.93,201371.13,80.878583,75.490476,225340.0
1,2010Q2,87.746667,477912.07,199158.91,212310.07,81.904799,76.530937,237786.0
2,2010Q3,87.71,479432.42,204003.49,210994.12,83.448397,73.764545,236480.0
3,2010Q4,85.02,543004.01,198283.07,250929.96,84.534475,83.881061,282260.0
4,2011Q1,85.13,603134.11,240588.73,283149.93,86.538667,101.270781,246709.6


In [245]:
dfcorr = varz[['REER', 'GDP', 'GDP Non-Oil', 'GDP Oil', 'CPI', 'OPEC', 'Exports']].corr()
dfcorr.style.background_gradient(cmap='coolwarm').format(precision=2)

Unnamed: 0,REER,GDP,GDP Non-Oil,GDP Oil,CPI,OPEC,Exports
REER,1.0,0.5,0.88,-0.34,0.86,-0.66,0.42
GDP,0.5,1.0,0.81,0.6,0.8,0.27,0.53
GDP Non-Oil,0.88,0.81,1.0,0.02,0.97,-0.32,0.48
GDP Oil,-0.34,0.6,0.02,1.0,0.04,0.92,0.29
CPI,0.86,0.8,0.97,0.04,1.0,-0.28,0.44
OPEC,-0.66,0.27,-0.32,0.92,-0.28,1.0,0.01
Exports,0.42,0.53,0.48,0.29,0.44,0.01,1.0


Need to convert nominal prices to real

In [252]:
varz_re = varz[['Quarter', 'GDP', 'GDP Non-Oil', 'GDP Oil', 'REER', 'CPI', 'OPEC', 'Exports']]
varz_re['GDP'] = 109.501439 * varz['GDP'] / varz['CPI']
varz_re['GDP Non-Oil'] = 109.501439 * varz['GDP Non-Oil'] / varz['CPI']
varz_re['GDP Oil'] = 109.501439 * varz['GDP Oil'] / varz['CPI']
varz_re['OPEC'] = 109.501439 * varz['OPEC'] / varz['CPI']
varz_re.head()

Unnamed: 0,Quarter,GDP,GDP Non-Oil,GDP Oil,REER,CPI,OPEC,Exports
0,2010Q1,650452.18814,281874.922706,272636.186212,84.77,80.878583,102.206486,225340.0
1,2010Q2,638937.64874,266262.632122,283844.885796,87.746667,81.904799,102.316933,237786.0
2,2010Q3,629113.819797,267694.48517,276867.627742,87.71,83.448397,96.794236,236480.0
3,2010Q4,703378.361966,256845.287353,325041.253808,85.02,84.534475,108.655041,282260.0
4,2011Q1,763173.914001,304428.21869,358282.903826,85.13,86.538667,128.142675,246709.6


Then remove the seasonality 

In [253]:
varz_ds = pd.DataFrame()
varz_ds['Quarter'] = varz['Quarter']

columns_to_ds = ['GDP', 'GDP Non-Oil', 'GDP Oil', 'REER', 'CPI', 'OPEC', 'Exports']

for col in columns_to_ds:
    result = seasonal_decompose(varz_re[col], model='additive', period=4)  
    varz_ds[col] = varz_re[col] - result.seasonal
    
#varz_ds.index = varz.index

varz_ds.head()

Unnamed: 0,Quarter,GDP,GDP Non-Oil,GDP Oil,REER,CPI,OPEC,Exports
0,2010Q1,648224.248621,270740.787008,280820.127266,84.910097,80.807196,102.583773,233321.241927
1,2010Q2,638209.329087,273623.489461,278404.643673,88.136985,82.075866,100.753092,242658.369411
2,2010Q3,632283.555277,268249.783939,274238.79133,87.349167,83.255991,96.458066,248071.761158
3,2010Q4,703164.885657,260063.266943,324926.391289,84.850417,84.627201,110.177765,257814.627504
4,2011Q1,760945.974482,293294.082992,366466.84488,85.270097,86.467279,128.519963,254690.841927


In [249]:
dfcorrds = varz_ds[['REER', 'GDP', 'GDP Non-Oil', 'GDP Oil', 'CPI', 'OPEC', 'Exports']].corr()
dfcorrds.style.background_gradient(cmap='coolwarm').format(precision=2)

Unnamed: 0,REER,GDP,GDP Non-Oil,GDP Oil,CPI,OPEC,Exports
REER,1.0,0.24,0.88,-0.57,0.86,-0.8,0.5
GDP,0.24,1.0,0.59,0.61,0.6,0.32,0.62
GDP Non-Oil,0.88,0.59,1.0,-0.26,0.94,-0.54,0.62
GDP Oil,-0.57,0.61,-0.26,1.0,-0.22,0.93,0.19
CPI,0.86,0.6,0.94,-0.22,1.0,-0.49,0.54
OPEC,-0.8,0.32,-0.54,0.93,-0.49,1.0,-0.11
Exports,0.5,0.62,0.62,0.19,0.54,-0.11,1.0


In [None]:
varz['log_GDP'] = np.log(varz['GDP'])
varz['log_Non_oil_GDP'] = np.log(varz['GDP Non-Oil'])
varz['log_Oil_GDP'] = np.log(varz['GDP Oil'])
varz['log_REER'] = np.log(varz['REER'])
varz['log_CPI'] = np.log(varz['CPI'])
varz['log_OPEC'] = np.log(varz['OPEC'])
varz['log_Exports'] = np.log(varz['Exports'])

varz['dlog_GDP'] = varz['log_GDP'].diff()
varz['dlog_Non_oil_GDP'] = varz['log_Non_oil_GDP'].diff()
varz['dlog_Oil_GDP'] = varz['log_Oil_GDP'].diff()
varz['dlog_REER'] = varz['log_REER'].diff()
varz['dlog_CPI'] = varz['log_CPI'].diff()
varz['dlog_OPEC'] = varz['log_OPEC'].diff()
varz['dlog_Exports'] = varz['log_Exports'].diff()

varz = varz.drop(0, axis=0)

varz.head()


In [254]:
varz_ds['log_GDP'] = np.log(varz_ds['GDP'])
varz_ds['log_Non_oil_GDP'] = np.log(varz_ds['GDP Non-Oil'])
varz_ds['log_Oil_GDP'] = np.log(varz_ds['GDP Oil'])
varz_ds['log_REER'] = np.log(varz_ds['REER'])
varz_ds['log_CPI'] = np.log(varz_ds['CPI'])
varz_ds['log_OPEC'] = np.log(varz_ds['OPEC'])
varz_ds['log_Exports'] = np.log(varz_ds['Exports'])

varz_ds['dlog_GDP'] = varz_ds['log_GDP'].diff()
varz_ds['dlog_Non_oil_GDP'] = varz_ds['log_Non_oil_GDP'].diff()
varz_ds['dlog_Oil_GDP'] = varz_ds['log_Oil_GDP'].diff()
varz_ds['dlog_REER'] = varz_ds['log_REER'].diff()
varz_ds['dlog_CPI'] = varz_ds['log_CPI'].diff()
varz_ds['dlog_OPEC'] = varz_ds['log_OPEC'].diff()
varz_ds['dlog_Exports'] = varz_ds['log_Exports'].diff()

varz_ds = varz_ds.drop(0, axis=0)

varz_ds.head()

Unnamed: 0,Quarter,GDP,GDP Non-Oil,GDP Oil,REER,CPI,OPEC,Exports,log_GDP,log_Non_oil_GDP,...,log_CPI,log_OPEC,log_Exports,dlog_GDP,dlog_Non_oil_GDP,dlog_Oil_GDP,dlog_REER,dlog_CPI,dlog_OPEC,dlog_Exports
1,2010Q2,638209.329087,273623.489461,278404.643673,88.136985,82.075866,100.753092,242658.369411,13.366422,12.519508,...,4.407644,4.612673,12.39941,-0.01557,0.010591,-0.008639,0.037299,0.015578,-0.018007,0.039238
2,2010Q3,632283.555277,268249.783939,274238.79133,87.349167,83.255991,96.458066,248071.761158,13.357093,12.499674,...,4.42192,4.569108,12.421473,-0.009328,-0.019834,-0.015076,-0.008979,0.014276,-0.043565,0.022063
3,2010Q4,703164.885657,260063.266943,324926.391289,84.850417,84.627201,110.177765,257814.627504,13.463347,12.46868,...,4.438256,4.702095,12.459996,0.106253,-0.030994,0.169599,-0.029024,0.016336,0.132987,0.038523
4,2011Q1,760945.974482,293294.082992,366466.84488,85.270097,86.467279,128.519963,254690.841927,13.542318,12.588931,...,4.459766,4.856084,12.447806,0.078971,0.120251,0.120309,0.004934,0.02151,0.153989,-0.01219
5,2011Q2,799515.561814,297099.540262,395689.693438,83.286985,87.362036,139.318089,262463.269411,13.591761,12.601823,...,4.470061,4.93676,12.477866,0.049444,0.012891,0.076722,-0.023532,0.010295,0.080675,0.030061


In [256]:
varz_ds.tail()

Unnamed: 0,Quarter,GDP,GDP Non-Oil,GDP Oil,REER,CPI,OPEC,Exports,log_GDP,log_Non_oil_GDP,...,log_CPI,log_OPEC,log_Exports,dlog_GDP,dlog_Non_oil_GDP,dlog_Oil_GDP,dlog_REER,dlog_CPI,dlog_OPEC,dlog_Exports
50,2022Q3,1087525.0,456806.323588,426998.13048,103.402501,107.377594,103.312952,329040.761158,13.899415,13.032015,...,4.676352,4.637763,12.703937,-0.028282,0.035557,-0.111878,0.026315,0.007008,-0.100883,0.03684
51,2022Q4,1035324.0,473288.50637,356406.497475,104.527084,108.389393,90.208948,312188.127504,13.850225,13.06746,...,4.68573,4.502129,12.651361,-0.04919,0.035446,-0.180708,0.010817,0.009379,-0.135634,-0.052576
52,2023Q1,1016472.0,464066.505479,338102.389772,101.126763,108.648612,81.517897,318832.141927,13.831848,13.047783,...,4.688119,4.400823,12.67242,-0.018377,-0.019677,-0.052723,-0.033071,0.002389,-0.101306,0.021059
53,2023Q2,978934.5,471059.653733,301200.471111,101.970319,109.541067,76.60139,317247.969411,13.79422,13.06274,...,4.6963,4.338615,12.667439,-0.037628,0.014957,-0.115573,0.008307,0.008181,-0.062207,-0.004981
54,2023Q3,998168.3,466422.5916,315572.137573,102.149167,109.510927,87.1559,275334.861158,13.813677,13.052847,...,4.696024,4.467698,12.525743,0.019457,-0.009893,0.046611,0.001752,-0.000275,0.129083,-0.141696


In [214]:
def adf_test(series, name):
    result = adfuller(series)
    print(f'ADF Test for {name}')
    print(f'p-value: {result[1]}')
    
# Exports are I(0) 
adf_test(varz_ds['log_GDP'], 'log GDP')
adf_test(varz_ds['log_Non_oil_GDP'], 'log Non-oil GDP')
adf_test(varz_ds['log_REER'], 'log REER')
adf_test(varz_ds['log_CPI'], 'log CPI')
adf_test(varz_ds['log_OPEC'], 'log OPEC Basket Price')
adf_test(varz_ds['log_Exports'], 'log Exports')

ADF Test for log GDP
p-value: 0.34271179148584174
ADF Test for log Non-oil GDP
p-value: 0.6917599454647636
ADF Test for log REER
p-value: 0.5793356650016118
ADF Test for log CPI
p-value: 0.42173497387941067
ADF Test for log OPEC Basket Price
p-value: 0.40299314137945436
ADF Test for log Exports
p-value: 0.0004345647855202614


In [255]:
varz_ds.to_csv('metrics_data2.csv', index=False)

Adding Non-oil and Oil Exports. This reduces the sample size

In [17]:
exports = pd.read_csv('exports_r.csv')

In [18]:
exports['Date'] = pd.to_datetime(exports['Date'], format='%Y / %m')
exports.set_index('Date', inplace=True)
exports= exports.resample('QE').sum()
exports = exports.reset_index()
exports['Quarter'] = exports['Date'].dt.to_period('Q')
exports.head()


  exports= exports.resample('Q').sum()


Unnamed: 0,Date,Total Exports,Oil Exports,Non-oil Exports,Quarter
0,2015-03-31,192849.70485,145116.05921,47733.64565,2015Q1
1,2015-06-30,220661.9457,172703.50772,47958.438,2015Q2
2,2015-09-30,184956.45927,138794.18878,46162.27048,2015Q3
3,2015-12-31,164844.9527,116934.80679,47910.14591,2015Q4
4,2016-03-31,140185.63677,97158.41141,43027.22536,2016Q1


'Total Exports': exports['Total Exports'], 'Oil Exports': exports['Oil Exports'],
                            'Non-Oil Exports': exports['Non-oil Exports']

In [240]:
varz = pd.DataFrame(data = { 'Quarter': reer['Quarter'], 'REER': reer["REER"], "GDP": core_data['Gross Domestic Product'], 'GDP Non-Oil' : core_data['GDP Non-Oil Activities'],
                            'GDP Oil': core_data['GDP Oil Activities'], 'CPI': core_data['CPI'],
                            'OPEC' : q_opec['Oil_Price']})
varz2 = pd.merge(varz, exports, on='Quarter')
varz2 = varz2.drop(labels = 'Date', axis=1)
varz2.head()

Unnamed: 0,Quarter,REER,GDP,GDP Non-Oil,GDP Oil,CPI,OPEC,Total Exports,Oil Exports,Non-oil Exports
0,2015Q1,97.29,654710.35,347095.72,163823.38,95.96,50.27381,192849.70485,145116.05921,47733.64565
1,2015Q2,97.493333,652337.08,334515.58,192997.26,96.236667,59.895,220661.9457,172703.50772,47958.438
2,2015Q3,99.21,619818.18,338921.32,159034.1,96.576667,48.291061,184956.45927,138794.18878,46162.27048
3,2015Q4,100.5,583700.82,336757.86,136359.71,96.95,39.706615,164844.9527,116934.80679,47910.14591
4,2016Q1,102.646667,594201.06,354955.37,109462.65,98.42,30.159219,140185.63677,97158.41141,43027.22536


In [241]:
varz2['GDP'] = 109.501439 * varz2['GDP'] / varz2['CPI']
varz2['GDP Non-Oil'] = 109.501439 * varz2['GDP Non-Oil'] / varz2['CPI']
varz2['GDP Oil'] = 109.501439 * varz2['GDP Oil'] / varz2['CPI']
varz2['OPEC'] = 109.501439 * varz2['OPEC'] / varz2['CPI']
varz2['Total Exports'] = 109.501439 * varz2['Total Exports'] / varz2['CPI']
varz2['Oil Exports'] = 109.501439 * varz2['Oil Exports'] / varz2['CPI']
varz2['Non-oil Exports'] = 109.501439 * varz2['Non-oil Exports'] / varz2['CPI']
varz2.head()

Unnamed: 0,Quarter,REER,GDP,GDP Non-Oil,GDP Oil,CPI,OPEC,Total Exports,Oil Exports,Non-oil Exports
0,2015Q1,97.29,747100.098512,396076.290233,186941.390703,95.96,57.368221,220063.778572,165594.17784,54469.600744
1,2015Q2,97.493333,742251.902988,380623.505005,219599.020044,96.236667,68.150622,251076.86522,196508.080237,54568.785006
2,2015Q3,99.21,702767.914534,384278.869083,180317.496974,96.576667,54.753812,209709.023315,157368.895824,52340.12748
3,2015Q4,100.5,659268.486183,380355.546824,154013.248753,96.95,44.847153,186186.276767,132073.539069,54112.737698
4,2016Q1,102.646667,661104.156933,394920.989593,121787.418124,98.42,33.554947,155969.609362,108097.803905,47871.805457


In [242]:
dfcorr1 = varz2.drop('Quarter', axis=1).corr()
dfcorr1.style.background_gradient(cmap='coolwarm').format(precision=2)

Unnamed: 0,REER,GDP,GDP Non-Oil,GDP Oil,CPI,OPEC,Total Exports,Oil Exports,Non-oil Exports
REER,1.0,0.17,0.32,0.13,0.46,0.01,0.06,0.06,-0.0
GDP,0.17,1.0,0.86,0.96,0.75,0.94,0.93,0.92,0.89
GDP Non-Oil,0.32,0.86,1.0,0.74,0.75,0.71,0.7,0.69,0.67
GDP Oil,0.13,0.96,0.74,1.0,0.62,0.97,0.98,0.98,0.87
CPI,0.46,0.75,0.75,0.62,1.0,0.6,0.56,0.52,0.69
OPEC,0.01,0.94,0.71,0.97,0.6,1.0,0.98,0.97,0.89
Total Exports,0.06,0.93,0.7,0.98,0.56,0.98,1.0,1.0,0.89
Oil Exports,0.06,0.92,0.69,0.98,0.52,0.97,1.0,1.0,0.85
Non-oil Exports,-0.0,0.89,0.67,0.87,0.69,0.89,0.89,0.85,1.0


In [47]:
varz2['log_GDP'] = np.log(varz2['GDP'])
varz2['log_Non_oil_GDP'] = np.log(varz2['GDP Non-Oil'])
varz2['log_Oil_GDP'] = np.log(varz2['GDP Oil'])
varz2['log_REER'] = np.log(varz2['REER'])
varz2['log_CPI'] = np.log(varz2['CPI'])
varz2['log_OPEC'] = np.log(varz2['OPEC'])
varz2['log_Total_Exports'] = np.log(varz2['Total Exports'])
varz2['log_Oil_Exports'] = np.log(varz2['Oil Exports'])
varz2['log_Non_oil_Exports'] = np.log(varz2['Non-oil Exports'])

varz2['dlog_GDP'] = varz2['log_GDP'].diff()
varz2['dlog_Non_oil_GDP'] = varz2['log_Non_oil_GDP'].diff()
varz2['dlog_Oil_GDP'] = varz2['log_Oil_GDP'].diff()
varz2['dlog_REER'] = varz2['log_REER'].diff()
varz2['dlog_CPI'] = varz2['log_CPI'].diff()
varz2['dlog_OPEC'] = varz2['log_OPEC'].diff()
varz2['dlog_Total_Exports'] = varz2['log_Total_Exports'].diff()
varz2['dlog_Oil_Exports'] = varz2['log_Oil_Exports'].diff()
varz2['dlog_Non_oil_Exports'] = varz2['log_Non_oil_Exports'].diff()

varz2 = varz2.drop(0,axis=0)
varz2.head()


Unnamed: 0,Quarter,REER,GDP,GDP Non-Oil,GDP Oil,CPI,OPEC,Total Exports,Oil Exports,Non-oil Exports,...,log_Non_oil_Exports,dlog_GDP,dlog_Non_oil_GDP,dlog_Oil_GDP,dlog_REER,dlog_CPI,dlog_OPEC,dlog_Total_Exports,dlog_Oil_Exports,dlog_Non_oil_Exports
0,2015Q1,97.29,654710.35,347095.72,163823.38,95.96,50.27381,192849.70485,145116.05921,47733.64565,...,10.773392,,,,,,,,,
1,2015Q2,97.493333,652337.08,334515.58,192997.26,96.236667,59.895,220661.9457,172703.50772,47958.438,...,10.77809,-0.003632,-0.036917,0.163887,0.002088,0.002879,0.175109,0.134721,0.174042,0.004698
2,2015Q3,99.21,619818.18,338921.32,159034.1,96.576667,48.291061,184956.45927,138794.18878,46162.27048,...,10.739918,-0.051135,0.013085,-0.193557,0.017455,0.003527,-0.215347,-0.176511,-0.218584,-0.038172
3,2015Q4,100.5,583700.82,336757.86,136359.71,96.95,39.706615,164844.9527,116934.80679,47910.14591,...,10.777083,-0.060038,-0.006404,-0.153822,0.012919,0.003858,-0.195729,-0.115115,-0.171376,0.037164
4,2016Q1,102.646667,594201.06,354955.37,109462.65,98.42,30.159219,140185.63677,97158.41141,43027.22536,...,10.669588,0.017829,0.052628,-0.219713,0.021135,0.015049,-0.275027,-0.162038,-0.185274,-0.107494


In [51]:
def adf_test(series, name):
    result = adfuller(series)
    print(f'ADF Test for {name}')
    print(f'p-value: {result[1]}')
    

adf_test(varz2['log_GDP'], 'log GDP')
adf_test(varz2['log_Non_oil_GDP'], 'log Non-oil GDP')
adf_test(varz2['log_REER'], 'log REER')
adf_test(varz2['log_CPI'], 'log CPI')
adf_test(varz2['log_OPEC'], 'log OPEC Basket Price')
adf_test(varz2['log_Non_oil_Exports'], 'log Non-oil Exports')

ADF Test for log GDP
p-value: 0.8427522365222927
ADF Test for log Non-oil GDP
p-value: 0.9572193275465566
ADF Test for log REER
p-value: 0.3922375815766411
ADF Test for log CPI
p-value: 0.9394330173502607
ADF Test for log OPEC Basket Price
p-value: 0.48645346853966814
ADF Test for log Non-oil Exports
p-value: 0.9906428363462889


Bounds test for Cointegration

In [72]:
dloggdp_order = ardl_select_order(endog = varz2['dlog_GDP'], 
                                  exog = varz2[['dlog_OPEC', 'dlog_CPI', 'dlog_REER']], 
                                  maxlag=4, maxorder=4)

print(dloggdp_order.ar_lags)
print(dloggdp_order.dl_lags)

[1, 2, 3, 4]
{'dlog_OPEC': [0, 1, 2], 'dlog_CPI': [0, 1, 2, 3, 4]}


In [81]:
gdp_ardl = ARDL(endog= varz2['dlog_GDP'], lags= 4, exog= varz2[['dlog_OPEC', 'dlog_CPI', 'log_OPEC', 'log_CPI']],
                order= {'dlog_OPEC': 2, 'dlog_CPI': 4, 'log_OPEC': 1, 'log_CPI': 1}, causal= False, trend= 'c')

gdp_ardl_results = gdp_ardl.fit()
gdp_ardl_results.summary()

TypeError: ARDL.fit() got an unexpected keyword argument 'exog_oos'

In [121]:
varz2.to_csv('metric_data.csv',index=False)

In [119]:
dlogoilgdp_order = ardl_select_order(endog = varz2['GDP'], 
                                  exog = varz2[['OPEC', 'CPI']], 
                                  maxlag=0, maxorder=0)

print(dlogoilgdp_order.ar_lags)
print(dlogoilgdp_order.dl_lags)

None
{'OPEC': [0], 'CPI': [0]}


In [120]:
dlogoilgdp_order.model.fit().summary()

ValueError: exog_oos must be provided when out-of-sample observations require values of the exog not in the original sample