In [24]:
import pandas as pd
import os
import numpy as np
!pip install openpyxl

# Define file path
excel_file = os.path.join(r'ImportFile.xlsx')

# Import data
GDP_dat = pd.read_excel(excel_file, sheet_name='GDP')
Emp_dat = pd.read_excel(excel_file, sheet_name='Emp')
Cons_dat = pd.read_excel(excel_file, sheet_name='Cons')
IPExp_dat = pd.read_excel(excel_file, sheet_name='IPExp')

# Convert 'Date' columns to datetime
GDP_dat['Date'] = pd.to_datetime(GDP_dat['Date'])
Emp_dat['Date'] = pd.to_datetime(Emp_dat['Date'])
Cons_dat['Date'] = pd.to_datetime(Cons_dat['Date'])
IPExp_dat['Date'] = pd.to_datetime(IPExp_dat['Date'])

# Merge all datasets on 'Date'
data = GDP_dat[['Date', 'GDP']].merge(Emp_dat, on='Date', how='left')
data = data.merge(Cons_dat, on='Date', how='left')
data = data.merge(IPExp_dat, on='Date', how='left')

# Set 'Date' as index if desired
data.set_index('Date', inplace=True)

# Check data
print(data.head())


                                    GDP    U  Claims      Cons  Sentiment  \
Date                                                                        
1970-01-01 00:00:00.000001995  0.898704  5.6  333500 -0.004842       97.6   
1970-01-01 00:00:00.000001995  0.898704  5.6  333500 -0.004842       97.6   
1970-01-01 00:00:00.000001995  0.898704  5.6  333500 -0.004842       97.6   
1970-01-01 00:00:00.000001995  0.898704  5.6  333500 -0.004842       97.6   
1970-01-01 00:00:00.000001995  0.898704  5.6  333500 -0.004842       97.6   

                                     IP       Exp  
Date                                               
1970-01-01 00:00:00.000001995  0.182841  0.466375  
1970-01-01 00:00:00.000001995 -0.162622 -0.064958  
1970-01-01 00:00:00.000001995  0.144061  2.016583  
1970-01-01 00:00:00.000001995 -0.063889  1.126669  
1970-01-01 00:00:00.000001995  0.339689  1.046501  


In [25]:
data = data[~data.index.duplicated(keep='first')]
data.head()

Unnamed: 0_level_0,GDP,U,Claims,Cons,Sentiment,IP,Exp
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1970-01-01 00:00:00.000001995,0.898704,5.6,333500,-0.004842,97.6,0.182841,0.466375
1970-01-01 00:00:00.000001996,1.233592,5.6,374000,-0.451704,89.3,-0.65367,-1.11769
1970-01-01 00:00:00.000001997,1.245604,5.3,339000,0.473086,97.4,0.141756,0.093353
1970-01-01 00:00:00.000001998,1.14732,4.6,321200,-0.088385,106.6,0.513545,-0.037956
1970-01-01 00:00:00.000001999,1.326373,4.3,326200,-0.143855,103.9,0.468131,0.116098


In [26]:
from statsmodels.tsa.stattools import adfuller

def test_stationarity(series, signif=0.05):
    result = adfuller(series, autolag='AIC')
    print(f"ADF Statistic: {result[0]}")
    print(f"p-value: {result[1]}")
    if result[1] < signif:
        print("Series is stationary")
    else:
        print("Series is not stationary")

for col in data.columns:
    print(f"Testing stationarity for {col}")
    test_stationarity(data[col])


Testing stationarity for GDP
ADF Statistic: -2.738982125202568
p-value: 0.06753213969984018
Series is not stationary
Testing stationarity for U
ADF Statistic: -2.4197438449834996
p-value: 0.13624121735107497
Series is not stationary
Testing stationarity for Claims
ADF Statistic: -2.0642359947718107
p-value: 0.2591364335970392
Series is not stationary
Testing stationarity for Cons
ADF Statistic: -4.771310383674473
p-value: 6.169196035797275e-05
Series is stationary
Testing stationarity for Sentiment
ADF Statistic: -1.992699716415523
p-value: 0.2897831642920363
Series is not stationary
Testing stationarity for IP
ADF Statistic: -4.5218419933929574
p-value: 0.0001795220495981995
Series is stationary
Testing stationarity for Exp
ADF Statistic: -2.029698128901216
p-value: 0.2737051939553059
Series is not stationary


In [27]:
# First differencing of non-stationary series
data_diff = data.copy()  # Create a copy to store the differenced data
non_stationary_columns = ['GDP', 'U', 'Claims', 'Sentiment', 'Exp']  # List non-stationary columns based on ADF test results

for col in non_stationary_columns:
    data_diff[col] = data[col].diff().dropna()  # Apply differencing

# Drop the first row after differencing (it will be NaN)
data_diff = data_diff.dropna()

# Check stationarity again
for col in data_diff.columns:
    print(f"Re-testing stationarity for {col}")
    test_stationarity(data_diff[col])


Re-testing stationarity for GDP
ADF Statistic: -4.559666033792099
p-value: 0.0001531829494081604
Series is stationary
Re-testing stationarity for U
ADF Statistic: -3.399748388603772
p-value: 0.010967452014455316
Series is stationary
Re-testing stationarity for Claims
ADF Statistic: -5.392131194274992
p-value: 3.5185167789439313e-06
Series is stationary
Re-testing stationarity for Cons
ADF Statistic: -2.899673986150529
p-value: 0.0453859215834954
Series is stationary
Re-testing stationarity for Sentiment
ADF Statistic: -5.50904405528348
p-value: 1.991315403717978e-06
Series is stationary
Re-testing stationarity for IP
ADF Statistic: -4.536997408237253
p-value: 0.0001684885144366308
Series is stationary
Re-testing stationarity for Exp
ADF Statistic: -6.141125285480566
p-value: 7.966634521832025e-08
Series is stationary


In [28]:
import pandas as pd
import numpy as np
from statsmodels.tsa.api import VAR
import matplotlib.pyplot as plt

In [38]:
print("Initial data overview:")
print(data_diff.head())
print("Data types:", data_diff.dtypes)
print("Data shape:", data_diff.shape)

# Define the window size
window_size = 12  # Adjust based on your dataset size

# Prepare to store FEVD results
fevd_results = []

# Rolling window analysis
for start in range(len(data_diff) - window_size + 1):
    end = start + window_size
    window_data = data_diff.iloc[start:end]

    # Check if there is enough data in each window
    if window_data.dropna().shape[0] < window_size or window_data.shape[1] < 2:
        print(f"Skipping window due to insufficient data or variables: Start {start}, End {end}")
        continue

    try:
        # Fit the VAR model
        model = VAR(window_data)
        results = model.fit(maxlags=1, ic='aic')  # Using AIC to determine the best number of lags

        # Compute FEVD and store results
        fevd = results.fevd(10)  # Compute FEVD for up to 10 periods
        fevd_results.append(fevd.decomp[-1, :, :])  # Store the FEVD results for the longest horizon

        # Optionally, print summary for the last window
        if start == len(data_diff) - window_size - 1:
            print(results.summary())

    except Exception as e:
        print(f"Error in VAR model fitting for window starting at index {start}: {e}")

# Convert list of FEVD results to an array
fevd_array = np.array(fevd_results)
print("FEVD array shape:", fevd_array.shape)

# Calculate the FEVD index as the mean across all variables and time periods
if fevd_array.ndim == 3:
    index_values = np.mean(fevd_array, axis=(1, 2))
    # Plotting the index over time
    plt.figure(figsize=(14, 7))
    plt.plot(range(len(index_values)), index_values, label='FEVD Index', marker='o')
    plt.title('Rolling Window FEVD Index Over Time')
    plt.xlabel('Window Starting Index')
    plt.ylabel('FEVD Index Value')
    plt.legend()
    plt.grid(True)
    plt.show()
else:
    print("FEVD array has unexpected dimensions:", fevd_array.shape)

Initial data overview:
                                    GDP    U   Claims      Cons  Sentiment  \
Date                                                                         
1970-01-01 00:00:00.000001996  0.334888  0.0  40500.0 -0.451704       -8.3   

                                    IP       Exp  
Date                                              
1970-01-01 00:00:00.000001996 -0.65367 -1.584065  
Data types: GDP          float64
U            float64
Claims       float64
Cons         float64
Sentiment    float64
IP           float64
Exp          float64
dtype: object
Data shape: (1, 7)
FEVD array shape: (0,)
FEVD array has unexpected dimensions: (0,)


In [39]:

data_diff.index = pd.to_datetime(data_diff.index)  # Ensure the index is datetime
if not data_diff.index.freq:
    data_diff = data_diff.asfreq('D')  # Setting to daily frequency, adjust as needed

# Print initial data setup
print("Initial data overview:")
print(data_diff.head())
print("Data types:", data_diff.dtypes)
print("Data shape:", data_diff.shape)

# Define the window size
window_size = 12  # Set a realistic window size based on your dataset size

# Prepare to store FEVD results
fevd_results = []

# Rolling window analysis
for start in range(len(data_diff) - window_size + 1):
    end = start + window_size
    window_data = data_diff.iloc[start:end].dropna()

    # Ensure each window has enough data points and variables
    if window_data.shape[0] < window_size or window_data.shape[1] < 2:
        print(f"Skipping window due to insufficient data: Start {start}, End {end}")
        continue

    try:
        # Fit the VAR model
        model = VAR(window_data)
        results = model.fit(maxlags=min(window_size-1, 2), ic='aic')  # Use appropriate maxlags

        # Compute FEVD and store results
        fevd = results.fevd(10)  # Compute FEVD for up to 10 periods
        fevd_results.append(fevd.decomp[-1, :, :])  # Store the FEVD results for the longest horizon

    except Exception as e:
        print(f"Error in VAR model fitting for window starting at index {start}: {e}")

# Convert list of FEVD results to an array
if fevd_results:
    fevd_array = np.array(fevd_results)
    if fevd_array.ndim == 3:
        index_values = np.mean(fevd_array, axis=(1, 2))
        # Plotting the index over time
        plt.figure(figsize=(14, 7))
        plt.plot(range(len(index_values)), index_values, label='FEVD Index', marker='o')
        plt.title('Rolling Window FEVD Index Over Time')
        plt.xlabel('Window Start Index')
        plt.ylabel('Index Value')
        plt.legend()
        plt.grid(True)
        plt.show()
else:
    print("No FEVD results were computed. Check model fitting steps.")


Initial data overview:
                                    GDP    U   Claims      Cons  Sentiment  \
Date                                                                         
1970-01-01 00:00:00.000001996  0.334888  0.0  40500.0 -0.451704       -8.3   

                                    IP       Exp  
Date                                              
1970-01-01 00:00:00.000001996 -0.65367 -1.584065  
Data types: GDP          float64
U            float64
Claims       float64
Cons         float64
Sentiment    float64
IP           float64
Exp          float64
dtype: object
Data shape: (1, 7)
No FEVD results were computed. Check model fitting steps.
