## Lee TH., Seregina E.: "Combining Forecasts under Structural Breaks Using Graphical LASSO"

### This Python notebook can be used to reproduce the values for RD-FGL time and state breaks in Table 1 and Table 2 for both empirical applications

#### (Please refer to the R notebook to reproduce all other competing methods in both tables)

In [None]:
#install regain if not already installed
%pip install regain==0.3.9

#### Please make sure to place "GL.py" and "TVGL.py" in the same directory as this notebook

In [None]:
#import packages
from __future__ import division
from GL import GraphicalLasso
from TVGL import TimeGraphicalLasso
from regain.datasets import make_dataset
from regain.utils import error_norm_time
import numpy as np 
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
import scipy
import scipy.linalg   # SciPy Linear Algebra Library
import pandas as pd
import statistics
import statsmodels.api as sm

import warnings
warnings.filterwarnings("ignore")
from numpy import savetxt

In [None]:
####################FUNCTIONS FOR EMPIRICAL APPLICATION#####################

def portfolios(X,estsigm):
    X = X.to_numpy()
    mu = np.mean(X,axis=0).reshape(X.shape[1],1)
    p = len(mu) 
    one = np.ones([p,1])
    phi = one.T@estsigm@one
    #GMV##
    gmv = (estsigm @ one) / phi 
    return [gmv]  

def CVlasso(X,Y, y2, k1, q, window, forecasters, truers, betas, covariate, alpha_set, beta_set): #X is returns, Y is residuals, K=k,
    inX = X[0:(window-q)]
    inY = Y[0:(window-q)]
    iny2 = y2[0:(window-q)]
    #################SFEs ARE HERE##################
    meanSFE=np.zeros([len(alpha_set),len(beta_set)])
    for alpha in range(0,len(alpha_set)):
        for beta in range(0,len(beta_set)):
            tvfgl = TimeGraphicalLasso(max_iter=100, alpha = alpha_set[alpha], beta = beta_set[beta]).fit(inY, iny2)
#         if k1==1:
#             Thetatvfgl=tvfgl.precision_[tvfgl.precision_.shape[0]-1] - tvfgl.precision_[tvfgl.precision_.shape[0]-1]@betas.T@np.linalg.inv( (np.cov(covariate, y=None, rowvar=False))**(-1)+ betas@tvfgl.precision_[tvfgl.precision_.shape[0]-1]@betas.T)@betas@tvfgl.precision_[tvfgl.precision_.shape[0]-1]  
#         else:
#             Thetatvfgl=tvfgl.precision_[tvfgl.precision_.shape[0]-1] - tvfgl.precision_[tvfgl.precision_.shape[0]-1]@betas.T@np.linalg.inv( np.linalg.inv(np.cov(covariate, y=None, rowvar=False))+betas@tvfgl.precision_[tvfgl.precision_.shape[0]-1]@betas.T)@betas@tvfgl.precision_[tvfgl.precision_.shape[0]-1]
        
            if k1==1:
                bracket = (np.cov(covariate, y=None, rowvar=False))**(-1)+ betas@tvfgl.precision_[tvfgl.precision_.shape[0]-1]@betas.T
                bracket = bracket.astype(np.float64)
                Thetatvfgl=tvfgl.precision_[tvfgl.precision_.shape[0]-1] - tvfgl.precision_[tvfgl.precision_.shape[0]-1]@betas.T@np.linalg.inv( bracket)@betas@tvfgl.precision_[tvfgl.precision_.shape[0]-1]  
            else:
                bracket = np.linalg.inv(np.cov(covariate, y=None, rowvar=False))+betas@tvfgl.precision_[tvfgl.precision_.shape[0]-1]@betas.T
                bracket = bracket.astype(np.float64)
                Thetatvfgl=tvfgl.precision_[tvfgl.precision_.shape[0]-1] - tvfgl.precision_[tvfgl.precision_.shape[0]-1]@betas.T@np.linalg.inv(bracket)@betas@tvfgl.precision_[tvfgl.precision_.shape[0]-1]
    ###############################################################

            portfolio_tvfgl = portfolios(inX, Thetatvfgl) #IMPORTANT: use non-standardized returns when computing portfolio weights!!!
            weight_global = portfolio_tvfgl[0].T
            competing = forecasters[(window-q):]
            true_y = truers.iloc[(window-q):]
            # competing = competing.to_numpy()
            SFE = []
            w1 = weight_global.T
            for kappa in range(competing.shape[0]):
                FE=w1.T@competing.iloc[kappa,:]
    #################SFEs ARE HERE##################
                SFE1=(true_y.iloc[kappa,:] - FE)**2
                SFE.append(SFE1)

            meanSFE[alpha,beta] = np.mean(SFE)   
            meanSFE = np.ma.array(meanSFE, mask=np.isnan(meanSFE)) #NEW (in case any na values are generated)
   
    same = []
    meanSFE_0 = []
    for d in range(0,(len(iny2)-1)):
        if iny2[d] ==iny2[d+1]:
            same = 0
        else:
            same = 1
    if same == 1:     
        [alphaopt1, betaopt1] = [alpha_set[np.where(meanSFE == np.min(meanSFE))[0]], beta_set[np.where(meanSFE == np.min(meanSFE))[1]]]
    else:
        meanSFE_0 = meanSFE[:,1]
      # betaopt1 = np.asarray(betaopt1)
        [alphaopt1, betaopt1] = [alpha_set[np.where(meanSFE_0 == np.min(meanSFE_0))[0]], np.ravel(np.asarray(beta_set[np.where(meanSFE == np.min(meanSFE))[1]][0]))]
#     if alphaopt1.shape[0]>1:
#         alphaopt1 = np.asarray([alphaopt1[10]])
#     if betaopt1.shape[0]>1:
#         betaopt1 = np.asarray([betaopt1[10]])
    return np.array([alphaopt1,betaopt1])


def CV_gamma(gamma_set,r, r1_cv, r2_cv, k):          
    err_gamma=np.zeros([len(gamma_set),1])
    err_loo=np.zeros([r2_cv.shape[0],1])    
    for gamma in range(0,len(gamma_set)):
        for loo in range(0,r2_cv.shape[0]):
            # r = F@Lambda[0:kDGP,] + eps
            r1_upd = r1_cv*gamma_set[gamma]
            r2_upd = np.delete(r2_cv, loo, axis=0)
            ##leaving one time -series out
            r_cv = np.concatenate((r1_upd, r2_upd), axis=0)

            ##estimating factors and loadings with LOO returns           
            L, V = np.linalg.eigh(np.dot(r_cv.T, r_cv))
            idx = L.argsort()[::-1]
            L = L[idx]  # eigenvalues, Nx1
            V = V[:, idx]  # eigenvectors columns, NxN
            lmb = V[:, 0:k]  # kx1
            Fhat = np.dot(r_cv, lmb)  # Txr (r=1 for PC1)
            sum_i = np.zeros([r_cv.shape[1],1])
            for i in range(r.shape[1]):
                sum_j = np.zeros([r2_cv.shape[0],1])
                ##computing sum of squared errors for the post-break period
                for j in range(0,r2_cv.shape[0]):
                    sum_j[j] = (r2_cv[j,i]-Fhat[j,:]@lmb.T[:,i])**2
                sum_i[i] = np.sum(sum_j) 
            ##collecting SSEs for all LOOs    
            err_loo[loo] = np.sum(sum_i)/(r.shape[1]*(r2_cv.shape[0]))   
        ##compute average of all LOOs for one gamma                
        err_gamma[gamma] = np.sum(err_loo)
    return  gamma_set[np.where(err_gamma == np.min(err_gamma))[0]]


def CV_break(j, t1_set,t2_set, trying, set1, window, forecasters, truers, k1):       
    err_break_loadings=np.zeros([len(t1_set),len(t2_set)])
    err_break_precision=np.zeros([len(t1_set),len(t2_set)])
    count1 = -1
    for t1 in t1_set:
        count1 += 1
        count2 = -1
        for t2 in t2_set:
            count2 += 1
            # print('t2 =', t2, 'count2 =', count2)
            breaks = np.zeros((trying.shape[0],1))
            for jjj in range(0,trying.shape[0]):
                if jjj <= t1:
                    breaks[jjj,]=0
                elif t1 < jjj <= t2:
                    breaks[jjj,]=1
                else:
                    breaks[jjj,]=2
          
            breaks = breaks.astype(int)
            breaks = np.ravel(breaks)
            err1 = trying.iloc[0:t1,]
            err11 = trying.iloc[t1:,]
            err2 = trying.iloc[0:t2,]
            err22 = trying.iloc[t2:,]
                          
            yset = breaks[(j+1):(j+window),].astype(int)
            y2 = yset
            y2 = y2.astype(int)
            y2 = np.ravel(y2)  #otherwise tvfgl function will complain and ask to reshape
          ###############################################################    
    
            set2 = set1.to_numpy()
#             set2 = StandardScaler().fit(set2).transform(set2)
            
            if t1 < (j+window) < t2 and j < t1:
                r1_cv = err1.iloc[j:t1,:]
                r11_cv = err11.iloc[0:(j+window-t1),:]
                r1_cv = r1_cv.to_numpy()
                r11_cv = r11_cv.to_numpy()
                gamma_opt = CV_gamma(gamma_set,set1, r1_cv, r11_cv, k1)
            elif (j+window) > t2 and t1 < j < t2:
                r2_cv = err2.iloc[j:t2,:]
                r22_cv = err22.iloc[0:(j+window-t2),:]
                r2_cv = r2_cv.to_numpy()
                r22_cv = r22_cv.to_numpy()
                gamma_opt = CV_gamma(gamma_set,set1, r2_cv, r22_cv, k1)
            elif (j+window) > t2 and j < t1:
                r1_cv = err1.iloc[j:t1,:]
                r11_cv = err11.iloc[0:(j+window-t1),:]
                r1_cv = r1_cv.to_numpy()
                r11_cv = r11_cv.to_numpy()
                gamma_opt1 = CV_gamma(gamma_set,set1, r1_cv, r11_cv, k1)
                r2_cv = err2.iloc[j:t2,:]
                r22_cv = err22.iloc[0:(j+window-t2),:]
                r2_cv = r2_cv.to_numpy()
                r22_cv = r22_cv.to_numpy()
                gamma_opt2 = CV_gamma(gamma_set,set1, r2_cv, r22_cv, k1)
            else:
                gamma_opt = 1
        
            ####modified returns for time-varying loadings only!!!
            set2_load = set2.copy()
            for row in range(set2_load.shape[0]):
                for col in range(set2_load.shape[1]):
                    if row < (t1-j) and t1 < (j+window) < t2 and j < t1:
                        set2_load[row,col]=gamma_opt* set2_load[row,col] 
                    elif row < (t2-j) and (j+window) > t2 and t1 < j < t2:
                        set2_load[row,col]=gamma_opt* set2_load[row,col] 
                    elif (j+window) > t2 and j < t1:
                        if row < (t1-j):
                            set2_load[row,col]=gamma_opt1* set2_load[row,col] 
                        elif (t1-j) < row < (t2-j):
                            set2_load[row,col]=gamma_opt2* set2_load[row,col] 
                        
        
            L_load, V_load = np.linalg.eigh(np.dot(set2_load.T, set2_load))
            idx_load = L_load.argsort()[::-1]
            L_load = L_load[idx_load]  # eigenvalues, Nx1
            V_load = V_load[:, idx_load]  # eigenvectors columns, NxN
            lmb = V_load[:, 0:k1]  # kx1
        
            ###According to Su (2017, JoE) if we obtain Fhat
            ###as usual they are only consistent for a rotational version
            ###hence, to get a consistent estimator use a two-stage procedure (OLS)
            Fhat = set2@lmb@np.linalg.inv(lmb.T@lmb)
            Y = set1 - Fhat@lmb.T ##these are the residuals
            
            sum_i = np.zeros([set1.shape[1],1])
            for i in range(set1.shape[1]):
                sum_j = np.zeros([set1.shape[0],1])
                ##computing sum of squared errors for the post-break period
                for jj in range(0,set1.shape[0]):
                    sum_j[jj] = (set1.iloc[jj,i]-Fhat[jj,:]@lmb.T[:,i])**2
                sum_i[i] = np.sum(sum_j) 
            err_break_loadings[count1, count2] = np.sum(sum_i)/(set1.shape[1]*(set1.shape[0]))   
            
            covariate = Fhat
            betas = lmb.T
            
            #q=10 for GDP, q=5 for unemployment and CPI   
            
            tuning = CVlasso(X=set1,Y=Y,y2=y2,k1=k1, q=q, window=window, forecasters = forecasters, truers = truers, betas = betas, covariate=covariate, alpha_set=alpha_set, beta_set=beta_set)
            # tuning = CVlasso(X=set1,Y=Y,y2=y2, q=10, window=window, forecasters = forecasters, truers = truers, Fhat = Fhat, lmb=lmb, alpha_set=alpha_set, beta_set=beta_set,j=j, k1=k1)
            tvfgl = TimeGraphicalLasso(max_iter=50, alpha = tuning[0][0], beta = tuning[1][0]).fit(Y, y2)
                #no tuning 
            # tvfgl = TimeGraphicalLasso(max_iter=100, alpha = alpha_set[1], beta = beta_set[1]).fit(Y, y2)
#             if k1==1:
#                 Thetatvfgl = tvfgl.precision_[tvfgl.precision_.shape[0]-1] - tvfgl.precision_[tvfgl.precision_.shape[0]-1]@lmb@np.linalg.inv( (np.var(Fhat))**(-1)
#                + lmb.T@tvfgl.precision_[tvfgl.precision_.shape[0]-1]@lmb)@lmb.T@tvfgl.precision_[tvfgl.precision_.shape[0]-1]
#             else:
#                 Thetatvfgl = tvfgl.precision_[tvfgl.precision_.shape[0]-1] - tvfgl.precision_[tvfgl.precision_.shape[0]-1]@lmb@np.linalg.inv( np.linalg.inv(np.cov(Fhat.T))
#                + lmb.T@tvfgl.precision_[tvfgl.precision_.shape[0]-1]@lmb)@lmb.T@tvfgl.precision_[tvfgl.precision_.shape[0]-1]
            
            if k1==1:
                bracket = (np.cov(covariate, y=None, rowvar=False))**(-1)+ betas@tvfgl.precision_[tvfgl.precision_.shape[0]-1]@betas.T
                bracket = bracket.astype(np.float64)
                Thetatvfgl=tvfgl.precision_[tvfgl.precision_.shape[0]-1] - tvfgl.precision_[tvfgl.precision_.shape[0]-1]@betas.T@np.linalg.inv( bracket)@betas@tvfgl.precision_[tvfgl.precision_.shape[0]-1]  
            else:
                bracket = np.linalg.inv(np.cov(covariate, y=None, rowvar=False))+betas@tvfgl.precision_[tvfgl.precision_.shape[0]-1]@betas.T
                bracket = bracket.astype(np.float64)
                Thetatvfgl=tvfgl.precision_[tvfgl.precision_.shape[0]-1] - tvfgl.precision_[tvfgl.precision_.shape[0]-1]@betas.T@np.linalg.inv(bracket)@betas@tvfgl.precision_[tvfgl.precision_.shape[0]-1]
###############################################################
            
            portfolio_tvfgl = portfolios(set1, Thetatvfgl) #IMPORTANT: use non-standardized returns when computing portfolio weights!!!
            weightTVFGL_global = portfolio_tvfgl[0].T
           #######MODEL FORECASTS ARE HERE######
            FETVFGL=y_frac.iloc[j,:] - weightTVFGL_global@Yhat.iloc[j,:]
        #################SFEs ARE HERE##################
            err_break_precision[count1, count2]=float((FETVFGL)**2 )
        
#         err_break_loadings = np.ma.array(err_break_loadings, mask=np.isnan(err_break_loadings)) #NEW (in case any na values are generated)
            err_break_precision = np.ma.array(err_break_precision, mask=np.isnan(err_break_precision)) #NEW (in case any na values are generated)

    [topt1_load, topt1_prec] = [t1_set[np.where(err_break_loadings == np.min(err_break_loadings))[0]][0], t1_set[np.where(err_break_precision == np.min(err_break_precision))[0][0]]]
    [topt2_load, topt2_prec] = [t2_set[np.where(err_break_loadings == np.min(err_break_loadings))[1]][0], t2_set[np.where(err_break_precision == np.min(err_break_precision))[1][0]]]    
    return np.array([topt1_load,topt1_prec, topt2_load,topt2_prec])
 ########################################################

In [None]:
##IMPORTING THE SERIES
## Please refer to README.txt for detailed data loading/description instructions
#(to avoid confusion, we kept the names of the imported files the same for both applications.
#So just make sure you insert the csv files that correspond to your application/series of interest)

###for h=1 (for h>1 csv names have respective addition to the horizon, i.e., yhat2, forERR2)
#importing series
#first, predictors
Yhat = pd.read_csv (r'yhat.csv')
Yhat = Yhat.drop(Yhat.columns[[0]], axis=1)
data = pd.read_csv (r'forERR.csv')
data = data.drop(data.columns[[0]], axis=1)
forERR = data
trying = forERR
# #second, actual series
y_frac = pd.read_csv (r'ytrue.csv')
y_frac = y_frac.drop(y_frac.columns[[0]], axis=1)

### ECB SPF State Breaks

In [None]:
##############(STATE BREAKS)#################################################
########PARAMETERS FOR ECB SPF################################################
#(use ONLY FOR STATE-DEPENDENT BREAK, bc for TIME breaks the code estimates break locations)
breaks = pd.read_csv (r'breaks.csv')

#for a quick check please use a single value for gamma, alpha, and beta
#the ranges below lead to increased computation time
gamma_opt_zero = 0
gamma_set = np.arange(0.7,1.05,0.05)  
alpha_set = np.array([0, 0.1, 0.25, 0.5, 0.7, 1, 3]).astype(float)
beta_set = np.array([0, 0.1, 0.25, 0.5, 0.7, 1, 3]).astype(float)

iterations = range(30,55,10) #this is the size of the rolling window R in Table 1
########################################

MSFE_tvfgl=np.zeros((len(iterations),1))
MSFE_tvfgl_load=np.zeros((len(iterations),1))

count = -1
h=1
lamb = 0.98

In [None]:
### ECB SPF APPLICATION (STATE BREAKS)

for l in iterations:
# PRESS TAB selecting all lines after this if uncomment line above
    count = count + 1
    T=trying.shape[0]
    # l=50
    window = l #12/31/2020 window is R
    m2 =T-window  #Forecasting observations
    ####FORECAST ERRORS MATRICES####
    FETVFGL= np.zeros(((m2-h+1),1))
    FETVFGL_load= np.zeros(((m2-h+1),1))
    #####SFEs####
    SFETVFGL= np.zeros(((m2-h+1),1))
    SFETVFGL_load= np.zeros(((m2-h+1),1))
    for j in range(0, (m2-h+1)):  
        print('m2,j =', m2, ',', j)
        set1 = trying.copy()
        set1 = set1.iloc[j:(j+window),:] 
        set1.columns = range(trying.shape[1])
        set1.reset_index(drop=True, inplace=True)

        tv_mean_matrix=np.zeros((set1.shape[0]-1,set1.shape[1]))
        for i in range(0,set1.shape[1]):
            data = set1.iloc[:,i]
            # Fit the AR(1) model
            model = sm.tsa.ARIMA(data, order=(1, 0, 0))
            results = model.fit()
            tv_mean = results.params[0] + results.params[1]*set1.iloc[1:(set1.shape[0]),i]
            tv_mean_matrix[:,i] = tv_mean

        set1_demeaned = set1.iloc[1:,:]-tv_mean_matrix
        ##################################################################

        set2 = set1_demeaned.copy()
        set2_nodown = set2.copy() #when only precision is time-varying, not loadings --> no downweighting
        #         for dd in range(0,(window-1)): ##downweighting (alternative way)
        #             set2.iloc[dd,:] = set2.iloc[dd,:]*lamb**(window-1-dd)
        for dd in range(0,(window-1)): ##downweighting
            set2.iloc[dd,:] = set2.iloc[dd,:]*lamb**(window-dd-2) #used to be set2.iloc[dd,:] = set2.iloc[dd,:]*lamb**(window-dd-1)
        forecasters = Yhat.iloc[(j+1):(j+window),:]
        truers = y_frac.iloc[(j+1):(j+window),:]
        
        breaks = breaks.astype(int)
        breaks = np.ravel(breaks)

        yset = breaks[(j+1):(j+window),].astype(int)
        y2 = yset
        y2 = y2.astype(int)
        y2 = np.ravel(y2)  #otherwise tvfgl function will complain and ask to reshape
    ###############################################################
        #please just use the average value of gamma_opt = 0.8 for a quick check
        gamma_opt = 0.8
#         t1_loadings = t1_precision = 34
#         t2_loadings = t2_precision = 81
    ### CROSS-VALIDATED VALUE OF GAMMA #####
#         if t1_loadings < (j+window) < t2_loadings and j < t1_loadings:
#             r1_cv = err1.iloc[j:t1_loadings,:]
#             r11_cv = err11.iloc[0:(j+window-t1_loadings),:]
#             r1_cv = r1_cv.to_numpy()
#             r11_cv = r11_cv.to_numpy()
#             gamma_opt = CV_gamma(gamma_set,set1, r1_cv, r11_cv, k=1)
#             print('gamma_opt =', gamma_opt)
#         elif (j+window) > t2_loadings and t1_loadings < j < t2_loadings:
#             r2_cv = err2.iloc[j:t2_loadings,:]
#             r22_cv = err22.iloc[0:(j+window-t2_loadings),:]
#             r2_cv = r2_cv.to_numpy()
#             r22_cv = r22_cv.to_numpy()
#             gamma_opt = CV_gamma(gamma_set,set1, r2_cv, r22_cv, k=1)
#             print('gamma_opt =', gamma_opt)
#         elif (j+window) > t2_loadings and j < t1_loadings:
#             r1_cv = err1.iloc[j:t1_loadings,:]
#             r11_cv = err11.iloc[0:(j+window-t1_loadings),:]
#             r1_cv = r1_cv.to_numpy()
#             r11_cv = r11_cv.to_numpy()
#             gamma_opt1 = CV_gamma(gamma_set,set1, r1_cv, r11_cv, k=1)
#             print('gamma_opt =', gamma_opt1)
#             r2_cv = err2.iloc[j:t2_loadings,:]
#             r22_cv = err22.iloc[0:(j+window-t2_loadings),:]
#             r2_cv = r2_cv.to_numpy()
#             r22_cv = r22_cv.to_numpy()
#             gamma_opt2 = CV_gamma(gamma_set,set1, r2_cv, r22_cv, k=1)
#             print('gamma_opt =', gamma_opt2)
#         else:
#             gamma_opt = 1
#             print('gamma_opt =', gamma_opt)
    ## END OF CROSS-VALIDATED VALUE OF GAMMA #####
    ##################################################################     
        k1=2

        ###############################################################################
        ###############################################################################        
        L, V = np.linalg.eigh(np.dot(set2.T, set2))
        idx = L.argsort()[::-1]
        L = L[idx]  # eigenvalues, Nx1
        V = V[:, idx]  # eigenvectors columns, NxN
        lmb = V[:, 0:k1]  # kx1
        Fhat = np.dot(set2, lmb)  # Txr (r=1 for PC1)

        Y = set1_demeaned - Fhat@lmb.T ##these are the residuals

        covariate = Fhat
        betas = lmb.T
        
        ##########################################################              
       
        q=10 #for GDP, q=5 for unemployment and CPI   for ECB SPF
        ###############################################################################################################
        ########Time-Varying TVFGL#########        
        tuning = CVlasso(X=set1,Y=Y,y2=y2,k1=k1, q=q, window=window, forecasters = forecasters, truers = truers, betas = betas, covariate=covariate, alpha_set=alpha_set, beta_set=beta_set)
        print(tuning.T)
        tvfgl = TimeGraphicalLasso(max_iter=50, alpha = tuning[0][0], beta = tuning[1][0]).fit(Y, y2)
        if k1==1:
            bracket = (np.cov(covariate, y=None, rowvar=False))**(-1)+ betas@tvfgl.precision_[tvfgl.precision_.shape[0]-1]@betas.T
            bracket = bracket.astype(np.float64)
            theta_TVFGL=tvfgl.precision_[tvfgl.precision_.shape[0]-1] - tvfgl.precision_[tvfgl.precision_.shape[0]-1]@betas.T@np.linalg.inv( bracket)@betas@tvfgl.precision_[tvfgl.precision_.shape[0]-1]  
        else:
            bracket = np.linalg.inv(np.cov(covariate, y=None, rowvar=False))+betas@tvfgl.precision_[tvfgl.precision_.shape[0]-1]@betas.T
            bracket = bracket.astype(np.float64)
            theta_TVFGL=tvfgl.precision_[tvfgl.precision_.shape[0]-1] - tvfgl.precision_[tvfgl.precision_.shape[0]-1]@betas.T@np.linalg.inv(bracket)@betas@tvfgl.precision_[tvfgl.precision_.shape[0]-1]
     
    ###############################################################     
    ###############################################################                
        portfolio_tvfgl = portfolios(set1_demeaned, theta_TVFGL)
        
        mu = set1.mean(axis=0) #subtracting mean of errors bc factors and loadings
        #used to get weights were computed for demeaned data
        #IMPORTANT: use non-standardized returns when computing portfolio weights!!!
        weightTVFGL = portfolio_tvfgl[0].T
       #######MODEL FORECASTS ARE HERE######
        FETVFGL[j]=y_frac.iloc[window+j,:] - weightTVFGL@Yhat.iloc[window+j,:] #- weightTVFGL@mu
    #################SFEs ARE HERE##################
        SFETVFGL[j]=(FETVFGL[j])**2
    #################### 
    FE[count,] = FETVFGL.reshape(-1,)
    MSFE_tvfgl[count]=np.nanmean(SFETVFGL)
    print('MSFE_tvfgl =', MSFE_tvfgl[count])
    
#This csv saves the MSFEs for RD-FGL for all R in iterations (R=30,40,50) as in Table 1,
#to get the competing MSFE in Table 1 please use "IJF_empirical_competing.ipynb" which is written in R
#(since most packages for the competing methods (like CLIME, LW, Nodewise regr'n, POET etc) were issued by
#the original authors in R)
savetxt('RDFGL_MSFE_ECB_state_breaks.csv', MSFE_tvfgl, delimiter=',')

### ECB SPF Time breaks

In [None]:
###############(TIME BREAKS)################################################
########PARAMETERS FOR ECB SPF################################################

#1) ONLY REQUIRED FOR TIME BREAKS (don't use for STATE BREAKS)
#uncomment t1_set and t2_set that corresponds to the series of interest


t1_set = np.arange(34,41,1)  #GDP break candidates
t2_set = np.arange(81,89,1) #GDP break candidates

# t1_set = np.arange(32,39,1)  #CPI break candidates
# t2_set = np.arange(83,93,1) #CPI break candidates

# t1_set = np.arange(42,50,1)  #UNEMP break candidates
# t2_set = np.arange(80,86,1) #UNEMP break candidates

#2) for a quick check please use a single value for gamma, alpha, and beta
#the ranges below lead to increased computation time
gamma_opt_zero = 0
gamma_set = np.arange(0.7,1.05,0.05)  
alpha_set = np.array([0, 0.1, 0.25, 0.5, 0.7, 1, 3]).astype(float)
beta_set = np.array([0, 0.1, 0.25, 0.5, 0.7, 1, 3]).astype(float)

iterations = range(30,55,10) #this is the size of the rolling window R in Table 1
########################################

MSFE_tvfgl=np.zeros((len(iterations),1))
MSFE_tvfgl_load=np.zeros((len(iterations),1))

count = -1
h=1
lamb = 0.98


In [None]:
### ECB SPF APPLICATION (TIME BREAKS)

for l in iterations:
# PRESS TAB selecting all lines after this if uncomment line above
    count = count + 1
    T=trying.shape[0]
    # l=50
    window = l #12/31/2020 window is R
    m2 =T-window  #Forecasting observations
    ####FORECAST ERRORS MATRICES####
    FETVFGL= np.zeros(((m2-h+1),1))
    FETVFGL_load= np.zeros(((m2-h+1),1))
    #####SFEs####
    SFETVFGL= np.zeros(((m2-h+1),1))
    SFETVFGL_load= np.zeros(((m2-h+1),1))
    for j in range(0, (m2-h+1)):  
        print('m2,j =', m2, ',', j)
        set1 = trying.copy()
        set1 = set1.iloc[j:(j+window),:] 
        set1.columns = range(trying.shape[1])
        set1.reset_index(drop=True, inplace=True)

        tv_mean_matrix=np.zeros((set1.shape[0]-1,set1.shape[1]))
        for i in range(0,set1.shape[1]):
            data = set1.iloc[:,i]
            # Fit the AR(1) model
            model = sm.tsa.ARIMA(data, order=(1, 0, 0))
            results = model.fit()
            tv_mean = results.params[0] + results.params[1]*set1.iloc[1:(set1.shape[0]),i]
            tv_mean_matrix[:,i] = tv_mean

        set1_demeaned = set1.iloc[1:,:]-tv_mean_matrix
        ##################################################################

        set2 = set1_demeaned.copy()
        set2_nodown = set2.copy() #when only precision is time-varying, not loadings --> no downweighting
        #         for dd in range(0,(window-1)): ##downweighting (alternative way)
        #             set2.iloc[dd,:] = set2.iloc[dd,:]*lamb**(window-1-dd)
        for dd in range(0,(window-1)): ##downweighting
            set2.iloc[dd,:] = set2.iloc[dd,:]*lamb**(window-dd-2) #used to be set2.iloc[dd,:] = set2.iloc[dd,:]*lamb**(window-dd-1)
        forecasters = Yhat.iloc[(j+1):(j+window),:]
        truers = y_frac.iloc[(j+1):(j+window),:]
        
        #IF USING KNOWN BREAKS please insert the values below and ignore CV_break function
        t1_loadings = t1_precision = 34 #INSERT YOUR KNOWN BRAKE(S)
        t2_loadings = t2_precision = 81 #INSERT YOUR KNOWN BRAKE(S)

        #IF USING UNKNOWN breaks please make sure you specified t_1 and t_2 rangers in the PARAMETERS section
        #and proceed using the CV_break search tool for breaks below

#         t_opt =  CV_break(j=j, t1_set = t1_set,t2_set = t2_set, trying=trying, set1=set1_demeaned, window=window, forecasters=forecasters, truers=truers, k1=1)

#         t1_loadings = t_opt[0]
#         t1_precision = t_opt[1]
#         t2_loadings = t_opt[2]
#         t2_precision = t_opt[3]
        
        breaks = np.zeros((trying.shape[0],1))
        for jjj in range(0,trying.shape[0]):
            if jjj <= t1_precision:
                breaks[jjj,]=0
            elif t1_precision < jjj <= t2_precision:
                breaks[jjj,]=1
            else:
                breaks[jjj,]=2
        
        breaks = breaks.astype(int)
        breaks = np.ravel(breaks)

        err1 = trying.iloc[0:t1_loadings,]
        err11 = trying.iloc[t1_loadings:,]
        err2 = trying.iloc[0:t2_loadings,]
        err22 = trying.iloc[t2_loadings:,]

        yset = breaks[(j+1):(j+window),].astype(int)
        y2 = yset
        y2 = y2.astype(int)
        y2 = np.ravel(y2)  #otherwise tvfgl function will complain and ask to reshape
    ###############################################################
# UNLOCK THE BLOCK BELOW IF USING TIME BREAKS, LEAVE COMMENTED OUT OTHERWISE#####
    ### CROSS-VALIDATED VALUE OF GAMMA #####
        if t1_loadings < (j+window) < t2_loadings and j < t1_loadings:
            r1_cv = err1.iloc[j:t1_loadings,:]
            r11_cv = err11.iloc[0:(j+window-t1_loadings),:]
            r1_cv = r1_cv.to_numpy()
            r11_cv = r11_cv.to_numpy()
            gamma_opt = CV_gamma(gamma_set,set1, r1_cv, r11_cv, k=1)
            print('gamma_opt =', gamma_opt)
        elif (j+window) > t2_loadings and t1_loadings < j < t2_loadings:
            r2_cv = err2.iloc[j:t2_loadings,:]
            r22_cv = err22.iloc[0:(j+window-t2_loadings),:]
            r2_cv = r2_cv.to_numpy()
            r22_cv = r22_cv.to_numpy()
            gamma_opt = CV_gamma(gamma_set,set1, r2_cv, r22_cv, k=1)
            print('gamma_opt =', gamma_opt)
        elif (j+window) > t2_loadings and j < t1_loadings:
            r1_cv = err1.iloc[j:t1_loadings,:]
            r11_cv = err11.iloc[0:(j+window-t1_loadings),:]
            r1_cv = r1_cv.to_numpy()
            r11_cv = r11_cv.to_numpy()
            gamma_opt1 = CV_gamma(gamma_set,set1, r1_cv, r11_cv, k=1)
            print('gamma_opt =', gamma_opt1)
            r2_cv = err2.iloc[j:t2_loadings,:]
            r22_cv = err22.iloc[0:(j+window-t2_loadings),:]
            r2_cv = r2_cv.to_numpy()
            r22_cv = r22_cv.to_numpy()
            gamma_opt2 = CV_gamma(gamma_set,set1, r2_cv, r22_cv, k=1)
            print('gamma_opt =', gamma_opt2)
        else:
            gamma_opt = 1
            print('gamma_opt =', gamma_opt)
    ## END OF CROSS-VALIDATED VALUE OF GAMMA #####
    ##################################################################

        ####modified returns for time-varying loadings only!!!
        set2_load = set1_demeaned.copy()
        for row in range(set2_load.shape[0]):
            for col in range(set2_load.shape[1]):
                if row < (t1_loadings-j) and t1_loadings < (j+window) < t2_loadings and j < t1_loadings:
                    set2_load.iloc[row,col]=gamma_opt* set2_load.iloc[row,col] 
                elif row < (t2_loadings-j) and (j+window) > t2_loadings and t1_loadings < j < t2_loadings:
                    set2_load.iloc[row,col]=gamma_opt* set2_load.iloc[row,col] 
                elif (j+window) > t2_loadings and j < t1_loadings:
                    if row < (t1_loadings-j):
                        set2_load.iloc[row,col]=gamma_opt1* set2_load.iloc[row,col] 
                    elif (t1_loadings-j) < row < (t2_loadings-j):
                        set2_load.iloc[row,col]=gamma_opt2* set2_load.iloc[row,col] 
            #########################################################################################################################
  #END OF UNLOCK THE BLOCK BELOW IF USING TIME BREAKS, LEAVE COMMENTED OUT OTHERWISE#####      
        k1=2

        L_load, V_load = np.linalg.eigh(np.dot(set2_load.T, set2_load))
        idx_load = L_load.argsort()[::-1]
        L_load = L_load[idx_load]  # eigenvalues, Nx1
        V_load = V_load[:, idx_load]  # eigenvectors columns, NxN
        lmb_load = V_load[:, 0:k1]  # kx1

        ###According to Su (2017, JoE) if we obtain Fhat
        ###as usual they are only consistent for a rotational version
        ###hence, to get a consistent estimator use a two-stage procedure (OLS)
        Fhat_load = set2_load@lmb_load@np.linalg.inv(lmb_load.T@lmb_load)
        Y_load = set1_demeaned - Fhat_load@lmb_load.T ##these are the residuals

        covariate_load = Fhat_load
        betas_load = lmb_load.T  
        
        ##########################################################              
       
        q=10 #for GDP, q=5 for unemployment and CPI   for ECB SPF
        ###############################################################################################################

    ###############################################################     
    ###############################################################
     ########Time-Varying TVFGL(BOTH precision and loadings time-varying)#########        
        tuning = CVlasso(X=set2_load,Y=Y_load,k1=k1, y2=y2, q=q, window=window, forecasters = forecasters, truers = truers, betas = betas_load, covariate=covariate_load, alpha_set=alpha_set, beta_set=beta_set)
        print(tuning.T)
        tvfgl = TimeGraphicalLasso(max_iter=50, alpha = tuning[0][0], beta = tuning[1][0]).fit(Y_load, y2)
        if k1==1:
            bracket = (np.cov(covariate_load, y=None, rowvar=False))**(-1)+ betas_load@tvfgl.precision_[tvfgl.precision_.shape[0]-1]@betas_load.T
            bracket = bracket.astype(np.float64)
            theta_TVFGL_load=tvfgl.precision_[tvfgl.precision_.shape[0]-1] - tvfgl.precision_[tvfgl.precision_.shape[0]-1]@betas_load.T@np.linalg.inv( bracket)@betas_load@tvfgl.precision_[tvfgl.precision_.shape[0]-1]  
        else:
            bracket = np.linalg.inv(np.cov(covariate_load, y=None, rowvar=False))+betas_load@tvfgl.precision_[tvfgl.precision_.shape[0]-1]@betas_load.T
            bracket = bracket.astype(np.float64)
            theta_TVFGL_load=tvfgl.precision_[tvfgl.precision_.shape[0]-1] - tvfgl.precision_[tvfgl.precision_.shape[0]-1]@betas_load.T@np.linalg.inv(bracket)@betas_load@tvfgl.precision_[tvfgl.precision_.shape[0]-1]
     ###############################################################           
        
        portfolio_tvfgl_load = portfolios(set1_demeaned, theta_TVFGL_load)
        
        mu = set1.mean(axis=0) #subtracting mean of errors bc factors and loadings
        #used to get weights were computed for demeaned data
        #IMPORTANT: use non-standardized returns when computing portfolio weights!!!
        weightTVFGL_load = portfolio_tvfgl_load[0].T
       #######MODEL FORECASTS ARE HERE######
        FETVFGL_load[j]=y_frac.iloc[window+j,:] - weightTVFGL_load@Yhat.iloc[window+j,:]#- weightTVFGL_load@mu
    #################SFEs ARE HERE##################
        SFETVFGL_load[j]=(FETVFGL_load[j])**2
    #################### 
    FE_load[count,] = FETVFGL_load.reshape(-1,)
    MSFE_tvfgl_load[count]=np.nanmean(SFETVFGL_load)
    print( 'MSFE_tvfgl_load =', MSFE_tvfgl_load[count])
    
#This csv saves the MSFEs for RD-FGL for all R in iterations (R=30,40,50) as in Table 1,
#to get the competing MSFE in Table 1 please use "IJF_empirical_competing.ipynb" which is written in R
#(since most packages for the competing methods (like CLIME, LW, Nodewise regr'n, POET etc) were issued by
#the original authors in R)
savetxt('RDFGL_MSFE_ECB_time_break.csv', MSFE_tvfgl_load, delimiter=',')

### FRED MD State breaks

In [None]:
###############(STATE BREAKS)################################################
########PARAMETERS FOR FRED-MD################################################
#(use ONLY FOR STATE-DEPENDENT BREAK, bc for TIME breaks the code estimates break locations)
breaks = pd.read_csv (r'breaks.csv')
#####FRED-MD CANDIDATES##########
gamma_opt_zero = 0
gamma_set = np.arange(0.7,1.05,0.05)  

alpha_set = np.array([0, 0.1, 0.25, 0.5, 0.7, 1, 3, 5]).astype(float)
beta_set = np.array([0, 0.1, 0.25, 0.5, 0.7, 1, 3, 5]).astype(float)

###FRED-MD
iterations = range(400,450,100)
########################################
T=trying.shape[0]
# l=50
window = iterations[0] #12/31/2020 window is R
m2 =T-window  #Forecasting observations
#########################################

MSFE_tvfgl=np.zeros((len(iterations),1))
MSFE_tvfgl_load=np.zeros((len(iterations),1))

count = -1
h=1
lamb = 0.98
q=150 #for tuning in fred md

FE= np.zeros((len(iterations),(m2-h+1)))
FE_load= np.zeros((len(iterations),(m2-h+1)))


In [None]:
### FRED MD APPLICATION (STATE BREAKS)

for l in iterations:
# PRESS TAB selecting all lines after this if uncomment line above
    count = count + 1
    T=trying.shape[0]
    # l=50
    window = l #12/31/2020 window is R
    m2 =T-window  #Forecasting observations
    ####FORECAST ERRORS MATRICES####
    FETVFGL= np.zeros(((m2-h+1),1))
    FETVFGL_load= np.zeros(((m2-h+1),1))
    #####SFEs####
    SFETVFGL= np.zeros(((m2-h+1),1))
    SFETVFGL_load= np.zeros(((m2-h+1),1))
    for j in range(0, (m2-h+1)):  
        print('m2,j =', m2, ',', j)
        set1 = trying.copy()
        set1 = set1.iloc[j:(j+window),:] 
        set1.columns = range(trying.shape[1])
        set1.reset_index(drop=True, inplace=True)

        tv_mean_matrix=np.zeros((set1.shape[0]-1,set1.shape[1]))
        for i in range(0,set1.shape[1]):
            data = set1.iloc[:,i]
            # Fit the AR(1) model
            model = sm.tsa.ARIMA(data, order=(1, 0, 0))
            results = model.fit()
            tv_mean = results.params[0] + results.params[1]*set1.iloc[1:(set1.shape[0]),i]
            tv_mean_matrix[:,i] = tv_mean

        set1_demeaned = set1.iloc[1:,:]-tv_mean_matrix
        ##################################################################

        set2 = set1_demeaned.copy()
        set2_nodown = set2.copy() #when only precision is time-varying, not loadings --> no downweighting
        #         for dd in range(0,(window-1)): ##downweighting (alternative way)
        #             set2.iloc[dd,:] = set2.iloc[dd,:]*lamb**(window-1-dd)
        for dd in range(0,(window-1)): ##downweighting
            set2.iloc[dd,:] = set2.iloc[dd,:]*lamb**(window-dd-2) #used to be set2.iloc[dd,:] = set2.iloc[dd,:]*lamb**(window-dd-1)
        forecasters = Yhat.iloc[(j+1):(j+window),:]
        truers = y_frac.iloc[(j+1):(j+window),:]
        
        breaks = breaks.astype(int)
        breaks = np.ravel(breaks)

        yset = breaks[(j+1):(j+window),].astype(int)
        y2 = yset
        y2 = y2.astype(int)
        y2 = np.ravel(y2)  #otherwise tvfgl function will complain and ask to reshape
    ###############################################################
        #please just use the average value of gamma_opt = 0.8 for a quick check
        gamma_opt = 0.8
#         t1_loadings = t1_precision = 34
#         t2_loadings = t2_precision = 81
    ### CROSS-VALIDATED VALUE OF GAMMA #####
#         if t1_loadings < (j+window) < t2_loadings and j < t1_loadings:
#             r1_cv = err1.iloc[j:t1_loadings,:]
#             r11_cv = err11.iloc[0:(j+window-t1_loadings),:]
#             r1_cv = r1_cv.to_numpy()
#             r11_cv = r11_cv.to_numpy()
#             gamma_opt = CV_gamma(gamma_set,set1, r1_cv, r11_cv, k=1)
#             print('gamma_opt =', gamma_opt)
#         elif (j+window) > t2_loadings and t1_loadings < j < t2_loadings:
#             r2_cv = err2.iloc[j:t2_loadings,:]
#             r22_cv = err22.iloc[0:(j+window-t2_loadings),:]
#             r2_cv = r2_cv.to_numpy()
#             r22_cv = r22_cv.to_numpy()
#             gamma_opt = CV_gamma(gamma_set,set1, r2_cv, r22_cv, k=1)
#             print('gamma_opt =', gamma_opt)
#         elif (j+window) > t2_loadings and j < t1_loadings:
#             r1_cv = err1.iloc[j:t1_loadings,:]
#             r11_cv = err11.iloc[0:(j+window-t1_loadings),:]
#             r1_cv = r1_cv.to_numpy()
#             r11_cv = r11_cv.to_numpy()
#             gamma_opt1 = CV_gamma(gamma_set,set1, r1_cv, r11_cv, k=1)
#             print('gamma_opt =', gamma_opt1)
#             r2_cv = err2.iloc[j:t2_loadings,:]
#             r22_cv = err22.iloc[0:(j+window-t2_loadings),:]
#             r2_cv = r2_cv.to_numpy()
#             r22_cv = r22_cv.to_numpy()
#             gamma_opt2 = CV_gamma(gamma_set,set1, r2_cv, r22_cv, k=1)
#             print('gamma_opt =', gamma_opt2)
#         else:
#             gamma_opt = 1
#             print('gamma_opt =', gamma_opt)
    ## END OF CROSS-VALIDATED VALUE OF GAMMA #####
    ##################################################################     
        k1=2

        ###############################################################################
        ###############################################################################        
        L, V = np.linalg.eigh(np.dot(set2.T, set2))
        idx = L.argsort()[::-1]
        L = L[idx]  # eigenvalues, Nx1
        V = V[:, idx]  # eigenvectors columns, NxN
        lmb = V[:, 0:k1]  # kx1
        Fhat = np.dot(set2, lmb)  # Txr (r=1 for PC1)

        Y = set1_demeaned - Fhat@lmb.T ##these are the residuals

        covariate = Fhat
        betas = lmb.T
        
        ##########################################################              
       
        q=150 #for FRED MD
        ###############################################################################################################
        ########Time-Varying TVFGL#########        
        tuning = CVlasso(X=set1,Y=Y,y2=y2,k1=k1, q=q, window=window, forecasters = forecasters, truers = truers, betas = betas, covariate=covariate, alpha_set=alpha_set, beta_set=beta_set)
        print(tuning.T)
        tvfgl = TimeGraphicalLasso(max_iter=50, alpha = tuning[0][0], beta = tuning[1][0]).fit(Y, y2)
        if k1==1:
            bracket = (np.cov(covariate, y=None, rowvar=False))**(-1)+ betas@tvfgl.precision_[tvfgl.precision_.shape[0]-1]@betas.T
            bracket = bracket.astype(np.float64)
            theta_TVFGL=tvfgl.precision_[tvfgl.precision_.shape[0]-1] - tvfgl.precision_[tvfgl.precision_.shape[0]-1]@betas.T@np.linalg.inv( bracket)@betas@tvfgl.precision_[tvfgl.precision_.shape[0]-1]  
        else:
            bracket = np.linalg.inv(np.cov(covariate, y=None, rowvar=False))+betas@tvfgl.precision_[tvfgl.precision_.shape[0]-1]@betas.T
            bracket = bracket.astype(np.float64)
            theta_TVFGL=tvfgl.precision_[tvfgl.precision_.shape[0]-1] - tvfgl.precision_[tvfgl.precision_.shape[0]-1]@betas.T@np.linalg.inv(bracket)@betas@tvfgl.precision_[tvfgl.precision_.shape[0]-1]
     
    ###############################################################     
    ###############################################################                
        portfolio_tvfgl = portfolios(set1_demeaned, theta_TVFGL)
        
        mu = set1.mean(axis=0) #subtracting mean of errors bc factors and loadings
        #used to get weights were computed for demeaned data
        #IMPORTANT: use non-standardized returns when computing portfolio weights!!!
        weightTVFGL = portfolio_tvfgl[0].T
       #######MODEL FORECASTS ARE HERE######
        FETVFGL[j]=y_frac.iloc[window+j,:] - weightTVFGL@Yhat.iloc[window+j,:] #- weightTVFGL@mu
    #################SFEs ARE HERE##################
        SFETVFGL[j]=(FETVFGL[j])**2
    #################### 
    FE[count,] = FETVFGL.reshape(-1,)
    MSFE_tvfgl[count]=np.nanmean(SFETVFGL)
    print('MSFE_tvfgl =', MSFE_tvfgl[count])
    
#This csv saves the MSFEs for RD-FGL in Table 2
#to get the competing MSFE in Table 2 please use "IJF_empirical_competing.ipynb" which is written in R
#(since most packages for the competing methods (like CLIME, LW, Nodewise regr'n, POET etc) were issued by
#the original authors in R)
savetxt('RDFGL_MSFE_FRED_state_breaks.csv', MSFE_tvfgl, delimiter=',')

### FRED MD Time breaks

In [None]:
###############(TIME BREAKS)################################################
########PARAMETERS FOR FRED-MD################################################

#####FRED-MD CANDIDATES##########
#1) ONLY REQUIRED FOR TIME BREAKS (don't use for STATE BREAKS)
#uncomment t1_set and t2_set that corresponds to the series of interest

#using the ranges below substantially increases computational time
#so we include the breaks identified from our results:
#INDPROD: t1 = 353, t2 = 494
#CPI: t1 = 320, t2 = 433
#PCEPI: t1 = 353, t2 = 495
#UNEMP: t1 = 348, t2 = 492

t1_set = np.arange(350,361,1)  #INDPROD break candidates 
t2_set = np.arange(490,501,1) #INDPROD break candidates

# t1_set = np.arange(315,350,1)  #CPI break candidates 
# t2_set = np.arange(425,460,1) #CPI break candidates 

# t1_set = np.arange(345,361,1)  #PCEPI break candidates
# t2_set = np.arange(490,501,1) #PCEPI break candidates 

# t1_set = np.arange(340,361,1)  #UNEMP break candidates 
# t2_set = np.arange(490,501,1) #UNEMP break candidates 

gamma_opt_zero = 0
gamma_set = np.arange(0.7,1.05,0.05)  

alpha_set = np.array([0, 0.1, 0.25, 0.5, 0.7, 1, 3, 5]).astype(float)
beta_set = np.array([0, 0.1, 0.25, 0.5, 0.7, 1, 3, 5]).astype(float)

###FRED-MD
iterations = range(400,450,100)
########################################
T=trying.shape[0]
# l=50
window = iterations[0] #12/31/2020 window is R
m2 =T-window  #Forecasting observations
#########################################

MSFE_tvfgl=np.zeros((len(iterations),1))
MSFE_tvfgl_load=np.zeros((len(iterations),1))

count = -1
h=1
lamb = 0.98
q=150 #for tuning in fred md

FE= np.zeros((len(iterations),(m2-h+1)))
FE_load= np.zeros((len(iterations),(m2-h+1)))


In [None]:
### FRED MD APPLICATION (TIME BREAKS)

for l in iterations:
# PRESS TAB selecting all lines after this if uncomment line above
    count = count + 1
    T=trying.shape[0]
    # l=50
    window = l #12/31/2020 window is R
    m2 =T-window  #Forecasting observations
    ####FORECAST ERRORS MATRICES####
    FETVFGL= np.zeros(((m2-h+1),1))
    FETVFGL_load= np.zeros(((m2-h+1),1))
    #####SFEs####
    SFETVFGL= np.zeros(((m2-h+1),1))
    SFETVFGL_load= np.zeros(((m2-h+1),1))
    for j in range(0, (m2-h+1)):  
        print('m2,j =', m2, ',', j)
        set1 = trying.copy()
        set1 = set1.iloc[j:(j+window),:] 
        set1.columns = range(trying.shape[1])
        set1.reset_index(drop=True, inplace=True)

        tv_mean_matrix=np.zeros((set1.shape[0]-1,set1.shape[1]))
        for i in range(0,set1.shape[1]):
            data = set1.iloc[:,i]
            # Fit the AR(1) model
            model = sm.tsa.ARIMA(data, order=(1, 0, 0))
            results = model.fit()
            tv_mean = results.params[0] + results.params[1]*set1.iloc[1:(set1.shape[0]),i]
            tv_mean_matrix[:,i] = tv_mean

        set1_demeaned = set1.iloc[1:,:]-tv_mean_matrix
        ##################################################################

        set2 = set1_demeaned.copy()
        set2_nodown = set2.copy() #when only precision is time-varying, not loadings --> no downweighting
        #         for dd in range(0,(window-1)): ##downweighting (alternative way)
        #             set2.iloc[dd,:] = set2.iloc[dd,:]*lamb**(window-1-dd)
        for dd in range(0,(window-1)): ##downweighting
            set2.iloc[dd,:] = set2.iloc[dd,:]*lamb**(window-dd-2) #used to be set2.iloc[dd,:] = set2.iloc[dd,:]*lamb**(window-dd-1)
        forecasters = Yhat.iloc[(j+1):(j+window),:]
        truers = y_frac.iloc[(j+1):(j+window),:]
        
        #IF USING KNOWN BREAKS please insert the values below and ignore CV_break function
        t1_loadings = t1_precision = 353 #INSERT YOUR KNOWN BRAKE(S)
        t2_loadings = t2_precision = 494 #INSERT YOUR KNOWN BRAKE(S)

        #IF USING UNKNOWN breaks please make sure you specified t_1 and t_2 rangers in the PARAMETERS section
        #and proceed using the CV_break search tool for breaks below

#         t_opt =  CV_break(j=j, t1_set = t1_set,t2_set = t2_set, trying=trying, set1=set1_demeaned, window=window, forecasters=forecasters, truers=truers, k1=1)

#         t1_loadings = t_opt[0]
#         t1_precision = t_opt[1]
#         t2_loadings = t_opt[2]
#         t2_precision = t_opt[3]
        
        breaks = np.zeros((trying.shape[0],1))
        for jjj in range(0,trying.shape[0]):
            if jjj <= t1_precision:
                breaks[jjj,]=0
            elif t1_precision < jjj <= t2_precision:
                breaks[jjj,]=1
            else:
                breaks[jjj,]=2
        
        breaks = breaks.astype(int)
        breaks = np.ravel(breaks)

        err1 = trying.iloc[0:t1_loadings,]
        err11 = trying.iloc[t1_loadings:,]
        err2 = trying.iloc[0:t2_loadings,]
        err22 = trying.iloc[t2_loadings:,]

        yset = breaks[(j+1):(j+window),].astype(int)
        y2 = yset
        y2 = y2.astype(int)
        y2 = np.ravel(y2)  #otherwise tvfgl function will complain and ask to reshape
    ###############################################################
# UNLOCK THE BLOCK BELOW IF USING TIME BREAKS, LEAVE COMMENTED OUT OTHERWISE#####
    ### CROSS-VALIDATED VALUE OF GAMMA #####
        if t1_loadings < (j+window) < t2_loadings and j < t1_loadings:
            r1_cv = err1.iloc[j:t1_loadings,:]
            r11_cv = err11.iloc[0:(j+window-t1_loadings),:]
            r1_cv = r1_cv.to_numpy()
            r11_cv = r11_cv.to_numpy()
            gamma_opt = CV_gamma(gamma_set,set1, r1_cv, r11_cv, k=1)
            print('gamma_opt =', gamma_opt)
        elif (j+window) > t2_loadings and t1_loadings < j < t2_loadings:
            r2_cv = err2.iloc[j:t2_loadings,:]
            r22_cv = err22.iloc[0:(j+window-t2_loadings),:]
            r2_cv = r2_cv.to_numpy()
            r22_cv = r22_cv.to_numpy()
            gamma_opt = CV_gamma(gamma_set,set1, r2_cv, r22_cv, k=1)
            print('gamma_opt =', gamma_opt)
        elif (j+window) > t2_loadings and j < t1_loadings:
            r1_cv = err1.iloc[j:t1_loadings,:]
            r11_cv = err11.iloc[0:(j+window-t1_loadings),:]
            r1_cv = r1_cv.to_numpy()
            r11_cv = r11_cv.to_numpy()
            gamma_opt1 = CV_gamma(gamma_set,set1, r1_cv, r11_cv, k=1)
            print('gamma_opt =', gamma_opt1)
            r2_cv = err2.iloc[j:t2_loadings,:]
            r22_cv = err22.iloc[0:(j+window-t2_loadings),:]
            r2_cv = r2_cv.to_numpy()
            r22_cv = r22_cv.to_numpy()
            gamma_opt2 = CV_gamma(gamma_set,set1, r2_cv, r22_cv, k=1)
            print('gamma_opt =', gamma_opt2)
        else:
            gamma_opt = 1
            print('gamma_opt =', gamma_opt)
    ## END OF CROSS-VALIDATED VALUE OF GAMMA #####
    ##################################################################

        ####modified returns for time-varying loadings only!!!
        set2_load = set1_demeaned.copy()
        for row in range(set2_load.shape[0]):
            for col in range(set2_load.shape[1]):
                if row < (t1_loadings-j) and t1_loadings < (j+window) < t2_loadings and j < t1_loadings:
                    set2_load.iloc[row,col]=gamma_opt* set2_load.iloc[row,col] 
                elif row < (t2_loadings-j) and (j+window) > t2_loadings and t1_loadings < j < t2_loadings:
                    set2_load.iloc[row,col]=gamma_opt* set2_load.iloc[row,col] 
                elif (j+window) > t2_loadings and j < t1_loadings:
                    if row < (t1_loadings-j):
                        set2_load.iloc[row,col]=gamma_opt1* set2_load.iloc[row,col] 
                    elif (t1_loadings-j) < row < (t2_loadings-j):
                        set2_load.iloc[row,col]=gamma_opt2* set2_load.iloc[row,col] 
            #########################################################################################################################
  #END OF UNLOCK THE BLOCK BELOW IF USING TIME BREAKS, LEAVE COMMENTED OUT OTHERWISE#####      
        k1=2

        L_load, V_load = np.linalg.eigh(np.dot(set2_load.T, set2_load))
        idx_load = L_load.argsort()[::-1]
        L_load = L_load[idx_load]  # eigenvalues, Nx1
        V_load = V_load[:, idx_load]  # eigenvectors columns, NxN
        lmb_load = V_load[:, 0:k1]  # kx1

        ###According to Su (2017, JoE) if we obtain Fhat
        ###as usual they are only consistent for a rotational version
        ###hence, to get a consistent estimator use a two-stage procedure (OLS)
        Fhat_load = set2_load@lmb_load@np.linalg.inv(lmb_load.T@lmb_load)
        Y_load = set1_demeaned - Fhat_load@lmb_load.T ##these are the residuals

        covariate_load = Fhat_load
        betas_load = lmb_load.T  
        
        ##########################################################              
       
        q=150 #for FRED MD
        ###############################################################################################################

    ###############################################################     
    ###############################################################
     ########Time-Varying TVFGL(BOTH precision and loadings time-varying)#########        
        tuning = CVlasso(X=set2_load,Y=Y_load,k1=k1, y2=y2, q=q, window=window, forecasters = forecasters, truers = truers, betas = betas_load, covariate=covariate_load, alpha_set=alpha_set, beta_set=beta_set)
        print(tuning.T)
        tvfgl = TimeGraphicalLasso(max_iter=50, alpha = tuning[0][0], beta = tuning[1][0]).fit(Y_load, y2)
        if k1==1:
            bracket = (np.cov(covariate_load, y=None, rowvar=False))**(-1)+ betas_load@tvfgl.precision_[tvfgl.precision_.shape[0]-1]@betas_load.T
            bracket = bracket.astype(np.float64)
            theta_TVFGL_load=tvfgl.precision_[tvfgl.precision_.shape[0]-1] - tvfgl.precision_[tvfgl.precision_.shape[0]-1]@betas_load.T@np.linalg.inv( bracket)@betas_load@tvfgl.precision_[tvfgl.precision_.shape[0]-1]  
        else:
            bracket = np.linalg.inv(np.cov(covariate_load, y=None, rowvar=False))+betas_load@tvfgl.precision_[tvfgl.precision_.shape[0]-1]@betas_load.T
            bracket = bracket.astype(np.float64)
            theta_TVFGL_load=tvfgl.precision_[tvfgl.precision_.shape[0]-1] - tvfgl.precision_[tvfgl.precision_.shape[0]-1]@betas_load.T@np.linalg.inv(bracket)@betas_load@tvfgl.precision_[tvfgl.precision_.shape[0]-1]
     ###############################################################           
        
        portfolio_tvfgl_load = portfolios(set1_demeaned, theta_TVFGL_load)
        
        mu = set1.mean(axis=0) #subtracting mean of errors bc factors and loadings
        #used to get weights were computed for demeaned data
        #IMPORTANT: use non-standardized returns when computing portfolio weights!!!
        weightTVFGL_load = portfolio_tvfgl_load[0].T
       #######MODEL FORECASTS ARE HERE######
        FETVFGL_load[j]=y_frac.iloc[window+j,:] - weightTVFGL_load@Yhat.iloc[window+j,:]#- weightTVFGL_load@mu
    #################SFEs ARE HERE##################
        SFETVFGL_load[j]=(FETVFGL_load[j])**2
    #################### 
    FE_load[count,] = FETVFGL_load.reshape(-1,)
    MSFE_tvfgl_load[count]=np.nanmean(SFETVFGL_load)
    print( 'MSFE_tvfgl_load =', MSFE_tvfgl_load[count])
    
#This csv saves the MSFEs for RD-FGL in Table 2
#to get the competing MSFE in Table 2 please use "IJF_empirical_competing.ipynb" which is written in R
#(since most packages for the competing methods (like CLIME, LW, Nodewise regr'n, POET etc) were issued by
#the original authors in R)
savetxt('RDFGL_MSFE_FRED_time_break.csv', MSFE_tvfgl_load, delimiter=',')