# Defined functions

In [36]:
import math
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from statsmodels.tsa.arima_process import ArmaProcess
from matplotlib import pyplot as plt
from scipy.linalg import sqrtm
def euclidean_distance(point1, point2):
    return np.sqrt(np.sum((point1 - point2)**2)) 
# equal slice
def sir_1(X, y, H, K):
    Z = X-np.mean(X, axis=0)
    width = (np.max(y) - np.min(y)) / H
    V_hat = np.zeros([X.shape[1], X.shape[1]])
    for h in range(H):
        h_index = np.logical_and(np.min(y)+h*width <= y, y < np.min(y)+(h+1)*width)
        ph_hat = np.mean(h_index)
        if ph_hat == 0:
            continue
        mh = np.mean(Z[h_index, :], axis=0)
        V_hat = np.add(V_hat,ph_hat * np.matmul(mh[:, np.newaxis], mh[np.newaxis, :]))
    eigenvalues, eigenvectors = np.linalg.eig(V_hat)
    K_index = np.argpartition(np.abs(eigenvalues), X.shape[1]-K) >= X.shape[1]-K
    K_largest_eigenvectors = eigenvectors[:, K_index]
    edr_est =  K_largest_eigenvectors       
    return  edr_est
    
# equal slice
def sir_1_time_series(X, y, s, H, K):
    from collections import deque
    def move_left(queue, steps):
        # Length of the queue
        n = len(queue)
        # Create a new list filled with 0
        new_queue = [0] * n
        
        # Move elements to the left by the specified number of steps
        for i in range(n):
            new_position = i - steps
            if new_position >= 0:
                new_queue[new_position] = queue[i]
        
        return new_queue
       
    Z = X-np.mean(X, axis=0)
    width = (np.max(y) - np.min(y)) / H
    V_hat = np.zeros([X.shape[1], X.shape[1]])
    for h in range(H):
        h_index = np.logical_and(np.min(y)+h*width <= y, y < np.min(y)+(h+1)*width)
        # queue moves s locations backward
        h_index = move_left(h_index, s) 
        # h_index = h_index[len(y) - X.shape[0]:]
        # h_index = h_index[:X.shape[0]]
        ph_hat = np.mean(h_index)
        if ph_hat == 0:
            continue
        # if np.all((h_index == 1) < X.shape[0]):
        mh = np.mean(Z[h_index, :], axis=0)
        V_hat = np.add(V_hat,ph_hat * np.matmul(mh[:, np.newaxis], mh[np.newaxis, :]))
        # else:
        #     h_index = h_index
        #     mh = np.mean(Z[h_index, :], axis=0)
        #     V_hat = np.add(V_hat,ph_hat * np.matmul(mh[:, np.newaxis], mh[np.newaxis, :]))
    eigenvalues, eigenvectors = np.linalg.eig(V_hat)
    K_index = np.argpartition(np.abs(eigenvalues), X.shape[1]-K) >= X.shape[1]-K
    K_largest_eigenvectors = eigenvectors[:, K_index]
    edr_est =  K_largest_eigenvectors         
    return  (V_hat, edr_est, eigenvalues ** 2)
    
#equal number in each slice
#queue structure
def sir_1_time_series1(X, y, s, num_slices, K):
    n_samples, n_features = X.shape
    V_hat = np.zeros([X.shape[1], X.shape[1]])
    # Step 1: Sort the data by the response variable
    sorted_indices = np.argsort(y)
    # sorted_indices1 = np.argsort(y[0:X.shape[0]])
    # if len(sorted_indices) > X.shape[0]:
    #     X_sorted = X[0:X.shape[0]][sorted_indices1]
    # else:
    X_sorted = X[sorted_indices]
    y_sorted = y[sorted_indices]
    # Step 2: Divide the data into slices
    slice_size = n_samples // num_slices
    ph_hat = slice_size/n_samples
    slices = []
    
    for i in range(num_slices):
        start_idx = i * slice_size
        if i < num_slices - 1:
            end_idx = (i + 1) * slice_size
        else:  # Last slice includes any remaining samples
            end_idx = n_samples
        if start_idx-s>0:
            slices.append((X[[x - s for x in sorted_indices[start_idx:end_idx]]], y_sorted[start_idx:end_idx]))
        else:
            slices.append((X[sorted_indices[0:end_idx-s]], y_sorted[start_idx:end_idx]))
    
    # Step 3: Compute the means of the predictors within each slice
    X_means = np.array([np.mean(slice_X, axis=0) for slice_X, _ in slices])
    
    # Step 4: Center the predictor means
    X_centered = X_means - np.mean(X_means, axis=0)
    # Check for NaN or inf values in V_hat
    if np.isnan(V_hat).any() or np.isinf(V_hat).any():
        # Handle NaN or inf values appropriately
        # For example, replace NaN values with 0 and inf values with a large number
        V_hat[np.isnan(V_hat)] = 0
        V_hat[np.isinf(V_hat)] = np.nanmax(np.abs(V_hat))  # Replace inf with maximum absolute value in V_hat
        
    V_hat = np.add(V_hat,ph_hat * np.matmul(X_centered.T, X_centered))
    eigenvalues, eigenvectors = np.linalg.eig(V_hat)
    K_index = np.argpartition(np.abs(eigenvalues), X.shape[1]-K) >= X.shape[1]-K
    K_largest_eigenvectors = eigenvectors[:, K_index]
    edr_est =  K_largest_eigenvectors
    
    return (V_hat, edr_est, eigenvalues ** 2)

In [7]:
#equal number in each slice for SIR
def sir_11(X, y, num_slices, K):
    n_samples, n_features = X.shape
    V_hat = np.zeros([X.shape[1], X.shape[1]])
    # Step 1: Sort the data by the response variable
    sorted_indices = np.argsort(y)
    X_sorted = X[sorted_indices]
    y_sorted = y[sorted_indices]
    
    # Step 2: Divide the data into slices
    slice_size = n_samples // num_slices
    ph_hat = slice_size/n_samples
    slices = []
    
    for i in range(num_slices):
        start_idx = i * slice_size
        if i < num_slices - 1:
            end_idx = (i + 1) * slice_size
        else:  # Last slice includes any remaining samples
            end_idx = n_samples
        slices.append((X_sorted[start_idx:end_idx], y_sorted[start_idx:end_idx]))
    
    # Step 3: Compute the means of the predictors within each slice
    X_means = np.array([np.mean(slice_X, axis=0) for slice_X, _ in slices])
    
    # Step 4: Center the predictor means
    X_centered = X_means - np.mean(X_means, axis=0)
    
    V_hat = np.add(V_hat,ph_hat * np.matmul(X_centered.T, X_centered))
    eigenvalues, eigenvectors = np.linalg.eig(V_hat)
    K_index = np.argpartition(np.abs(eigenvalues), X.shape[1]-K) >= X.shape[1]-K
    K_largest_eigenvectors = eigenvectors[:, K_index]
    edr_est =  K_largest_eigenvectors
    
    return edr_est

### Above are functions for SIR, SIR different output(V_hat, edr_est, eigenvalues ** 2 and edr_est), SIR for equal number in each slice and SIR with equal slice.

# SIR

For model
\begin{align}
   & y_t = 2z_{1,t-1} + 3z_{2,t-1} +\epsilon_t
    \label{model:time series 2}
\end{align}
with four components $z_1 \sim \text{AR}(1)$ with $\phi_1$, $z_2 \sim \text{AR}(1)$ with $\phi_2$, $z_3 \sim \text{ARMA}(1, 1)$ with $\phi = 0.3$ and $\theta = 0.4$ and $z_4 \sim \text{MA}(1)$ with $\theta = -0.4$. Dimension $p = 4$. $K = 1$.

## Results under Sir_1(equal slice) and Sir_11(equal number in each slice)

### coefficient 0.2 0.2, correct direction

In [72]:
#0.2 0.2 correct direction
num_N = 5
n_obs = 10000
ar_coeff1 = 0.2 
ar_coeff2 = 0.2
noise = np.zeros((num_N, n_obs))
n = 100
H = 50
K = 1
hat = 0
Hat = 0
for a in range(n):    
    for h in range(num_N):
        noise[h] = np.random.normal(0, 1, size=n_obs)  # Normally distributed noise
    ar_series = np.zeros((num_N, n_obs))
    for t in range(0, n_obs):
        ar_series[0][t] = ar_coeff1 * ar_series[0][t - 1]  + noise[0][t]
        ar_series[1][t] = ar_coeff2 * ar_series[1][t - 1]  + noise[1][t]
        ar_series[2][t] = 0.3 * ar_series[2][t - 1] + 0.4 * noise[2][t-1] + noise[2][t]
        ar_series[3][t] = -0.4 * noise[3][t-1] + noise[3][t]
        ar_series[4][t] = 2*ar_series[0][t - 1] + 3*ar_series[1][t - 1] + noise[4][t]
    y = ar_series[4]
    X = np.concatenate([ar_series[i].reshape(-1,1) for i in range(4)], axis = 1)
    sir_1_result = sir_1(X, y, H, K=K)
    #after adding normalization and modifying the first coefficient
    if sir_1_result[0]<0:
        sir_1_result = -sir_1_result
    sir_1_result = sir_1_result/np.linalg.norm(sir_1_result)
    hat = hat + sir_1_result 
    sir_11_result = sir_11(X, y, H, K=K)
    #after adding normalization and modifying the first coefficient
    if sir_11_result[0]<0:
        sir_11_result = -sir_11_result
    sir_11_result = sir_11_result/np.linalg.norm(sir_11_result)
    Hat = Hat + sir_11_result
print(f"Equal slice direction:{hat/n}")
hat1 = hat/n
print(f"Equal slice ratio:{hat1[0]/hat1[1]}")
print(f"Equal number in each slice direction:{Hat/n}")
Hat1 = Hat/n
print(f"Equal number in each slice ratio:{Hat1[0]/Hat1[1]}")

Equal slice direction:[[ 0.55022728]
 [ 0.82705784]
 [-0.00266489]
 [-0.00193875]]
Equal slice ratio:[0.66528271]
Equal number in each slice direction:[[ 5.47185641e-01]
 [ 8.29002510e-01]
 [-3.05165667e-03]
 [-7.34620269e-04]]
Equal number in each slice ratio:[0.66005306]


### coefficient 0.2 0.8, incorrect direction

In [73]:
#0.2 0.8 incorrect direction
num_N = 5
n_obs = 10000
ar_coeff1 = 0.2 
ar_coeff2 = 0.8
noise = np.zeros((num_N, n_obs))
n = 100
H = 50
K = 1
hat = 0
Hat = 0
for a in range(n):    
    for h in range(num_N):
        noise[h] = np.random.normal(0, 1, size=n_obs)  # Normally distributed noise
    ar_series = np.zeros((num_N, n_obs))
    for t in range(0, n_obs):
        ar_series[0][t] = ar_coeff1 * ar_series[0][t - 1]  + noise[0][t]
        ar_series[1][t] = ar_coeff2 * ar_series[1][t - 1]  + noise[1][t]
        ar_series[2][t] = 0.3 * ar_series[2][t - 1] + 0.4 * noise[2][t-1] + noise[2][t]
        ar_series[3][t] = -0.4 * noise[3][t-1] + noise[3][t]
        ar_series[4][t] = 2*ar_series[0][t - 1] + 3*ar_series[1][t - 1] + noise[4][t]
    y = ar_series[4]
    X = np.concatenate([ar_series[i].reshape(-1,1) for i in range(4)], axis = 1)
    sir_1_result = sir_1(X, y, H, K=K)
    #after adding normalization and modifying the first coefficient
    if sir_1_result[0]<0:
        sir_1_result = -sir_1_result
    sir_1_result = sir_1_result/np.linalg.norm(sir_1_result)
    hat = hat + sir_1_result 
    sir_11_result = sir_11(X, y, H, K=K)
    #after adding normalization and modifying the first coefficient
    if sir_11_result[0]<0:
        sir_11_result = -sir_11_result
    sir_11_result = sir_11_result/np.linalg.norm(sir_11_result)
    Hat = Hat + sir_11_result
print(f"Equal slice direction:{hat/n}")
hat1 = hat/n
print(f"Equal slice ratio:{hat1[0]/hat1[1]}")
print(f"Equal number in each slice direction:{Hat/n}")
Hat1 = Hat/n
print(f"Equal number in each slice ratio:{Hat1[0]/Hat1[1]}")

Equal slice direction:[[ 6.04709463e-02]
 [ 9.97990092e-01]
 [-6.48172944e-04]
 [-3.21339522e-04]]
Equal slice ratio:[0.06059273]
Equal number in each slice direction:[[ 6.05792852e-02]
 [ 9.97980320e-01]
 [-5.72619709e-04]
 [-2.59907093e-04]]
Equal number in each slice ratio:[0.06070188]


### coefficient 0.8 0.8, correct direction

In [74]:
#0.8 0.8 correct direction
num_N = 5
n_obs = 10000
ar_coeff1 = 0.8 
ar_coeff2 = 0.8
noise = np.zeros((num_N, n_obs))
n = 100
H = 50
K = 1
hat = 0
Hat = 0
for a in range(n):    
    for h in range(num_N):
        noise[h] = np.random.normal(0, 1, size=n_obs)  # Normally distributed noise
    ar_series = np.zeros((num_N, n_obs))
    for t in range(0, n_obs):
        ar_series[0][t] = ar_coeff1 * ar_series[0][t - 1]  + noise[0][t]
        ar_series[1][t] = ar_coeff2 * ar_series[1][t - 1]  + noise[1][t]
        ar_series[2][t] = 0.3 * ar_series[2][t - 1] + 0.4 * noise[2][t-1] + noise[2][t]
        ar_series[3][t] = -0.4 * noise[3][t-1] + noise[3][t]
        ar_series[4][t] = 2*ar_series[0][t - 1] + 3*ar_series[1][t - 1] + noise[4][t]
    y = ar_series[4]
    X = np.concatenate([ar_series[i].reshape(-1,1) for i in range(4)], axis = 1)
    sir_1_result = sir_1(X, y, H, K=K)
    #after adding normalization and modifying the first coefficient
    if sir_1_result[0]<0:
        sir_1_result = -sir_1_result
    sir_1_result = sir_1_result/np.linalg.norm(sir_1_result)
    hat = hat + sir_1_result 
    sir_11_result = sir_11(X, y, H, K=K)
    #after adding normalization and modifying the first coefficient
    if sir_11_result[0]<0:
        sir_11_result = -sir_11_result
    sir_11_result = sir_11_result/np.linalg.norm(sir_11_result)
    Hat = Hat + sir_11_result
print(f"Equal slice direction:{hat/n}")
hat1 = hat/n
print(f"Equal slice ratio:{hat1[0]/hat1[1]}")
print(f"Equal number in each slice direction:{Hat/n}")
Hat1 = Hat/n
print(f"Equal number in each slice ratio:{Hat1[0]/Hat1[1]}")

Equal slice direction:[[ 5.51164249e-01]
 [ 8.33834171e-01]
 [ 7.05916426e-04]
 [-2.35679570e-04]]
Equal slice ratio:[0.66099983]
Equal number in each slice direction:[[ 5.51118853e-01]
 [ 8.33869763e-01]
 [ 7.60820593e-04]
 [-2.48872414e-04]]
Equal number in each slice ratio:[0.66091718]


## SIR for model
\begin{align}
   & y_t = 2z_{1,t} + 3z_{2,t} +\epsilon_t
\end{align}
with four components $z_i \sim \text{AR}(1)$ with $\phi_i, i = 1, \ldots, 4$. I denote the model as NO(0.2, 0.2, 0.2, 0.2) if $\phi_i = 0.2, i = 1, \ldots, 4$. $K = 1$.

### ar_coeff = [0.2, 0.2, 0.2, 0.2]

In [87]:
#ar_coeff = [0.2, 0.2, 0.2, 0.2]
num_N = 5
n_obs = 10000
ar_coeff = [0.2, 0.2, 0.2, 0.2]
noise = np.zeros((num_N, n_obs))
n = 100
H = 50
K = 1
hat = 0
for a in range(n):    
    for h in range(num_N):
        noise[h] = np.random.normal(0, 1, size=n_obs)  # Normally distributed noise
    ar_series = np.zeros((num_N, n_obs))
    for t in range(0, n_obs):
        ar_series[0][t] = ar_coeff[0] * ar_series[0][t - 1]  + noise[0][t]
        ar_series[1][t] = ar_coeff[1] * ar_series[1][t - 1]  + noise[1][t]
        ar_series[2][t] = ar_coeff[2] * ar_series[2][t - 1]  + noise[2][t]
        ar_series[3][t] = ar_coeff[3] * ar_series[3][t - 1]  + noise[3][t]
        ar_series[4][t] = 2*ar_series[0][t] + 3*ar_series[1][t] + noise[4][t]
    y = ar_series[4]
    X = np.concatenate([ar_series[i].reshape(-1,1) for i in range(4)], axis = 1)
    sir_1_result = sir_1(X, y, H=H, K=K)
    #after adding normalization and modifying the first coefficient
    if sir_1_result[0]<0:
        sir_1_result = -sir_1_result
    sir_1_result = sir_1_result/np.linalg.norm(sir_1_result)
    hat = hat + sir_1_result 
    sir_11_result = sir_11(X, y, H, K=K)
    #after adding normalization and modifying the first coefficient
    if sir_11_result[0]<0:
        sir_11_result = -sir_11_result
    sir_11_result = sir_11_result/np.linalg.norm(sir_11_result)
    Hat = Hat + sir_11_result
print(f"Equal slice direction:{hat/n}")
hat1 = hat/n
print(f"Equal slice ratio:{hat1[0]/hat1[1]}")
print(f"Equal number in each slice direction:{Hat/n}")
Hat1 = Hat/n
print(f"Equal number in each slice ratio:{Hat1[0]/Hat1[1]}")

Equal slice direction:[[ 5.55096135e-01]
 [ 8.31595927e-01]
 [ 9.87298312e-04]
 [-1.54833445e-04]]
Equal slice ratio:[0.66750704]
Equal number in each slice direction:[[1.10950932e+00]
 [1.66323899e+00]
 [1.83722916e-03]
 [3.52549362e-04]]
Equal number in each slice ratio:[0.66707751]


### ar_coeff = [0.2, 0.2, 0.8, 0.8]

In [88]:
#ar_coeff = [0.2, 0.2, 0.8, 0.8]
num_N = 5
n_obs = 10000
ar_coeff = [0.2, 0.2, 0.8, 0.8]
noise = np.zeros((num_N, n_obs))
n = 100
H = 50
K = 1
hat = 0
for a in range(n):    
    for h in range(num_N):
        noise[h] = np.random.normal(0, 1, size=n_obs)  # Normally distributed noise
    ar_series = np.zeros((num_N, n_obs))
    for t in range(0, n_obs):
        ar_series[0][t] = ar_coeff[0] * ar_series[0][t - 1]  + noise[0][t]
        ar_series[1][t] = ar_coeff[1] * ar_series[1][t - 1]  + noise[1][t]
        ar_series[2][t] = ar_coeff[2] * ar_series[2][t - 1]  + noise[2][t]
        ar_series[3][t] = ar_coeff[3] * ar_series[3][t - 1]  + noise[3][t]
        ar_series[4][t] = 2*ar_series[0][t] + 3*ar_series[1][t] + noise[4][t]
    y = ar_series[4]
    X = np.concatenate([ar_series[i].reshape(-1,1) for i in range(4)], axis = 1)
    sir_1_result = sir_1(X, y, H=H, K=K)
    #after adding normalization and modifying the first coefficient
    if sir_1_result[0]<0:
        sir_1_result = -sir_1_result
    sir_1_result = sir_1_result/np.linalg.norm(sir_1_result)
    hat = hat + sir_1_result 
    sir_11_result = sir_11(X, y, H, K=K)
    #after adding normalization and modifying the first coefficient
    if sir_11_result[0]<0:
        sir_11_result = -sir_11_result
    sir_11_result = sir_11_result/np.linalg.norm(sir_11_result)
    Hat = Hat + sir_11_result
print(f"Equal slice direction:{hat/n}")
hat1 = hat/n
print(f"Equal slice ratio:{hat1[0]/hat1[1]}")
print(f"Equal number in each slice direction:{Hat/n}")
Hat1 = Hat/n
print(f"Equal number in each slice ratio:{Hat1[0]/Hat1[1]}")

Equal slice direction:[[ 5.54614799e-01]
 [ 8.31551840e-01]
 [ 2.93844972e-03]
 [-3.30158714e-04]]
Equal slice ratio:[0.66696359]
Equal number in each slice direction:[[ 1.66417676e+00]
 [ 2.49475284e+00]
 [ 4.59979919e-03]
 [-1.02698341e-04]]
Equal number in each slice ratio:[0.6670708]


### ar_coeff = [0.2, 0.5, 0.8, 0.8]

In [75]:
#ar_coeff = [0.2, 0.5, 0.8, 0.8]
num_N = 5
n_obs = 10000
ar_coeff = [0.2, 0.5, 0.8, 0.8]
noise = np.zeros((num_N, n_obs))
n = 100
H = 50
K = 1
hat = 0
for a in range(n):    
    for h in range(num_N):
        noise[h] = np.random.normal(0, 1, size=n_obs)  # Normally distributed noise
    ar_series = np.zeros((num_N, n_obs))
    for t in range(0, n_obs):
        ar_series[0][t] = ar_coeff[0] * ar_series[0][t - 1]  + noise[0][t]
        ar_series[1][t] = ar_coeff[1] * ar_series[1][t - 1]  + noise[1][t]
        ar_series[2][t] = ar_coeff[2] * ar_series[2][t - 1]  + noise[2][t]
        ar_series[3][t] = ar_coeff[3] * ar_series[3][t - 1]  + noise[3][t]
        ar_series[4][t] = 2*ar_series[0][t] + 3*ar_series[1][t] + noise[4][t]
    y = ar_series[4]
    X = np.concatenate([ar_series[i].reshape(-1,1) for i in range(4)], axis = 1)
    sir_1_result = sir_1(X, y, H=H, K=K)
    #after adding normalization and modifying the first coefficient
    if sir_1_result[0]<0:
        sir_1_result = -sir_1_result
    sir_1_result = sir_1_result/np.linalg.norm(sir_1_result)
    hat = hat + sir_1_result 
    sir_11_result = sir_11(X, y, H, K=K)
    #after adding normalization and modifying the first coefficient
    if sir_11_result[0]<0:
        sir_11_result = -sir_11_result
    sir_11_result = sir_11_result/np.linalg.norm(sir_11_result)
    Hat = Hat + sir_11_result
print(f"Equal slice direction:{hat/n}")
hat1 = hat/n
print(f"Equal slice ratio:{hat1[0]/hat1[1]}")
print(f"Equal number in each slice direction:{Hat/n}")
Hat1 = Hat/n
print(f"Equal number in each slice ratio:{Hat1[0]/Hat1[1]}")

Equal slice direction:[[0.46189184]
 [0.88630596]
 [0.0042448 ]
 [0.00288614]]
Equal slice ratio:[0.52114266]
Equal number in each slice direction:[[1.01290085]
 [1.7202301 ]
 [0.00497268]
 [0.00291084]]
Equal number in each slice ratio:[0.58881707]


### ar_coeff = [0.2, 0.5, 0.5, 0.8]

In [76]:
#ar_coeff = [0.2, 0.5, 0.5, 0.8]
num_N = 5
n_obs = 10000
ar_coeff = [0.2, 0.5, 0.5, 0.8]
noise = np.zeros((num_N, n_obs))
n = 100
H = 50
K = 1
hat = 0
for a in range(n):    
    for h in range(num_N):
        noise[h] = np.random.normal(0, 1, size=n_obs)  # Normally distributed noise
    ar_series = np.zeros((num_N, n_obs))
    for t in range(0, n_obs):
        ar_series[0][t] = ar_coeff[0] * ar_series[0][t - 1]  + noise[0][t]
        ar_series[1][t] = ar_coeff[1] * ar_series[1][t - 1]  + noise[1][t]
        ar_series[2][t] = ar_coeff[2] * ar_series[2][t - 1]  + noise[2][t]
        ar_series[3][t] = ar_coeff[3] * ar_series[3][t - 1]  + noise[3][t]
        ar_series[4][t] = 2*ar_series[0][t] + 3*ar_series[1][t] + noise[4][t]
    y = ar_series[4]
    X = np.concatenate([ar_series[i].reshape(-1,1) for i in range(4)], axis = 1)
    sir_1_result = sir_1(X, y, H=H, K=K)
    #after adding normalization and modifying the first coefficient
    if sir_1_result[0]<0:
        sir_1_result = -sir_1_result
    sir_1_result = sir_1_result/np.linalg.norm(sir_1_result)
    hat = hat + sir_1_result 
    sir_11_result = sir_11(X, y, H, K=K)
    #after adding normalization and modifying the first coefficient
    if sir_11_result[0]<0:
        sir_11_result = -sir_11_result
    sir_11_result = sir_11_result/np.linalg.norm(sir_11_result)
    Hat = Hat + sir_11_result
print(f"Equal slice direction:{hat/n}")
hat1 = hat/n
print(f"Equal slice ratio:{hat1[0]/hat1[1]}")
print(f"Equal number in each slice direction:{Hat/n}")
Hat1 = Hat/n
print(f"Equal number in each slice ratio:{Hat1[0]/Hat1[1]}")

Equal slice direction:[[ 4.63183556e-01]
 [ 8.85775180e-01]
 [ 1.06089700e-04]
 [-9.76717593e-04]]
Equal slice ratio:[0.52291323]
Equal number in each slice direction:[[1.47604378e+00]
 [2.60602530e+00]
 [5.14360204e-03]
 [1.92624909e-03]]
Equal number in each slice ratio:[0.56639656]


# SIR for Time series new objective(Double sum)

Code below is for this derivation. When taking the sum across all $q$, it is easily seen that 
$$
\sum_q diag\left[ (\mathbf{W} \circ \Phi^{\circ q})^T Cov(\mathbb{E}(\mathbf{X}_{t-q}|y) (\mathbf{W} \circ\Phi^{\circ q})\right]=diag(\mathbf{W}^T\sum_q \mathbf{V}^{[1:q]}\mathbf{W}),
$$
where
$$
\mathbf{V}^{[1:Q]}_{jk}=\sum_{q=1}^Q\mathbf{V}^q_{jk}\cdot \phi_j^q\cdot \phi_k^q.
$$
I'm wondering, is it $\text{diag}(W^T V^{[1: q]} W)$, not $\text{diag}(\mathbf{W}^T\sum_q \mathbf{V}^{[1:q]}\mathbf{W}).$

## Use equal slice

In [129]:
#from 0 to Q double sum
from tabulate import tabulate
def NEWSIR(ar_coeff, T):
    num_N = 5
    n_obs = 1000
    noise = np.zeros((num_N, n_obs))
    n = 100
    H = 50
    P = 4
    K = 1
    S = 20
    hat = [np.zeros((P, 1)) for i in range(S)]
    # ar_coeff = [0.2, 0.2, 0.2, 0.2]
    g = np.zeros((S, 1))
    for w in range(n):    
        for h in range(num_N):
            noise[h] = np.random.normal(0, 1, size=n_obs)  # Normally distributed noise
        ar_series = np.zeros((num_N, n_obs))
        for t in range(0, n_obs):
            ar_series[0][t] = ar_coeff[0] * ar_series[0][t - 1]  + noise[0][t]
            ar_series[1][t] = ar_coeff[1] * ar_series[1][t - 1]  + noise[1][t]
            ar_series[2][t] = ar_coeff[2] * ar_series[2][t - 1]  + noise[2][t]
            ar_series[3][t] = ar_coeff[3] * ar_series[3][t - 1]  + noise[3][t]
            ar_series[4][t] = 2*ar_series[0][t] + 3*ar_series[1][t] + noise[4][t]
        y = ar_series[4]
        X = np.concatenate([ar_series[i].reshape(-1,1) for i in range(4)], axis = 1)
        V = []
        for a in range(0,S+1):
            M, edr, lam1 = T(X, y, a, H, K)
            V.append(M)
        for q in range(1, S+1):
            Q = np.zeros((P, P))
            phi = ar_coeff
            for j in range(P):
                for k in range(P):
                    Q[j,k] = sum(sum(phi[j]**a * V[a][j,k] * phi[k]**a for a in range(0,l)) for l in range(1, q+1)) #double sum
            eigenvalues1, eigenvectors1 = np.linalg.eig(Q)
            K_index = np.argpartition(np.abs(eigenvalues1), P-K) >= P-K
            K_largest_eigenvectors = eigenvectors1[:, K_index]
            edr_est =  K_largest_eigenvectors
            if edr_est[0]<0:
                edr_est = -edr_est
            edr_est = edr_est/np.linalg.norm(edr_est)
            hat[q-1] += edr_est       
    for i in range(S):
        hat[i] = hat[i]/n
        g[i] = hat[i][0] / hat[i][1]
    array = np.array(hat)
    print(tabulate(array, tablefmt='latex'))
    print(g)

### ar_coeff = [0.2,0.2,0.2,0.2]

In [130]:
ar_coeff = [0.2,0.2,0.2,0.2]
NEWSIR(ar_coeff, sir_1_time_series)

\begin{tabular}{rrrr}
\hline
 0.554219 & 0.830436 & 0.0011332  & 0.00473857 \\
 0.549475 & 0.832778 & 0.00308414 & 0.00395092 \\
 0.547393 & 0.83338  & 0.00374569 & 0.00358167 \\
 0.546266 & 0.833628 & 0.00407116 & 0.00336832 \\
 0.545562 & 0.833758 & 0.00426425 & 0.00322942 \\
 0.545082 & 0.833836 & 0.00439198 & 0.00313179 \\
 0.544733 & 0.833888 & 0.00448267 & 0.00305942 \\
 0.544468 & 0.833924 & 0.00455039 & 0.00300364 \\
 0.544261 & 0.833951 & 0.00460286 & 0.00295934 \\
 0.544093 & 0.833972 & 0.0046447  & 0.00292329 \\
 0.543956 & 0.833988 & 0.00467886 & 0.0028934  \\
 0.54384  & 0.834002 & 0.00470725 & 0.0028682  \\
 0.543742 & 0.834012 & 0.00473124 & 0.00284668 \\
 0.543658 & 0.834022 & 0.00475176 & 0.00282809 \\
 0.543585 & 0.834029 & 0.00476952 & 0.00281186 \\
 0.54352  & 0.834036 & 0.00478504 & 0.00279757 \\
 0.543464 & 0.834042 & 0.00479872 & 0.00278489 \\
 0.543413 & 0.834047 & 0.00481087 & 0.00277357 \\
 0.543368 & 0.834051 & 0.00482173 & 0.0027634  \\
 0.543327 & 0.834055 

### ar_coeff = [0.2,0.2,0.8,0.8]

In [131]:
ar_coeff = [0.2,0.2,0.8,0.8]
NEWSIR(ar_coeff,sir_1_time_series)

\begin{tabular}{rrrr}
\hline
 0.547153 & 0.830722 & -0.0114239  & -0.0049901   \\
 0.419096 & 0.632559 & -0.0360375  & -0.000328941 \\
 0.331373 & 0.480581 & -0.0566771  &  0.00678603  \\
 0.300021 & 0.421163 & -0.0214941  & -0.0191334   \\
 0.282287 & 0.395401 & -0.00811907 & -0.0246749   \\
 0.269339 & 0.375329 & -0.0271879  & -0.0412054   \\
 0.259358 & 0.361838 & -0.0289973  & -0.0423478   \\
 0.251284 & 0.351705 & -0.0284046  & -0.0415472   \\
 0.24255  & 0.341601 & -0.0254017  & -0.038518    \\
 0.238017 & 0.326509 & -0.0385891  & -0.0498715   \\
 0.235093 & 0.322698 & -0.0386271  & -0.0498593   \\
 0.232531 & 0.319394 & -0.0385321  & -0.0497285   \\
 0.230352 & 0.316598 & -0.0384197  & -0.0495571   \\
 0.228492 & 0.31422  & -0.0383301  & -0.0493624   \\
 0.22689  & 0.312175 & -0.0382808  & -0.0491494   \\
 0.225492 & 0.310391 & -0.038279   & -0.0489195   \\
 0.224258 & 0.308815 & -0.0383264  & -0.0486731   \\
 0.223155 & 0.307403 & -0.0384214  & -0.0484104   \\
 0.222158 & 0.306

### ar_coeff = [0.2,0.5,0.8,0.8]

In [132]:
ar_coeff = [0.2,0.5,0.8,0.8]
NEWSIR(ar_coeff,sir_1_time_series)

\begin{tabular}{rrrr}
\hline
 0.459295 & 0.880704 & -0.00714909 & 0.00991775 \\
 0.339796 & 0.730175 &  0.021341   & 0.0268801  \\
 0.264865 & 0.578714 &  0.0433002  & 0.0586561  \\
 0.225784 & 0.488499 &  0.0400155  & 0.0408523  \\
 0.205847 & 0.449643 &  0.0562121  & 0.0513033  \\
 0.192463 & 0.425626 &  0.0527989  & 0.0319704  \\
 0.183164 & 0.406909 &  0.0702575  & 0.0168734  \\
 0.17638  & 0.397186 &  0.0735386  & 0.0160639  \\
 0.171211 & 0.389644 &  0.0761837  & 0.0154874  \\
 0.167149 & 0.383234 &  0.0607322  & 0.024511   \\
 0.163877 & 0.378327 &  0.0625266  & 0.0241779  \\
 0.161201 & 0.369238 &  0.0566731  & 0.0418287  \\
 0.159029 & 0.3576   &  0.0736535  & 0.0507315  \\
 0.157197 & 0.354882 &  0.0747357  & 0.0506117  \\
 0.155629 & 0.35254  &  0.0756595  & 0.0505135  \\
 0.154299 & 0.34054  &  0.0867034  & 0.0364445  \\
 0.153145 & 0.335085 &  0.0734339  & 0.0225382  \\
 0.152155 & 0.319755 &  0.0878027  & 0.0268339  \\
 0.151319 & 0.318527 &  0.0884297  & 0.0267761  \\
 0

### ar_coeff = [0.2,0.5,0.5,0.8]

In [133]:
ar_coeff = [0.2,0.5,0.5,0.8]
NEWSIR(ar_coeff,sir_1_time_series)

\begin{tabular}{rrrr}
\hline
 0.453712 & 0.886373 & 0.0033857 & -0.00442161 \\
 0.37972  & 0.80695  & 0.033548  & -0.0210339  \\
 0.321667 & 0.703135 & 0.0314533 & -0.0626375  \\
 0.288137 & 0.641239 & 0.038019  & -0.0820253  \\
 0.271496 & 0.599427 & 0.0479411 & -0.0886612  \\
 0.261843 & 0.579628 & 0.0473643 & -0.110542   \\
 0.255181 & 0.564687 & 0.037699  & -0.149698   \\
 0.250305 & 0.557468 & 0.0346084 & -0.169822   \\
 0.243017 & 0.545432 & 0.0392208 & -0.163043   \\
 0.239634 & 0.540279 & 0.0398052 & -0.163908   \\
 0.237118 & 0.536426 & 0.0403717 & -0.164838   \\
 0.235022 & 0.533084 & 0.0409626 & -0.165758   \\
 0.233204 & 0.530064 & 0.0415325 & -0.166684   \\
 0.231584 & 0.527291 & 0.0420304 & -0.167627   \\
 0.230119 & 0.524735 & 0.042427  & -0.168591   \\
 0.228785 & 0.522377 & 0.042722  & -0.169573   \\
 0.227563 & 0.520198 & 0.0429333 & -0.170561   \\
 0.226439 & 0.518174 & 0.0430835 & -0.171539   \\
 0.225401 & 0.516285 & 0.0431911 & -0.172484   \\
 0.224441 & 0.510067 

## Use equal number in each slice

In [106]:
# #from 0 to Q double sum
# from tabulate import tabulate
# import numpy as np

# def uu2(ar_coeff):
#     import numpy as np
#     from tabulate import tabulate
    
#     def sir_1_time_series1(X, y, s, num_slices, K):
#         n_samples, n_features = X.shape
#         V_hat = np.zeros([X.shape[1], X.shape[1]])
#         # Step 1: Sort the data by the response variable
#         sorted_indices = np.argsort(y)
#         X_sorted = X[sorted_indices]
#         y_sorted = y[sorted_indices]
#         # Step 2: Divide the data into slices
#         slice_size = n_samples // num_slices
#         ph_hat = slice_size / n_samples
#         slices = []
        
#         for i in range(num_slices):
#             start_idx = i * slice_size
#             if i < num_slices - 1:
#                 end_idx = (i + 1) * slice_size
#             else:  # Last slice includes any remaining samples
#                 end_idx = n_samples
#             if start_idx - s > 0:
#                 slices.append((X[[x - s for x in sorted_indices[start_idx:end_idx]]], y_sorted[start_idx:end_idx]))
#             else:
#                 slices.append((X[sorted_indices[0:end_idx - s]], y_sorted[start_idx:end_idx]))
        
#         # Step 3: Compute the means of the predictors within each slice
#         X_means = np.array([np.mean(slice_X, axis=0) for slice_X, _ in slices])
        
#         # Step 4: Center the predictor means
#         X_centered = X_means - np.mean(X_means, axis=0)
        
#         V_hat = np.add(V_hat, ph_hat * np.matmul(X_centered.T, X_centered))

#         # Check for NaN or inf values in V_hat
#         if np.isnan(V_hat).any() or np.isinf(V_hat).any():
#             # Handle NaN or inf values appropriately
#             # For example, replace NaN values with 0 and inf values with a large number
#             V_hat[np.isnan(V_hat)] = 0
#             V_hat[np.isinf(V_hat)] = np.nanmax(np.abs(V_hat))  # Replace inf with maximum absolute value in V_hat
    
#     # Compute eigenvalues and eigenvectors
#         eigenvalues, eigenvectors = np.linalg.eig(V_hat)
#         eigenvalues, eigenvectors = np.linalg.eig(V_hat)
#         K_index = np.argpartition(np.abs(eigenvalues), X.shape[1] - K) >= X.shape[1] - K
#         K_largest_eigenvectors = eigenvectors[:, K_index]
#         edr_est = K_largest_eigenvectors
        
#         return V_hat, edr_est, eigenvalues ** 2
    
#     num_N = 5
#     n_obs = 1000
#     noise = np.zeros((num_N, n_obs))
#     n = 100
#     H = 50
#     P = 4
#     K = 1
#     S = 20
#     hat = [np.zeros((P, 1)) for _ in range(S)]
#     g = np.zeros((S, 1))
#     # ar_coeff = [0.2, 0.2, 0.2, 0.2]
#     n1 = 0
#     l = 1  # Initialize `l` outside the loop
#     while n1 < 100:
#         for h in range(num_N):
#             noise[h] = np.random.normal(0, 1, size=n_obs)  # Normally distributed noise
#         ar_series = np.zeros((num_N, n_obs))
#         for t in range(0, n_obs):
#             ar_series[0][t] = ar_coeff[0] * ar_series[0][t - 1] + noise[0][t]
#             ar_series[1][t] = ar_coeff[1] * ar_series[1][t - 1] + noise[1][t]
#             ar_series[2][t] = ar_coeff[2] * ar_series[2][t - 1] + noise[2][t]
#             ar_series[3][t] = ar_coeff[3] * ar_series[3][t - 1] + noise[3][t]
#             ar_series[4][t] = 2 * ar_series[0][t] + 3 * ar_series[1][t] + noise[4][t]
#         y = ar_series[4]
#         X = np.concatenate([ar_series[i].reshape(-1, 1) for i in range(4)], axis=1)
#         V = []
#         for a in range(0, S + 1):
#             M, _, _ = sir_1_time_series1(X, y, a, H, K)
#             V.append(M)
#         for q in range(1, S + 1):
#             Q = np.zeros((P, P))
#             phi = ar_coeff
#             for j in range(P):
#                 for k in range(P):
#                     Q[j, k] = sum(sum(phi[j] ** a * V[a][j, k] * phi[k] ** a for a in range(0, l)) for l in range(1, q + 1))
#             eigenvalues1, eigenvectors1 = np.linalg.eig(Q)
#             K_index = np.argpartition(np.abs(eigenvalues1), P - K) >= P - K
#             K_largest_eigenvectors = eigenvectors1[:, K_index]
#             edr_est = K_largest_eigenvectors
#             if edr_est[0] < 0:
#                 edr_est = -edr_est
#             edr_est = edr_est / np.linalg.norm(edr_est)
#             hat[q - 1] += edr_est
#             n1 += 1
    
#     for i in range(S):
#         hat[i] = hat[i] / n
#         g[i] = hat[i][0] / hat[i][1]
#     array = np.array(hat)
#     print(tabulate(array, tablefmt='latex'))
#     print(g)

# # Example usage
# ar_coeff = [0.2, 0.2, 0.2, 0.2]
# uu2(ar_coeff)

### ar_coeff = [0.2,0.2,0.2,0.2]

In [None]:
ar_coeff = [0.2,0.2,0.2,0.2]
NEWSIR(ar_coeff, sir_1_time_series1)

### ar_coeff = [0.2,0.2,0.8,0.8]

In [80]:
ar_coeff = [0.2,0.2,0.8,0.8]
NEWSIR(ar_coeff, sir_1_time_series1)

\begin{tabular}{rrrr}
\hline
 0.0261015 & 0.0424326 & 0.00158348 & 0.000551446 \\
 0.0260885 & 0.0424096 & 0.00172348 & 0.000470322 \\
 0.0260789 & 0.0423897 & 0.00183951 & 0.000430528 \\
 0.026071  & 0.0423736 & 0.00192171 & 0.000405332 \\
 0.026064  & 0.0423599 & 0.00198741 & 0.000387352 \\
 0.026058  & 0.0423481 & 0.0020394  & 0.000376529 \\
 0.0260529 & 0.0423383 & 0.00208166 & 0.000368662 \\
 0.0260486 & 0.0423299 & 0.00211583 & 0.000362479 \\
 0.0260448 & 0.0423228 & 0.00214411 & 0.000357546 \\
 0.0260416 & 0.0423167 & 0.00216775 & 0.000353525 \\
 0.0260388 & 0.0423113 & 0.00218789 & 0.000350162 \\
 0.0260364 & 0.0423067 & 0.00220526 & 0.000347249 \\
 0.0260342 & 0.0423026 & 0.00222038 & 0.000344689 \\
 0.0260323 & 0.042299  & 0.00223359 & 0.000342406 \\
 0.0260306 & 0.0422959 & 0.00224522 & 0.000340366 \\
 0.0260291 & 0.042293  & 0.00225552 & 0.00033852  \\
 0.0260278 & 0.0422904 & 0.00226469 & 0.000336845 \\
 0.0260266 & 0.0422881 & 0.00227292 & 0.000335322 \\
 0.0260254 & 0.04

### ar_coeff = [0.2,0.5,0.8,0.8]

In [70]:
ar_coeff = [0.2,0.5,0.8,0.8]
NEWSIR(ar_coeff, sir_1_time_series1)

\begin{tabular}{rrrr}
\hline
 0.0242404 & 0.0432713 & 0.00111159 & 0.000576685 \\
 0.0236342 & 0.043481  & 0.00131628 & 0.000700589 \\
 0.0233527 & 0.0435416 & 0.00144546 & 0.000766658 \\
 0.0231901 & 0.0435552 & 0.00153747 & 0.000827364 \\
 0.0230834 & 0.0435519 & 0.00160634 & 0.000876656 \\
 0.0230082 & 0.0435431 & 0.00165926 & 0.000912523 \\
 0.0229521 & 0.0435329 & 0.00169987 & 0.000939351 \\
 0.0229086 & 0.0435226 & 0.00173293 & 0.000960179 \\
 0.0228738 & 0.043513  & 0.00176026 & 0.00097653  \\
 0.0228455 & 0.0435043 & 0.00178293 & 0.000989822 \\
 0.0228219 & 0.0434965 & 0.0018021  & 0.0010007   \\
 0.0228021 & 0.0434895 & 0.00181849 & 0.00100975  \\
 0.0227851 & 0.0434834 & 0.00183263 & 0.00101734  \\
 0.0227704 & 0.0434778 & 0.00184493 & 0.00102382  \\
 0.0227576 & 0.0434729 & 0.00185571 & 0.00102944  \\
 0.0227464 & 0.0434684 & 0.00186521 & 0.00103435  \\
 0.0227364 & 0.0434644 & 0.00187365 & 0.00103868  \\
 0.0227275 & 0.0434607 & 0.0018812  & 0.00104252  \\
 0.0227195 & 0.04

### ar_coeff = [0.2,0.5,0.5,0.8]

In [71]:
ar_coeff = [0.2,0.5,0.5,0.8]
NEWSIR(ar_coeff, sir_1_time_series1)

\begin{tabular}{rrrr}
\hline
 0.021882  & 0.0445643 & 0.00130569 & -0.00163376 \\
 0.0212254 & 0.0447941 & 0.00138485 & -0.00177341 \\
 0.0209237 & 0.0448746 & 0.00141902 & -0.00188339 \\
 0.0207576 & 0.0449085 & 0.00143795 & -0.00195949 \\
 0.0206533 & 0.0449241 & 0.00144985 & -0.00201511 \\
 0.020582  & 0.0449317 & 0.00145783 & -0.00205717 \\
 0.0205302 & 0.0449356 & 0.00146355 & -0.00209    \\
 0.0204908 & 0.0449374 & 0.00146785 & -0.00211639 \\
 0.0204599 & 0.0449382 & 0.0014712  & -0.00213772 \\
 0.0204349 & 0.0449385 & 0.00147389 & -0.00215533 \\
 0.0204144 & 0.0449385 & 0.00147609 & -0.00216993 \\
 0.0203972 & 0.0449383 & 0.00147792 & -0.00218226 \\
 0.0203827 & 0.0449381 & 0.00147947 & -0.00219281 \\
 0.0203701 & 0.0449378 & 0.0014808  & -0.00220195 \\
 0.0203593 & 0.0449374 & 0.00148195 & -0.00220992 \\
 0.0203497 & 0.0449371 & 0.00148296 & -0.00221693 \\
 0.0203413 & 0.0449368 & 0.00148385 & -0.00222315 \\
 0.0203338 & 0.0449365 & 0.00148464 & -0.00222868 \\
 0.020327  & 0.04

# SIR for Time series new objective(Single sum)

If the objective is $$\text{diag}(W^T V^{[1: Q]} W)$$

## Use equal slice

In [104]:
#from 0 to Q single sum
from tabulate import tabulate
def NEWSIRsingle(ar_coeff):
    num_N = 5
    n_obs = 1000
    noise = np.zeros((num_N, n_obs))
    n = 100
    H = 50
    P = 4
    K = 1
    S = 20
    hat = [np.zeros((P, 1)) for i in range(S)]
    # ar_coeff = [0.2, 0.2, 0.2, 0.2]
    g = np.zeros((S, 1))
    for w in range(n):    
        for h in range(num_N):
            noise[h] = np.random.normal(0, 1, size=n_obs)  # Normally distributed noise
        ar_series = np.zeros((num_N, n_obs))
        for t in range(0, n_obs):
            ar_series[0][t] = ar_coeff[0] * ar_series[0][t - 1]  + noise[0][t]
            ar_series[1][t] = ar_coeff[1] * ar_series[1][t - 1]  + noise[1][t]
            ar_series[2][t] = ar_coeff[2] * ar_series[2][t - 1]  + noise[2][t]
            ar_series[3][t] = ar_coeff[3] * ar_series[3][t - 1]  + noise[3][t]
            ar_series[4][t] = 2*ar_series[0][t] + 3*ar_series[1][t] + noise[4][t]
        y = ar_series[4]
        X = np.concatenate([ar_series[i].reshape(-1,1) for i in range(4)], axis = 1)
        V = []
        for a in range(0,S+1):
            M, edr, lam1 = sir_1_time_series(X, y, a, H, K)
            V.append(M)
        for q in range(1, S+1):
            Q = np.zeros((P, P))
            phi = ar_coeff
            for j in range(P):
                for k in range(P):
                    Q[j,k] = sum(phi[j]**a * V[a][j,k] * phi[k]**a for a in range(0,q)) #single sum
            eigenvalues1, eigenvectors1 = np.linalg.eig(Q)
            K_index = np.argpartition(np.abs(eigenvalues1), P-K) >= P-K
            K_largest_eigenvectors = eigenvectors1[:, K_index]
            edr_est =  K_largest_eigenvectors
            if edr_est[0]<0:
                edr_est = -edr_est
            edr_est = edr_est/np.linalg.norm(edr_est)
            hat[q-1] += edr_est       
    for i in range(S):
        hat[i] = hat[i]/n
        g[i] = hat[i][0] / hat[i][1]
    array = np.array(hat)
    print(tabulate(array, tablefmt='latex'))
    print(g)

### ar_coeff = [0.2,0.2,0.2,0.2]

In [105]:
ar_coeff = [0.2,0.2,0.2,0.2]
NEWSIRsingle(ar_coeff)

\begin{tabular}{rrrr}
\hline
 0.55235  & 0.831289 & -0.00652422 &  0.00176025 \\
 0.554268 & 0.825945 & -0.0128939  & -0.00945419 \\
 0.554282 & 0.825614 & -0.0131315  & -0.00992433 \\
 0.554283 & 0.825601 & -0.0131409  & -0.00994309 \\
 0.554283 & 0.8256   & -0.0131413  & -0.00994384 \\
 0.554283 & 0.8256   & -0.0131413  & -0.00994387 \\
 0.554283 & 0.8256   & -0.0131413  & -0.00994387 \\
 0.554283 & 0.8256   & -0.0131413  & -0.00994387 \\
 0.554283 & 0.8256   & -0.0131413  & -0.00994387 \\
 0.554283 & 0.8256   & -0.0131413  & -0.00994387 \\
 0.554283 & 0.8256   & -0.0131413  & -0.00994387 \\
 0.554283 & 0.8256   & -0.0131413  & -0.00994387 \\
 0.554283 & 0.8256   & -0.0131413  & -0.00994387 \\
 0.554283 & 0.8256   & -0.0131413  & -0.00994387 \\
 0.554283 & 0.8256   & -0.0131413  & -0.00994387 \\
 0.554283 & 0.8256   & -0.0131413  & -0.00994387 \\
 0.554283 & 0.8256   & -0.0131413  & -0.00994387 \\
 0.554283 & 0.8256   & -0.0131413  & -0.00994387 \\
 0.554283 & 0.8256   & -0.0131413  

### ar_coeff = [0.2,0.2,0.8,0.8]

In [None]:
ar_coeff = [0.2,0.2,0.8,0.8]
NEWSIRsingle(ar_coeff)

### ar_coeff = [0.2,0.5,0.5,0.8]

In [None]:
ar_coeff = [0.2,0.5,0.5,0.8]
NEWSIRsingle(ar_coeff)

### ar_coeff = [0.2,0.5,0.8,0.8]

In [None]:
ar_coeff = [0.2,0.5,0.8,0.8]
NEWSIRsingle(ar_coeff)

## Use equal number in each slice

In [134]:
#from 0 to Q single sum
from tabulate import tabulate
import numpy as np

def uuu2(ar_coeff):
    import numpy as np
    from tabulate import tabulate
    
    def sir_1_time_series1(X, y, s, num_slices, K):
        n_samples, n_features = X.shape
        V_hat = np.zeros([X.shape[1], X.shape[1]])
        # Step 1: Sort the data by the response variable
        sorted_indices = np.argsort(y)
        X_sorted = X[sorted_indices]
        y_sorted = y[sorted_indices]
        # Step 2: Divide the data into slices
        slice_size = n_samples // num_slices
        ph_hat = slice_size / n_samples
        slices = []
        
        for i in range(num_slices):
            start_idx = i * slice_size
            if i < num_slices - 1:
                end_idx = (i + 1) * slice_size
            else:  # Last slice includes any remaining samples
                end_idx = n_samples
            if start_idx - s > 0:
                slices.append((X[[x - s for x in sorted_indices[start_idx:end_idx]]], y_sorted[start_idx:end_idx]))
            else:
                slices.append((X[sorted_indices[0:end_idx - s]], y_sorted[start_idx:end_idx]))
        
        # Step 3: Compute the means of the predictors within each slice
        X_means = np.array([np.mean(slice_X, axis=0) for slice_X, _ in slices])
        
        # Step 4: Center the predictor means
        X_centered = X_means - np.mean(X_means, axis=0)
        
        V_hat = np.add(V_hat, ph_hat * np.matmul(X_centered.T, X_centered))

        #report wrong, there's  NaN or inf value in V_hat, so add this
        # Check for NaN or inf values in V_hat
        if np.isnan(V_hat).any() or np.isinf(V_hat).any():
            # Handle NaN or inf values appropriately
            # For example, replace NaN values with 0 and inf values with a large number
            V_hat[np.isnan(V_hat)] = 0
            V_hat[np.isinf(V_hat)] = np.nanmax(np.abs(V_hat))  # Replace inf with maximum absolute value in V_hat
    
    # Compute eigenvalues and eigenvectors
        eigenvalues, eigenvectors = np.linalg.eig(V_hat)
        eigenvalues, eigenvectors = np.linalg.eig(V_hat)
        K_index = np.argpartition(np.abs(eigenvalues), X.shape[1] - K) >= X.shape[1] - K
        K_largest_eigenvectors = eigenvectors[:, K_index]
        edr_est = K_largest_eigenvectors
        
        return V_hat, edr_est, eigenvalues ** 2
    
    num_N = 5
    n_obs = 1000
    noise = np.zeros((num_N, n_obs))
    n = 100
    H = 50
    P = 4
    K = 1
    S = 20
    hat = [np.zeros((P, 1)) for _ in range(S)]
    g = np.zeros((S, 1))
    # ar_coeff = [0.2, 0.2, 0.2, 0.2]
    n1 = 0
    while n1 < 100:
        for h in range(num_N):
            noise[h] = np.random.normal(0, 1, size=n_obs)  # Normally distributed noise
        ar_series = np.zeros((num_N, n_obs))
        for t in range(0, n_obs):
            ar_series[0][t] = ar_coeff[0] * ar_series[0][t - 1] + noise[0][t]
            ar_series[1][t] = ar_coeff[1] * ar_series[1][t - 1] + noise[1][t]
            ar_series[2][t] = ar_coeff[2] * ar_series[2][t - 1] + noise[2][t]
            ar_series[3][t] = ar_coeff[3] * ar_series[3][t - 1] + noise[3][t]
            ar_series[4][t] = 2 * ar_series[0][t] + 3 * ar_series[1][t] + noise[4][t]
        y = ar_series[4]
        X = np.concatenate([ar_series[i].reshape(-1, 1) for i in range(4)], axis=1)
        V = []
        for a in range(0, S + 1):
            M, _, _ = sir_1_time_series1(X, y, a, H, K)
            V.append(M)
        for q in range(1, S + 1):
            Q = np.zeros((P, P))
            phi = ar_coeff
            for j in range(P):
                for k in range(P):
                    Q[j, k] = sum(phi[j] ** a * V[a][j, k] * phi[k] ** a for a in range(0, q))
            eigenvalues1, eigenvectors1 = np.linalg.eig(Q)
            K_index = np.argpartition(np.abs(eigenvalues1), P - K) >= P - K
            K_largest_eigenvectors = eigenvectors1[:, K_index]
            edr_est = K_largest_eigenvectors
            if edr_est[0] < 0:
                edr_est = -edr_est
            edr_est = edr_est / np.linalg.norm(edr_est)
            hat[q - 1] += edr_est
            n1 += 1
    
    for i in range(S):
        hat[i] = hat[i] / n
        g[i] = hat[i][0] / hat[i][1]
    array = np.array(hat)
    print(tabulate(array, tablefmt='latex'))
    print(g)

In [None]:
ar_coeff = [0.2, 0.2, 0.2, 0.2]
uuu2(ar_coeff)

In [93]:
ar_coeff = [0.2,0.5,0.8,0.8]
uuu2(ar_coeff)

\begin{tabular}{rrrr}
\hline
 0.0233308 & 0.0439774 & 0.00148481 & -0.000284376 \\
 0.0221942 & 0.0444542 & 0.00192512 & -0.000589303 \\
 0.0219856 & 0.0444877 & 0.00210631 & -0.000800864 \\
 0.0219211 & 0.0444724 & 0.0022261  & -0.00096454  \\
 0.0219007 & 0.0444587 & 0.00228155 & -0.00103267  \\
 0.0218916 & 0.0444483 & 0.00231401 & -0.00105981  \\
 0.0218865 & 0.0444405 & 0.00233926 & -0.00107817  \\
 0.0218835 & 0.0444349 & 0.00235877 & -0.00108906  \\
 0.0218817 & 0.0444316 & 0.00236771 & -0.00109702  \\
 0.0218806 & 0.0444295 & 0.00237299 & -0.00110342  \\
 0.0218799 & 0.0444281 & 0.0023764  & -0.00110694  \\
 0.0218794 & 0.0444272 & 0.00237852 & -0.00110875  \\
 0.0218791 & 0.0444266 & 0.00238001 & -0.00111006  \\
 0.0218789 & 0.0444262 & 0.00238104 & -0.00111085  \\
 0.0218788 & 0.044426  & 0.00238187 & -0.00111133  \\
 0.0218787 & 0.0444259 & 0.00238226 & -0.00111153  \\
 0.0218787 & 0.0444258 & 0.0023826  & -0.00111165  \\
 0.0218786 & 0.0444257 & 0.00238281 & -0.00111177  \\

In [94]:
ar_coeff = [0.2,0.2,0.8,0.8]
uuu2(ar_coeff)

\begin{tabular}{rrrr}
\hline
 0.0281901 & 0.0407746 & 0.000983024 & -0.00229931 \\
 0.0281302 & 0.0406785 & 0.00108717  & -0.00267873 \\
 0.0280782 & 0.0405959 & 0.00119454  & -0.00292771 \\
 0.0280312 & 0.0405244 & 0.00132309  & -0.00306977 \\
 0.0279983 & 0.0404748 & 0.00143155  & -0.00316628 \\
 0.0279731 & 0.0404372 & 0.00151865  & -0.00323207 \\
 0.0279567 & 0.0404124 & 0.00156755  & -0.0032817  \\
 0.0279465 & 0.0403974 & 0.00160851  & -0.0033108  \\
 0.0279403 & 0.0403882 & 0.00163071  & -0.00332418 \\
 0.0279365 & 0.0403826 & 0.00164288  & -0.00333221 \\
 0.0279336 & 0.0403781 & 0.00165054  & -0.00333886 \\
 0.0279319 & 0.0403755 & 0.00165522  & -0.00334191 \\
 0.0279309 & 0.040374  & 0.0016585   & -0.0033438  \\
 0.0279302 & 0.040373  & 0.0016607   & -0.00334503 \\
 0.0279297 & 0.0403722 & 0.00166182  & -0.00334607 \\
 0.0279294 & 0.0403718 & 0.00166244  & -0.00334668 \\
 0.0279292 & 0.0403715 & 0.00166287  & -0.00334692 \\
 0.0279291 & 0.0403713 & 0.0016633   & -0.00334719 \\

In [95]:
ar_coeff = [0.2,0.5,0.5,0.8]
uuu2(ar_coeff)

\begin{tabular}{rrrr}
\hline
 0.0219593 & 0.04474   & -0.000577489 & -0.00238238 \\
 0.0207578 & 0.0452431 & -0.000575591 & -0.00293161 \\
 0.0205502 & 0.0453095 & -0.00058722  & -0.00322293 \\
 0.0204969 & 0.0453141 & -0.000588143 & -0.00338837 \\
 0.0204803 & 0.0453096 & -0.000589038 & -0.00347471 \\
 0.0204743 & 0.0453051 & -0.000588676 & -0.00352741 \\
 0.0204716 & 0.0453019 & -0.000588435 & -0.00355672 \\
 0.0204698 & 0.0452991 & -0.000588162 & -0.00358144 \\
 0.0204685 & 0.0452969 & -0.000587925 & -0.00359898 \\
 0.0204677 & 0.0452956 & -0.000587791 & -0.00361018 \\
 0.0204672 & 0.0452947 & -0.000587693 & -0.00361719 \\
 0.020467  & 0.0452942 & -0.000587637 & -0.00362133 \\
 0.0204668 & 0.0452938 & -0.000587589 & -0.00362406 \\
 0.0204666 & 0.0452936 & -0.000587557 & -0.00362584 \\
 0.0204666 & 0.0452935 & -0.000587538 & -0.00362686 \\
 0.0204665 & 0.0452934 & -0.000587528 & -0.00362753 \\
 0.0204665 & 0.0452933 & -0.000587521 & -0.00362791 \\
 0.0204665 & 0.0452933 & -0.00058751

In [96]:
ar_coeff = [0.2,0.2,0.2,0.2]
uuu2(ar_coeff)

\begin{tabular}{rrrr}
\hline
 0.0276621 & 0.0415087 & -0.00187225 & 0.00016349  \\
 0.0276669 & 0.0415049 & -0.00186133 & 0.000174439 \\
 0.0276667 & 0.041505  & -0.0018606  & 0.000174786 \\
 0.0276667 & 0.041505  & -0.0018606  & 0.000174795 \\
 0.0276667 & 0.041505  & -0.0018606  & 0.000174796 \\
 0.0276667 & 0.041505  & -0.0018606  & 0.000174796 \\
 0.0276667 & 0.041505  & -0.0018606  & 0.000174796 \\
 0.0276667 & 0.041505  & -0.0018606  & 0.000174796 \\
 0.0276667 & 0.041505  & -0.0018606  & 0.000174796 \\
 0.0276667 & 0.041505  & -0.0018606  & 0.000174796 \\
 0.0276667 & 0.041505  & -0.0018606  & 0.000174796 \\
 0.0276667 & 0.041505  & -0.0018606  & 0.000174796 \\
 0.0276667 & 0.041505  & -0.0018606  & 0.000174796 \\
 0.0276667 & 0.041505  & -0.0018606  & 0.000174796 \\
 0.0276667 & 0.041505  & -0.0018606  & 0.000174796 \\
 0.0276667 & 0.041505  & -0.0018606  & 0.000174796 \\
 0.0276667 & 0.041505  & -0.0018606  & 0.000174796 \\
 0.0276667 & 0.041505  & -0.0018606  & 0.000174796 \\

# TSIR

In [124]:
#0.2 0.2 can find edr direction
def TSIR(ar_coeff, T):
    n = 100
    S = 12  
    H = 50
    K = 1
    P = 4
    num_N = 5
    n_x = 2
    n_obs = 1000
    QW = np.zeros((P,4))
    for w in range(n):
        noise = np.zeros((num_N, n_obs))
        ar_series = np.zeros((num_N, n_obs))
        Newar_series = np.zeros((n_obs, num_N - 1))
        M = np.zeros((num_N, n_obs))
        X = []
        num_dataframes = S
        y = []
        ar_seriesT1 = pd.DataFrame({})
        ar_seriesT = pd.DataFrame({})
        X1 = np.zeros((P,P))
        for l in range(num_dataframes):    
            dg = pd.DataFrame({})
            X.append(dg)    
        scaler = StandardScaler()  
        lam = pd.DataFrame({})
        # ar_coeff1 = 0.3  # Autoregressive coefficient
        # ar_coeff2 = 0.7
        B = []
        edr1 = []
        for h in range(num_N):
            noise[h] = np.random.normal(0, 1, size=n_obs)  # Normally distributed noise
        # Generate the additive AR time series
        for t in range(0, n_obs):
            ar_series[0][t] = ar_coeff[0] * ar_series[0][t - 1]  + noise[0][t]
            ar_series[1][t] = ar_coeff[1] * ar_series[1][t - 1]  + noise[1][t]
            ar_series[2][t] = 0.3 * ar_series[2][t - 1] + 0.4 * noise[2][t-1] + noise[2][t]
            ar_series[3][t] = -0.4 * noise[3][t-1] + noise[3][t]
            ar_series[4][t] = 2*ar_series[0][t - 1] + 3*ar_series[1][t - 1] + noise[4][t]
        y = ar_series[4]
        X = np.concatenate([ar_series[i].reshape(-1,1) for i in range(4)], axis = 1)
        X = scaler.fit_transform(X)
        # for t in range(0, n_obs):
        #     ar_seriesT1 = pd.concat([ar_seriesT1, pd.DataFrame([[ar_series[0][t], ar_series[1][t], ar_series[2][t], ar_series[3][t]]])], axis = 0, ignore_index=True)
        #     X1 = X1 + ar_seriesT1.values[t].reshape(-1, 1) @ ar_seriesT1.values[t].reshape(-1, 1).T
        # X1 = 1/n_obs*X1    
        # eigenvalues, eigenvectors = np.linalg.eig(X1)
        # # Compute reciprocal square root of eigenvalues
        # recip_sqrt_eigenvalues = np.diag(1 / np.sqrt(eigenvalues))
        # # Reconstruct the matrix
        # A_neg_half = eigenvectors @ recip_sqrt_eigenvalues @ np.linalg.inv(eigenvectors)
        # for t in range(0, n_obs):
        #     # M = np.array([Newar_series[i][t] for i in range(num_N - 1)])
        #     Newar_series[t] = A_neg_half @ ar_seriesT1.values[t]
        for j in range(1,S + 1):
            # for T in range(1,n_obs + 1):   
            #     if T + j >= (n_obs):
            #         break
            #     else:
                    # y[j-1] = pd.concat([y[j-1], pd.Series([ar_series[4][T+j]])], ignore_index=True)
                    # M = pd.DataFrame({})
                    # for i in range(0,5):
                    #     M[i] = ar_series[i][T]
            # ar_seriesT = pd.DataFrame([[Newar_series[T][0], Newar_series[T][1], Newar_series[T][2], Newar_series[T][3]]])    
            V, edr, lam1 = T(X, y, j, H, K=K)
            B.append(V)
            edr1.append(edr)
        from scipy.stats import ortho_group
        def create_random_orthogonal_matrix(rows, cols):
            # Create a random matrix
            random_matrix = np.random.rand(rows, cols)    
            # Perform QR decomposition
            q, r = np.linalg.qr(random_matrix)    
            return q
        rows = 4
        cols = 4
        P1 = create_random_orthogonal_matrix(rows, cols)
        #W_i
        Gam = np.zeros((4,4))
        r = np.zeros((4,4))
        n1 = 0
        while n1<=10000:
            for i in range(K):
                for k in range(S):
                    Gam[i] += (P1[i] @ B[k] @ P1[i]) * B[k] @ P1[i] 
            U, S1, VT = np.linalg.svd(Gam)
            QQ = U @ VT
            P1 = QQ
            n1 = n1 + 1 
        for i in range(len(QQ)):
            if QQ[i][0]<0:
                QQ[i] = -QQ[i]
        QQ = QQ/np.linalg.norm(QQ)
        # sum(lam.apply(find_largest, axis=1).values[i]*edr1[i] for i in range(len(edr1)))
        QW += QQ
    
        # LAM = pd.DataFrame({})
        # for j in range(1, S + 1):
        #     for i in range(4):
        #         LAM1= (QQ[i] @ B[j-1] @ QQ[i].T)**2
        #         LAM = pd.concat([LAM, pd.DataFrame([LAM1])], ignore_index=True)
        # total_sum = LAM.sum().sum()
        # LAM = LAM/total_sum
        # reshaped_data = LAM.values.reshape(S, 4)
        # # Convert reshaped data back to DataFrame
        # LAM = pd.DataFrame(reshaped_data)
        # sorted_columns = LAM.sum().sort_values().index
        # Reorder DataFrame columns based on sorted column names
        # LAM = LAM[sorted_columns]
        
    QW = QW/n
    
    return QW

In [122]:
TSIR([0.2,0.2], sir_1_time_series1)

array([[ 2.74463098e-01,  4.16778804e-01, -3.52710953e-04,
         2.12060850e-03],
       [ 4.17685428e-01, -2.73863926e-01,  3.77812782e-04,
        -1.52171407e-03],
       [ 0.00000000e+00,  5.02152914e-04,  4.99345206e-01,
         2.06553633e-05],
       [ 0.00000000e+00, -2.61130959e-03,  2.06553633e-05,
         4.99567541e-01]])

In [123]:
TSIR([0.2,0.8], sir_1_time_series1)

array([[ 1.22438364e-01,  4.82752552e-01,  3.08550664e-03,
        -2.66403375e-03],
       [ 4.84052887e-01, -1.22117915e-01, -8.74153785e-04,
         7.29552118e-04],
       [ 0.00000000e+00, -3.21321710e-03,  4.98983577e-01,
         2.85369021e-05],
       [ 0.00000000e+00,  2.76436169e-03,  2.85369021e-05,
         4.99675328e-01]])

In [125]:
TSIR([0.2,0.2], sir_1_time_series)

array([[ 0.21742315, -0.00966039,  0.01254573,  0.03445188],
       [ 0.42838298, -0.00425975, -0.00105679, -0.0245955 ],
       [ 0.        ,  0.03597271,  0.4086829 ,  0.0046153 ],
       [ 0.        ,  0.02906866,  0.0046153 ,  0.36736469]])

In [126]:
TSIR([0.2,0.8], sir_1_time_series)

array([[ 2.33343568e-01, -2.49433031e-03, -6.40901368e-03,
         3.76527795e-02],
       [ 4.12871385e-01, -3.52583083e-03,  4.42063147e-03,
        -7.69900231e-04],
       [ 0.00000000e+00, -4.12862559e-02,  3.69667869e-01,
         2.30828716e-03],
       [ 0.00000000e+00,  7.89737522e-05,  2.30828716e-03,
         3.42968195e-01]])