<a href="https://colab.research.google.com/github/weirdmini/SIR-time-series/blob/main/Summary.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Defined functions

In [None]:
import math
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from statsmodels.tsa.arima_process import ArmaProcess
from matplotlib import pyplot as plt
from scipy.linalg import sqrtm
def euclidean_distance(point1, point2):
    return np.sqrt(np.sum((point1 - point2)**2))
# equal slice
def sir_1(X, y, H, K):
    Z = X-np.mean(X, axis=0)
    width = (np.max(y) - np.min(y)) / H
    V_hat = np.zeros([X.shape[1], X.shape[1]])
    for h in range(H):
        h_index = np.logical_and(np.min(y)+h*width <= y, y < np.min(y)+(h+1)*width)
        ph_hat = np.mean(h_index)
        if ph_hat == 0:
            continue
        mh = np.mean(Z[h_index, :], axis=0)
        V_hat = np.add(V_hat,ph_hat * np.matmul(mh[:, np.newaxis], mh[np.newaxis, :]))
    eigenvalues, eigenvectors = np.linalg.eig(V_hat)
    K_index = np.argpartition(np.abs(eigenvalues), X.shape[1]-K) >= X.shape[1]-K
    K_largest_eigenvectors = eigenvectors[:, K_index]
    edr_est =  K_largest_eigenvectors
    return  edr_est

# equal slice
def sir_1_time_series(X, y, s, H, K):
    from collections import deque
    def move_left(queue, steps):
        # Length of the queue
        n = len(queue)
        # Create a new list filled with 0
        new_queue = [0] * n

        # Move elements to the left by the specified number of steps
        for i in range(n):
            new_position = i - steps
            if new_position >= 0:
                new_queue[new_position] = queue[i]

        return new_queue

    Z = X-np.mean(X, axis=0)
    width = (np.max(y) - np.min(y)) / H
    V_hat = np.zeros([X.shape[1], X.shape[1]])
    for h in range(H):
        h_index = np.logical_and(np.min(y)+h*width <= y, y < np.min(y)+(h+1)*width)
        # queue moves s locations backward
        h_index = move_left(h_index, s)
        # h_index = h_index[len(y) - X.shape[0]:]
        # h_index = h_index[:X.shape[0]]
        ph_hat = np.mean(h_index)
        if ph_hat == 0:
            continue
        # if np.all((h_index == 1) < X.shape[0]):
        mh = np.mean(Z[h_index, :], axis=0)
        V_hat = np.add(V_hat,ph_hat * np.matmul(mh[:, np.newaxis], mh[np.newaxis, :]))
        # else:
        #     h_index = h_index
        #     mh = np.mean(Z[h_index, :], axis=0)
        #     V_hat = np.add(V_hat,ph_hat * np.matmul(mh[:, np.newaxis], mh[np.newaxis, :]))
    eigenvalues, eigenvectors = np.linalg.eig(V_hat)
    K_index = np.argpartition(np.abs(eigenvalues), X.shape[1]-K) >= X.shape[1]-K
    K_largest_eigenvectors = eigenvectors[:, K_index]
    edr_est =  K_largest_eigenvectors
    return  (V_hat, edr_est, eigenvalues ** 2)

#equal number in each slice
#queue structure
def sir_1_time_series1(X, y, s, num_slices, K):
    n_samples, n_features = X.shape
    V_hat = np.zeros([X.shape[1], X.shape[1]])
    # Step 1: Sort the data by the response variable
    sorted_indices = np.argsort(y)
    # sorted_indices1 = np.argsort(y[0:X.shape[0]])
    # if len(sorted_indices) > X.shape[0]:
    #     X_sorted = X[0:X.shape[0]][sorted_indices1]
    # else:
    X_sorted = X[sorted_indices]
    y_sorted = y[sorted_indices]
    # Step 2: Divide the data into slices
    slice_size = n_samples // num_slices
    ph_hat = slice_size/n_samples
    slices = []
    # original indices are the order of t, the sorted indices are the order of y from its size
    # argsort gives the indices for the order of y from its size
    for i in range(num_slices):
        start_idx = i * slice_size
        if i < num_slices - 1:
            end_idx = (i + 1) * slice_size
        else:  # Last slice includes any remaining samples
            end_idx = n_samples
        if start_idx-s>0:
            slices.append((X[[x - s for x in sorted_indices[start_idx:end_idx]]], y_sorted[start_idx:end_idx]))
        else:
            slices.append((X[sorted_indices[0:end_idx-s]], y_sorted[start_idx:end_idx]))

    # Step 3: Compute the means of the predictors within each slice
    X_means = np.array([np.mean(slice_X, axis=0) for slice_X, _ in slices])

    # Step 4: Center the predictor means
    X_centered = X_means - np.mean(X_means, axis=0)
    V_hat = np.add(V_hat,ph_hat * np.matmul(X_centered.T, X_centered))
    # Check for NaN or inf values in V_hat
    if np.isnan(V_hat).any() or np.isinf(V_hat).any():
        # Handle NaN or inf values appropriately
        # For example, replace NaN values with 0 and inf values with a large number
        V_hat[np.isnan(V_hat)] = 0
        V_hat[np.isinf(V_hat)] = np.nanmax(np.abs(V_hat))  # Replace inf with maximum absolute value in V_hat

    eigenvalues, eigenvectors = np.linalg.eig(V_hat)
    K_index = np.argpartition(np.abs(eigenvalues), X.shape[1]-K) >= X.shape[1]-K
    K_largest_eigenvectors = eigenvectors[:, K_index]
    edr_est =  K_largest_eigenvectors

    return (V_hat, edr_est, eigenvalues ** 2)

# def sir_1_time_series2(X, y, num_slices, K):
#     n_samples, n_features = X.shape
#     V_hat = np.zeros([X.shape[1], X.shape[1]])
#     # Step 1: Sort the data by the response variable
#     sorted_indices = np.argsort(y)
#     # sorted_indices1 = np.argsort(y[0:X.shape[0]])
#     # if len(sorted_indices) > X.shape[0]:
#     #     X_sorted = X[0:X.shape[0]][sorted_indices1]
#     # else:
#     X_sorted = X[sorted_indices]
#     y_sorted = y[sorted_indices]
#     # Step 2: Divide the data into slices
#     slice_size = n_samples // num_slices
#     ph_hat = slice_size/n_samples
#     slices = []
#     # original indices are the order of t, the sorted indices are the order of y from its size
#     # argsort gives the indices for the order of y from its size
#     for i in range(num_slices):
#         start_idx = i * slice_size
#         if i < num_slices - 1:
#             end_idx = (i + 1) * slice_size
#         else:  # Last slice includes any remaining samples
#             end_idx = n_samples
#         if start_idx-s>0:
#             slices.append((X[[x - s for x in sorted_indices[start_idx:end_idx]]], y_sorted[start_idx:end_idx]))
#         else:
#             slices.append((X[sorted_indices[0:end_idx-s]], y_sorted[start_idx:end_idx]))

#     # Step 3: Compute the means of the predictors within each slice
#     X_means = np.array([np.mean(slice_X, axis=0) for slice_X, _ in slices])

#     # Step 4: Center the predictor means
#     X_centered = X_means - np.mean(X_means, axis=0)
#     V_hat = np.add(V_hat,ph_hat * np.matmul(X_centered.T, X_centered))
#     # Check for NaN or inf values in V_hat
#     if np.isnan(V_hat).any() or np.isinf(V_hat).any():
#         # Handle NaN or inf values appropriately
#         # For example, replace NaN values with 0 and inf values with a large number
#         V_hat[np.isnan(V_hat)] = 0
#         V_hat[np.isinf(V_hat)] = np.nanmax(np.abs(V_hat))  # Replace inf with maximum absolute value in V_hat

#     eigenvalues, eigenvectors = np.linalg.eig(V_hat)
#     K_index = np.argpartition(np.abs(eigenvalues), X.shape[1]-K) >= X.shape[1]-K
#     K_largest_eigenvectors = eigenvectors[:, K_index]
#     edr_est =  K_largest_eigenvectors

#     return (V_hat, edr_est, eigenvalues ** 2)

In [None]:
import numpy as np
X = [1,4,5,3,7]
sort1 = np.argsort(X)
sort1

array([0, 3, 1, 2, 4], dtype=int64)

In [None]:
#equal number in each slice for SIR
def sir_11(X, y, num_slices, K):
    n_samples, n_features = X.shape
    V_hat = np.zeros([X.shape[1], X.shape[1]])
    # Step 1: Sort the data by the response variable
    sorted_indices = np.argsort(y)
    X_sorted = X[sorted_indices]
    y_sorted = y[sorted_indices]

    # Step 2: Divide the data into slices
    slice_size = n_samples // num_slices
    ph_hat = slice_size/n_samples
    slices = []

    for i in range(num_slices):
        start_idx = i * slice_size
        if i < num_slices - 1:
            end_idx = (i + 1) * slice_size
        else:  # Last slice includes any remaining samples
            end_idx = n_samples
        slices.append((X_sorted[start_idx:end_idx], y_sorted[start_idx:end_idx]))

    # Step 3: Compute the means of the predictors within each slice
    X_means = np.array([np.mean(slice_X, axis=0) for slice_X, _ in slices])

    # Step 4: Center the predictor means
    X_centered = X_means - np.mean(X_means, axis=0)

    V_hat = np.add(V_hat,ph_hat * np.matmul(X_centered.T, X_centered))
    eigenvalues, eigenvectors = np.linalg.eig(V_hat)
    K_index = np.argpartition(np.abs(eigenvalues), X.shape[1]-K) >= X.shape[1]-K
    K_largest_eigenvectors = eigenvectors[:, K_index]
    edr_est =  K_largest_eigenvectors

    return edr_est

### Above are functions for SIR, SIR different output(V_hat, edr_est, eigenvalues ** 2 and edr_est), SIR for equal number in each slice and SIR with equal slice.

# SIR
## The results are shown by 1. directions; 2. ratio between first two coefficients

# Curious why SIR fails when given time series data

# For iid data

In [None]:
#average
# example 1 : y = x_1 + x_2 + x_3 + x_4 + 0x_5 + epsilon
hat = 0
for a in range(100):
    n = 100
    H = 5
    K = 1
    beta = np.array([1, 1, 1, 1, 0])
#     print('beta : ', beta)
    X = np.random.normal(0, 1, [n, 5])
    epsilon = np.random.normal(0, 1, n)
    y = np.matmul(X, beta) + epsilon

#     print('SIR Method 1, beta_hat : ')
    sir_1_result = sir_1(X, y, H=H, K=K)
    if sir_1_result[0]<0:
        sir_1_result = -sir_1_result
    sir_1_result = sir_1_result/np.linalg.norm(sir_1_result)
    hat = hat + sir_1_result

hat = hat/100
print(hat)
# print('SIR Method 1, projection matrix F norm : ')
# print(projection_matrix_F_norm(beta[:, np.newaxis], sir_1_result))

[[ 0.4822566 ]
 [ 0.4923153 ]
 [ 0.47870703]
 [ 0.48558868]
 [-0.02222339]]


# For time series data

For model
\begin{align}
   & y_t = 2z_{1,t-1} + 3z_{2,t-1} +\epsilon_t
    \label{model:time series 2}
\end{align}
with four components $z_1 \sim \text{AR}(1)$ with $\phi_1$, $z_2 \sim \text{AR}(1)$ with $\phi_2$, $z_3 \sim \text{ARMA}(1, 1)$ with $\phi = 0.3$ and $\theta = 0.4$ and $z_4 \sim \text{MA}(1)$ with $\theta = -0.4$. Dimension $p = 4$. $K = 1$.

## Results under equal slice and equal number in each slice

### coefficient 0.2 0.2, correct direction

In [None]:
#0.2 0.2 correct direction
num_N = 5
n_obs = 10000
ar_coeff1 = 0.2
ar_coeff2 = 0.2
noise = np.zeros((num_N, n_obs))
n = 100
H = 50
K = 1
hat = 0
Hat = 0
for a in range(n):
    for h in range(num_N):
        noise[h] = np.random.normal(0, 1, size=n_obs)  # Normally distributed noise
    ar_series = np.zeros((num_N, n_obs))
    for t in range(0, n_obs):
        ar_series[0][t] = ar_coeff1 * ar_series[0][t - 1]  + noise[0][t]
        ar_series[1][t] = ar_coeff2 * ar_series[1][t - 1]  + noise[1][t]
        ar_series[2][t] = 0.3 * ar_series[2][t - 1] + 0.4 * noise[2][t-1] + noise[2][t]
        ar_series[3][t] = -0.4 * noise[3][t-1] + noise[3][t]
        ar_series[4][t] = 2*ar_series[0][t - 1] + 3*ar_series[1][t - 1] + noise[4][t]
    y = ar_series[4]
    X = np.concatenate([ar_series[i].reshape(-1,1) for i in range(4)], axis = 1)
    sir_1_result = sir_1(X, y, H, K=K)
    #after adding normalization and modifying the first coefficient
    if sir_1_result[0]<0:
        sir_1_result = -sir_1_result
    sir_1_result = sir_1_result/np.linalg.norm(sir_1_result)
    hat = hat + sir_1_result
    sir_11_result = sir_11(X, y, H, K=K)
    #after adding normalization and modifying the first coefficient
    if sir_11_result[0]<0:
        sir_11_result = -sir_11_result
    sir_11_result = sir_11_result/np.linalg.norm(sir_11_result)
    Hat = Hat + sir_11_result
print(f"Equal slice direction:{hat/n}")
hat1 = hat/n
print(f"Equal slice ratio:{hat1[0]/hat1[1]}")
print(f"Equal number in each slice direction:{Hat/n}")
Hat1 = Hat/n
print(f"Equal number in each slice ratio:{Hat1[0]/Hat1[1]}")

Equal slice direction:[[ 0.55022728]
 [ 0.82705784]
 [-0.00266489]
 [-0.00193875]]
Equal slice ratio:[0.66528271]
Equal number in each slice direction:[[ 5.47185641e-01]
 [ 8.29002510e-01]
 [-3.05165667e-03]
 [-7.34620269e-04]]
Equal number in each slice ratio:[0.66005306]


### coefficient 0.2 0.8, incorrect direction

In [None]:
#0.2 0.8 incorrect direction
num_N = 5
n_obs = 10000
ar_coeff1 = 0.2
ar_coeff2 = 0.8
noise = np.zeros((num_N, n_obs))
n = 100
H = 50
K = 1
hat = 0
Hat = 0
for a in range(n):
    for h in range(num_N):
        noise[h] = np.random.normal(0, 1, size=n_obs)  # Normally distributed noise
    ar_series = np.zeros((num_N, n_obs))
    for t in range(0, n_obs):
        ar_series[0][t] = ar_coeff1 * ar_series[0][t - 1]  + noise[0][t]
        ar_series[1][t] = ar_coeff2 * ar_series[1][t - 1]  + noise[1][t]
        ar_series[2][t] = 0.3 * ar_series[2][t - 1] + 0.4 * noise[2][t-1] + noise[2][t]
        ar_series[3][t] = -0.4 * noise[3][t-1] + noise[3][t]
        ar_series[4][t] = 2*ar_series[0][t] + 3*ar_series[1][t] + noise[4][t]
    y = ar_series[4]
    X = np.concatenate([ar_series[i].reshape(-1,1) for i in range(4)], axis = 1)
    sir_1_result = sir_1(X, y, H, K=K)
    #after adding normalization and modifying the first coefficient
    if sir_1_result[0]<0:
        sir_1_result = -sir_1_result
    sir_1_result = sir_1_result/np.linalg.norm(sir_1_result)
    hat = hat + sir_1_result
    sir_11_result = sir_11(X, y, H, K=K)
    #after adding normalization and modifying the first coefficient
    if sir_11_result[0]<0:
        sir_11_result = -sir_11_result
    sir_11_result = sir_11_result/np.linalg.norm(sir_11_result)
    Hat = Hat + sir_11_result


print(f"Equal slice direction:{hat/n}")
hat1 = hat/n
print(f"Equal slice ratio:{hat1[0]/hat1[1]}")
print(f"Equal number in each slice direction:{Hat/n}")
Hat1 = Hat/n
print(f"Equal number in each slice ratio:{Hat1[0]/Hat1[1]}")

Equal slice direction:[[ 2.41554690e-01]
 [ 9.70260558e-01]
 [-7.19133248e-04]
 [ 2.93346564e-04]]
Equal slice ratio:[0.24895858]
Equal number in each slice direction:[[ 2.41509793e-01]
 [ 9.70272399e-01]
 [-7.28361284e-04]
 [ 3.32651879e-04]]
Equal number in each slice ratio:[0.24890927]


### coefficient 0.8 0.8, correct direction

In [None]:
#0.8 0.8 correct direction
num_N = 5
n_obs = 10000
ar_coeff1 = 0.8
ar_coeff2 = 0.8
noise = np.zeros((num_N, n_obs))
n = 100
H = 50
K = 1
hat = 0
Hat = 0
for a in range(n):
    for h in range(num_N):
        noise[h] = np.random.normal(0, 1, size=n_obs)  # Normally distributed noise
    ar_series = np.zeros((num_N, n_obs))
    for t in range(0, n_obs):
        ar_series[0][t] = ar_coeff1 * ar_series[0][t - 1]  + noise[0][t]
        ar_series[1][t] = ar_coeff2 * ar_series[1][t - 1]  + noise[1][t]
        ar_series[2][t] = 0.3 * ar_series[2][t - 1] + 0.4 * noise[2][t-1] + noise[2][t]
        ar_series[3][t] = -0.4 * noise[3][t-1] + noise[3][t]
        ar_series[4][t] = 2*ar_series[0][t - 1] + 3*ar_series[1][t - 1] + noise[4][t]
    y = ar_series[4]
    X = np.concatenate([ar_series[i].reshape(-1,1) for i in range(4)], axis = 1)
    sir_1_result = sir_1(X, y, H, K=K)
    #after adding normalization and modifying the first coefficient
    if sir_1_result[0]<0:
        sir_1_result = -sir_1_result
    sir_1_result = sir_1_result/np.linalg.norm(sir_1_result)
    hat = hat + sir_1_result
    sir_11_result = sir_11(X, y, H, K=K)
    #after adding normalization and modifying the first coefficient
    if sir_11_result[0]<0:
        sir_11_result = -sir_11_result
    sir_11_result = sir_11_result/np.linalg.norm(sir_11_result)
    Hat = Hat + sir_11_result
print(f"Equal slice direction:{hat/n}")
hat1 = hat/n
print(f"Equal slice ratio:{hat1[0]/hat1[1]}")
print(f"Equal number in each slice direction:{Hat/n}")
Hat1 = Hat/n
print(f"Equal number in each slice ratio:{Hat1[0]/Hat1[1]}")

Equal slice direction:[[ 5.51164249e-01]
 [ 8.33834171e-01]
 [ 7.05916426e-04]
 [-2.35679570e-04]]
Equal slice ratio:[0.66099983]
Equal number in each slice direction:[[ 5.51118853e-01]
 [ 8.33869763e-01]
 [ 7.60820593e-04]
 [-2.48872414e-04]]
Equal number in each slice ratio:[0.66091718]


## SIR for model
\begin{align}
   & y_t = 2z_{1,t} + 3z_{2,t} +\epsilon_t
\end{align}
with four components $z_i \sim \text{AR}(1)$ with $\phi_i, i = 1, \ldots, 4$. I denote the model as NO(0.2, 0.2, 0.2, 0.2) if $\phi_i = 0.2, i = 1, \ldots, 4$. $K = 1$.

### ar_coeff = [0.2, 0.2, 0.2, 0.2]

In [None]:
#ar_coeff = [0.2, 0.2, 0.2, 0.2]
num_N = 5
n_obs = 10000
ar_coeff = [0.2, 0.2, 0.2, 0.2]
noise = np.zeros((num_N, n_obs))
n = 100
H = 50
K = 1
hat = 0
for a in range(n):
    for h in range(num_N):
        noise[h] = np.random.normal(0, 1, size=n_obs)  # Normally distributed noise
    ar_series = np.zeros((num_N, n_obs))
    for t in range(0, n_obs):
        ar_series[0][t] = ar_coeff[0] * ar_series[0][t - 1]  + noise[0][t]
        ar_series[1][t] = ar_coeff[1] * ar_series[1][t - 1]  + noise[1][t]
        ar_series[2][t] = ar_coeff[2] * ar_series[2][t - 1]  + noise[2][t]
        ar_series[3][t] = ar_coeff[3] * ar_series[3][t - 1]  + noise[3][t]
        ar_series[4][t] = 2*ar_series[0][t] + 3*ar_series[1][t] + noise[4][t]
    y = ar_series[4]
    X = np.concatenate([ar_series[i].reshape(-1,1) for i in range(4)], axis = 1)
    sir_1_result = sir_1(X, y, H=H, K=K)
    #after adding normalization and modifying the first coefficient
    if sir_1_result[0]<0:
        sir_1_result = -sir_1_result
    sir_1_result = sir_1_result/np.linalg.norm(sir_1_result)
    hat = hat + sir_1_result
    sir_11_result = sir_11(X, y, H, K=K)
    #after adding normalization and modifying the first coefficient
    if sir_11_result[0]<0:
        sir_11_result = -sir_11_result
    sir_11_result = sir_11_result/np.linalg.norm(sir_11_result)
    Hat = Hat + sir_11_result
print(f"Equal slice direction:{hat/n}")
hat1 = hat/n
print(f"Equal slice ratio:{hat1[0]/hat1[1]}")
print(f"Equal number in each slice direction:{Hat/n}")
Hat1 = Hat/n
print(f"Equal number in each slice ratio:{Hat1[0]/Hat1[1]}")

Equal slice direction:[[ 5.55096135e-01]
 [ 8.31595927e-01]
 [ 9.87298312e-04]
 [-1.54833445e-04]]
Equal slice ratio:[0.66750704]
Equal number in each slice direction:[[1.10950932e+00]
 [1.66323899e+00]
 [1.83722916e-03]
 [3.52549362e-04]]
Equal number in each slice ratio:[0.66707751]


### ar_coeff = [0.2, 0.2, 0.8, 0.8]

In [None]:
#ar_coeff = [0.2, 0.2, 0.8, 0.8]
num_N = 5
n_obs = 10000
ar_coeff = [0.2, 0.2, 0.8, 0.8]
noise = np.zeros((num_N, n_obs))
n = 100
H = 50
K = 1
hat = 0
for a in range(n):
    for h in range(num_N):
        noise[h] = np.random.normal(0, 1, size=n_obs)  # Normally distributed noise
    ar_series = np.zeros((num_N, n_obs))
    for t in range(0, n_obs):
        ar_series[0][t] = ar_coeff[0] * ar_series[0][t - 1]  + noise[0][t]
        ar_series[1][t] = ar_coeff[1] * ar_series[1][t - 1]  + noise[1][t]
        ar_series[2][t] = ar_coeff[2] * ar_series[2][t - 1]  + noise[2][t]
        ar_series[3][t] = ar_coeff[3] * ar_series[3][t - 1]  + noise[3][t]
        ar_series[4][t] = 2*ar_series[0][t] + 3*ar_series[1][t] + noise[4][t]
    y = ar_series[4]
    X = np.concatenate([ar_series[i].reshape(-1,1) for i in range(4)], axis = 1)
    sir_1_result = sir_1(X, y, H=H, K=K)
    #after adding normalization and modifying the first coefficient
    if sir_1_result[0]<0:
        sir_1_result = -sir_1_result
    sir_1_result = sir_1_result/np.linalg.norm(sir_1_result)
    hat = hat + sir_1_result
    sir_11_result = sir_11(X, y, H, K=K)
    #after adding normalization and modifying the first coefficient
    if sir_11_result[0]<0:
        sir_11_result = -sir_11_result
    sir_11_result = sir_11_result/np.linalg.norm(sir_11_result)
    Hat = Hat + sir_11_result
print(f"Equal slice direction:{hat/n}")
hat1 = hat/n
print(f"Equal slice ratio:{hat1[0]/hat1[1]}")
print(f"Equal number in each slice direction:{Hat/n}")
Hat1 = Hat/n
print(f"Equal number in each slice ratio:{Hat1[0]/Hat1[1]}")

Equal slice direction:[[ 5.54614799e-01]
 [ 8.31551840e-01]
 [ 2.93844972e-03]
 [-3.30158714e-04]]
Equal slice ratio:[0.66696359]
Equal number in each slice direction:[[ 1.66417676e+00]
 [ 2.49475284e+00]
 [ 4.59979919e-03]
 [-1.02698341e-04]]
Equal number in each slice ratio:[0.6670708]


### ar_coeff = [0.2, 0.5, 0.8, 0.8]

In [None]:
#ar_coeff = [0.2, 0.5, 0.8, 0.8]
num_N = 5
n_obs = 10000
ar_coeff = [0.2, 0.5, 0.8, 0.8]
noise = np.zeros((num_N, n_obs))
n = 100
H = 50
K = 1
hat = 0
for a in range(n):
    for h in range(num_N):
        noise[h] = np.random.normal(0, 1, size=n_obs)  # Normally distributed noise
    ar_series = np.zeros((num_N, n_obs))
    for t in range(0, n_obs):
        ar_series[0][t] = ar_coeff[0] * ar_series[0][t - 1]  + noise[0][t]
        ar_series[1][t] = ar_coeff[1] * ar_series[1][t - 1]  + noise[1][t]
        ar_series[2][t] = ar_coeff[2] * ar_series[2][t - 1]  + noise[2][t]
        ar_series[3][t] = ar_coeff[3] * ar_series[3][t - 1]  + noise[3][t]
        ar_series[4][t] = 2*ar_series[0][t] + 3*ar_series[1][t] + noise[4][t]
    y = ar_series[4]
    X = np.concatenate([ar_series[i].reshape(-1,1) for i in range(4)], axis = 1)
    sir_1_result = sir_1(X, y, H=H, K=K)
    #after adding normalization and modifying the first coefficient
    if sir_1_result[0]<0:
        sir_1_result = -sir_1_result
    sir_1_result = sir_1_result/np.linalg.norm(sir_1_result)
    hat = hat + sir_1_result
    sir_11_result = sir_11(X, y, H, K=K)
    #after adding normalization and modifying the first coefficient
    if sir_11_result[0]<0:
        sir_11_result = -sir_11_result
    sir_11_result = sir_11_result/np.linalg.norm(sir_11_result)
    Hat = Hat + sir_11_result
print(f"Equal slice direction:{hat/n}")
hat1 = hat/n
print(f"Equal slice ratio:{hat1[0]/hat1[1]}")
print(f"Equal number in each slice direction:{Hat/n}")
Hat1 = Hat/n
print(f"Equal number in each slice ratio:{Hat1[0]/Hat1[1]}")

Equal slice direction:[[0.46189184]
 [0.88630596]
 [0.0042448 ]
 [0.00288614]]
Equal slice ratio:[0.52114266]
Equal number in each slice direction:[[1.01290085]
 [1.7202301 ]
 [0.00497268]
 [0.00291084]]
Equal number in each slice ratio:[0.58881707]


### ar_coeff = [0.2, 0.5, 0.5, 0.8]

In [None]:
#ar_coeff = [0.2, 0.5, 0.5, 0.8]
num_N = 5
n_obs = 10000
ar_coeff = [0.2, 0.5, 0.5, 0.8]
noise = np.zeros((num_N, n_obs))
n = 100
H = 50
K = 1
hat = 0
for a in range(n):
    for h in range(num_N):
        noise[h] = np.random.normal(0, 1, size=n_obs)  # Normally distributed noise
    ar_series = np.zeros((num_N, n_obs))
    for t in range(0, n_obs):
        ar_series[0][t] = ar_coeff[0] * ar_series[0][t - 1]  + noise[0][t]
        ar_series[1][t] = ar_coeff[1] * ar_series[1][t - 1]  + noise[1][t]
        ar_series[2][t] = ar_coeff[2] * ar_series[2][t - 1]  + noise[2][t]
        ar_series[3][t] = ar_coeff[3] * ar_series[3][t - 1]  + noise[3][t]
        ar_series[4][t] = 2*ar_series[0][t] + 3*ar_series[1][t] + noise[4][t]
    y = ar_series[4]
    X = np.concatenate([ar_series[i].reshape(-1,1) for i in range(4)], axis = 1)
    sir_1_result = sir_1(X, y, H=H, K=K)
    #after adding normalization and modifying the first coefficient
    if sir_1_result[0]<0:
        sir_1_result = -sir_1_result
    sir_1_result = sir_1_result/np.linalg.norm(sir_1_result)
    hat = hat + sir_1_result
    sir_11_result = sir_11(X, y, H, K=K)
    #after adding normalization and modifying the first coefficient
    if sir_11_result[0]<0:
        sir_11_result = -sir_11_result
    sir_11_result = sir_11_result/np.linalg.norm(sir_11_result)
    Hat = Hat + sir_11_result
print(f"Equal slice direction:{hat/n}")
hat1 = hat/n
print(f"Equal slice ratio:{hat1[0]/hat1[1]}")
print(f"Equal number in each slice direction:{Hat/n}")
Hat1 = Hat/n
print(f"Equal number in each slice ratio:{Hat1[0]/Hat1[1]}")

Equal slice direction:[[ 4.63183556e-01]
 [ 8.85775180e-01]
 [ 1.06089700e-04]
 [-9.76717593e-04]]
Equal slice ratio:[0.52291323]
Equal number in each slice direction:[[1.47604378e+00]
 [2.60602530e+00]
 [5.14360204e-03]
 [1.92624909e-03]]
Equal number in each slice ratio:[0.56639656]


# SIR for Time series new objective(Double sum)
## The results are shown by 1. directions 2. ratio between first two coefficients

Code below is for this derivation. When taking the sum across all $q$, it is easily seen that
$$
\sum_q diag\left[ (\mathbf{W} \circ \Phi^{\circ q})^T Cov(\mathbb{E}(\mathbf{X}_{t-q}|y) (\mathbf{W} \circ\Phi^{\circ q})\right]=diag(\mathbf{W}^T\sum_q \mathbf{V}^{[1:q]}\mathbf{W}),
$$
where
$$
\mathbf{V}^{[1:Q]}_{jk}=\sum_{q=1}^Q\mathbf{V}^q_{jk}\cdot \phi_j^q\cdot \phi_k^q.
$$
I'm wondering, is it $\text{diag}(W^T V^{[1: q]} W)$, not $\text{diag}(\mathbf{W}^T\sum_q \mathbf{V}^{[1:q]}\mathbf{W}).$

## Use equal slice

In [None]:
#from 0 to Q double sum
from tabulate import tabulate
def NEWSIR(ar_coeff, T, n_obs, S):
    num_N = 5
    # n_obs = 10000
    noise = np.zeros((num_N, n_obs))
    n = 100
    H = 50
    P = 4
    K = 1
    # S = 20
    hat = [np.zeros((P, 1)) for i in range(S)]
    # ar_coeff = [0.2, 0.2, 0.2, 0.2]
    g = np.zeros((S, 1))
    for w in range(n):
        for h in range(num_N):
            noise[h] = np.random.normal(0, 1, size=n_obs)  # Normally distributed noise
        ar_series = np.zeros((num_N, n_obs))
        for t in range(0, n_obs):
            ar_series[0][t] = ar_coeff[0] * ar_series[0][t - 1]  + noise[0][t]
            ar_series[1][t] = ar_coeff[1] * ar_series[1][t - 1]  + noise[1][t]
            ar_series[2][t] = ar_coeff[2] * ar_series[2][t - 1]  + noise[2][t]
            ar_series[3][t] = ar_coeff[3] * ar_series[3][t - 1]  + noise[3][t]
            ar_series[4][t] = 2*ar_series[0][t] + 3*ar_series[1][t] + noise[4][t]
        y = ar_series[4]
        X = np.concatenate([ar_series[i].reshape(-1,1) for i in range(4)], axis = 1)
        V = []
        for a in range(0,S+1):
            M, edr, lam1 = T(X, y, a, H, K)
            V.append(M)
        for q in range(1, S+1):
            Q = np.zeros((P, P))
            phi = ar_coeff
            for j in range(P):
                for k in range(P):
                    Q[j,k] = sum(sum(phi[j]**a * V[a][j,k] * phi[k]**a for a in range(0,l)) for l in range(1, q+1)) #double sum
            eigenvalues1, eigenvectors1 = np.linalg.eig(Q)
            K_index = np.argpartition(np.abs(eigenvalues1), P-K) >= P-K
            K_largest_eigenvectors = eigenvectors1[:, K_index]
            edr_est =  K_largest_eigenvectors
            if edr_est[0]<0:
                edr_est = -edr_est
            edr_est = edr_est/np.linalg.norm(edr_est)
            hat[q-1] += edr_est
    for i in range(S):
        hat[i] = hat[i]/n
        g[i] = hat[i][0] / hat[i][1]
    array = np.array(hat)
    print(tabulate(array, tablefmt='latex'))
    print(g)

### ar_coeff = [0.2,0.2,0.2,0.2]

In [None]:
ar_coeff = [0.2,0.2,0.2,0.2]
NEWSIR(ar_coeff, sir_1_time_series, 10000, 50)

\begin{tabular}{rrrr}
\hline
 0.554585 & 0.831906 & -0.00187916 &  0.000159399 \\
 0.553301 & 0.831914 &  0.00227951 & -0.00094284  \\
 0.552559 & 0.831664 &  0.00359749 & -0.00122537  \\
 0.55212  & 0.83149  &  0.00421676 & -0.00134183  \\
 0.551832 & 0.831369 &  0.00457534 & -0.00140373  \\
 0.551629 & 0.831283 &  0.00480933 & -0.00144168  \\
 0.551478 & 0.831218 &  0.00497417 & -0.00146715  \\
 0.551362 & 0.831167 &  0.00509663 & -0.00148537  \\
 0.55127  & 0.831127 &  0.00519122 & -0.001499    \\
 0.551194 & 0.831094 &  0.00526651 & -0.00150956  \\
 0.551132 & 0.831067 &  0.00532787 & -0.00151797  \\
 0.551079 & 0.831044 &  0.00537885 & -0.00152483  \\
 0.551034 & 0.831024 &  0.00542188 & -0.00153052  \\
 0.550996 & 0.831007 &  0.00545869 & -0.00153531  \\
 0.550962 & 0.830992 &  0.00549053 & -0.0015394   \\
 0.550932 & 0.830979 &  0.00551836 & -0.00154293  \\
 0.550905 & 0.830967 &  0.00554289 & -0.00154601  \\
 0.550882 & 0.830957 &  0.00556467 & -0.00154872  \\
 0.55086  & 0.830

### ar_coeff = [0.2,0.2,0.8,0.8]

In [None]:
ar_coeff = [0.2,0.2,0.8,0.8]
NEWSIR(ar_coeff,sir_1_time_series, 10000, 10)

\begin{tabular}{rrrr}
\hline
 0.556049 & 0.830562 & -0.000267665 &  0.00171431 \\
 0.438066 & 0.636593 &  0.0359006   &  0.0236571  \\
 0.358612 & 0.494733 &  0.0704215   &  0.0450812  \\
 0.314949 & 0.417433 &  0.0534683   &  0.0482798  \\
 0.288643 & 0.375582 &  0.0478755   &  0.00580257 \\
 0.26829  & 0.335881 &  0.0445573   &  0.00342528 \\
 0.254685 & 0.317757 &  0.0499964   &  0.00205482 \\
 0.244467 & 0.300952 &  0.0536248   &  0.00287382 \\
 0.236596 & 0.287178 &  0.0750268   & -0.00512007 \\
 0.230491 & 0.279271 &  0.0767333   & -0.00679534 \\
\hline
\end{tabular}
[[0.66948495]
 [0.68814155]
 [0.72485901]
 [0.75448867]
 [0.76852045]
 [0.79876349]
 [0.80150804]
 [0.81231039]
 [0.82386804]
 [0.82533116]]


### ar_coeff = [0.2,0.5,0.8,0.8]

In [None]:
ar_coeff = [0.2,0.5,0.8,0.8]
NEWSIR(ar_coeff,sir_1_time_series, 10000, 50)

\begin{tabular}{rrrr}
\hline
 0.461232 & 0.886648 &  0.00185602 & -0.000684399 \\
 0.367788 & 0.790847 & -0.0118667  &  0.00408115  \\
 0.284692 & 0.609771 &  0.00277821 & -0.0236595   \\
 0.25131  & 0.523449 & -0.00225777 & -0.0700011   \\
 0.233338 & 0.481689 &  0.0145163  & -0.0859569   \\
 0.221394 & 0.465216 &  0.0157676  & -0.0873892   \\
 0.212442 & 0.435712 &  0.0292717  & -0.0684612   \\
 0.205988 & 0.416691 &  0.049366   & -0.0684726   \\
 0.201292 & 0.410014 &  0.05121    & -0.0679493   \\
 0.1977   & 0.40493  &  0.0525372  & -0.0677423   \\
 0.194837 & 0.400871 &  0.0535773  & -0.0676911   \\
 0.192482 & 0.397517 &  0.0544213  & -0.0677207   \\
 0.190503 & 0.394682 &  0.0551135  & -0.067789    \\
 0.188813 & 0.392251 &  0.0556808  & -0.0678707   \\
 0.187353 & 0.390146 &  0.0561444  & -0.0679515   \\
 0.186094 & 0.381477 &  0.0528152  & -0.0495969   \\
 0.185007 & 0.379947 &  0.0531147  & -0.0496304   \\
 0.184051 & 0.37484  &  0.071263   & -0.0415754   \\
 0.18321  & 0.368

### ar_coeff = [0.2,0.5,0.5,0.8]

In [None]:
ar_coeff = [0.2,0.5,0.5,0.8]
NEWSIR(ar_coeff,sir_1_time_series, 10000, 50)

NameError: name 'NEWSIR' is not defined

## Use equal number in each slice

In [None]:
# #from 0 to Q double sum
# from tabulate import tabulate
# import numpy as np

# def uu2(ar_coeff):
#     import numpy as np
#     from tabulate import tabulate

#     def sir_1_time_series1(X, y, s, num_slices, K):
#         n_samples, n_features = X.shape
#         V_hat = np.zeros([X.shape[1], X.shape[1]])
#         # Step 1: Sort the data by the response variable
#         sorted_indices = np.argsort(y)
#         X_sorted = X[sorted_indices]
#         y_sorted = y[sorted_indices]
#         # Step 2: Divide the data into slices
#         slice_size = n_samples // num_slices
#         ph_hat = slice_size / n_samples
#         slices = []

#         for i in range(num_slices):
#             start_idx = i * slice_size
#             if i < num_slices - 1:
#                 end_idx = (i + 1) * slice_size
#             else:  # Last slice includes any remaining samples
#                 end_idx = n_samples
#             if start_idx - s > 0:
#                 slices.append((X[[x - s for x in sorted_indices[start_idx:end_idx]]], y_sorted[start_idx:end_idx]))
#             else:
#                 slices.append((X[sorted_indices[0:end_idx - s]], y_sorted[start_idx:end_idx]))

#         # Step 3: Compute the means of the predictors within each slice
#         X_means = np.array([np.mean(slice_X, axis=0) for slice_X, _ in slices])

#         # Step 4: Center the predictor means
#         X_centered = X_means - np.mean(X_means, axis=0)

#         V_hat = np.add(V_hat, ph_hat * np.matmul(X_centered.T, X_centered))

#         # Check for NaN or inf values in V_hat
#         if np.isnan(V_hat).any() or np.isinf(V_hat).any():
#             # Handle NaN or inf values appropriately
#             # For example, replace NaN values with 0 and inf values with a large number
#             V_hat[np.isnan(V_hat)] = 0
#             V_hat[np.isinf(V_hat)] = np.nanmax(np.abs(V_hat))  # Replace inf with maximum absolute value in V_hat

#     # Compute eigenvalues and eigenvectors
#         eigenvalues, eigenvectors = np.linalg.eig(V_hat)
#         eigenvalues, eigenvectors = np.linalg.eig(V_hat)
#         K_index = np.argpartition(np.abs(eigenvalues), X.shape[1] - K) >= X.shape[1] - K
#         K_largest_eigenvectors = eigenvectors[:, K_index]
#         edr_est = K_largest_eigenvectors

#         return V_hat, edr_est, eigenvalues ** 2

#     num_N = 5
#     n_obs = 1000
#     noise = np.zeros((num_N, n_obs))
#     n = 100
#     H = 50
#     P = 4
#     K = 1
#     S = 20
#     hat = [np.zeros((P, 1)) for _ in range(S)]
#     g = np.zeros((S, 1))
#     # ar_coeff = [0.2, 0.2, 0.2, 0.2]
#     n1 = 0
#     l = 1  # Initialize `l` outside the loop
#     while n1 < 100:
#         for h in range(num_N):
#             noise[h] = np.random.normal(0, 1, size=n_obs)  # Normally distributed noise
#         ar_series = np.zeros((num_N, n_obs))
#         for t in range(0, n_obs):
#             ar_series[0][t] = ar_coeff[0] * ar_series[0][t - 1] + noise[0][t]
#             ar_series[1][t] = ar_coeff[1] * ar_series[1][t - 1] + noise[1][t]
#             ar_series[2][t] = ar_coeff[2] * ar_series[2][t - 1] + noise[2][t]
#             ar_series[3][t] = ar_coeff[3] * ar_series[3][t - 1] + noise[3][t]
#             ar_series[4][t] = 2 * ar_series[0][t] + 3 * ar_series[1][t] + noise[4][t]
#         y = ar_series[4]
#         X = np.concatenate([ar_series[i].reshape(-1, 1) for i in range(4)], axis=1)
#         V = []
#         for a in range(0, S + 1):
#             M, _, _ = sir_1_time_series1(X, y, a, H, K)
#             V.append(M)
#         for q in range(1, S + 1):
#             Q = np.zeros((P, P))
#             phi = ar_coeff
#             for j in range(P):
#                 for k in range(P):
#                     Q[j, k] = sum(sum(phi[j] ** a * V[a][j, k] * phi[k] ** a for a in range(0, l)) for l in range(1, q + 1))
#             eigenvalues1, eigenvectors1 = np.linalg.eig(Q)
#             K_index = np.argpartition(np.abs(eigenvalues1), P - K) >= P - K
#             K_largest_eigenvectors = eigenvectors1[:, K_index]
#             edr_est = K_largest_eigenvectors
#             if edr_est[0] < 0:
#                 edr_est = -edr_est
#             edr_est = edr_est / np.linalg.norm(edr_est)
#             hat[q - 1] += edr_est
#             n1 += 1

#     for i in range(S):
#         hat[i] = hat[i] / n
#         g[i] = hat[i][0] / hat[i][1]
#     array = np.array(hat)
#     print(tabulate(array, tablefmt='latex'))
#     print(g)

# # Example usage
# ar_coeff = [0.2, 0.2, 0.2, 0.2]
# uu2(ar_coeff)

In [None]:
#from 0 to Q double sum
from tabulate import tabulate
import numpy as np

def uu2(ar_coeff):
    import numpy as np
    from tabulate import tabulate

    def sir_1_time_series2(X, y, num_slices, K):
        n_samples, n_features = X.shape
        V_hat = np.zeros([X.shape[1], X.shape[1]])
        # Step 1: Sort the data by the response variable
        sorted_indices = np.argsort(y)
        X_sorted = X[sorted_indices]
        y_sorted = y[sorted_indices]

        # Step 2: Divide the data into slices
        slice_size = n_samples // num_slices
        ph_hat = slice_size/n_samples
        slices = []

        for i in range(num_slices):
            start_idx = i * slice_size
            if i < num_slices - 1:
                end_idx = (i + 1) * slice_size
            else:  # Last slice includes any remaining samples
                end_idx = n_samples
            slices.append((X_sorted[start_idx:end_idx], y_sorted[start_idx:end_idx]))

        # Step 3: Compute the means of the predictors within each slice
        X_means = np.array([np.mean(slice_X, axis=0) for slice_X, _ in slices])

        # Step 4: Center the predictor means
        X_centered = X_means - np.mean(X_means, axis=0)

        V_hat = np.add(V_hat,ph_hat * np.matmul(X_centered.T, X_centered))
        eigenvalues, eigenvectors = np.linalg.eig(V_hat)
        K_index = np.argpartition(np.abs(eigenvalues), X.shape[1]-K) >= X.shape[1]-K
        K_largest_eigenvectors = eigenvectors[:, K_index]
        edr_est =  K_largest_eigenvectors

        return V_hat, edr_est, eigenvalues ** 2

    num_N = 5
    n_obs = 1000
    S = 20
    noise = np.zeros((num_N, n_obs+S))
    n = 100
    H = 50
    P = 4
    K = 1

    hat = [np.zeros((P, 1)) for _ in range(S)]
    g = np.zeros((S, 1))
    # ar_coeff = [0.2, 0.2, 0.2, 0.2]
    n1 = 0
    l = 1  # Initialize `l` outside the loop
    while n1 < 100:
        for h in range(num_N):
            noise[h] = np.random.normal(0, 1, size=(n_obs+S))  # Normally distributed noise
        ar_series = np.zeros((num_N, n_obs+S))
        for t in range(0, n_obs+S):
            ar_series[0][t] = ar_coeff[0] * ar_series[0][t - 1] + noise[0][t]
            ar_series[1][t] = ar_coeff[1] * ar_series[1][t - 1] + noise[1][t]
            ar_series[2][t] = ar_coeff[2] * ar_series[2][t - 1] + noise[2][t]
            ar_series[3][t] = ar_coeff[3] * ar_series[3][t - 1] + noise[3][t]
            ar_series[4][t] = 2 * ar_series[0][t] + 3 * ar_series[1][t] + noise[4][t]
        y = ar_series[4][S:n_obs+S]
        X = np.concatenate([ar_series[i][0:n_obs].reshape(-1, 1) for i in range(4)], axis=1)
        V = []
        for a in range(0, S + 1):
            M, _, _ = sir_1_time_series2(X, y, H, K)
            V.append(M)
        for q in range(1, S + 1):
            Q = np.zeros((P, P))
            phi = ar_coeff
            for j in range(P):
                for k in range(P):
                    Q[j, k] = sum(sum(phi[j] ** a * V[a][j, k] * phi[k] ** a for a in range(0, l)) for l in range(1, q + 1))
            eigenvalues1, eigenvectors1 = np.linalg.eig(Q)
            K_index = np.argpartition(np.abs(eigenvalues1), P - K) >= P - K
            K_largest_eigenvectors = eigenvectors1[:, K_index]
            edr_est = K_largest_eigenvectors
            if edr_est[0] < 0:
                edr_est = -edr_est
            edr_est = edr_est / np.linalg.norm(edr_est)
            hat[q - 1] += edr_est
            n1 += 1

    for i in range(S):
        hat[i] = hat[i] / n
        g[i] = hat[i][0] / hat[i][1]
    array = np.array(hat)
    print(tabulate(array, tablefmt='latex'))
    print(g)

# Example usage
ar_coeff = [0.2, 0.2, 0.2, 0.2]
uu2(ar_coeff)

\begin{tabular}{rrrr}
\hline
 0.0187047 & 0.0101912 & 0.0106083 & 0.000418829 \\
 0.0187047 & 0.0101912 & 0.0106083 & 0.000418829 \\
 0.0187047 & 0.0101912 & 0.0106083 & 0.000418829 \\
 0.0187047 & 0.0101912 & 0.0106083 & 0.000418829 \\
 0.0187047 & 0.0101912 & 0.0106083 & 0.000418829 \\
 0.0187047 & 0.0101912 & 0.0106083 & 0.000418829 \\
 0.0187047 & 0.0101912 & 0.0106083 & 0.000418829 \\
 0.0187047 & 0.0101912 & 0.0106083 & 0.000418829 \\
 0.0187047 & 0.0101912 & 0.0106083 & 0.000418829 \\
 0.0187047 & 0.0101912 & 0.0106083 & 0.000418829 \\
 0.0187047 & 0.0101912 & 0.0106083 & 0.000418829 \\
 0.0187047 & 0.0101912 & 0.0106083 & 0.000418829 \\
 0.0187047 & 0.0101912 & 0.0106083 & 0.000418829 \\
 0.0187047 & 0.0101912 & 0.0106083 & 0.000418829 \\
 0.0187047 & 0.0101912 & 0.0106083 & 0.000418829 \\
 0.0187047 & 0.0101912 & 0.0106083 & 0.000418829 \\
 0.0187047 & 0.0101912 & 0.0106083 & 0.000418829 \\
 0.0187047 & 0.0101912 & 0.0106083 & 0.000418829 \\
 0.0187047 & 0.0101912 & 0.0106083 

In [None]:
#from 0 to Q double sum
from tabulate import tabulate
import numpy as np

def uu2(ar_coeff):
    import numpy as np
    from tabulate import tabulate

    def sir_1_time_series2(X, y, num_slices, K):
        n_samples, n_features = X.shape
        V_hat = np.zeros([X.shape[1], X.shape[1]])
        # Step 1: Sort the data by the response variable
        sorted_indices = np.argsort(y)
        X_sorted = X[sorted_indices]
        y_sorted = y[sorted_indices]

        # Step 2: Divide the data into slices
        slice_size = n_samples // num_slices
        ph_hat = slice_size/n_samples
        slices = []

        for i in range(num_slices):
            start_idx = i * slice_size
            if i < num_slices - 1:
                end_idx = (i + 1) * slice_size
            else:  # Last slice includes any remaining samples
                end_idx = n_samples
            slices.append((X_sorted[start_idx:end_idx], y_sorted[start_idx:end_idx]))

        # Step 3: Compute the means of the predictors within each slice
        X_means = np.array([np.mean(slice_X, axis=0) for slice_X, _ in slices])

        # Step 4: Center the predictor means
        X_centered = X_means - np.mean(X_means, axis=0)

        V_hat = np.add(V_hat,ph_hat * np.matmul(X_centered.T, X_centered))
        eigenvalues, eigenvectors = np.linalg.eig(V_hat)
        K_index = np.argpartition(np.abs(eigenvalues), X.shape[1]-K) >= X.shape[1]-K
        K_largest_eigenvectors = eigenvectors[:, K_index]
        edr_est =  K_largest_eigenvectors

        return V_hat, edr_est, eigenvalues ** 2

    num_N = 5
    n_obs = 10000
    S = 21
    noise = np.zeros((num_N, n_obs+S))
    n = 100
    H = 50
    P = 4
    K = 1
    y = [np.zeros((num_N, n_obs+i)) for i in range(S+1)]
    hat = [np.zeros((P, 1)) for _ in range(S)]
    g = np.zeros((S, 1))
    # ar_coeff = [0.2, 0.2, 0.2, 0.2]
    n1 = 0
    l = 1  # Initialize `l` outside the loop
    while n1 < 100:
        for h in range(num_N):
            noise[h] = np.random.normal(0, 1, size=(n_obs+S))  # Normally distributed noise
        ar_series = np.zeros((num_N, n_obs+S))
        for t in range(0, n_obs+S):
            ar_series[0][t] = ar_coeff[0] * ar_series[0][t - 1] + noise[0][t]
            ar_series[1][t] = ar_coeff[1] * ar_series[1][t - 1] + noise[1][t]
            ar_series[2][t] = ar_coeff[2] * ar_series[2][t - 1] + noise[2][t]
            ar_series[3][t] = ar_coeff[3] * ar_series[3][t - 1] + noise[3][t]
            ar_series[4][t] = 2 * ar_series[0][t] + 3 * ar_series[1][t] + noise[4][t]
        for a in range(0, S+1):
            y[a] = ar_series[4][a:n_obs+a]
        X = np.concatenate([ar_series[i][0:n_obs].reshape(-1, 1) for i in range(4)], axis=1)
        V = []
        for a in range(0, S + 1):
            M, _, _ = sir_1_time_series2(X, y[a], H, K)
            V.append(M)
        for q in range(1, S + 1):
            Q = np.zeros((P, P))
            phi = ar_coeff
            for j in range(P):
                for k in range(P):
                    Q[j, k] = sum(sum(phi[j] ** a * V[a][j, k] * phi[k] ** a for a in range(0, l)) for l in range(1, q + 1))
            eigenvalues1, eigenvectors1 = np.linalg.eig(Q)
            K_index = np.argpartition(np.abs(eigenvalues1), P - K) >= P - K
            K_largest_eigenvectors = eigenvectors1[:, K_index]
            edr_est = K_largest_eigenvectors
            if edr_est[0] < 0:
                edr_est = -edr_est
            edr_est = edr_est / np.linalg.norm(edr_est)
            hat[q - 1] += edr_est
            n1 += 1

    for i in range(S):
        hat[i] = hat[i] / n
        g[i] = hat[i][0] / hat[i][1]
    array = np.array(hat)
    print(tabulate(array, tablefmt='latex'))
    print(g)

# Example usage
ar_coeff = [0.2, 0.2, 0.2, 0.2]
uu2(ar_coeff)

\begin{tabular}{rrrr}
\hline
 0.0273562 & 0.0418388 & -0.000515076 & -0.00010586  \\
 0.0273562 & 0.0418387 & -0.000514755 & -0.000106752 \\
 0.0273563 & 0.0418387 & -0.000514634 & -0.000107049 \\
 0.0273563 & 0.0418387 & -0.000514574 & -0.000107197 \\
 0.0273563 & 0.0418387 & -0.000514537 & -0.000107286 \\
 0.0273563 & 0.0418387 & -0.000514513 & -0.000107345 \\
 0.0273563 & 0.0418387 & -0.000514496 & -0.000107388 \\
 0.0273563 & 0.0418387 & -0.000514483 & -0.000107419 \\
 0.0273563 & 0.0418387 & -0.000514473 & -0.000107444 \\
 0.0273563 & 0.0418387 & -0.000514465 & -0.000107464 \\
 0.0273563 & 0.0418387 & -0.000514458 & -0.00010748  \\
 0.0273563 & 0.0418387 & -0.000514453 & -0.000107493 \\
 0.0273563 & 0.0418387 & -0.000514448 & -0.000107505 \\
 0.0273563 & 0.0418387 & -0.000514444 & -0.000107514 \\
 0.0273563 & 0.0418387 & -0.000514441 & -0.000107523 \\
 0.0273563 & 0.0418387 & -0.000514438 & -0.00010753  \\
 0.0273563 & 0.0418387 & -0.000514435 & -0.000107537 \\
 0.0273563 & 0.0418

In [None]:
ar_coeff = [0.2, 0.2, 0.8, 0.8]
uu2(ar_coeff)

\begin{tabular}{rrrr}
\hline
 0.027449  & 0.0417737 & -0.000453235 & -0.000342125 \\
 0.0274497 & 0.0417728 & -0.000463653 & -0.000339915 \\
 0.0274499 & 0.0417725 & -0.000467409 & -0.000339694 \\
 0.0274499 & 0.0417724 & -0.000469634 & -0.000339755 \\
 0.02745   & 0.0417722 & -0.000471234 & -0.000339896 \\
 0.02745   & 0.0417721 & -0.000472444 & -0.000340047 \\
 0.02745   & 0.0417721 & -0.000473371 & -0.000340192 \\
 0.02745   & 0.041772  & -0.000474106 & -0.000340328 \\
 0.02745   & 0.041772  & -0.000474703 & -0.000340449 \\
 0.02745   & 0.041772  & -0.000475196 & -0.000340555 \\
 0.02745   & 0.0417719 & -0.000475607 & -0.000340645 \\
 0.02745   & 0.0417719 & -0.000475955 & -0.000340722 \\
 0.02745   & 0.0417719 & -0.000476252 & -0.000340789 \\
 0.0274501 & 0.0417719 & -0.000476508 & -0.000340847 \\
 0.0274501 & 0.0417719 & -0.000476731 & -0.000340897 \\
 0.0274501 & 0.0417718 & -0.000476927 & -0.000340941 \\
 0.0274501 & 0.0417718 & -0.000477101 & -0.000340981 \\
 0.0274501 & 0.0417

In [None]:
ar_coeff = [0.2, 0.5, 0.8, 0.8]
uu2(ar_coeff)

\begin{tabular}{rrrr}
\hline
 0.0234576 & 0.0441193 & 0.000726897 & 0.000598505 \\
 0.0230671 & 0.0443212 & 0.000767077 & 0.000634939 \\
 0.0229203 & 0.0443954 & 0.000787399 & 0.000653782 \\
 0.0228462 & 0.0444324 & 0.000798911 & 0.000664151 \\
 0.0228018 & 0.0444545 & 0.000806466 & 0.000670612 \\
 0.0227723 & 0.0444691 & 0.000811727 & 0.000675134 \\
 0.0227512 & 0.0444796 & 0.000815587 & 0.000678459 \\
 0.0227355 & 0.0444873 & 0.000818536 & 0.000681016 \\
 0.0227232 & 0.0444934 & 0.000820859 & 0.000683036 \\
 0.0227134 & 0.0444982 & 0.000822734 & 0.000684668 \\
 0.0227054 & 0.0445021 & 0.00082428  & 0.000686014 \\
 0.0226987 & 0.0445054 & 0.000825577 & 0.000687142 \\
 0.022693  & 0.0445082 & 0.000826679 & 0.0006881   \\
 0.0226882 & 0.0445106 & 0.000827625 & 0.000688923 \\
 0.022684  & 0.0445126 & 0.000828448 & 0.000689638 \\
 0.0226803 & 0.0445144 & 0.000829168 & 0.000690263 \\
 0.0226771 & 0.044516  & 0.000829805 & 0.000690816 \\
 0.0226742 & 0.0445174 & 0.000830371 & 0.000691307 \\

In [None]:
ar_coeff = [0.2, 0.5, 0.5, 0.8]
uu2(ar_coeff)

\begin{tabular}{rrrr}
\hline
 0.00904701 & 0.0178282 & -0.000129318 & -0.000399291 \\
 0.00888127 & 0.0179103 & -0.00012743  & -0.000431598 \\
 0.00881848 & 0.0179406 & -0.000126918 & -0.000450806 \\
 0.00878687 & 0.0179558 & -0.000126609 & -0.00046164  \\
 0.00876795 & 0.0179648 & -0.000126413 & -0.000468342 \\
 0.00875537 & 0.0179707 & -0.000126283 & -0.000472882 \\
 0.00874641 & 0.017975  & -0.00012619  & -0.000476162 \\
 0.00873969 & 0.0179782 & -0.000126121 & -0.00047865  \\
 0.00873447 & 0.0179806 & -0.000126067 & -0.000480597 \\
 0.0087303  & 0.0179826 & -0.000126024 & -0.000482164 \\
 0.00872689 & 0.0179842 & -0.000125988 & -0.000483451 \\
 0.00872405 & 0.0179856 & -0.000125959 & -0.000484528 \\
 0.00872165 & 0.0179867 & -0.000125934 & -0.000485441 \\
 0.00871959 & 0.0179877 & -0.000125913 & -0.000486225 \\
 0.00871781 & 0.0179885 & -0.000125894 & -0.000486906 \\
 0.00871625 & 0.0179892 & -0.000125878 & -0.000487502 \\
 0.00871487 & 0.0179899 & -0.000125864 & -0.000488028 \\
 0

### ar_coeff = [0.2,0.2,0.2,0.2]

In [None]:
ar_coeff = [0.2,0.2,0.2,0.2]
NEWSIR(ar_coeff, sir_1_time_series1, 10000, 20)

\begin{tabular}{rrrr}
\hline
 0.553517 & 0.832631 & 0.00104219 & 0.000139102 \\
 0.553509 & 0.832636 & 0.00104897 & 0.00013629  \\
 0.553506 & 0.832637 & 0.00105138 & 0.000135233 \\
 0.553505 & 0.832638 & 0.00105258 & 0.000134698 \\
 0.553504 & 0.832639 & 0.00105331 & 0.000134378 \\
 0.553504 & 0.832639 & 0.00105379 & 0.000134164 \\
 0.553503 & 0.832639 & 0.00105413 & 0.000134011 \\
 0.553503 & 0.83264  & 0.00105439 & 0.000133897 \\
 0.553503 & 0.83264  & 0.00105459 & 0.000133808 \\
 0.553503 & 0.83264  & 0.00105475 & 0.000133737 \\
 0.553502 & 0.83264  & 0.00105488 & 0.000133679 \\
 0.553502 & 0.83264  & 0.00105499 & 0.00013363  \\
 0.553502 & 0.83264  & 0.00105509 & 0.000133589 \\
 0.553502 & 0.83264  & 0.00105516 & 0.000133554 \\
 0.553502 & 0.83264  & 0.00105523 & 0.000133524 \\
 0.553502 & 0.83264  & 0.00105529 & 0.000133497 \\
 0.553502 & 0.83264  & 0.00105535 & 0.000133474 \\
 0.553502 & 0.83264  & 0.00105539 & 0.000133453 \\
 0.553502 & 0.83264  & 0.00105544 & 0.000133434 \\
 0

### ar_coeff = [0.2,0.2,0.8,0.8]

In [None]:
ar_coeff = [0.2,0.2,0.8,0.8]
NEWSIR(ar_coeff, sir_1_time_series1, 10000, 20)

\begin{tabular}{rrrr}
\hline
 0.555403 & 0.830972 & -0.00119026 & 0.00466392 \\
 0.555396 & 0.83095  & -0.00118676 & 0.0047071  \\
 0.555392 & 0.83094  & -0.00118428 & 0.00473366 \\
 0.555389 & 0.830934 & -0.00118354 & 0.00475167 \\
 0.555387 & 0.83093  & -0.00118326 & 0.00476499 \\
 0.555386 & 0.830927 & -0.00118336 & 0.00477535 \\
 0.555385 & 0.830925 & -0.00118363 & 0.00478352 \\
 0.555384 & 0.830923 & -0.00118395 & 0.00478999 \\
 0.555383 & 0.830922 & -0.00118427 & 0.00479526 \\
 0.555383 & 0.83092  & -0.00118455 & 0.0047996  \\
 0.555382 & 0.830919 & -0.00118479 & 0.00480323 \\
 0.555382 & 0.830919 & -0.001185   & 0.0048063  \\
 0.555381 & 0.830918 & -0.00118518 & 0.00480893 \\
 0.555381 & 0.830917 & -0.00118534 & 0.00481121 \\
 0.555381 & 0.830917 & -0.00118548 & 0.00481319 \\
 0.55538  & 0.830916 & -0.0011856  & 0.00481494 \\
 0.55538  & 0.830916 & -0.0011857  & 0.00481648 \\
 0.55538  & 0.830916 & -0.0011858  & 0.00481785 \\
 0.55538  & 0.830915 & -0.00118589 & 0.00481908 \\
 0

### ar_coeff = [0.2,0.5,0.8,0.8]

In [None]:
ar_coeff = [0.2,0.5,0.8,0.8]
NEWSIR(ar_coeff, sir_1_time_series1, 10000, 20)

\begin{tabular}{rrrr}
\hline
 0.460693 & 0.886914 & -0.000132539 & 0.000892078 \\
 0.450926 & 0.891848 & -0.000215107 & 0.000951643 \\
 0.446836 & 0.893864 & -0.000274556 & 0.000975757 \\
 0.444678 & 0.894915 & -0.000313526 & 0.000986374 \\
 0.443366 & 0.89555  & -0.000339783 & 0.000992    \\
 0.442491 & 0.895971 & -0.000358299 & 0.000995843 \\
 0.441866 & 0.896271 & -0.000371721 & 0.00099858  \\
 0.441398 & 0.896495 & -0.000381876 & 0.00100075  \\
 0.441035 & 0.896669 & -0.000389784 & 0.00100248  \\
 0.440745 & 0.896807 & -0.00039614  & 0.00100391  \\
 0.440508 & 0.89692  & -0.00040136  & 0.00100509  \\
 0.44031  & 0.897014 & -0.000405714 & 0.00100608  \\
 0.440143 & 0.897094 & -0.0004094   & 0.00100692  \\
 0.44     & 0.897162 & -0.000412562 & 0.00100765  \\
 0.439877 & 0.897221 & -0.000415302 & 0.00100829  \\
 0.439768 & 0.897272 & -0.000417701 & 0.00100885  \\
 0.439673 & 0.897318 & -0.000419818 & 0.00100934  \\
 0.439588 & 0.897358 & -0.0004217   & 0.00100978  \\
 0.439512 & 0.897

### ar_coeff = [0.2,0.5,0.5,0.8]

In [None]:
ar_coeff = [0.2,0.5,0.5,0.8]
NEWSIR(ar_coeff, sir_1_time_series1, 10000, 20)

\begin{tabular}{rrrr}
\hline
 0.461182 & 0.886941 & -0.000348751 & -0.000559072 \\
 0.451528 & 0.891867 & -0.000304097 & -0.000635605 \\
 0.44749  & 0.893885 & -0.000279163 & -0.000672299 \\
 0.44536  & 0.894939 & -0.000265587 & -0.000692759 \\
 0.444067 & 0.895576 & -0.000257224 & -0.000705709 \\
 0.443203 & 0.895999 & -0.000251622 & -0.000714455 \\
 0.442587 & 0.8963   & -0.000247625 & -0.000720833 \\
 0.442126 & 0.896526 & -0.000244632 & -0.000725688 \\
 0.441768 & 0.8967   & -0.000242307 & -0.000729496 \\
 0.441482 & 0.89684  & -0.000240449 & -0.000732561 \\
 0.441249 & 0.896953 & -0.000238931 & -0.000735085 \\
 0.441054 & 0.897048 & -0.000237667 & -0.000737199 \\
 0.44089  & 0.897128 & -0.000236599 & -0.000738995 \\
 0.440749 & 0.897196 & -0.000235683 & -0.000740539 \\
 0.440627 & 0.897256 & -0.00023489  & -0.000741878 \\
 0.44052  & 0.897308 & -0.000234197 & -0.00074305  \\
 0.440426 & 0.897353 & -0.000233585 & -0.000744086 \\
 0.440342 & 0.897394 & -0.000233042 & -0.000745007 \\

# SIR for Time series new objective(Single sum)
## The results are shown by 1. directions 2. ratio between first two coefficients

If the objective is $$\text{diag}(W^T V^{[1: Q]} W)$$

## Use equal slice

In [None]:
#from 0 to Q single sum
from tabulate import tabulate
def NEWSIRsingle(ar_coeff):
    num_N = 5
    n_obs = 1000
    noise = np.zeros((num_N, n_obs))
    n = 100
    H = 50
    P = 4
    K = 1
    S = 20
    hat = [np.zeros((P, 1)) for i in range(S)]
    # ar_coeff = [0.2, 0.2, 0.2, 0.2]
    g = np.zeros((S, 1))
    for w in range(n):
        for h in range(num_N):
            noise[h] = np.random.normal(0, 1, size=n_obs)  # Normally distributed noise
        ar_series = np.zeros((num_N, n_obs))
        for t in range(0, n_obs):
            ar_series[0][t] = ar_coeff[0] * ar_series[0][t - 1]  + noise[0][t]
            ar_series[1][t] = ar_coeff[1] * ar_series[1][t - 1]  + noise[1][t]
            ar_series[2][t] = ar_coeff[2] * ar_series[2][t - 1]  + noise[2][t]
            ar_series[3][t] = ar_coeff[3] * ar_series[3][t - 1]  + noise[3][t]
            ar_series[4][t] = 2*ar_series[0][t] + 3*ar_series[1][t] + noise[4][t]
        y = ar_series[4]
        X = np.concatenate([ar_series[i].reshape(-1,1) for i in range(4)], axis = 1)
        V = []
        for a in range(0,S+1):
            M, edr, lam1 = sir_1_time_series(X, y, a, H, K)
            V.append(M)
        for q in range(1, S+1):
            Q = np.zeros((P, P))
            phi = ar_coeff
            for j in range(P):
                for k in range(P):
                    Q[j,k] = sum(phi[j]**a * V[a][j,k] * phi[k]**a for a in range(0,q)) #single sum
            eigenvalues1, eigenvectors1 = np.linalg.eig(Q)
            K_index = np.argpartition(np.abs(eigenvalues1), P-K) >= P-K
            K_largest_eigenvectors = eigenvectors1[:, K_index]
            edr_est =  K_largest_eigenvectors
            if edr_est[0]<0:
                edr_est = -edr_est
            edr_est = edr_est/np.linalg.norm(edr_est)
            hat[q-1] += edr_est
    for i in range(S):
        hat[i] = hat[i]/n
        g[i] = hat[i][0] / hat[i][1]
    array = np.array(hat)
    print(tabulate(array, tablefmt='latex'))
    print(g)

### ar_coeff = [0.2,0.2,0.2,0.2]

In [None]:
ar_coeff = [0.2,0.2,0.2,0.2]
NEWSIRsingle(ar_coeff)

\begin{tabular}{rrrr}
\hline
 0.55235  & 0.831289 & -0.00652422 &  0.00176025 \\
 0.554268 & 0.825945 & -0.0128939  & -0.00945419 \\
 0.554282 & 0.825614 & -0.0131315  & -0.00992433 \\
 0.554283 & 0.825601 & -0.0131409  & -0.00994309 \\
 0.554283 & 0.8256   & -0.0131413  & -0.00994384 \\
 0.554283 & 0.8256   & -0.0131413  & -0.00994387 \\
 0.554283 & 0.8256   & -0.0131413  & -0.00994387 \\
 0.554283 & 0.8256   & -0.0131413  & -0.00994387 \\
 0.554283 & 0.8256   & -0.0131413  & -0.00994387 \\
 0.554283 & 0.8256   & -0.0131413  & -0.00994387 \\
 0.554283 & 0.8256   & -0.0131413  & -0.00994387 \\
 0.554283 & 0.8256   & -0.0131413  & -0.00994387 \\
 0.554283 & 0.8256   & -0.0131413  & -0.00994387 \\
 0.554283 & 0.8256   & -0.0131413  & -0.00994387 \\
 0.554283 & 0.8256   & -0.0131413  & -0.00994387 \\
 0.554283 & 0.8256   & -0.0131413  & -0.00994387 \\
 0.554283 & 0.8256   & -0.0131413  & -0.00994387 \\
 0.554283 & 0.8256   & -0.0131413  & -0.00994387 \\
 0.554283 & 0.8256   & -0.0131413  

### ar_coeff = [0.2,0.2,0.8,0.8]

In [None]:
ar_coeff = [0.2,0.2,0.8,0.8]
NEWSIRsingle(ar_coeff)

### ar_coeff = [0.2,0.5,0.5,0.8]

In [None]:
ar_coeff = [0.2,0.5,0.5,0.8]
NEWSIRsingle(ar_coeff)

### ar_coeff = [0.2,0.5,0.8,0.8]

In [None]:
ar_coeff = [0.2,0.5,0.8,0.8]
NEWSIRsingle(ar_coeff)

## Use equal number in each slice

In [None]:
#from 0 to Q single sum
from tabulate import tabulate
import numpy as np

def uuu2(ar_coeff):
    import numpy as np
    from tabulate import tabulate

    def sir_1_time_series1(X, y, s, num_slices, K):
        n_samples, n_features = X.shape
        V_hat = np.zeros([X.shape[1], X.shape[1]])
        # Step 1: Sort the data by the response variable
        sorted_indices = np.argsort(y)
        X_sorted = X[sorted_indices]
        y_sorted = y[sorted_indices]
        # Step 2: Divide the data into slices
        slice_size = n_samples // num_slices
        ph_hat = slice_size / n_samples
        slices = []

        for i in range(num_slices):
            start_idx = i * slice_size
            if i < num_slices - 1:
                end_idx = (i + 1) * slice_size
            else:  # Last slice includes any remaining samples
                end_idx = n_samples
            if start_idx - s > 0:
                slices.append((X[[x - s for x in sorted_indices[start_idx:end_idx]]], y_sorted[start_idx:end_idx]))
            else:
                slices.append((X[sorted_indices[0:end_idx - s]], y_sorted[start_idx:end_idx]))

        # Step 3: Compute the means of the predictors within each slice
        X_means = np.array([np.mean(slice_X, axis=0) for slice_X, _ in slices])

        # Step 4: Center the predictor means
        X_centered = X_means - np.mean(X_means, axis=0)

        V_hat = np.add(V_hat, ph_hat * np.matmul(X_centered.T, X_centered))

        #report wrong, there's  NaN or inf value in V_hat, so add this
        # Check for NaN or inf values in V_hat
        if np.isnan(V_hat).any() or np.isinf(V_hat).any():
            # Handle NaN or inf values appropriately
            # For example, replace NaN values with 0 and inf values with a large number
            V_hat[np.isnan(V_hat)] = 0
            V_hat[np.isinf(V_hat)] = np.nanmax(np.abs(V_hat))  # Replace inf with maximum absolute value in V_hat

    # Compute eigenvalues and eigenvectors
        eigenvalues, eigenvectors = np.linalg.eig(V_hat)
        eigenvalues, eigenvectors = np.linalg.eig(V_hat)
        K_index = np.argpartition(np.abs(eigenvalues), X.shape[1] - K) >= X.shape[1] - K
        K_largest_eigenvectors = eigenvectors[:, K_index]
        edr_est = K_largest_eigenvectors

        return V_hat, edr_est, eigenvalues ** 2

    num_N = 5
    n_obs = 1000
    noise = np.zeros((num_N, n_obs))
    n = 100
    H = 50
    P = 4
    K = 1
    S = 20
    hat = [np.zeros((P, 1)) for _ in range(S)]
    g = np.zeros((S, 1))
    # ar_coeff = [0.2, 0.2, 0.2, 0.2]
    n1 = 0
    while n1 < 100:
        for h in range(num_N):
            noise[h] = np.random.normal(0, 1, size=n_obs)  # Normally distributed noise
        ar_series = np.zeros((num_N, n_obs))
        for t in range(0, n_obs):
            ar_series[0][t] = ar_coeff[0] * ar_series[0][t - 1] + noise[0][t]
            ar_series[1][t] = ar_coeff[1] * ar_series[1][t - 1] + noise[1][t]
            ar_series[2][t] = ar_coeff[2] * ar_series[2][t - 1] + noise[2][t]
            ar_series[3][t] = ar_coeff[3] * ar_series[3][t - 1] + noise[3][t]
            ar_series[4][t] = 2 * ar_series[0][t] + 3 * ar_series[1][t] + noise[4][t]
        y = ar_series[4]
        X = np.concatenate([ar_series[i].reshape(-1, 1) for i in range(4)], axis=1)
        V = []
        for a in range(0, S + 1):
            M, _, _ = sir_1_time_series1(X, y, a, H, K)
            V.append(M)
        for q in range(1, S + 1):
            Q = np.zeros((P, P))
            phi = ar_coeff
            for j in range(P):
                for k in range(P):
                    Q[j, k] = sum(phi[j] ** a * V[a][j, k] * phi[k] ** a for a in range(0, q))
            eigenvalues1, eigenvectors1 = np.linalg.eig(Q)
            K_index = np.argpartition(np.abs(eigenvalues1), P - K) >= P - K
            K_largest_eigenvectors = eigenvectors1[:, K_index]
            edr_est = K_largest_eigenvectors
            if edr_est[0] < 0:
                edr_est = -edr_est
            edr_est = edr_est / np.linalg.norm(edr_est)
            hat[q - 1] += edr_est
            n1 += 1

    for i in range(S):
        hat[i] = hat[i] / n
        g[i] = hat[i][0] / hat[i][1]
    array = np.array(hat)
    print(tabulate(array, tablefmt='latex'))
    print(g)

In [None]:
ar_coeff = [0.2, 0.2, 0.2, 0.2]
uuu2(ar_coeff)

In [None]:
ar_coeff = [0.2,0.5,0.8,0.8]
uuu2(ar_coeff)

\begin{tabular}{rrrr}
\hline
 0.0233308 & 0.0439774 & 0.00148481 & -0.000284376 \\
 0.0221942 & 0.0444542 & 0.00192512 & -0.000589303 \\
 0.0219856 & 0.0444877 & 0.00210631 & -0.000800864 \\
 0.0219211 & 0.0444724 & 0.0022261  & -0.00096454  \\
 0.0219007 & 0.0444587 & 0.00228155 & -0.00103267  \\
 0.0218916 & 0.0444483 & 0.00231401 & -0.00105981  \\
 0.0218865 & 0.0444405 & 0.00233926 & -0.00107817  \\
 0.0218835 & 0.0444349 & 0.00235877 & -0.00108906  \\
 0.0218817 & 0.0444316 & 0.00236771 & -0.00109702  \\
 0.0218806 & 0.0444295 & 0.00237299 & -0.00110342  \\
 0.0218799 & 0.0444281 & 0.0023764  & -0.00110694  \\
 0.0218794 & 0.0444272 & 0.00237852 & -0.00110875  \\
 0.0218791 & 0.0444266 & 0.00238001 & -0.00111006  \\
 0.0218789 & 0.0444262 & 0.00238104 & -0.00111085  \\
 0.0218788 & 0.044426  & 0.00238187 & -0.00111133  \\
 0.0218787 & 0.0444259 & 0.00238226 & -0.00111153  \\
 0.0218787 & 0.0444258 & 0.0023826  & -0.00111165  \\
 0.0218786 & 0.0444257 & 0.00238281 & -0.00111177  \\

In [None]:
ar_coeff = [0.2,0.2,0.8,0.8]
uuu2(ar_coeff)

\begin{tabular}{rrrr}
\hline
 0.0281901 & 0.0407746 & 0.000983024 & -0.00229931 \\
 0.0281302 & 0.0406785 & 0.00108717  & -0.00267873 \\
 0.0280782 & 0.0405959 & 0.00119454  & -0.00292771 \\
 0.0280312 & 0.0405244 & 0.00132309  & -0.00306977 \\
 0.0279983 & 0.0404748 & 0.00143155  & -0.00316628 \\
 0.0279731 & 0.0404372 & 0.00151865  & -0.00323207 \\
 0.0279567 & 0.0404124 & 0.00156755  & -0.0032817  \\
 0.0279465 & 0.0403974 & 0.00160851  & -0.0033108  \\
 0.0279403 & 0.0403882 & 0.00163071  & -0.00332418 \\
 0.0279365 & 0.0403826 & 0.00164288  & -0.00333221 \\
 0.0279336 & 0.0403781 & 0.00165054  & -0.00333886 \\
 0.0279319 & 0.0403755 & 0.00165522  & -0.00334191 \\
 0.0279309 & 0.040374  & 0.0016585   & -0.0033438  \\
 0.0279302 & 0.040373  & 0.0016607   & -0.00334503 \\
 0.0279297 & 0.0403722 & 0.00166182  & -0.00334607 \\
 0.0279294 & 0.0403718 & 0.00166244  & -0.00334668 \\
 0.0279292 & 0.0403715 & 0.00166287  & -0.00334692 \\
 0.0279291 & 0.0403713 & 0.0016633   & -0.00334719 \\

In [None]:
ar_coeff = [0.2,0.5,0.5,0.8]
uuu2(ar_coeff)

\begin{tabular}{rrrr}
\hline
 0.0219593 & 0.04474   & -0.000577489 & -0.00238238 \\
 0.0207578 & 0.0452431 & -0.000575591 & -0.00293161 \\
 0.0205502 & 0.0453095 & -0.00058722  & -0.00322293 \\
 0.0204969 & 0.0453141 & -0.000588143 & -0.00338837 \\
 0.0204803 & 0.0453096 & -0.000589038 & -0.00347471 \\
 0.0204743 & 0.0453051 & -0.000588676 & -0.00352741 \\
 0.0204716 & 0.0453019 & -0.000588435 & -0.00355672 \\
 0.0204698 & 0.0452991 & -0.000588162 & -0.00358144 \\
 0.0204685 & 0.0452969 & -0.000587925 & -0.00359898 \\
 0.0204677 & 0.0452956 & -0.000587791 & -0.00361018 \\
 0.0204672 & 0.0452947 & -0.000587693 & -0.00361719 \\
 0.020467  & 0.0452942 & -0.000587637 & -0.00362133 \\
 0.0204668 & 0.0452938 & -0.000587589 & -0.00362406 \\
 0.0204666 & 0.0452936 & -0.000587557 & -0.00362584 \\
 0.0204666 & 0.0452935 & -0.000587538 & -0.00362686 \\
 0.0204665 & 0.0452934 & -0.000587528 & -0.00362753 \\
 0.0204665 & 0.0452933 & -0.000587521 & -0.00362791 \\
 0.0204665 & 0.0452933 & -0.00058751

In [None]:
ar_coeff = [0.2,0.2,0.2,0.2]
uuu2(ar_coeff)

\begin{tabular}{rrrr}
\hline
 0.0276621 & 0.0415087 & -0.00187225 & 0.00016349  \\
 0.0276669 & 0.0415049 & -0.00186133 & 0.000174439 \\
 0.0276667 & 0.041505  & -0.0018606  & 0.000174786 \\
 0.0276667 & 0.041505  & -0.0018606  & 0.000174795 \\
 0.0276667 & 0.041505  & -0.0018606  & 0.000174796 \\
 0.0276667 & 0.041505  & -0.0018606  & 0.000174796 \\
 0.0276667 & 0.041505  & -0.0018606  & 0.000174796 \\
 0.0276667 & 0.041505  & -0.0018606  & 0.000174796 \\
 0.0276667 & 0.041505  & -0.0018606  & 0.000174796 \\
 0.0276667 & 0.041505  & -0.0018606  & 0.000174796 \\
 0.0276667 & 0.041505  & -0.0018606  & 0.000174796 \\
 0.0276667 & 0.041505  & -0.0018606  & 0.000174796 \\
 0.0276667 & 0.041505  & -0.0018606  & 0.000174796 \\
 0.0276667 & 0.041505  & -0.0018606  & 0.000174796 \\
 0.0276667 & 0.041505  & -0.0018606  & 0.000174796 \\
 0.0276667 & 0.041505  & -0.0018606  & 0.000174796 \\
 0.0276667 & 0.041505  & -0.0018606  & 0.000174796 \\
 0.0276667 & 0.041505  & -0.0018606  & 0.000174796 \\

# TSIR
## The results are shown by 1. directions

In [None]:
#0.2 0.2 can find edr direction
def TSIR(ar_coeff, T):
    n = 100
    S = 12
    H = 50
    K = 1
    P = 4
    num_N = 5
    n_x = 2
    n_obs = 1000
    QW = np.zeros((P,4))
    for w in range(n):
        noise = np.zeros((num_N, n_obs))
        ar_series = np.zeros((num_N, n_obs))
        Newar_series = np.zeros((n_obs, num_N - 1))
        M = np.zeros((num_N, n_obs))
        X = []
        num_dataframes = S
        y = []
        ar_seriesT1 = pd.DataFrame({})
        ar_seriesT = pd.DataFrame({})
        X1 = np.zeros((P,P))
        for l in range(num_dataframes):
            dg = pd.DataFrame({})
            X.append(dg)
        scaler = StandardScaler()
        lam = pd.DataFrame({})
        # ar_coeff1 = 0.3  # Autoregressive coefficient
        # ar_coeff2 = 0.7
        B = []
        edr1 = []
        for h in range(num_N):
            noise[h] = np.random.normal(0, 1, size=n_obs)  # Normally distributed noise
        # Generate the additive AR time series
        for t in range(0, n_obs):
            ar_series[0][t] = ar_coeff[0] * ar_series[0][t - 1]  + noise[0][t]
            ar_series[1][t] = ar_coeff[1] * ar_series[1][t - 1]  + noise[1][t]
            ar_series[2][t] = 0.3 * ar_series[2][t - 1] + 0.4 * noise[2][t-1] + noise[2][t]
            ar_series[3][t] = -0.4 * noise[3][t-1] + noise[3][t]
            ar_series[4][t] = 2*ar_series[0][t - 1] + 3*ar_series[1][t - 1] + noise[4][t]
        y = ar_series[4]
        X = np.concatenate([ar_series[i].reshape(-1,1) for i in range(4)], axis = 1)
        X = scaler.fit_transform(X)
        # for t in range(0, n_obs):
        #     ar_seriesT1 = pd.concat([ar_seriesT1, pd.DataFrame([[ar_series[0][t], ar_series[1][t], ar_series[2][t], ar_series[3][t]]])], axis = 0, ignore_index=True)
        #     X1 = X1 + ar_seriesT1.values[t].reshape(-1, 1) @ ar_seriesT1.values[t].reshape(-1, 1).T
        # X1 = 1/n_obs*X1
        # eigenvalues, eigenvectors = np.linalg.eig(X1)
        # # Compute reciprocal square root of eigenvalues
        # recip_sqrt_eigenvalues = np.diag(1 / np.sqrt(eigenvalues))
        # # Reconstruct the matrix
        # A_neg_half = eigenvectors @ recip_sqrt_eigenvalues @ np.linalg.inv(eigenvectors)
        # for t in range(0, n_obs):
        #     # M = np.array([Newar_series[i][t] for i in range(num_N - 1)])
        #     Newar_series[t] = A_neg_half @ ar_seriesT1.values[t]
        for j in range(1,S + 1):
            # for T in range(1,n_obs + 1):
            #     if T + j >= (n_obs):
            #         break
            #     else:
                    # y[j-1] = pd.concat([y[j-1], pd.Series([ar_series[4][T+j]])], ignore_index=True)
                    # M = pd.DataFrame({})
                    # for i in range(0,5):
                    #     M[i] = ar_series[i][T]
            # ar_seriesT = pd.DataFrame([[Newar_series[T][0], Newar_series[T][1], Newar_series[T][2], Newar_series[T][3]]])
            V, edr, lam1 = T(X, y, j, H, K=K)
            B.append(V)
            edr1.append(edr)
        from scipy.stats import ortho_group
        def create_random_orthogonal_matrix(rows, cols):
            # Create a random matrix
            random_matrix = np.random.rand(rows, cols)
            # Perform QR decomposition
            q, r = np.linalg.qr(random_matrix)
            return q
        rows = 4
        cols = 4
        P1 = create_random_orthogonal_matrix(rows, cols)
        #W_i
        Gam = np.zeros((4,4))
        r = np.zeros((4,4))
        n1 = 0
        while n1<=10000:
            for i in range(K):
                for k in range(S):
                    Gam[i] += (P1[i] @ B[k] @ P1[i]) * B[k] @ P1[i]
            U, S1, VT = np.linalg.svd(Gam)
            QQ = U @ VT
            P1 = QQ
            n1 = n1 + 1
        for i in range(len(QQ)):
            if QQ[i][0]<0:
                QQ[i] = -QQ[i]
        QQ = QQ/np.linalg.norm(QQ)
        # sum(lam.apply(find_largest, axis=1).values[i]*edr1[i] for i in range(len(edr1)))
        QW += QQ

        # LAM = pd.DataFrame({})
        # for j in range(1, S + 1):
        #     for i in range(4):
        #         LAM1= (QQ[i] @ B[j-1] @ QQ[i].T)**2
        #         LAM = pd.concat([LAM, pd.DataFrame([LAM1])], ignore_index=True)
        # total_sum = LAM.sum().sum()
        # LAM = LAM/total_sum
        # reshaped_data = LAM.values.reshape(S, 4)
        # # Convert reshaped data back to DataFrame
        # LAM = pd.DataFrame(reshaped_data)
        # sorted_columns = LAM.sum().sort_values().index
        # Reorder DataFrame columns based on sorted column names
        # LAM = LAM[sorted_columns]

    QW = QW/n

    return QW

In [None]:
TSIR([0.2,0.2], sir_1_time_series1)

array([[ 2.74463098e-01,  4.16778804e-01, -3.52710953e-04,
         2.12060850e-03],
       [ 4.17685428e-01, -2.73863926e-01,  3.77812782e-04,
        -1.52171407e-03],
       [ 0.00000000e+00,  5.02152914e-04,  4.99345206e-01,
         2.06553633e-05],
       [ 0.00000000e+00, -2.61130959e-03,  2.06553633e-05,
         4.99567541e-01]])

In [None]:
TSIR([0.2,0.8], sir_1_time_series1)

array([[ 1.22438364e-01,  4.82752552e-01,  3.08550664e-03,
        -2.66403375e-03],
       [ 4.84052887e-01, -1.22117915e-01, -8.74153785e-04,
         7.29552118e-04],
       [ 0.00000000e+00, -3.21321710e-03,  4.98983577e-01,
         2.85369021e-05],
       [ 0.00000000e+00,  2.76436169e-03,  2.85369021e-05,
         4.99675328e-01]])

In [None]:
TSIR([0.2,0.2], sir_1_time_series)

array([[ 0.21742315, -0.00966039,  0.01254573,  0.03445188],
       [ 0.42838298, -0.00425975, -0.00105679, -0.0245955 ],
       [ 0.        ,  0.03597271,  0.4086829 ,  0.0046153 ],
       [ 0.        ,  0.02906866,  0.0046153 ,  0.36736469]])

In [None]:
TSIR([0.2,0.8], sir_1_time_series)

array([[ 2.33343568e-01, -2.49433031e-03, -6.40901368e-03,
         3.76527795e-02],
       [ 4.12871385e-01, -3.52583083e-03,  4.42063147e-03,
        -7.69900231e-04],
       [ 0.00000000e+00, -4.12862559e-02,  3.69667869e-01,
         2.30828716e-03],
       [ 0.00000000e+00,  7.89737522e-05,  2.30828716e-03,
         3.42968195e-01]])

# My idea

In [None]:
#0.2 0.2 can find edr direction
def find_largest(row):
        return max(row)
def IDEA(ar_coeff, T):
    n = 100
    S = 12
    H = 50
    K = 1
    P = 4
    num_N = 5
    n_x = 2
    n_obs = 10000
    QW = np.zeros((P,4))
    W1 = np.zeros((P, 1))
    W2 = np.zeros((P, 1))
    for w in range(n):
        noise = np.zeros((num_N, n_obs))
        ar_series = np.zeros((num_N, n_obs))
        Newar_series = np.zeros((n_obs, num_N - 1))
        M = np.zeros((num_N, n_obs))
        X = []
        num_dataframes = S
        y = []
        ar_seriesT1 = pd.DataFrame({})
        ar_seriesT = pd.DataFrame({})
        X1 = np.zeros((P,P))
        for l in range(num_dataframes):
            dg = pd.DataFrame({})
            X.append(dg)
        scaler = StandardScaler()
        lam = pd.DataFrame({})
        # ar_coeff1 = 0.3  # Autoregressive coefficient
        # ar_coeff2 = 0.7
        B = []
        edr1 = []
        for h in range(num_N):
            noise[h] = np.random.normal(0, 1, size=n_obs)  # Normally distributed noise
        # Generate the additive AR time series
        for t in range(0, n_obs):
            ar_series[0][t] = ar_coeff[0] * ar_series[0][t - 1]  + noise[0][t]
            ar_series[1][t] = ar_coeff[1] * ar_series[1][t - 1]  + noise[1][t]
            ar_series[2][t] = 0.3 * ar_series[2][t - 1] + 0.4 * noise[2][t-1] + noise[2][t]
            ar_series[3][t] = -0.4 * noise[3][t-1] + noise[3][t]
            ar_series[4][t] = 2*ar_series[0][t - 1] + 3*ar_series[1][t - 1] + noise[4][t]
        y = ar_series[4]
        X = np.concatenate([ar_series[i].reshape(-1,1) for i in range(4)], axis = 1)
        X = scaler.fit_transform(X)
        # for t in range(0, n_obs):
        #     ar_seriesT1 = pd.concat([ar_seriesT1, pd.DataFrame([[ar_series[0][t], ar_series[1][t], ar_series[2][t], ar_series[3][t]]])], axis = 0, ignore_index=True)
        #     X1 = X1 + ar_seriesT1.values[t].reshape(-1, 1) @ ar_seriesT1.values[t].reshape(-1, 1).T
        # X1 = 1/n_obs*X1
        # eigenvalues, eigenvectors = np.linalg.eig(X1)
        # # Compute reciprocal square root of eigenvalues
        # recip_sqrt_eigenvalues = np.diag(1 / np.sqrt(eigenvalues))
        # # Reconstruct the matrix
        # A_neg_half = eigenvectors @ recip_sqrt_eigenvalues @ np.linalg.inv(eigenvectors)
        # for t in range(0, n_obs):
        #     # M = np.array([Newar_series[i][t] for i in range(num_N - 1)])
        #     Newar_series[t] = A_neg_half @ ar_seriesT1.values[t]
        for j in range(0,S + 1):
            # for T in range(1,n_obs + 1):
            #     if T + j >= (n_obs):
            #         break
            #     else:
                    # y[j-1] = pd.concat([y[j-1], pd.Series([ar_series[4][T+j]])], ignore_index=True)
                    # M = pd.DataFrame({})
                    # for i in range(0,5):
                    #     M[i] = ar_series[i][T]
            # ar_seriesT = pd.DataFrame([[Newar_series[T][0], Newar_series[T][1], Newar_series[T][2], Newar_series[T][3]]])
            V, edr, lam1 = T(X, y, j, H, K=K)
            B.append(V)
            edr1.append(edr)
            lam = pd.concat([lam, pd.DataFrame([lam1])], ignore_index=True)
            total_sum = lam.sum().sum()
            lam = lam/total_sum
            sorted_columns = lam.sum().sort_values().index
            lam = lam[sorted_columns]

    #add weights for each lag?
        W1 += sum(lam.apply(find_largest, axis=1).values[i]*edr1[i] for i in range(len(edr1)))
        W2 += sum(edr1[i] for i in range(len(edr1)))

    W1 = W1/n
    W2 = W2/n

    return W1, W2, lam

In [None]:
#add s=0?
ar_coeff = [0.2, 0.2]
IDEA(ar_coeff, sir_1_time_series1)

(array([[ 0.14199556],
        [ 0.21071697],
        [-0.00119924],
        [ 0.00485361]]),
 array([[ 2.78978284],
        [ 3.72508242],
        [-0.02227834],
        [ 0.0352592 ]]),
            1         2         3         0
 0   0.001262  0.006521  0.010868  0.574571
 1   0.000041  0.000013  0.000011  0.402316
 2   0.000010  0.000027  0.000054  0.002410
 3   0.000011  0.000018  0.000032  0.000173
 4   0.000009  0.000029  0.000051  0.000116
 5   0.000042  0.000014  0.000024  0.000118
 6   0.000008  0.000021  0.000014  0.000099
 7   0.000008  0.000032  0.000025  0.000111
 8   0.000008  0.000033  0.000044  0.000110
 9   0.000023  0.000012  0.000015  0.000148
 10  0.000015  0.000026  0.000030  0.000142
 11  0.000022  0.000015  0.000009  0.000117
 12  0.000034  0.000009  0.000014  0.000114)

In [None]:
ar_coeff = [0.2, 0.8]
IDEA(ar_coeff, sir_1_time_series1)

(array([[ 1.22006264e-01],
        [ 7.36171451e-01],
        [-3.04964495e-03],
        [-5.41659550e-04]]),
 array([[ 0.7085896 ],
        [ 6.04233114],
        [-0.01717659],
        [ 0.01058967]]),
            2         3         1         0
 0   0.000016  0.000023  0.000054  0.316986
 1   0.000004  0.000007  0.000030  0.267329
 2   0.000007  0.000012  0.000020  0.160367
 3   0.000024  0.000040  0.000008  0.097039
 4   0.000006  0.000009  0.000031  0.054261
 5   0.000010  0.000017  0.000052  0.031691
 6   0.000028  0.000015  0.000053  0.020322
 7   0.000027  0.000032  0.000046  0.014128
 8   0.000028  0.000044  0.000074  0.010252
 9   0.000026  0.000021  0.000009  0.008068
 10  0.000045  0.000039  0.000011  0.007074
 11  0.000037  0.000041  0.000009  0.005991
 12  0.000017  0.000011  0.000029  0.005477)

In [None]:
ar_coeff = [0.8, 0.2]
IDEA(ar_coeff, sir_1_time_series1)

(array([[ 5.49373512e-01],
        [ 1.72398312e-01],
        [ 1.19854428e-03],
        [-5.44625445e-05]]),
 array([[ 6.87318977],
        [ 1.23273416],
        [ 0.0537869 ],
        [-0.01108501]]),
            3         2         1         0
 0   0.000080  0.000119  0.000023  0.437500
 1   0.000009  0.000006  0.000094  0.332962
 2   0.000011  0.000014  0.000028  0.103827
 3   0.000009  0.000014  0.000041  0.051413
 4   0.000029  0.000037  0.000012  0.026473
 5   0.000020  0.000014  0.000044  0.014612
 6   0.000010  0.000009  0.000028  0.008355
 7   0.000023  0.000011  0.000057  0.006085
 8   0.000019  0.000010  0.000028  0.004677
 9   0.000020  0.000024  0.000045  0.003891
 10  0.000009  0.000017  0.000065  0.003200
 11  0.000021  0.000020  0.000007  0.002960
 12  0.000019  0.000013  0.000027  0.002958)