In [None]:
#equal number in each slice for SIR
from tabulate import tabulate
import numpy as np
from scipy.linalg import solve_triangular
def sir_modified(X, y, num_slices, K):
    X = X - np.mean(X, axis=0)
    #reduced QR decomposition
    Q, R = np.linalg.qr(X)
    n_samples, n_features = X.shape
    Z = np.sqrt(n_samples) * Q
    
    V_hat = np.zeros([X.shape[1], X.shape[1]])
    # Step 1: Sort the data by the response variable
    sorted_indices = np.argsort(y)
    Z_sorted = Z[sorted_indices]
    y_sorted = y[sorted_indices]

    # Step 2: Divide the data into slices
    slice_size = n_samples // num_slices
    ph_hat = slice_size/n_samples
    slices = []

    for i in range(num_slices):
        start_idx = i * slice_size
        if i < num_slices - 1:
            end_idx = (i + 1) * slice_size
        else:  # Last slice includes any remaining samples
            end_idx = n_samples
        slices.append((Z_sorted[start_idx:end_idx], y_sorted[start_idx:end_idx]))
    
    # Step 3: Compute the means of the predictors within each slice
    Z_means = np.array([np.mean(slice_Z, axis=0) for slice_Z, _ in slices])
    
    # Step 4: Center the predictor means
    Z_centered = Z_means - np.mean(Z_means, axis=0)

    V_hat = np.add(V_hat,ph_hat * np.matmul(Z_centered.T, Z_centered))
    
    eigenvalues, eigenvectors = np.linalg.eig(V_hat)
    
    idx = eigenvalues.argsort()[::-1]  # Get indices that would sort eigenvalues in descending order
    eigenvalues = eigenvalues[idx]
    eigenvectors = eigenvectors[:, idx]
    edr_est = solve_triangular(R, eigenvectors)
    # edr_est = solve_triangular(np.sqrt(n_samples) * R, eigenvectors)
    K_index = np.argpartition(np.abs(eigenvalues), X.shape[1]-K) >= X.shape[1]-K
    edr_est = edr_est[:, K_index]
    # edr_est = edr_est.flatten()
    if edr_est[0][0] < 0:
        edr_est = -edr_est
    edr_est = edr_est / np.linalg.norm(edr_est)
    
    return edr_est.T, V_hat, R

In [None]:
import sliced
from sliced import SlicedInverseRegression
num_N = 5
n_obs = 10000
ar_coeff = [0.2, 0.8, 0.5, 0.8]
noise = np.zeros((num_N, n_obs+3))
n = 100
H = 50
K = 1
hat = 0
Hat = 0
for a in range(n):
    for h in range(num_N):
        noise[h] = np.random.normal(0, 1, size=n_obs+3)  # Normally distributed noise
    ar_series = np.zeros((num_N, n_obs+3))
    for t in range(0, n_obs+3):
        ar_series[0][t] = ar_coeff[0] * ar_series[0][t - 1]  + noise[0][t]
        ar_series[1][t] = ar_coeff[1] * ar_series[1][t - 1]  + noise[1][t]
        ar_series[2][t] = ar_coeff[2] * ar_series[2][t - 1]  + noise[2][t]
        ar_series[3][t] = ar_coeff[3] * ar_series[3][t - 1]  + noise[3][t]
        ar_series[4][t] = 2*ar_series[0][t-1] + 3*ar_series[1][t-2] + noise[4][t]
    y = ar_series[4][2:n_obs+3]
    X = np.concatenate([ar_series[0][1:n_obs+2].reshape(-1,1), ar_series[1][0:n_obs+1].reshape(-1,1), ar_series[2][2:n_obs + 3].reshape(-1,1), ar_series[3][2:n_obs + 3].reshape(-1,1)], axis = 1)
    edr, _, R = sir_modified(X, y, H, K=K)
    hat += edr

hat = hat/n
g = hat[0][0]/hat[0][1]
g