In [1]:
import numpy as np
import statsmodels.api as sm
from statsmodels.base.model import GenericLikelihoodModel
from scipy.stats import norm
import pandas as pd

In [2]:
class OLS_loglike(GenericLikelihoodModel):
    
    def __init__(self, *args, ols=False, **kwargs):
        super(OLS_loglike, self).__init__(*args, **kwargs)
        self.ols = ols

    def loglikeobs(self, params):
        y = self.endog
        x = self.exog
        mu_y = np.dot(x, params)
        resid = y - mu_y
        sigma = np.sqrt(np.sum(resid**2) / resid.shape[0])
        pr_y = norm.logpdf(resid, loc=0, scale=sigma)
        return pr_y

In [3]:
# Function to generate synthetic data
def generate_data(n, break_point, alpha0, alpha1, alpha2, beta0, gamma0):
    index = np.arange(n).reshape(-1, 1)
    c2 = np.random.normal(size=n)
    e = np.random.normal(size=n)
    u1 = np.random.normal(0, 0.5, size=n)  # Shared random factor for correlation
    
    d1 = beta0 * (index > break_point).astype(float).reshape(-1) + u1 + np.random.normal(scale=1, size=n)
    d2 = gamma0 * (index > (break_point + n * 0.4)).astype(float).reshape(-1) + u1 + np.random.normal(scale=1, size=n)
    controls = np.random.normal(scale=2, size=(n, 5)) + u1[:, np.newaxis]  # d3 to d10 with added shared variation
    
    c1 = alpha0 + alpha1 * c2 + alpha2 * d1 + e
    
    data = np.column_stack((index, c2, d1, d2, controls))
    return c1, data

# Function to compute delta and return model parameters
def compute_delta(c1, data, theta, break_point):
    index = data[:, 0]
    c2 = data[:, 1]
    dj = data[:, theta]
    
    X = sm.add_constant(np.column_stack((c2, c2 * (index >= break_point), dj)))
    model = np.linalg.lstsq(X, c1, rcond=None)
    coeffs = model[0]
    delta = coeffs[2]
    
    return delta, coeffs

# Function to calculate log-likelihood for model parameters
def calculate_log_likelihood(c1, X, params):
    model = OLS_loglike(c1, X)
    log_like_val = model.loglikeobs(params).sum()
    return log_like_val

# Function to rank deltas and create result dictionary
def rank_deltas(c1, data, theta_values, break_point):
    results = []
    for theta in theta_values:
        delta, params = compute_delta(c1, data, theta, break_point)
        
        index = data[:, 0]
        c2 = data[:, 1]
        dj = data[:, theta]
        X = sm.add_constant(np.column_stack((c2, c2 * (index >= break_point), dj)))
        
        log_likelihood = calculate_log_likelihood(c1, X, params)
        
        result = {
            'Original_Index': theta,
            'Delta': delta,
            'Log_Likelihood': log_likelihood
        }
        results.append(result)
    
    # Sort results based on absolute delta
    results_sorted = sorted(results, key=lambda x: np.abs(x['Delta']))
    
    # Add ranking to each result
    for rank, entry in enumerate(results_sorted, start=1):
        entry['Rank'] = rank
    
    return results_sorted

# Generate a sample data
np.random.seed(42)
n = 100
break_point = 50
alpha0, alpha1, alpha2 = 1.0, 0.5, 0.3
beta0, gamma0 = 1.0, -1.0

c1, data = generate_data(n, break_point, alpha0, alpha1, alpha2, beta0, gamma0)

# Calculate rankings based on deltas
theta_values = range(2, data.shape[1])
results = rank_deltas(c1, data, theta_values, break_point)

# Print the sorted results
for result in results:
    print(result)

{'Original_Index': 2, 'Delta': -0.05973123790690391, 'Log_Likelihood': -135.6676001512741, 'Rank': 1}
{'Original_Index': 3, 'Delta': -0.07212115200793941, 'Log_Likelihood': -138.90323215665148, 'Rank': 2}
{'Original_Index': 4, 'Delta': -0.13606142849898198, 'Log_Likelihood': -141.65357815305615, 'Rank': 3}
{'Original_Index': 5, 'Delta': -0.1407495056215835, 'Log_Likelihood': -141.6708619166173, 'Rank': 4}
{'Original_Index': 6, 'Delta': -0.1424646189581829, 'Log_Likelihood': -140.91607462003904, 'Rank': 5}
{'Original_Index': 7, 'Delta': -0.14767115320700236, 'Log_Likelihood': -141.8251104197545, 'Rank': 6}
{'Original_Index': 8, 'Delta': -0.16233449561815072, 'Log_Likelihood': -141.55098813813356, 'Rank': 7}


In [4]:
import numpy as np
from scipy.stats import norm
from statsmodels.base.model import GenericLikelihoodModel
import statsmodels.api as sm

# Function to generate synthetic data
def generate_data(n, break_point, alpha0, alpha1, alpha2, beta0, gamma0):
    index = np.arange(n).reshape(-1, 1)
    c2 = np.random.normal(size=n)
    e = np.random.normal(size=n)
    u1 = np.random.normal(0, 0.5, size=n)

    d1 = beta0 * (index > break_point).astype(float).reshape(-1) + u1 + np.random.normal(scale=1, size=n)
    d2 = gamma0 * (index > (break_point + n * 0.4)).astype(float).reshape(-1) + u1 + np.random.normal(scale=1, size=n)
    controls = np.random.normal(scale=2, size=(n, 5)) + u1[:, np.newaxis]

    c1 = alpha0 + alpha1 * c2 + alpha2 * d1 + e

    data = np.column_stack((index, c2, d1, d2, controls))
    return c1, data

# Custom OLS Log-Likelihood model
class OLS_loglike(GenericLikelihoodModel):
    def __init__(self, *args, ols=False, **kwargs):
        super(OLS_loglike, self).__init__(*args, **kwargs)
        self.ols = ols

    def loglikeobs(self, params):
        y = self.endog
        x = self.exog
        mu_y = np.dot(x, params)
        resid = y - mu_y
        sigma = np.sqrt(np.sum(resid**2) / resid.shape[0])
        pr_y = norm.logpdf(resid, loc=0, scale=sigma)
        return pr_y

# Simulation setup
n = 250
true_break_point = int(n * 0.5)
alpha0, alpha1 = 1.0, 0.5
alpha2, beta0, gamma0 = 0.3, 1.0, -1.0
mu_delta, sigma_delta = 2.0, 1.0
bootstrap_iterations = 100
num_draws = 100

# Generate base data
np.random.seed(42)
c1, data = generate_data(n, true_break_point, alpha0, alpha1, alpha2, beta0, gamma0)
indices = np.arange(data.shape[1])[2:]

# Function to simulate the deltas and calculate the likelihood ratio for configurations
def calculate_likelihood_distribution(c1, data, indices, true_break_point, num_draws, bootstrap_iterations):
    likelihood_ratios = np.zeros((len(indices), num_draws * bootstrap_iterations))
    
    for i, theta in enumerate(indices):
        for draw in range(num_draws):
            np.random.seed(draw)
            
            # Simulate deltas
            deltas = np.hstack([
                np.random.normal(mu_delta, sigma_delta, size=bootstrap_iterations),
                np.random.normal(-mu_delta, sigma_delta, size=bootstrap_iterations)
            ])
            
            # Bootstrap data and parameters
            for j in range(bootstrap_iterations):
                delta = deltas[j]
                
                # Bootstrap sample
                data_bootstrap = data[np.random.choice(n, n, replace=True)]
                
                # Use a model with fixed parameters except for delta
                index = data_bootstrap[:, 0]
                c2 = data_bootstrap[:, 1]
                cj = data_bootstrap[:, theta]
                
                X_fixed = sm.add_constant(np.column_stack((c2, cj)))
                params_fixed = np.linalg.lstsq(X_fixed, c1, rcond=None)[0]
                
                # Calculate c1 using bootstrap draws and compute likelihood
                c1_bootstrap = params_fixed[0] + params_fixed[1]*c2 + alpha2*cj + delta*c2*(index > true_break_point) + np.random.normal(size=n)
                X_full = sm.add_constant(np.column_stack((c2, (index > true_break_point), cj)))
                
                likelihood_no_delta = calculate_log_likelihood(c1_bootstrap, X_fixed, params_fixed)
                
                # Apply the model with the current estimated parameters (including delta)
                params_full = np.hstack([params_fixed, delta])
                likelihood_with_delta = calculate_log_likelihood(c1_bootstrap, X_full, params_full)
                
                # Calculate and store the likelihood ratio
                likelihood_ratio = likelihood_with_delta - likelihood_no_delta
                likelihood_ratios[i, draw * bootstrap_iterations + j] = likelihood_ratio
    
    return likelihood_ratios

# Function to calculate log-likelihood
def calculate_log_likelihood(c1_bootstrap, X, params):
    model = OLS_loglike(c1_bootstrap, X)
    log_like_val = model.loglikeobs(params).sum()
    return log_like_val

# Perform simulation and calculate likelihood distributions
likelihood_distributions = calculate_likelihood_distribution(c1, data, indices, true_break_point, num_draws, bootstrap_iterations)
print(likelihood_distributions)

[[-120.9598053  -111.97681321 -141.45069697 ... -100.54227656
   -92.83441123 -129.09379577]
 [-123.66112699 -111.28942829 -106.55331004 ... -141.89234256
  -109.35662158 -118.79078779]
 [-195.86511141 -193.90528485 -202.95892123 ... -192.17705114
  -200.44041782 -221.93241486]
 ...
 [-211.41032165 -168.5467214  -178.79042123 ... -178.87676637
  -203.63572674 -225.12275169]
 [-200.05933281 -186.76117052 -205.92310882 ... -196.88398081
  -221.74235396 -207.2389878 ]
 [-242.91579412 -207.08834607 -233.96480496 ... -205.42694609
  -208.61733685 -263.28000998]]
