In [94]:
# Load package
import numpy as np; from scipy import stats; import matplotlib.pyplot as plt; import pymc as pm;import arviz as az; 
import math; import pandas as pd
from scipy.optimize import minimize
from scipy import special
import pytensor.tensor as pt
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression

# Generate Data

In [95]:
def generate_Corr_identity(p):
    Sigma = np.zeros((p-1, p-1))
    np.fill_diagonal(Sigma, 1)
    return Sigma

def generate_data(n, p, sigma_sqr, beta, nu, corr):

    beta = beta.reshape((p, 1))
    x_i = np.random.normal(0, 1, (n, p - 1))
    x_i_correlated = x_i @ corr
    ones = np.ones((n, 1))
    x_i_full =  np.concatenate((ones, x_i_correlated), axis=1)
    XB = x_i_full @ beta
    E = stats.t.rvs(df = nu, loc=0, scale= np.sqrt(sigma_sqr), size=(n, 1))
    Y = XB + E
    return Y, x_i_full

In [96]:
p = 5 #number of betas
beta = np.array([2, 1, 0.3, 0.9, 1])
n = 50
nu_origin = 5
sigma_sqr = 1.5
corr = generate_Corr_identity(p)  
y, X = generate_data(n, p, sigma_sqr, beta, nu_origin, corr)

# Step 0: Obtain the expression for the log-likelihood

In [97]:
def negative_log_likelihood_optimize_beta_sigma(params, nu):
    """
    Negative log likelihood that will be passed into minimize function for given nu
    The use of it is to get the numerical result of beta and sigma
    :param params: contains beta and sigma as one vector, dim = p + 1
    :param nu: Degree of freedom
    :param X: Data matrix
    :param y: Observed Value
    :return: The negative log likelihood equation (2) in Fonseca 2008
    """

    betas, sigma = params[:-1], params[-1]
    sigma = np.abs(sigma) # Archer note: I simply let sigma to be its absolute value because it is symmetric.
    p = X.shape[1]
    B = np.reshape(betas, (p, 1))
    XB = X @ B
    n = X.shape[0]
    XB = XB.reshape((n, 1))
    # loglikelihood
    equation = n * np.log(special.gamma((nu + 1)/2)) + n* nu *0.5 * np.log(nu) - n * np.log(special.gamma(nu/2)) - 0.5*n*np.log(np.pi) - n * np.log(sigma) - 0.5 *(nu + 1)*np.sum(np.log(nu + ((y - XB)/sigma)**2))
    return -equation

# Step 1: Compute the conditional MLE λ and profile likelihood value for each fixed ν

In [98]:
def initial_guess_from_lin_reg(x_without_1, y,nu_origin):
    initial_guess = []

    model = LinearRegression().fit(x_without_1, y)
    # intercept
    initial_guess.append(model.intercept_.item()) # Archer note: I modify the code to avoid py printing error.
    # coeff
    for coeff in model.coef_[0]:
        initial_guess.append(coeff)
    # sigma_sq
    y_pred = model.predict(x_without_1)
    residual_sq = (((y - y_pred)**2).sum())/(n-2)
    initial_guess.append(residual_sq)

    return initial_guess

In [99]:
def conditional_MLE(log_nu): 
    nu = np.exp(log_nu)

    p = X.shape[1]
    x_without_1 = X[:, 1:]
    initial_guess = initial_guess_from_lin_reg(x_without_1, y,nu_origin)
    result = minimize(negative_log_likelihood_optimize_beta_sigma, initial_guess, args=(nu,), method='BFGS', options={'gtol': 1e-5, 'maxiter': 10000, 'norm':np.inf}) 
    
    return result.x

In [100]:
def negative_profile_loglikelihood(log_nu): 
    nu = np.exp(log_nu)

    p = X.shape[1]
    x_without_1 = X[:, 1:]
    initial_guess = initial_guess_from_lin_reg(x_without_1, y,nu_origin)
    result = minimize(negative_log_likelihood_optimize_beta_sigma, initial_guess, args=(nu,), method='BFGS', options={'gtol': 1e-5, 'maxiter': 10000, 'norm':np.inf}) 

    if result.success == False:
        return(1e8) 
    else:
        return result.fun 

# Step 2: Compute the observed Fisher information matrix

In [101]:
# Note that these are second derivatives of NEGATIVE log likelihood, we will fix it in the determinant code
def Hessian_beta_sigma(beta, sigma, nu, X, y):
    """
    The Hessian matrix of the negative log likelihood function
    :param beta: Coefficients
    :param sigma: Scale
    :param nu: Degree of freedom
    :param X: Data Matrix
    :param y: Observed Value
    :return: Hessian Matrix of beta and sigma on the log likelihood function, equation (2) Fonseca 2008
    """
    p = X.shape[1]
    n = X.shape[0]
    beta = np.array(beta)
    sigma = np.array(sigma)
    total_b2 = 0
    total_b_sigma = 0
    total_sigma_b = 0
    total_s2 = 0
    for i in range(n):
        Xi = np.reshape(X[i,:], (p, 1))
        residual = y[i] - X[i, :] @ beta
        nu_sigma_sqr_plus_residual =  nu*(sigma**2)+ residual**2
        total_b2 = total_b2 + (- (nu_sigma_sqr_plus_residual)* np.outer(X[i,:],X[i, :]) + np.outer(X[i, :], X[i, :])*2*(residual**2))/(nu_sigma_sqr_plus_residual**2)
        #total_b_sigma = total_b_sigma + (- nu_sigma_sqr_plus_residual*2 * residual * X[i,:] + 2*(residual**3 )*X[i,:]) / (nu_sigma_sqr_plus_residual**2)
        total_sigma_b = total_sigma_b + (-2*nu*sigma*X[i,:]*residual)/(nu_sigma_sqr_plus_residual**2)
        total_s2 = total_s2+ (-(residual**2) * 2* nu ) / ( (nu_sigma_sqr_plus_residual**2)) - 2*(residual**2)/(nu_sigma_sqr_plus_residual* (sigma**2))

    db2 = total_b2 * (nu+1)
    #dbs = total_b_sigma * (nu+1)/sigma
    dsb = total_sigma_b * (nu+1)
    dbs = dsb
    ds2 = n/(sigma**2) + (nu+1)*total_s2

    hessian = np.zeros((p + 1, p + 1))
    hessian[:p, :p] = db2
    hessian[:p, p] = dbs
    hessian[p, :p] = dsb
    hessian[p, p] = ds2

    return hessian

# Step 3: Find the block of j(ν, λ) corresponding to the insurance parameters λ = (β, σ).

In [102]:
def obs_block_det(beta, sigma, nu, X, y):
    
    block = np.array(Hessian_beta_sigma(beta, sigma, nu, X, y))* -1 
    # need to multiply all entries by -1 as we were on negative scale
    
    return np.linalg.det(block)

# Step 4: Plug λˆν you found in Step 1 to the block matrix j(ν, λ) in Step 4, to get j(ν, λˆ ν).
# Step 5: Adjusted profile log-likelihood function

In [103]:
def negative_adj_profile_loglikelihood(log_nu):
    nu = np.exp(log_nu)
    
    sigma = conditional_MLE(log_nu)[-1]
    beta = conditional_MLE(log_nu)[:-1]
    p = X.shape[1]
    B = np.reshape(beta, (p, 1))
    XB = X @ B
    n = X.shape[0]
    XB = XB.reshape((n, 1))
    det = obs_block_det(beta, sigma, nu, X, y)

    negative_adj_profile_loglikelihood = negative_profile_loglikelihood(log_nu) + 0.5*np.log(abs(det))
    return negative_adj_profile_loglikelihood

In [104]:
initial_log_nu = np.log(nu_origin)
adj_profile_optim = minimize(negative_adj_profile_loglikelihood,initial_log_nu,method='Nelder-Mead', options={'maxiter': 10000})

print(np.exp(adj_profile_optim.x))
print(adj_profile_optim.success)

[5.79196731]
True
