In [1]:
# Load package
import numpy as np; from scipy import stats; import matplotlib.pyplot as plt; import pymc as pm;import arviz as az; 
import math; import pandas as pd
from scipy.optimize import minimize
from scipy.optimize import root
from scipy import special
import pytensor.tensor as pt
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from scipy.optimize import fsolve

In [2]:
import warnings
warnings.filterwarnings("ignore")

In [3]:
def generate_Corr_identity(p):

    Sigma = np.zeros((p-1, p-1))
    np.fill_diagonal(Sigma, 1)
    return Sigma

def generate_data(n, p, sigma_sqr, beta, nu, corr):

    beta = beta.reshape((p, 1))
    x_i = np.random.normal(0, 1, (n, p - 1))
    x_i_correlated = x_i @ corr
    ones = np.ones((n, 1))
    x_i_full =  np.concatenate((ones, x_i_correlated), axis=1)
    XB = x_i_full @ beta
    E = stats.t.rvs(df = nu, loc=0, scale= np.sqrt(sigma_sqr), size=(n, 1))
    Y = XB + E
    return Y, x_i_full,x_i

def calculate_y_axix(nu_origin, nu_est):
    n = len(nu_est)
    if n == 0:
        return -1
    else:
        nu_origin_vec = nu_origin * np.ones((n, 1))
        mse = np.sum((nu_est - nu_origin_vec)**2) / n
        result = np.sqrt(mse)/nu_origin
        return result
    
# full likelihood
def negative_log_likelihood(params):
    betas, sigma, nu = params[:-2], params[-2], params[-1]
    p = X.shape[1]
    B = np.reshape(betas, (p, 1))
    XB = X @ B
    n = X.shape[0]
    XB = XB.reshape((n, 1))
    
    # loglikelihood
    equation = n * np.log(special.gamma((nu + 1)/2)) + n* nu *0.5 * np.log(nu) - n * np.log(special.gamma(nu/2)) - 0.5*n*np.log(np.pi) - n * np.log(sigma) - 0.5 *(nu + 1)*np.sum(np.log(nu + ((y - XB)/sigma)**2))
    return -equation

def optimizer_all_three_params_normal(eqt, method_name):
    p = X.shape[1]
    bounds = [(None, None)] * p +[(0, np.inf)]*2
    initial_guess = np.random.normal(0, 1, size = (p+2))
    initial_guess[p] = np.abs(initial_guess[p])
    result = minimize(eqt, initial_guess, method=method_name,bounds= bounds, options={'maxiter': 1000})
    return result

# Jeffrey's prior
def logJeff(x):
    return np.log((x/(x+3))**(1/2)*(special.polygamma(1,x/2) - special.polygamma(1, (x+1)/2) - 2*(x+3)/(x*(x+1)**2))**(1/2))

# full joint
def negative_joint(params):
    nu = params[-1]
    return -logJeff(nu) + negative_log_likelihood(params)

def initial_guess_from_lin_reg(x_without_1, y,nu_origin):
    initial_guess = []
    
    model = LinearRegression().fit(x_without_1, y)
    # intercept 
    initial_guess.append(float(model.intercept_))
    # coeff
    for coeff in model.coef_[0]:
        initial_guess.append(coeff)
    # sigma_sq    
    y_pred = model.predict(x_without_1)
    residual_sq = (((y - y_pred)**2).sum())/(n-2)
    initial_guess.append(residual_sq)
    
    # nu
    initial_guess.append(nu_origin) # use true nu for initial guess
    return initial_guess

def optimizer_all_three_params_least_sq(eqt, initial_guess,method_name):
    p = X.shape[1]
    bounds = [(None, None)] * p +[(0, np.inf)]*2
    result = minimize(eqt, initial_guess, method= method_name,bounds = bounds, options={'maxiter':1000})
    return result

def fix_x_generate_data(n, p, sigma_sqr, beta, nu, corr,X):

    beta = beta.reshape((p, 1))
    XB = X @ beta
    E = stats.t.rvs(df = nu, loc=0, scale= np.sqrt(sigma_sqr), size=(n, 1))
    Y = XB + E
    return Y

In [4]:
def score_func_nu(nu):
    betas, sigma= initial_guess[:-2], initial_guess[-2]
    p = X.shape[1]
    B = np.reshape(betas, (p, 1))
    XB = X @ B
    n = X.shape[0]
    XB = XB.reshape((n, 1))
    Z = XB/sigma
    return n/2*(special.gamma((nu + 1)/2) - special.gamma(nu/2)) + 1/2 * np.sum(-np.log((nu + (Z**2))/nu) + ((Z**2) - 1)/(nu + (Z**2)))

In [5]:
def score_func_all_param(param):
    betas, sigma, nu = param[:-2],param[-2],param[-1]
    
    p = X.shape[1]
    B = np.reshape(betas, (p, 1))
    XB = X @ B
    n = X.shape[0]
    XB = XB.reshape((n, 1))
    Z = XB/sigma
    
    beta0_eqt = (nu+1)/2 *np.sum(1/(nu + (Z**2)) * 2*Z* X[:,0])
    beta1_eqt = (nu+1)/2 *np.sum(1/(nu + (Z**2)) * 2*Z* X[:,1])
    beta2_eqt = (nu+1)/2 *np.sum(1/(nu + (Z**2)) * 2*Z* X[:,2])
    beta3_eqt = (nu+1)/2 *np.sum(1/(nu + (Z**2)) * 2*Z* X[:,3])
    beta4_eqt = (nu+1)/2 *np.sum(1/(nu + (Z**2)) * 2*Z* X[:,4])
    
    sigma_eqt = -n/2 + (nu+1)/2 * np.sum(1/(nu + (Z**2)) *2*Z * (y-XB)/(sigma**2))
    nu_eqt = n/2*(special.gamma((nu + 1)/2) - special.gamma(nu/2)) + 1/2 * np.sum(-np.log((nu + (Z**2))/nu) + ((Z**2) - 1)/(nu + (Z**2)))
    return [beta0_eqt,beta1_eqt,beta2_eqt,beta3_eqt,beta4_eqt,sigma_eqt,nu_eqt]

In [14]:
# Fix number of observations
n = 50
method_list = ['Nelder-Mead', 'L-BFGS-B']#,'BFGS']

p = 5
beta = np.array([2, 1, 0.3, 0.9, 1])
sigma_sqr = 1.5
corr = generate_Corr_identity(p)
nu_origin = 20

x_without_1 = np.random.normal(0, 1, (n, p - 1))
x_i_correlated = x_without_1 @ corr
ones = np.ones((n, 1))
X =  np.concatenate((ones, x_i_correlated), axis=1)

estimates_dict = {}

for method_name in method_list:
    estimates_dict[method_name] = {}
    estimates_dict[method_name]['normal_mle_profile'] = []
    estimates_dict[method_name]['normal_map_profile'] = []
    estimates_dict[method_name]['lse_mle_profile'] = []
    estimates_dict[method_name]['lse_map_profile'] = []
    estimates_dict[method_name]['root_mle_profile'] = []
    estimates_dict[method_name]['all_param_root_mle_profile'] = []

In [15]:
for j in range(50): # number of simulations 
    y = fix_x_generate_data(n, p, sigma_sqr, beta, nu_origin, corr, X) ## Generate data   
    
    for method_name in method_list:
        
        # Normal(0,1) as initial guess
        profile_normal_MLE_result = optimizer_all_three_params_normal(negative_log_likelihood,method_name)
        if profile_normal_MLE_result.success == True:
            estimates_dict[method_name]['normal_mle_profile'].append(profile_normal_MLE_result.x[-1])
        
        profile_normal_joint_result = optimizer_all_three_params_normal(negative_joint,method_name)
        if profile_normal_joint_result.success == True:
            estimates_dict[method_name]['normal_map_profile'].append(profile_normal_joint_result.x[-1])
        
        # Least square estimates as initial guess
        initial_guess = initial_guess_from_lin_reg(x_without_1, y,nu_origin)
        profile_lse_MLE_result = optimizer_all_three_params_least_sq(negative_log_likelihood, initial_guess, method_name)
        if profile_lse_MLE_result.success == True:
            estimates_dict[method_name]['lse_mle_profile'].append(profile_lse_MLE_result.x[-1])
        
        profile_lse_joint_result = optimizer_all_three_params_least_sq(negative_joint, initial_guess, method_name)
        if profile_lse_joint_result.success == True:
            estimates_dict[method_name]['lse_map_profile'].append(profile_lse_joint_result.x[-1])
        
        # Root solver
        profile_root_MLE_result = root(score_func_nu, x0 = initial_guess[-1], method = 'broyden1')
        if profile_root_MLE_result.success == True:
            estimates_dict[method_name]['root_mle_profile'].append(profile_root_MLE_result.x)
            
        profile_all_param_root_MLE_result = fsolve(score_func_all_param, x0 = initial_guess)
        estimates_dict[method_name]['all_param_root_mle_profile'].append(profile_all_param_root_MLE_result[-1])

In [16]:
final_dict = {}
mse_list = ['normal_mle_profile','normal_map_profile','lse_mle_profile','lse_map_profile','root_mle_profile','all_param_root_mle_profile']

for method_name in method_list:
    final_dict[method_name] = {}
    final_dict[method_name]["MSE"] = []
    final_dict[method_name]["Number of convergence"] = []
    
    for mse in mse_list:
        final_dict[method_name]["MSE"].append(calculate_y_axix(nu_origin, estimates_dict[method_name][mse]))
        final_dict[method_name]["Number of convergence"].append(len(estimates_dict[method_name][mse]))

In [17]:
final_dict

{'Nelder-Mead': {'MSE': [4.774334944338542,
   4.761360982988759,
   11.9301380394459,
   5.677647423419091,
   5.670330294619745,
   5.354400415899868],
  'Number of convergence': [25, 26, 28, 50, 50, 50]},
 'L-BFGS-B': {'MSE': [9.741406354072101,
   1.8217946529132596,
   33.961582052133444,
   3.9255166681251885,
   5.670330294619745,
   5.354400415899868],
  'Number of convergence': [7, 6, 47, 26, 50, 50]}}