In [1]:
# Load package
import numpy as np; from scipy import stats; import matplotlib.pyplot as plt; import pymc as pm;import arviz as az; 
import math; import pandas as pd
from scipy.optimize import minimize
from scipy.optimize import root
from scipy import special
import pytensor.tensor as pt
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from scipy.optimize import fsolve
from sympy import *

import warnings
warnings.filterwarnings("ignore")

import random
random.seed(10)

In [2]:
def generate_Corr_identity(p):

    Sigma = np.zeros((p-1, p-1))
    np.fill_diagonal(Sigma, 1)
    return Sigma

def generate_data(n, p, sigma_sqr, beta, nu, corr):

    beta = beta.reshape((p, 1))
    x_i = np.random.normal(0, 1, (n, p - 1))
    x_i_correlated = x_i @ corr
    ones = np.ones((n, 1))
    x_i_full =  np.concatenate((ones, x_i_correlated), axis=1)
    XB = x_i_full @ beta
    E = stats.t.rvs(df = nu, loc=0, scale= np.sqrt(sigma_sqr), size=(n, 1))
    Y = XB + E
    return Y, x_i_full,x_i
    
# full likelihood
def negative_log_likelihood(params):
    betas, sigma, nu = params[:-2], params[-2], params[-1]
    p = X.shape[1]
    B = np.reshape(betas, (p, 1))
    XB = X @ B
    n = X.shape[0]
    XB = XB.reshape((n, 1))
    
    # loglikelihood
    equation = n * np.log(special.gamma((nu + 1)/2)) + n* nu *0.5 * np.log(nu) - n * np.log(special.gamma(nu/2)) - 0.5*n*np.log(np.pi) - n * np.log(sigma) - 0.5 *(nu + 1)*np.sum(np.log(nu + ((y - XB)/sigma)**2))
    return -equation

# Jeffrey's prior
def logJeff(x):
    return np.log((x/(x+3))**(1/2)*(special.polygamma(1,x/2) - special.polygamma(1, (x+1)/2) - 2*(x+3)/(x*(x+1)**2))**(1/2))

# full joint with Jeff and 1/sigma priors
def negative_full_joint(params):
    nu = params[-1]
    sigma = params[-2]
    return -logJeff(nu) - np.log(1/sigma) + negative_log_likelihood(params)

def initial_guess_from_lin_reg(x_without_1, y,nu_origin):
    initial_guess = []
    
    model = LinearRegression().fit(x_without_1, y)
    # intercept 
    initial_guess.append(float(model.intercept_))
    # coeff
    for coeff in model.coef_[0]:
        initial_guess.append(coeff)
    # sigma_sq    
    y_pred = model.predict(x_without_1)
    residual_sq = (((y - y_pred)**2).sum())/(n-2)
    initial_guess.append(residual_sq)
    
    # nu
    initial_guess.append(nu_origin) # use true nu for initial guess
    return initial_guess

def optimizer_all_three_params_least_sq(eqt, initial_guess,method_name):
    p = X.shape[1]
    bounds = [(None, None)] * p +[(0, np.inf)]*2
    result = minimize(eqt, initial_guess, method= method_name,bounds = bounds, options={'maxiter':1000})
    return result

def fix_x_generate_data(n, p, sigma_sqr, beta, nu, corr,X):

    beta = beta.reshape((p, 1))
    XB = X @ beta
    E = stats.t.rvs(df = nu, loc=0, scale= np.sqrt(sigma_sqr), size=(n, 1))
    Y = XB + E
    return Y

def hessian_sigma_prior(sigma):
    return 1/sigma**2

def hessian_likelihood(beta,sigma, nu): 
    p = X.shape[1]
    B = np.reshape(beta, (p, 1))
    XB = X @ B
    XB = XB.reshape((n, 1))
    Z = XB/sigma_sqr

    # Adopted from W6 
    total_b2 = 0
    total_b_sigma = 0
    total_sigma_b = 0
    total_s2 = 0
    for i in range(n):
            Xi = np.reshape(X[i,:], (p, 1))
            residual = y[i] - X[i, :] @ beta
            nu_sigma_sqr_plus_residual =  nu*(sigma**2)+ residual**2
            total_b2 = total_b2 + (- (nu_sigma_sqr_plus_residual)* np.outer(X[i,:],X[i, :]) + np.outer(X[i, :], X[i, :])*2*(residual**2))/(nu_sigma_sqr_plus_residual**2)
            total_sigma_b = total_sigma_b + (-2*nu*sigma*X[i,:]*residual)/(nu_sigma_sqr_plus_residual**2)
            total_s2 = total_s2 - 3 * (residual ** 2) / nu_sigma_sqr_plus_residual + ((residual ** 4) * 2 ) / (nu_sigma_sqr_plus_residual ** 2) 

    db2 = -1 * total_b2 * (nu+1)
    dsb = -1 * total_sigma_b * (nu+1)
    dbs = dsb
    ds2 = -1 * (n / (sigma ** 2) + (nu + 1) * total_s2/(sigma**2))

    dv2 = n/4 * (special.polygamma(1, (nu+1)/2) - special.polygamma(1, nu/2)) + 1/2 * np.sum((Z**2)/(nu + (Z**2)) - (Z**2 -1)/(nu + (Z**2))**2)

    nu_sym = symbols('nu_sym')
    sigma_eqt = -n/2 + (nu_sym+1)/2 * np.sum(1/(nu_sym + (Z**2)) *2*Z * (y-XB)/(sigma**2))
    sigma_eqt= Derivative(sigma_eqt, nu_sym) 
    sigma_eqt_diff = sigma_eqt.doit()
    dsv = sigma_eqt_diff.evalf(subs={nu_sym: nu})
    dvs = dsv

    beta0_eqt = (nu_sym+1)/2 *np.sum(1/(nu_sym + (Z**2)) * 2*Z* X[:,0])
    beta0_eqt= Derivative(beta0_eqt, nu_sym) 
    beta0_eqt_diff = beta0_eqt.doit()
    db0v = beta0_eqt_diff.evalf(subs={nu_sym: nu})

    beta1_eqt = (nu_sym+1)/2 *np.sum(1/(nu_sym + (Z**2)) * 2*Z* X[:,1])
    beta1_eqt= Derivative(beta1_eqt, nu_sym) 
    beta1_eqt_diff = beta1_eqt.doit()
    db1v = beta1_eqt_diff.evalf(subs={nu_sym: nu})

    beta2_eqt = (nu_sym+1)/2 *np.sum(1/(nu_sym + (Z**2)) * 2*Z* X[:,2])
    beta2_eqt= Derivative(beta2_eqt, nu_sym) 
    beta2_eqt_diff = beta2_eqt.doit()
    db2v = beta2_eqt_diff.evalf(subs={nu_sym: nu})

    beta3_eqt = (nu_sym+1)/2 *np.sum(1/(nu_sym + (Z**2)) * 2*Z* X[:,3])
    beta3_eqt= Derivative(beta3_eqt, nu_sym) 
    beta3_eqt_diff = beta3_eqt.doit()
    db3v = beta3_eqt_diff.evalf(subs={nu_sym: nu})

    beta4_eqt = (nu_sym+1)/2 *np.sum(1/(nu_sym + (Z**2)) * 2*Z* X[:,4])
    beta4_eqt= Derivative(beta4_eqt, nu_sym) 
    beta4_eqt_diff = beta4_eqt.doit()
    db4v = beta4_eqt_diff.evalf(subs={nu_sym: nu})

    dbv = np.array([db0v,db1v,db2v,db3v,db4v])
    dvb = dbv

    hessian_likelihood = np.zeros((p + 2, p + 2))
    hessian_likelihood[:p, :p] = db2
    hessian_likelihood[:p, p] = dbs
    hessian_likelihood[p, :p] = dsb
    hessian_likelihood[p, p] = ds2
    hessian_likelihood[-1,-1] = dv2
    hessian_likelihood[-2,-1] = dsv
    hessian_likelihood[-1,-2] = dvs
    hessian_likelihood[:p,p+1] = dbv
    hessian_likelihood[p+1,:p] = dvb
    
    return hessian_likelihood

def hessian_likelihood(beta,sigma, nu): 
    p = X.shape[1]
    B = np.reshape(beta, (p, 1))
    XB = X @ B
    XB = XB.reshape((n, 1))
    Z = XB/sigma_sqr

    # Adopted from W6 
    total_b2 = 0
    total_b_sigma = 0
    total_sigma_b = 0
    total_s2 = 0
    for i in range(n):
            Xi = np.reshape(X[i,:], (p, 1))
            residual = y[i] - X[i, :] @ beta
            nu_sigma_sqr_plus_residual =  nu*(sigma**2)+ residual**2
            total_b2 = total_b2 + (- (nu_sigma_sqr_plus_residual)* np.outer(X[i,:],X[i, :]) + np.outer(X[i, :], X[i, :])*2*(residual**2))/(nu_sigma_sqr_plus_residual**2)
            total_sigma_b = total_sigma_b + (-2*nu*sigma*X[i,:]*residual)/(nu_sigma_sqr_plus_residual**2)
            total_s2 = total_s2 - 3 * (residual ** 2) / nu_sigma_sqr_plus_residual + ((residual ** 4) * 2 ) / (nu_sigma_sqr_plus_residual ** 2) 

    db2 = -1 * total_b2 * (nu+1)
    dsb = -1 * total_sigma_b * (nu+1)
    dbs = dsb
    ds2 = -1 * (n / (sigma ** 2) + (nu + 1) * total_s2/(sigma**2))

    dv2 = n/4 * (special.polygamma(1, (nu+1)/2) - special.polygamma(1, nu/2)) + 1/2 * np.sum((Z**2)/(nu + (Z**2)) - (Z**2 -1)/(nu + (Z**2))**2)

    nu_sym = symbols('nu_sym')
    sigma_eqt = -n/2 + (nu_sym+1)/2 * np.sum(1/(nu_sym + (Z**2)) *2*Z * (y-XB)/(sigma**2))
    sigma_eqt= Derivative(sigma_eqt, nu_sym) 
    sigma_eqt_diff = sigma_eqt.doit()
    dsv = sigma_eqt_diff.evalf(subs={nu_sym: nu})
    dvs = dsv

    beta0_eqt = (nu_sym+1)/2 *np.sum(1/(nu_sym + (Z**2)) * 2*Z* X[:,0])
    beta0_eqt= Derivative(beta0_eqt, nu_sym) 
    beta0_eqt_diff = beta0_eqt.doit()
    db0v = beta0_eqt_diff.evalf(subs={nu_sym: nu})

    beta1_eqt = (nu_sym+1)/2 *np.sum(1/(nu_sym + (Z**2)) * 2*Z* X[:,1])
    beta1_eqt= Derivative(beta1_eqt, nu_sym) 
    beta1_eqt_diff = beta1_eqt.doit()
    db1v = beta1_eqt_diff.evalf(subs={nu_sym: nu})

    beta2_eqt = (nu_sym+1)/2 *np.sum(1/(nu_sym + (Z**2)) * 2*Z* X[:,2])
    beta2_eqt= Derivative(beta2_eqt, nu_sym) 
    beta2_eqt_diff = beta2_eqt.doit()
    db2v = beta2_eqt_diff.evalf(subs={nu_sym: nu})

    beta3_eqt = (nu_sym+1)/2 *np.sum(1/(nu_sym + (Z**2)) * 2*Z* X[:,3])
    beta3_eqt= Derivative(beta3_eqt, nu_sym) 
    beta3_eqt_diff = beta3_eqt.doit()
    db3v = beta3_eqt_diff.evalf(subs={nu_sym: nu})

    beta4_eqt = (nu_sym+1)/2 *np.sum(1/(nu_sym + (Z**2)) * 2*Z* X[:,4])
    beta4_eqt= Derivative(beta4_eqt, nu_sym) 
    beta4_eqt_diff = beta4_eqt.doit()
    db4v = beta4_eqt_diff.evalf(subs={nu_sym: nu})

    dbv = np.array([db0v,db1v,db2v,db3v,db4v])
    dvb = dbv

    hessian_likelihood = np.zeros((p + 2, p + 2))
    hessian_likelihood[:p, :p] = db2
    hessian_likelihood[:p, p] = dbs
    hessian_likelihood[p, :p] = dsb
    hessian_likelihood[p, p] = ds2
    hessian_likelihood[-1,-1] = dv2
    hessian_likelihood[-2,-1] = dsv
    hessian_likelihood[-1,-2] = dvs
    hessian_likelihood[:p,p+1] = dbv
    hessian_likelihood[p+1,:p] = dvb
    
    return hessian_likelihood

def hessian_nu_Jeff_prior(x):
    term1 = (x + 3)*(1.0*x/(x + 3)**3 - 1.0/(x + 3)**2)/x + (-0.5*x/(x + 3)**2 + 0.5/(x + 3))/x - (x + 3)*(-0.5*x/(x + 3)**2 + 0.5/(x + 3))/x**2
    term2 = 1/(2*(special.polygamma(1,x/2) - special.polygamma(1, (x+1)/2) - 2*(x+3)/(x*(x+1)**2)))
    term3 = 1/4*special.polygamma(3,x/2)
    term4 = 1/4*special.polygamma(3,(x+1)/2)
    term5 = -8/(x*(x + 1)**3) + 3*(4*x + 12)/(x*(x + 1)**4) - 4/(x**2*(x + 1)**2) + 2*(2*x + 6)/(x**2*(x + 1)**3) + (4*x + 12)/(x**2*(x + 1)**3) + 2*(2*x + 6)/(x**3*(x + 1)**2)
    return term1 + term2* (term3 - term4 - term5)

# Task 1: CI for betas

In [3]:
def standard_error_beta(beta,sigma,nu):
    Hessian_L = hessian_likelihood(beta,sigma,nu)
    hessian_S = hessian_sigma_prior(sigma)
    hessian_nu = hessian_nu_Jeff_prior(nu)
    Hessian_L[p][p] += hessian_S
    Hessian_L[-1][-1] += hessian_nu
    hessian_inverse = np.linalg.inv(Hessian_L)
    
    sd_beta_0 = np.sqrt(abs(hessian_inverse[0][0]))
    sd_beta_1 = np.sqrt(abs(hessian_inverse[1][1]))
    sd_beta_2 = np.sqrt(abs(hessian_inverse[2][2]))
    sd_beta_3 = np.sqrt(abs(hessian_inverse[3][3]))
    sd_beta_4 = np.sqrt(abs(hessian_inverse[4][4]))
    
    return sd_beta_0,sd_beta_1,sd_beta_2,sd_beta_3,sd_beta_4

def confidence_interval_beta(beta_MAP, beta_sd):
    upper_interval = beta_MAP + 1.96*beta_sd
    lower_interval = beta_MAP - 1.96*beta_sd
    return [lower_interval,upper_interval]

In [4]:
def beta_mse_function(true_beta_list, MAP_list):
    n = len(MAP_list)
    if n == 0:
        return -1
    else:
        MAP_list = np.array(MAP_list)
        MAP_list = MAP_list.transpose()
        
        beta_mse = []
        for j in range(len(true_beta_list)):
            b_list = MAP_list[j]
            true_beta = true_beta_list[j]
            beta_origin_vec = true_beta * np.ones((n, 1))
            mse = np.sum((b_list - beta_origin_vec)**2) / n
            result = np.sqrt(mse)/true_beta
            beta_mse.append(result)
        
        return beta_mse

## Nelder- Mead

In [20]:
# Fix number of observations
n = 50
p = 5
beta = np.array([2, 1, 0.3, 0.9, 1])
sigma_sqr = 1.5
corr = generate_Corr_identity(p)
x_without_1 = np.random.normal(0, 1, (n, p - 1))
x_i_correlated = x_without_1 @ corr
ones = np.ones((n, 1))
X =  np.concatenate((ones, x_i_correlated), axis=1)

final_dict = {}
for nu_origin in range(5,26,5):

    beta_MAP_list = []
    beta_0_count_include_in_CI = 0
    beta_1_count_include_in_CI = 0
    beta_2_count_include_in_CI = 0
    beta_3_count_include_in_CI = 0
    beta_4_count_include_in_CI = 0
    
    for j in range(50): # number of simulations 
        y = fix_x_generate_data(n, p, sigma_sqr, beta, nu_origin, corr, X) 
        
        # Maximize log joint with Nedler- Mead algorithm and LS intial guess to get MAP
        initial_guess = initial_guess_from_lin_reg(x_without_1, y,nu_origin)
        profile_lse_joint_result = optimizer_all_three_params_least_sq(negative_full_joint, initial_guess, 'Nelder-Mead')
        if profile_lse_joint_result.success == True:
            
            # MAP
            nu_MAP = profile_lse_joint_result.x[-1]
            beta_MAP = profile_lse_joint_result.x[:p]
            sigma_MAP = profile_lse_joint_result.x[-2]
            beta_MAP_list.append(beta_MAP)

            # SD
            sd_beta_0,sd_beta_1,sd_beta_2,sd_beta_3,sd_beta_4 = standard_error_beta(beta_MAP,sigma_MAP,nu_MAP)
            
            # CI
            beta_0_CI = confidence_interval_beta(beta_MAP[0], sd_beta_0)
            beta_1_CI = confidence_interval_beta(beta_MAP[1], sd_beta_1)
            beta_2_CI = confidence_interval_beta(beta_MAP[2], sd_beta_2)
            beta_3_CI = confidence_interval_beta(beta_MAP[3], sd_beta_3)
            beta_4_CI = confidence_interval_beta(beta_MAP[4], sd_beta_4)
            
            # CI capture true betas?
            beta_0_count_include_in_CI += beta_0_CI[0]<= beta[0]<= beta_0_CI[1]
            beta_1_count_include_in_CI += beta_1_CI[0]<= beta[1]<= beta_1_CI[1]
            beta_2_count_include_in_CI += beta_2_CI[0]<= beta[2]<= beta_2_CI[1]
            beta_3_count_include_in_CI += beta_3_CI[0]<= beta[3]<= beta_3_CI[1]
            beta_4_count_include_in_CI += beta_4_CI[0]<= beta[4]<= beta_4_CI[1]
    
    beta_count_include_in_CI = [beta_0_count_include_in_CI,beta_1_count_include_in_CI,beta_2_count_include_in_CI,beta_3_count_include_in_CI,beta_4_count_include_in_CI]
    
    beta_mse = beta_mse_function(beta, beta_MAP_list)
    final_dict[nu_origin] = [beta_count_include_in_CI,len(beta_MAP_list), beta_mse]
    print(nu_origin)

5
10
15
20
25


In [21]:
final_dict

{5: [[17, 44, 46, 40, 47],
  50,
  [0.7450647789422626,
   1.4368274294530023,
   4.057479761507989,
   1.6608099465905877,
   1.2737170322006035]],
 10: [[20, 45, 43, 47, 47],
  50,
  [0.8047183216168271,
   1.2896047215138904,
   5.075380204012951,
   1.4470235830130211,
   1.4654969704698055]],
 15: [[15, 41, 45, 46, 44],
  50,
  [0.7372810066585116,
   1.4085395300146322,
   4.347956155201788,
   1.4747428540286949,
   1.5935592226954602]],
 20: [[17, 46, 44, 42, 40],
  50,
  [0.8476683634921037,
   1.1819327679179599,
   3.916949518334949,
   1.6862288992211236,
   1.5541258271332417]],
 25: [[17, 46, 45, 44, 45],
  49,
  [0.6998002360621626,
   1.1631076248759216,
   3.2649124883886924,
   1.5917572356613627,
   1.4248895755038091]]}

# Task 2: Fit data with Normal error

In [5]:
def fix_x_generate_norm_data(n, p, sigma_sqr, beta, corr,X):

    beta = beta.reshape((p, 1))
    XB = X @ beta
    E = stats.norm.rvs(loc=0, scale= np.sqrt(sigma_sqr), size=(n, 1))
    Y = XB + E
    return Y

In [34]:
# Fix number of observations
n = 50
p = 5
sigma_sqr = 1.5
corr = generate_Corr_identity(p)
x_without_1 = np.random.normal(0, 1, (n, p - 1))
x_i_correlated = x_without_1 @ corr
ones = np.ones((n, 1))
X =  np.concatenate((ones, x_i_correlated), axis=1)

final_dict = {}

for i in range(10):
    beta = stats.norm.rvs(loc=0, scale= 1, size=(5, 1))
    
    beta_MAP_list = []
    beta_0_count_include_in_CI = 0
    beta_1_count_include_in_CI = 0
    beta_2_count_include_in_CI = 0
    beta_3_count_include_in_CI = 0
    beta_4_count_include_in_CI = 0
    
    for j in range(50): # number of simulations 
        y = fix_x_generate_norm_data(n, p, sigma_sqr, beta, corr, X) 
        
        # Maximize log joint with Nedler- Mead algorithm and LS intial guess to get MAP
        initial_guess = initial_guess_from_lin_reg(x_without_1, y, 10)
        profile_lse_joint_result = optimizer_all_three_params_least_sq(negative_full_joint, initial_guess, 'Nelder-Mead')
        if profile_lse_joint_result.success == True:
            
            # MAP
            nu_MAP = profile_lse_joint_result.x[-1]
            beta_MAP = profile_lse_joint_result.x[:p]
            sigma_MAP = profile_lse_joint_result.x[-2]
            beta_MAP_list.append(beta_MAP)

            # SD
            sd_beta_0,sd_beta_1,sd_beta_2,sd_beta_3,sd_beta_4 = standard_error_beta(beta_MAP,sigma_MAP,nu_MAP)
            
            # CI
            beta_0_CI = confidence_interval_beta(beta_MAP[0], sd_beta_0)
            beta_1_CI = confidence_interval_beta(beta_MAP[1], sd_beta_1)
            beta_2_CI = confidence_interval_beta(beta_MAP[2], sd_beta_2)
            beta_3_CI = confidence_interval_beta(beta_MAP[3], sd_beta_3)
            beta_4_CI = confidence_interval_beta(beta_MAP[4], sd_beta_4)
            
            # CI capture true betas?
            beta_0_count_include_in_CI += beta_0_CI[0]<= beta[0]<= beta_0_CI[1]
            beta_1_count_include_in_CI += beta_1_CI[0]<= beta[1]<= beta_1_CI[1]
            beta_2_count_include_in_CI += beta_2_CI[0]<= beta[2]<= beta_2_CI[1]
            beta_3_count_include_in_CI += beta_3_CI[0]<= beta[3]<= beta_3_CI[1]
            beta_4_count_include_in_CI += beta_4_CI[0]<= beta[4]<= beta_4_CI[1]
    
    beta_count_include_in_CI = [beta_0_count_include_in_CI,beta_1_count_include_in_CI,beta_2_count_include_in_CI,beta_3_count_include_in_CI,beta_4_count_include_in_CI]
    
    beta_mse = beta_mse_function(beta, beta_MAP_list)
    final_dict[i] = [beta_count_include_in_CI,len(beta_MAP_list), beta_mse]
    print(beta)

[[ 1.31780219]
 [-0.03030688]
 [ 0.26285322]
 [-1.24057971]
 [ 0.43951594]]
[[ 0.22969871]
 [-2.09178028]
 [ 0.15318667]
 [-0.47292646]
 [ 0.2248167 ]]
[[ 1.4594033 ]
 [ 0.55418151]
 [ 0.09261323]
 [ 0.01396648]
 [-0.8364693 ]]
[[-0.65508941]
 [ 0.48026789]
 [-0.55859682]
 [-0.24071861]
 [-0.24604499]]
[[-0.42075152]
 [-0.43579238]
 [ 0.18818697]
 [ 0.47646982]
 [-0.06789239]]
[[ 0.42081536]
 [-0.17084315]
 [ 0.4954618 ]
 [ 2.48472171]
 [ 0.74023399]]
[[ 0.45167746]
 [ 0.23233416]
 [-0.61989875]
 [ 0.88229321]
 [-0.54007581]]
[[-2.27037435]
 [-0.20465844]
 [ 0.6543312 ]
 [-0.78134476]
 [-1.12798419]]
[[-0.06054198]
 [ 1.41928692]
 [ 0.03931046]
 [-1.53916308]
 [-0.81462937]]
[[-1.47694556]
 [ 0.8065151 ]
 [ 0.62117603]
 [-1.19191685]
 [-0.44545092]]


In [35]:
final_dict

{0: [[array([13]), array([46]), array([49]), array([44]), array([48])],
  50,
  [array([1.0903287]),
   array([-36.76342088]),
   array([3.92673254]),
   array([-1.2137255]),
   array([2.61601536])]],
 1: [[array([22]), array([43]), array([46]), array([46]), array([46])],
  50,
  [array([6.326159]),
   array([-0.56929571]),
   array([6.96718093]),
   array([-2.75508294]),
   array([5.1392537])]],
 2: [[array([8]), array([41]), array([44]), array([44]), array([49])],
  50,
  [array([0.84436856]),
   array([2.53440385]),
   array([14.42452337]),
   array([112.73988511]),
   array([-1.17783408])]],
 3: [[array([40]), array([44]), array([41]), array([46]), array([43])],
  50,
  [array([-2.18142154]),
   array([2.64596177]),
   array([-2.2248102]),
   array([-5.69675713]),
   array([-5.31298878])]],
 4: [[array([36]), array([47]), array([45]), array([47]), array([46])],
  49,
  [array([-3.1470206]),
   array([-2.07198573]),
   array([5.54303894]),
   array([2.61104135]),
   array([-17.26762

# Task 3: Fit data with outliers

In [28]:
def fix_x_generate_norm_data(n, p, sigma_sqr, beta, corr,X):

    beta = beta.reshape((p, 1))
    XB = X @ beta
    E = stats.norm.rvs(loc=0, scale= np.sqrt(sigma_sqr), size=(n, 1))
    Y = XB + E
    return Y

In [41]:
n = 50
p = 5
sigma_sqr = 1.5
corr = generate_Corr_identity(p)
x_without_1 = np.random.normal(0, 1, (n, p - 1))
x_i_correlated = x_without_1 @ corr
ones = np.ones((n, 1))
X =  np.concatenate((ones, x_i_correlated), axis=1)

final_dict = {}
for i in range(10):
    beta = stats.norm.rvs(loc=0, scale= 1, size=(5, 1))
    
    beta_MAP_list = []
    beta_0_count_include_in_CI = 0
    beta_1_count_include_in_CI = 0
    beta_2_count_include_in_CI = 0
    beta_3_count_include_in_CI = 0
    beta_4_count_include_in_CI = 0
    
    for j in range(50): # number of simulations 
        non_outliers = fix_x_generate_norm_data(45, p, sigma_sqr, beta, corr, X[:45]) 
        outliers = np.array([[10],[15],[-13],[-19],[21]])
        y = np.concatenate((non_outliers, outliers))
        
        # Maximize log joint with Nedler- Mead algorithm and LS intial guess to get MAP
        initial_guess = initial_guess_from_lin_reg(x_without_1, y, 10)
        profile_lse_joint_result = optimizer_all_three_params_least_sq(negative_full_joint, initial_guess, 'Nelder-Mead')
        if profile_lse_joint_result.success == True:
            
            # MAP
            nu_MAP = profile_lse_joint_result.x[-1]
            beta_MAP = profile_lse_joint_result.x[:p]
            sigma_MAP = profile_lse_joint_result.x[-2]
            beta_MAP_list.append(beta_MAP)

            # SD
            sd_beta_0,sd_beta_1,sd_beta_2,sd_beta_3,sd_beta_4 = standard_error_beta(beta_MAP,sigma_MAP,nu_MAP)
            
            # CI
            beta_0_CI = confidence_interval_beta(beta_MAP[0], sd_beta_0)
            beta_1_CI = confidence_interval_beta(beta_MAP[1], sd_beta_1)
            beta_2_CI = confidence_interval_beta(beta_MAP[2], sd_beta_2)
            beta_3_CI = confidence_interval_beta(beta_MAP[3], sd_beta_3)
            beta_4_CI = confidence_interval_beta(beta_MAP[4], sd_beta_4)
            
            # CI capture true betas?
            beta_0_count_include_in_CI += beta_0_CI[0]<= beta[0]<= beta_0_CI[1]
            beta_1_count_include_in_CI += beta_1_CI[0]<= beta[1]<= beta_1_CI[1]
            beta_2_count_include_in_CI += beta_2_CI[0]<= beta[2]<= beta_2_CI[1]
            beta_3_count_include_in_CI += beta_3_CI[0]<= beta[3]<= beta_3_CI[1]
            beta_4_count_include_in_CI += beta_4_CI[0]<= beta[4]<= beta_4_CI[1]
    
    beta_count_include_in_CI = [beta_0_count_include_in_CI,beta_1_count_include_in_CI,beta_2_count_include_in_CI,beta_3_count_include_in_CI,beta_4_count_include_in_CI]
    
    beta_mse = beta_mse_function(beta, beta_MAP_list)
    final_dict[i] = [beta_count_include_in_CI,len(beta_MAP_list), beta_mse]
    print(beta)

[[-0.57248014]
 [ 1.86101118]
 [-1.05117132]
 [-0.44503235]
 [-0.80354711]]
[[-0.58202719]
 [-0.05520919]
 [ 1.14312754]
 [-0.30112363]
 [-1.02744028]]
[[ 0.49307819]
 [ 1.05909044]
 [-1.50951674]
 [ 0.1598098 ]
 [-1.63158692]]
[[-1.86463373]
 [-1.34802929]
 [ 0.11990641]
 [-0.64673708]
 [-0.72995007]]
[[-0.28379284]
 [ 2.27820937]
 [ 0.01417134]
 [-0.23225807]
 [-0.45867776]]
[[-2.72081618]
 [ 0.82657523]
 [ 0.15053318]
 [ 0.56693376]
 [ 1.063035  ]]
[[-0.09641213]
 [ 0.20173054]
 [-0.42150076]
 [-2.00697095]
 [ 1.05171477]]
[[ 0.60200351]
 [-0.36921039]
 [ 0.3453198 ]
 [ 1.56647439]
 [ 0.0422946 ]]
[[ 0.32041916]
 [ 0.58848548]
 [ 1.23940653]
 [-0.60239555]
 [-0.23367623]]
[[-0.84095685]
 [ 0.35067329]
 [-0.7940624 ]
 [ 0.20686309]
 [-0.70907126]]


In [42]:
final_dict

{0: [[array([18]), array([34]), array([36]), array([28]), array([36])],
  40,
  [array([-4.63871247]),
   array([2.22829539]),
   array([-1.35037465]),
   array([-5.89441485]),
   array([-1.67851412])]],
 1: [[array([25]), array([32]), array([32]), array([38]), array([34])],
  42,
  [array([-2.42958093]),
   array([-29.95753132]),
   array([1.17195315]),
   array([-4.90409967]),
   array([-1.29987666])]],
 2: [[array([10]), array([11]), array([8]), array([14]), array([16])],
  20,
  [array([2.84122447]),
   array([4.44743604]),
   array([-3.12974051]),
   array([13.40018412]),
   array([-0.85960506])]],
 3: [[array([11]), array([17]), array([20]), array([16]), array([21])],
  24,
  [array([-0.88460934]),
   array([-1.17877132]),
   array([9.93568414]),
   array([-3.64027344]),
   array([-1.73187989])]],
 4: [[array([25]), array([31]), array([30]), array([34]), array([33])],
  41,
  [array([-6.35725909]),
   array([0.826264]),
   array([123.8433772]),
   array([-7.91341481]),
   array([