# Test Analystic GLRT vs Optimization GLRT

In [1]:
# Comparing the analytic GLRT for the single-coefficient case with the optimization-based version
import numpy as np
import torch
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression

In [2]:
from uncertainty.DataGeneration import linearRegression_normal
from uncertainty.torch_linear import TorchLinear
from uncertainty.analyticLinearRegressionCIs import analyticLinearTest_GLRT, analyticLinearCis
from uncertainty.glrtTorch import glrtTorchCis
from uncertainty.torch_linear import TorchLinear

In [3]:
def compareAnalyticVsOptimization(beta, n=200, alpha=0.05):
    """Generate data according to a linear regression with coefficient vector beta.
    Compute the analytic chi-squared test 'is the coefficient vector zero?'. Record the p-value.
    Compute the confidence intervals on beta using the GLRT-based method. Record whether ALL intervals contains zero.
    Return TRUE if both methods agree, and FALSE otherwise."""  
    # Get data
    X, y = linearRegression_normal(beta=np.array(beta), cov=np.eye(len(beta)), sigma=1, n=n)

    # Compute the p-value from the chi squared test
    TL = TorchLinear(lr=0.3,max_iter=30)
    TL.fit(X,y)
    pVal_TL = analyticLinearTest_GLRT(TL, X, y)
    
    # Next, get the optimization-based intervals.
    # GLRT method with Torch model (coefs)
    lcb_GLRT, ucb_GLRT, lcb_Results, ucb_Results, ucb_Torch = glrtTorchCis(
        lambda:TorchLinear(lr=0.3,max_iter=100,fit_intercept=False), X=X, y=y, citype='coefs', alpha=alpha,
        search_kwargs={'lmbds':np.logspace(-10,10,101)},fit_kwargs={'lr':0.3,'max_iter':30})
    
    # To check whether they agree with the analytic version:
    # The p-value should be < 0.05 iff any of the level-0.05 glrt-based confidence intervals
    # on the coefficient beta don't contain zero.
    isNotZero_analytic = pVal_TL < alpha    # The chi-square test is one-sided
    isNotZero_optim = any(lcb_GLRT > 0) or any(ucb_GLRT < 0)
    
    print("p value", pVal_TL, "and confidence interval(s)", lcb_GLRT, "to", ucb_GLRT)
    
    return (isNotZero_analytic == isNotZero_optim, pVal_TL, lcb_GLRT, ucb_GLRT)
    

## Single nonzero coefficient

In [None]:
%%time
beta = [0.1]

success = []
pVals = []
lcbs = []
ucbs = []

#for i in range(1000):
for i in range(100):
    s, p, l, u = compareAnalyticVsOptimization(beta=beta)
    success.append(s)
    pVals.append(p)
    lcbs.append(l)
    ucbs.append(u)

p value 0.1729001695775163 and confidence interval(s) [-0.03946152] to [0.25403476]
p value 0.00405907504087788 and confidence interval(s) [0.05896423] to [0.28133342]
p value 0.7962059171500295 and confidence interval(s) [-0.09323138] to [0.1295778]
p value 0.15250508656146367 and confidence interval(s) [0.02002812] to [0.19162913]
p value 0.0192294186608426 and confidence interval(s) [0.05424307] to [0.29597059]
p value 0.059135619309627185 and confidence interval(s) [0.00398964] to [0.26626384]
p value 0.40497594396583125 and confidence interval(s) [-0.0563984] to [0.1608731]
p value 0.06685948354939908 and confidence interval(s) [0.01551689] to [0.23349136]
p value 0.00013033707512077886 and confidence interval(s) [0.15429305] to [0.30178562]


In [None]:
minY = min(lcbs)
maxY = max(ucbs)
plt.ylim(minY, maxY)

for (p, l, u, s) in zip(pVals, lcbs, ucbs, success):
    # ymin and ymax are the 'fraction of the way up from the bottom of the plot'
    plt.axvline(x=p, ymin=(l-minY)/(maxY - minY), ymax=(u-minY)/(maxY-minY),
                color=('darkgreen' if s else 'darkred'), alpha=(0.2 if s else 1))  # display mismatches in a darker line

# plt.scatter(np.asarray(pVals)[np.array(success)], np.asarray(lcbs)[np.array(success)], color='darkgreen')
# plt.scatter(np.asarray(pVals)[np.array(success)], np.asarray(ucbs)[np.array(success)], color='darkgreen')

# plt.scatter(np.asarray(pVals)[~np.array(success)], np.asarray(lcbs)[~np.array(success)], color='darkred', marker='x')
# plt.scatter(np.asarray(pVals)[~np.array(success)], np.asarray(ucbs)[~np.array(success)], color='darkred', marker='x')

plt.axhline(0, color='k', linestyle='--')
plt.axvline(0.05, color='gray', linestyle='--')

plt.xlabel("p-value")
plt.ylabel("Range of confidence interval")
plt.title("Succeeded in " + str(np.mean(success)*100) + "% of trials")

plt.savefig("d1Beta0.1.png")
plt.show()

## Single Zero Coefficient

In [None]:
%%time
beta = [0.0]

success = []
pVals = []
lcbs = []
ucbs = []

for i in range(1000):
#for i in range(100):
    s, p, l, u = compareAnalyticVsOptimization(beta=beta)
    success.append(s)
    pVals.append(p)
    lcbs.append(l)
    ucbs.append(u)

In [None]:
minY = min(lcbs)
maxY = max(ucbs)
plt.ylim(minY, maxY)

for (p, l, u, s) in zip(pVals, lcbs, ucbs, success):
    # ymin and ymax are the 'fraction of the way up from the bottom of the plot'
    plt.axvline(x=p, ymin=(l-minY)/(maxY - minY), ymax=(u-minY)/(maxY-minY),
                color=('darkgreen' if s else 'darkred'), alpha=(0.2 if s else 1))  # display mismatches in a darker line

# plt.scatter(np.asarray(pVals)[np.array(success)], np.asarray(lcbs)[np.array(success)], color='darkgreen')
# plt.scatter(np.asarray(pVals)[np.array(success)], np.asarray(ucbs)[np.array(success)], color='darkgreen')

# plt.scatter(np.asarray(pVals)[~np.array(success)], np.asarray(lcbs)[~np.array(success)], color='darkred', marker='x')
# plt.scatter(np.asarray(pVals)[~np.array(success)], np.asarray(ucbs)[~np.array(success)], color='darkred', marker='x')

plt.axhline(0, color='k', linestyle='--')
plt.axvline(0.05, color='gray', linestyle='--')

plt.xlabel("p-value")
plt.ylabel("Range of confidence interval")
plt.title("Succeeded in " + str(np.mean(success)*100) + "% of trials")

plt.savefig("d1Beta0.png")
plt.show()

## Multi-dimensional Beta

In [None]:
%%time
beta = [0.0, 0.0]

success = []
pVals = []
lcbs = []
ucbs = []

for i in range(1000):
#for i in range(100):
    s, p, l, u = compareAnalyticVsOptimization(beta=beta)
    success.append(s)
    pVals.append(p)
    lcbs.append(l)
    ucbs.append(u)

In [None]:
d = len(beta)
fig, ax = plt.subplots(d, figsize=(8, 4*d))
for i in range(d):
    ilcbs = [l[i] for l in lcbs]
    iucbs = [u[i] for u in ucbs]
    minY = min(ilcbs)
    maxY = max(iucbs)
    ax[i].set_ylim(minY, maxY)
    
    for (p, l, u, s) in zip(pVals, ilcbs, iucbs, success):
        # ymin and ymax are the 'fraction of the way up from the bottom of the plot'
        ax[i].axvline(x=p, ymin=(l-minY)/(maxY - minY), ymax=(u-minY)/(maxY-minY),
                    color=('darkgreen' if s else 'darkred'), alpha=(0.2 if s else 1))  # display mismatches in a darker line

    ax[i].axhline(0, color='k', linestyle='--')
    ax[i].axvline(0.05, color='gray', linestyle='--')
    
ax[0].set_title("Succeeded in " + str(np.mean(success)*100) + "% of trials")    
ax[d-1].set_xlabel("p-value")
ax[0].set_ylabel("Range of confidence interval")
plt.tight_layout()
plt.savefig("d2Beta0.png")
plt.show()

## Higher dimensional beta - check for multiple testing problems (I don't think we will have them... but good to confirm)

In [None]:
%%time
beta = [0.0, 0.0, 0.0, 0.0, 0.0]

success = []
pVals = []
lcbs = []
ucbs = []

for i in range(1000):
#for i in range(100):
    s, p, l, u = compareAnalyticVsOptimization(beta=beta)
    success.append(s)
    pVals.append(p)
    lcbs.append(l)
    ucbs.append(u)

In [None]:
d = len(beta)
fig, ax = plt.subplots(d, figsize=(8, 4*d))
for i in range(d):
    ilcbs = [l[i] for l in lcbs]
    iucbs = [u[i] for u in ucbs]
    minY = min(ilcbs)
    maxY = max(iucbs)
    ax[i].set_ylim(minY, maxY)
    
    for (p, l, u, s) in zip(pVals, ilcbs, iucbs, success):
        # ymin and ymax are the 'fraction of the way up from the bottom of the plot'
        ax[i].axvline(x=p, ymin=(l-minY)/(maxY - minY), ymax=(u-minY)/(maxY-minY),
                    color=('darkgreen' if s else 'darkred'), alpha=(0.2 if s else 1))  # display mismatches in a darker line

    ax[i].axhline(0, color='k', linestyle='--')
    ax[i].axvline(0.05, color='gray', linestyle='--')
    
ax[0].set_title("Succeeded in " + str(np.mean(success)*100) + "% of trials")    
ax[d-1].set_xlabel("p-value")
ax[0].set_ylabel("Range of confidence interval")
plt.tight_layout()
plt.savefig("d5Beta0.png")
plt.show()