In [1]:
import numpy as np
from scipy import stats
import unittest
import sys
%run helper_fns.ipynb
%run probability_fns.ipynb
%run likelihood_fns.ipynb

...........
----------------------------------------------------------------------
Ran 11 tests in 0.009s

OK
...........
----------------------------------------------------------------------
Ran 11 tests in 0.010s

OK
......

end is lower than start; can happen and will return 0 which will cause neg infs later: [-inf, 0.0, -1.0, 1.0, inf]



----------------------------------------------------------------------
Ran 6 tests in 0.012s

OK


## Likelihood functions and checks
This notebook is to be used as a submodule that contains wrappers for all the likelihood functions used by the ordinal probit model for survey data, and Metropolis-Hastings sampler. There is also an optional testing suite.

In [3]:
def mu_accept(mu, mu_star, guessVector, sigma, thetas, mu_0, sigma_0, sigma_prop,
             printing=False, debug=False):
    """Returns the acceptance probability of a proposal for mu.
    
    Inputs
    -------------------
    mu: current latent mean; scalar value in Reals;
    mu_star: proposal for new latent mean; scalar value in Reals;
    guessVector: guesses / y's. Each should be an integer from 1:k; could be list or vector of ints;
    sigma: latent sd; scalar value in (0, inf);
    thetas: values of latent thresholds defining probit probabilities; scalars in [1.5, k-0.5];
            endpoints are -inf, inf. Length is k+2, where k is the number of possible ordinal choices.
    mu_0: prior mean; scalar value in Reals;
    sigma_0: prior standard devation; scalar value in (0, inf);
    printing: Bool; whether to print messages;
    debug: Bool; whether to run internal error checks.
    
    Outputs
    -------------------
    AP: acceptance probability (ratio); scalar in (0, inf).
    """
    
    A1 = joint_log_probability_mu(guessVector, mu_star, sigma, thetas, mu_0, sigma_0, 
                        printing, debug)
    A2 = joint_log_probability_mu(guessVector, mu, sigma, thetas, mu_0, sigma_0, 
                        printing, debug)
    
    A3 = mu_log_jump_probs(mu, mu_star, sigma_prop, printing, debug)
    
    A = A1 - A2 + A3
    if debug:
        if A <= np.log(sys.float_info.max):
            print("A will cause overflow:\
        {} (A1) {} (A2) {} (A3) {}".format(A, A1, A2, A3))

    if A > 0: # if more probably (in log space), always jump
        return 1
    else:
        return np.exp(A)

def sigma_accept(sigma, sigma_star, guessVector, mu, thetas, gamma_mean, gamma_spread,
             printing=False, debug=False):
    """Returns the acceptance probability of a proposal for sigma.
    
    Inputs
    -------------------
    sigma: current latent mean; scalar value in (0, inf);
    sigma_star: proposal for new latent mean; scalar value in (0, inf); 
    guessVector: guesses / y's. Each should be an integer from 1:k; could be list or vector of ints;
    mu: latent mean; scalar value in Reals;
    thetas: values of latent thresholds defining probit probabilities; scalars in [1.5, k-0.5];
            endpoints are -inf, inf. Length is k+2, where k is the number of possible ordinal choices.
    gamma_mean: prior mean; scalar value in Reals;
    gamma_spread: prior standard devation; scalar value in (0, inf);
    printing: Bool; whether to print messages;
    debug: Bool; whether to run internal error checks.
    
    Outputs
    -------------------
    AP: acceptance probability (ratio); scalar in (0, inf).
    """
    A1 = joint_log_probability_sigma(guessVector, mu, sigma_star, thetas, gamma_mean, gamma_spread,
                        printing, debug)
    A2 = joint_log_probability_sigma(guessVector, mu, sigma, thetas, gamma_mean, gamma_spread,
                        printing, debug)
    
    A3 = sigma_log_jump_probs(sigma, sigma_star, gamma_spread, printing, debug)
    
    A = A1 - A2 + A3
    
    if A > 0: # if more probably (in log space), always jump
        return 1
    else:
        return np.exp(A)
def theta_accept(theta, theta_star, thetas, guessMatrix, mus, sigmas, center, shift, sigma_0, sigma_prop, 
                 lower_0, upper_0, printing=False, debug=False):
    """Returns the acceptance probability of a proposal for theta. A bit inefficient. Note, have to monitor
    here for -infs, as sometimes the proposals reverse order sloppily. 
    
    Inputs
    -------------------
    theta: a current threshold; scalar value in (0, inf);
    theta_star: proposal for new latent mean; scalar value in (0, inf); 
    guessVector: guesses / y's. Each should be an integer from 1:k; could be list or vector of ints;
    mu: latent mean; scalar value in Reals;
    thetas: values of latent thresholds defining probit probabilities; scalars in [1.5, k-0.5];
            endpoints are -inf, inf. Length is k+2, where k is the number of possible ordinal choices.
    mu_0: prior mean; scalar value in Reals;
    sigma_0: prior standard devation; scalar value in (0, inf);
    printing: Bool; whether to print messages;
    debug: Bool; whether to run internal error checks.

    Outputs
    -------------------
    AP: acceptance probability (ratio); scalar in (0, inf).
    """
    
    thetasTemp = thetas.copy()
    thetasTemp[center] = theta_star
    
    A1 = joint_log_probability_thetas(guessMatrix, mus, sigmas, thetasTemp, shift, sigma_0,
                                     printing, debug)
    
    A2 = joint_log_probability_thetas(guessMatrix, mus, sigmas, thetas, shift, sigma_0,
                                     printing, debug)
    
    A3 = theta_log_jump_probs(theta, theta_star, sigma_prop, lower_0, upper_0, printing, debug)

    A = A1 - A2 + A3
    
    if debug:
        if np.isneginf(A):
            print('A is -inf: A {}; A1 {} A2 {} A3 {}'.format(A, A1, A2, A3))
    if printing: 
        print("A1 {} A2 {} A3 {}".format(A1, A2, A3))
    if np.isneginf(A):
        return 0.0 # do not move if that would swap a threshold
    else:
        if A > 0: # if more probably (in log space), always jump
            return 1
        else:
            return np.exp(A)

        
class LikelihoodFunctionsTestSuite(unittest.TestCase):
    def gauss(self, val, mu=0, s=1):
            return np.exp(- (1/2)*
                          ((val-mu)/s)**2) / np.sqrt(2*np.pi*(s**2))
    
    def gauss_trunc(self, x, mu, sigma, lower, upper):
        zeta = (x-mu)/sigma
        alpha = (lower-mu)/sigma
        beta = (upper-mu)/sigma
        Z = stats.norm.cdf(beta) - stats.norm.cdf(alpha)
        return (self.gauss(zeta)) / (sigma*Z)
    
    def gamma(self, x, gamma_mean, gamma_spread, printing=False):
        """Return probability under gamma distribution."""
        alpha = (gamma_mean**2) / (gamma_spread**2) # shape param, algebra from wikipedia
        beta = gamma_mean / (gamma_spread**2) # algebra from wikipedia
        
        term_1 = (beta**alpha)
        term_2 = 1/gamma_fn(alpha)
 
        term_3 = (x**(alpha-1)) 
        term_4 = np.exp(-beta*x)
        if printing:
            print('exterior alpha, beta', alpha, beta)
        #print_fn(["term_1", term_1, "term_2", term_2, "term_3", term_3, "term_4", term_4])
        return term_1*term_2*term_3*term_4
    
    def test_mu_accept(self):
        #             mu. mu_star  y.    sigma         thetas             mu_0 s_0  s_j
        testValues = [0.0, 1.0, [2, 1], 2.0, [-np.inf, 1.5, 2.5, np.inf], 1.0, 1.0, 2.0]
        mu, mu_star, guessVector, sigma, thetas, mu_0, s_0, s_j = testValues.copy()
        
        returned_A = mu_accept(*testValues, printing=False, debug=True)
        
        real_A1 = np.sum(np.log([max(0, stats.norm.cdf(thetas[guessVector[0]+1], mu_star, sigma) - stats.norm.cdf(thetas[guessVector[0]], mu_star, sigma)), 
                                max(0, stats.norm.cdf(thetas[guessVector[1]+1], mu_star, sigma) - stats.norm.cdf(thetas[guessVector[1]], mu_star, sigma)),
                                self.gauss(mu_star, mu_0, s_0)]))
        real_A2 = np.sum(np.log([max(0, stats.norm.cdf(thetas[guessVector[0]+1], mu, sigma) - stats.norm.cdf(thetas[guessVector[0]], mu, sigma)), 
                                max(0, stats.norm.cdf(thetas[guessVector[1]+1], mu, sigma) - stats.norm.cdf(thetas[guessVector[1]], mu, sigma)),
                                self.gauss(mu, mu_0, s_0)]))
        
        real_A3 = 0
        comb = np.min([1, np.exp(real_A1 - real_A2 + real_A3)])
        self.assertAlmostEqual(returned_A, comb)
        
        
    def test_sigma_accept(self):
        #             s.  s_star  y.    mu         thetas                gamma_mean gamma_spread
        testValues = [1.0, 2.0, [2, 1], 2.0, [-np.inf, 1.5, 2.5, np.inf], 2.0, 3.0]
        sigma, sigma_star, guessVector, mu, thetas, gamma_mean, gamma_spread = testValues.copy()
        returned_A = sigma_accept(*testValues, printing=False, debug=True)
        real_A1 = np.sum(np.log([max(0, stats.norm.cdf(thetas[guessVector[0]+1], mu, sigma_star) - stats.norm.cdf(thetas[guessVector[0]], mu, sigma_star)), 
                                max(0, stats.norm.cdf(thetas[guessVector[1]+1], mu, sigma_star) - stats.norm.cdf(thetas[guessVector[1]], mu, sigma_star)),
                                self.gamma(sigma_star, gamma_mean, gamma_spread)]))
        real_A2 = np.sum(np.log([max(0, stats.norm.cdf(thetas[guessVector[0]+1], mu, sigma) - stats.norm.cdf(thetas[guessVector[0]], mu, sigma)), 
                                max(0, stats.norm.cdf(thetas[guessVector[1]+1], mu, sigma) - stats.norm.cdf(thetas[guessVector[1]], mu, sigma)),
                                self.gamma(sigma, gamma_mean, gamma_spread)]))
        # this still needs changing
        real_A3 = np.log(self.gamma(sigma, sigma_star, gamma_spread)) - np.log(self.gamma(sigma_star, sigma, gamma_spread)) # because asymmetric ---> Metropolis-H 
        comb = np.min([1, np.exp(real_A1 - real_A2 + real_A3)])
        self.assertAlmostEqual(returned_A, comb)
        
    def test_theta_accept(self):
        # in this case proposal is in order
        testValues1 = [2.5, 2.6, [-np.inf, 1.5, 2.5, 3.5, np.inf], [[2, 1], [2, 3]], [1.0, 2.0], [1.0, 1.0], 2, 0.5, 1.0, 1.0, 0, 3]
        theta, theta_star, thetas, guessMatrix, mus, sigmas, center, shift, sigma_0, sigma_prop, lower_0, upper_0 = testValues1.copy()
        returned_A = theta_accept(*testValues1, printing=False, debug=True)
        thetasStar = thetas.copy()
        thetasStar[center] = theta_star
        
        real_LP_1 = np.sum(np.log([max(0, 
                                       stats.norm.cdf(thetasStar[guessMatrix[0][0]], mus[0], sigmas[0]) - stats.norm.cdf(thetasStar[guessMatrix[0][0]-1], mus[0], sigmas[0])), 
                                max(0, 
                                    stats.norm.cdf(thetasStar[guessMatrix[0][1]], mus[0], sigmas[0]) - stats.norm.cdf(thetasStar[guessMatrix[0][1]-1], mus[0], sigmas[0])),
                                   max(0, 
                                       stats.norm.cdf(thetasStar[guessMatrix[1][0]], mus[1], sigmas[1]) - stats.norm.cdf(thetasStar[guessMatrix[1][0]-1], mus[1], sigmas[1])), 
                                max(0, 
                                    stats.norm.cdf(thetasStar[guessMatrix[1][1]], mus[1], sigmas[1]) - stats.norm.cdf(thetasStar[guessMatrix[1][1]-1], mus[1], sigmas[1]))]))
        
        
        tempProbsStar = [self.gauss(x, i+1.5, sigma_0) for (i, x) in enumerate(thetasStar[1:-1])]
        real_LP_2 = np.sum(np.log(tempProbsStar))

        
        real_LP_3 = np.sum(np.log([max(0, 
                                       stats.norm.cdf(thetas[guessMatrix[0][0]], mus[0], sigmas[0]) - stats.norm.cdf(thetas[guessMatrix[0][0]-1], mus[0], sigmas[0])), 
                                max(0, 
                                    stats.norm.cdf(thetas[guessMatrix[0][1]], mus[0], sigmas[0]) - stats.norm.cdf(thetas[guessMatrix[0][1]-1], mus[0], sigmas[0])),
                                   max(0, 
                                       stats.norm.cdf(thetas[guessMatrix[1][0]], mus[1], sigmas[1]) - stats.norm.cdf(thetas[guessMatrix[1][0]-1], mus[1], sigmas[1])), 
                                max(0, 
                                    stats.norm.cdf(thetas[guessMatrix[1][1]], mus[1], sigmas[1]) - stats.norm.cdf(thetas[guessMatrix[1][1]-1], mus[1], sigmas[1]))]))
         
        tempProbsStar = [self.gauss(x, i+1.5, sigma_0) for (i, x) in enumerate(thetasStar[1:-1])]

        real_LP_4 = np.sum(np.log(tempProbsStar))
        
        
        real_A1 = real_LP_1 + real_LP_2
        real_A2 = real_LP_3 + real_LP_4
        real_A3 = np.log(self.gauss_trunc(theta, theta_star, sigma_prop, lower_0, upper_0)) - np.log(self.gauss_trunc(theta_star, theta, sigma_prop, lower_0, upper_0))
        comb = np.min([1, np.exp(real_A1 - real_A2 + real_A3)])
        self.assertAlmostEqual(returned_A, comb)
        
        # in this case theta proposal is out of order
        testValues2 = [2.5, 1.3, [-np.inf, 1.5, 2.5, 3.5, np.inf], [[2, 1], [2, 3]], [1.0, 2.0], [1.0, 1.0], 2, 0.5, 1.0, 1.0, 0, 3]
        theta, theta_star, thetas, guessMatrix, mus, sigmas, center, shift, sigma_0, sigma_prop, lower_0, upper_0 = testValues2.copy()
        returned_A2 = theta_accept(*testValues2, printing=False, debug=True)
        thetasStar2 = thetas.copy()
        thetasStar2[center] = theta_star
        
        real_LP_1 = np.sum(np.log([max(0, 
                                       stats.norm.cdf(thetasStar2[guessMatrix[0][0]], mus[0], sigmas[0]) - stats.norm.cdf(thetasStar2[guessMatrix[0][0]-1], mus[0], sigmas[0])), 
                                max(0, 
                                    stats.norm.cdf(thetasStar2[guessMatrix[0][1]], mus[0], sigmas[0]) - stats.norm.cdf(thetasStar2[guessMatrix[0][1]-1], mus[0], sigmas[0])),
                                   max(0, 
                                       stats.norm.cdf(thetasStar2[guessMatrix[1][0]], mus[1], sigmas[1]) - stats.norm.cdf(thetasStar2[guessMatrix[1][0]-1], mus[1], sigmas[1])), 
                                max(0, 
                                    stats.norm.cdf(thetasStar2[guessMatrix[1][1]], mus[1], sigmas[1]) - stats.norm.cdf(thetasStar2[guessMatrix[1][1]-1], mus[1], sigmas[1]))]))
        
        
        tempProbsStar = [self.gauss(x, i+1.5, sigma_0) for (i, x) in enumerate(thetasStar2[1:-1])]
        real_LP_2 = np.sum(np.log(tempProbsStar))

        
        real_LP_3 = np.sum(np.log([max(0, 
                                       stats.norm.cdf(thetas[guessMatrix[0][0]], mus[0], sigmas[0]) - stats.norm.cdf(thetas[guessMatrix[0][0]-1], mus[0], sigmas[0])), 
                                max(0, 
                                    stats.norm.cdf(thetas[guessMatrix[0][1]], mus[0], sigmas[0]) - stats.norm.cdf(thetas[guessMatrix[0][1]-1], mus[0], sigmas[0])),
                                   max(0, 
                                       stats.norm.cdf(thetas[guessMatrix[1][0]], mus[1], sigmas[1]) - stats.norm.cdf(thetas[guessMatrix[1][0]-1], mus[1], sigmas[1])), 
                                max(0, 
                                    stats.norm.cdf(thetas[guessMatrix[1][1]], mus[1], sigmas[1]) - stats.norm.cdf(thetas[guessMatrix[1][1]-1], mus[1], sigmas[1]))]))
         
        tempProbs = [self.gauss(x, i+1.5, sigma_0) for (i, x) in enumerate(thetas[1:-1])]

        real_LP_4 = np.sum(np.log(tempProbs))
        
        real_A1 = real_LP_1 + real_LP_2
        real_A2 = real_LP_3 + real_LP_4
        real_A3 = np.log(self.gauss_trunc(theta, theta_star, sigma_prop, lower_0, upper_0)) - np.log(self.gauss_trunc(theta_star, theta, sigma_prop, lower_0, upper_0))
        comb2 = np.min([1, np.exp(real_A1 - real_A2 + real_A3)])
        self.assertAlmostEqual(returned_A2, comb2)
    
runner = unittest.TextTestRunner(failfast=True)
runner.run(initialize_suite(LikelihoodFunctionsTestSuite))  

E
ERROR: test_mu_accept (__main__.LikelihoodFunctionsTestSuite)
----------------------------------------------------------------------
Traceback (most recent call last):
  File "/var/folders/z3/65sxpv5n3csdhfzm0ysx30940000gp/T/ipykernel_73506/2875689163.py", line 150, in test_mu_accept
    returned_A = mu_accept(*testValues, printing=False, debug=True)
  File "/var/folders/z3/65sxpv5n3csdhfzm0ysx30940000gp/T/ipykernel_73506/2875689163.py", line 32, in mu_accept
    if A <= np.log(sys.float_info.max):
NameError: name 'sys' is not defined

----------------------------------------------------------------------
Ran 1 test in 0.002s

FAILED (errors=1)


<unittest.runner.TextTestResult run=1 errors=1 failures=0>