In [2]:
import numpy as np
from scipy import stats
import unittest
import sys
%run helper_fns.ipynb
%run probability_fns.ipynb

...........
----------------------------------------------------------------------
Ran 11 tests in 0.015s

OK


## Likelihood functions and checks
This notebook is to be used as a submodule that contains wrappers for all the likelihood functions used by the ordinal probit model for survey data, and Metropolis-Hastings sampler. There is also an optional testing suite.

In [8]:
def LOP(y, mu, sigma, thetas, printing=False, debug=False):
    """Returns likelihood given by ordinal probit model for a particular guess.
    Inputs
    -------------------
    y: single guess. Should be an integer from 1:k; could be list or vector of ints;
    mu: latent mean; scalar value in Reals;
    sigma: latent sd; scalar value in (0, inf);
    thetas: values of latent thresholds defining probit probabilities; scalars in [1.5, k-0.5];
            endpoints are -inf, inf. Length is k+2, where k is the number of possible ordinal choices.
    printing: Bool; whether to print messages;
    debug: Bool; whether to run internal error checks.
    
    Outputs
    -------------------
    LOP: likelihood; scalar in (0, 1).
    """
    start = stats.norm.cdf(thetas[y-1], mu, sigma)
    end = stats.norm.cdf(thetas[y], mu, sigma)
    LOP = max(0, end-start)
    if debug:
        assert isinstance(y, int), "y is not int: {}".format(y)
        assert isinstance(mu, float), "mu is not scalar: {}".format(mu)
        assert isinstance(sigma, float), "sigma is not scalar: {}".format(sigma)
        assert y in np.arange(1, len(thetas)-1), "y is out of range: {}; thetas: {}".format(y, thetas)
        assert 0.0 <= start <= 1.0, "start not in right range: {} vs [0, 1]".format(start)
        assert 0.0 <= end <= 1.0, "end not in right range: {} vs [0, 1]".format(end)
        if end-start < 0:
            print("end is lower than start; can happen and will return 0 which will cause neg infs later: {}".format(thetas))
        
    return LOP
    
def LLQ(guessVector, mu, sigma, thetas, printing=False, debug=False):
    """Returns log likelihood for a single question / survey item under ordinal probit model. 
    
    Inputs
    -------------------
    guessVector: counts of guesses / y's for question. Each should be an integer / count; could be list or vector of ints;
    mu: latent mean; scalar value in Reals;
    sigma: latent sd; scalar value in (0, inf);
    thetas: values of latent thresholds defining probit probabilities; scalars in [1.5, k-0.5]
    printing: Bool; whether to print messages;
    debug: Bool; whether to run internal error checks.
    
    Outputs
    -------------------
    LLQ: log likelihood; scalar in (-inf, 0)."""
    LLQ = 0
    for ans, counts in enumerate(guessVector):
        # guesses are 1-indexed
        prob = LOP(ans+1, mu, sigma, thetas)
        LLQ += counts * np.log(prob) # this means fewer calls to lookup table
    
   
    return LLQ
        
def LL(guessMatrix, mus, sigmas, thetas, printing=False, debug=False):
    """Returns log likelihood for all questions / survey items under ordinal probit model. 
    
    Inputs
    -------------------
    guessMatrix: counts of guesses / y's. Each should be an integer / count; could be list of lists or matrix of ints;
    mus: latent means for all question; scalar vector in Reals;
    sigmas: latent sds for all questions; scalar vector in (0, inf);
    thetas: values of latent thresholds defining probit probabilities; scalars in [1.5, k-0.5], assuming 
        k invariant between questions
    printing: Bool; whether to print messages;
    debug: Bool; whether to run internal error checks.
    
    Outputs
    -------------------
    LL: log likelihood; scalar in (-inf, 0)."""
    LL = 0
    for c, guessVector in enumerate(guessMatrix):
        LL += LLQ(guessVector, mus[c], sigmas[c], thetas, printing, debug)
    
    if debug:
        assert isinstance(LL, float), "LL not scalar: {}".format(LL)
        assert -np.inf <= LL <= 0, "LL out of range: {}".format(LL)
        
    return LL


        
def joint_log_probability_mu(guessVector, mu, sigma, thetas, mu_0, sigma_0, 
                        printing=False, debug=False):
    
    """Returns log joint probability for a single mu (question) under ordinal probit model. 
    
    Inputs
    -------------------
    guessVector: guesses / y's. Each should be an integer from 1:k; could be list or vector of ints;
    mu: latent mean; scalar value in Reals;
    sigma: latent sd; scalar value in (0, inf);
    thetas: values of latent thresholds defining probit probabilities; scalars in [1.5, k-0.5]
    mu_0: prior mean; scalar value in Reals;
    sigma_0: prior standard devation; scalar value in (0, inf);
    printing: Bool; whether to print messages;
    debug: Bool; whether to run internal error checks.
    
    Outputs
    -------------------
    LJ: : log joint likelihood; scalar in (-inf, 0).
   
    """
    LL = LLQ(guessVector, mu, sigma, thetas, printing, debug)
    LP = np.log(mu_prior(mu, mu_0, sigma_0, printing, debug))
    LJ = LL + LP
    
    if debug:
        assert isinstance(LL, float), "LL not scalar: {}".format(LL)
        assert isinstance(LP, float), "LP not scalar: {}".format(LP)
        assert -np.inf <= LL <= 0, "LL out of range: {}".format(LL)
        assert -np.inf <= LP <= 0, "LL out of range: {}".format(LP)
        if LL + LP == -np.inf:
            print("np.inf returned; should only really happen with thetas out of order:{}".format(thetas))
            print("LL: {}".format(LL))
            print("LP: {}".format(LP))
            
    return LJ

def joint_log_probability_sigma(guessVector, mu, sigma, thetas, gamma_mean, gamma_spread, 
                        printing=False, debug=False):
    
    """Returns log joint probability for a single sigma (question) under ordinal probit model. 
    
    Inputs
    -------------------
    guessVector: guesses / y's. Each should be an integer from 1:k; could be list or vector of ints;
    mu: latent mean; scalar value in Reals;
    sigma: latent sd; scalar value in (0, inf);
    thetas: values of latent thresholds defining probit probabilities; scalars in [1.5, k-0.5]
    gamma_mean: prior mean; scalar value in Reals;
    gamma_spread: prior standard devation; scalar value in (0, inf);
    printing: Bool; whether to print messages;
    debug: Bool; whether to run internal error checks.
    
    Outputs
    -------------------
    LJ: : log joint likelihood; scalar in (-inf, 0).
    
    """
    LL = LLQ(guessVector, mu, sigma, thetas, printing, debug)
    LP = np.log(sigma_prior(sigma, gamma_mean, gamma_spread, printing, debug))
    LJ = LL + LP
    
    if debug:
        assert isinstance(LL, float), "LL not scalar: {}".format(LL)
        assert isinstance(LP, float), "LP not scalar: {}".format(LP)
        assert -np.inf <= LL <= 0, "LL out of range: {}".format(LL)
        assert -np.inf <= LP <= 0, "LL out of range: {}".format(LP)
        if LL + LP == -np.inf:
            print("np.inf returned; should only really happen with thetas out of order:{}".format(thetas))
            print("LL: {}".format(LL))
            print("LP: {}".format(LP))
    return LJ

def joint_log_probability_thetas(guessMatrix, mus, sigmas, thetas, shift, sigma_0, 
                        printing=False, debug=False):
    
    """Returns log joint probability for all thetas (all questions) under ordinal probit model. 
    
    Inputs
    -------------------
    guessMatrix: guesses / y's. Each should be an integer from 1:k; could be list of lists or matrix of ints;
    mus: latent means for all question; scalar vector in Reals;
    sigmas: latent sds for all questions; scalar vector in (0, inf);
    thetas: values of latent thresholds defining probit probabilities; scalars in [1.5, k-0.5], assuming 
        k invariant between questions
    shift: to add to prior means to help with intervals; pretty much always 0.5; scalar value in Reals;
    sigma_0: prior standard devation; scalar value in (0, inf);
    printing: Bool; whether to print messages;
    debug: Bool; whether to run internal error checks.
    
    Outputs
    -------------------
    LJ: : log joint likelihood; scalar in (-inf, 0).
   
    """
    L = LL(guessMatrix, mus, sigmas, thetas, printing, debug)
    LP = thetas_log_prior(thetas, shift, sigma_0, printing, debug)
    LJ = L + LP
    
    if debug:
        assert isinstance(L, float), "LL not scalar: {}".format(L)
        assert isinstance(LP, float), "LP not scalar: {}".format(LP)
        assert -np.inf <= L <= 0, "LL out of range: {}".format(L)
        assert -np.inf <= LP <= 0, "LL out of range: {}".format(LP)
        
        if L + LP == -np.inf:
            print("np.inf returned; should only really happen with thetas out of order:{}".format(thetas))
            print("LL: {}".format(L))
            print("LP: {}".format(LP))
            
    if printing:
        print_fn(["thetas", thetas, "shift", shift, "sigma_0", sigma_0, "LP", LP])
    return LJ

class LikelihoodFunctionsTestSuite(unittest.TestCase):
    def gauss(self, val, mu=0, s=1):
            return np.exp(- (1/2)*
                          ((val-mu)/s)**2) / np.sqrt(2*np.pi*(s**2))
    
    def gauss_trunc(self, x, mu, sigma, lower, upper):
        zeta = (x-mu)/sigma
        alpha = (lower-mu)/sigma
        beta = (upper-mu)/sigma
        Z = stats.norm.cdf(beta) - stats.norm.cdf(alpha)
        return (self.gauss(zeta)) / (sigma*Z)
    
    def gamma(self, x, gamma_mean, gamma_spread, printing=False):
        """Return probability under gamma distribution."""
        alpha = (gamma_mean**2) / (gamma_spread**2) # shape param, algebra from wikipedia
        beta = gamma_mean / (gamma_spread**2) # algebra from wikipedia
        
        term_1 = (beta**alpha)
        term_2 = 1/gamma_fn(alpha)
 
        term_3 = (x**(alpha-1)) 
        term_4 = np.exp(-beta*x)
        if printing:
            print('exterior alpha, beta', alpha, beta)
        #print_fn(["term_1", term_1, "term_2", term_2, "term_3", term_3, "term_4", term_4])
        return term_1*term_2*term_3*term_4
    
    def test_LOP(self):
        testValues1 = [1, 1.0, 1.0, [-np.inf, -1.0, 0.0, 1.0, np.inf]]
        returned_p = LOP(*testValues1, printing=False, debug=True)
        real_p = max(0, stats.norm.cdf(-1.0, 1, 1) - stats.norm.cdf(-np.inf, 1, 1))
        self.assertAlmostEqual(returned_p, real_p)
        
        testValues2 = [2, 1.0, 1.0, [-np.inf, 0.0, -1.0, 1.0, np.inf]]
        returned_p = LOP(*testValues2, printing=False, debug=True)
        real_p = 0
        self.assertAlmostEqual(returned_p, real_p)
        
    def test_LLQ(self):
        testValues = [[1, 1], 0.0, 2.0, [-np.inf, 1.5, 2.5, np.inf]]
        returned_LP = LLQ(*testValues, printing=False, debug=True)
        real_LP = np.sum(np.log([max(0, stats.norm.cdf(2.5, 0.0, 2.0) - stats.norm.cdf(1.5, 0.0, 2.0)), 
                                max(0, stats.norm.cdf(1.5, 0.0, 2.0) - stats.norm.cdf(-np.inf, 0.0, 2.0))]))
        
        self.assertAlmostEqual(returned_LP, real_LP)
        
    def test_LL(self):
        testValues = [[[1, 1], [1, 1]], [0.0, 1.0], [2.0, 1.0], [-np.inf, 1.5, 2.5, np.inf]]
        returned_LP = LL(*testValues, printing=False, debug=True)
        real_LP = np.sum(np.log([max(0, stats.norm.cdf(2.5, 0.0, 2.0) - stats.norm.cdf(1.5, 0.0, 2.0)), 
                                max(0, stats.norm.cdf(1.5, 0.0, 2.0) - stats.norm.cdf(-np.inf, 0.0, 2.0)),
                               max(0, stats.norm.cdf(1.5, 1.0, 1.0) - stats.norm.cdf(-np.inf, 1.0, 1.0)),
                               max(0, stats.norm.cdf(2.5, 1.0, 1.0) - stats.norm.cdf(1.5, 1.0, 1.0))]))
        self.assertAlmostEqual(returned_LP, real_LP)
        
    def test_joint_log_probability_mu(self):
        testValues = [[1, 1], 0.0, 2.0, [-np.inf, 1.5, 2.5, np.inf], 1.0, 1.0] 
        returned_LP = joint_log_probability_mu(*testValues, 
                        printing=False, debug=False)
        real_LP = np.sum(np.log([max(0, stats.norm.cdf(2.5, 0.0, 2.0) - stats.norm.cdf(1.5, 0.0, 2.0)), 
                                max(0, stats.norm.cdf(1.5, 0.0, 2.0) - stats.norm.cdf(-np.inf, 0.0, 2.0)),
                                self.gauss(0.0, 1.0, 1.0)]))
        
        self.assertAlmostEqual(returned_LP, real_LP)
        
    def test_joint_log_probability_sigma(self):
      
        testValues = [[1, 1], 0.0, 2.0, [-np.inf, 1.5, 2.5, np.inf], 1.0, 2.0] 
        guessVector, mean, sigma, thetas, gamma_mean, gamma_spread = testValues.copy()
        returned_LP = joint_log_probability_sigma(*testValues, 
                        printing=False, debug=False)
        real_LP = np.sum(np.log([max(0, stats.norm.cdf(2.5, mean, sigma) - stats.norm.cdf(1.5, mean, sigma)), 
                                max(0, stats.norm.cdf(1.5, mean, sigma) - stats.norm.cdf(-np.inf, mean, sigma)),
                                self.gamma(sigma, gamma_mean, gamma_spread)])) # last argument is summe log prior?
        self.assertAlmostEqual(returned_LP, real_LP)
        
    def test_joint_log_probability_thetas(self):
        testValues = [[[1, 1], [1, 1]], [0.0, 1.0], [2.0, 1.0], [-np.inf, 1.5, 2.5, np.inf], 0.5, 1.0]
        returned_LP = joint_log_probability_thetas(*testValues, printing=False, debug=True)
        real_LP_1 = np.sum(np.log([max(0, stats.norm.cdf(2.5, 0.0, 2.0) - stats.norm.cdf(1.5, 0.0, 2.0)), 
                                max(0, stats.norm.cdf(1.5, 0.0, 2.0) - stats.norm.cdf(-np.inf, 0.0, 2.0)),
                               max(0, stats.norm.cdf(1.5, 1.0, 1.0) - stats.norm.cdf(-np.inf, 1.0, 1.0)),
                               max(0, stats.norm.cdf(2.5, 1.0, 1.0) - stats.norm.cdf(1.5, 1.0, 1.0))]))
        real_LP_2 = np.sum(np.log([self.gauss(x, i+1.5, 1) for (i, x) in enumerate([1.5, 2.5])]))
        self.assertAlmostEqual(returned_LP, real_LP_1 + real_LP_2)
runner = unittest.TextTestRunner(failfast=True)
runner.run(initialize_suite(LikelihoodFunctionsTestSuite))  

......

end is lower than start; can happen and will return 0 which will cause neg infs later: [-inf, 0.0, -1.0, 1.0, inf]



----------------------------------------------------------------------
Ran 6 tests in 0.014s

OK


<unittest.runner.TextTestResult run=6 errors=0 failures=0>