In [1]:
import numpy as np
from scipy import stats
import unittest
%run helper_fns.ipynb
%run probability_fns.ipynb
%run likelihood_fns.ipynb
%run sampling_fns.ipynb

...........
----------------------------------------------------------------------
Ran 11 tests in 0.010s

OK
...........
----------------------------------------------------------------------
Ran 11 tests in 0.011s

OK
......
----------------------------------------------------------------------
Ran 6 tests in 0.012s

OK
...........
----------------------------------------------------------------------
Ran 11 tests in 0.013s

OK
...........
----------------------------------------------------------------------
Ran 11 tests in 0.012s

OK
......
----------------------------------------------------------------------
Ran 6 tests in 0.013s

OK
  LLQ += np.log(LOP(y, mu, sigma, thetas))
  real_LP_1 = np.sum(np.log([max(0,
.

end is lower than start; can happen and will return 0 which will cause neg infs later: [-inf, 0.0, -1.0, 1.0, inf]
end is lower than start; can happen and will return 0 which will cause neg infs later: [-inf, 0.0, -1.0, 1.0, inf]
some thetas out of order: [-inf, 1.5, 1.3, 3.5, inf]; test: [False  True False False  True]
np.inf returned; should only really happen with thetas out of order:[-inf, 1.5, 1.3, 3.5, inf]
LL: -inf
LP: -3.4768155996140178
A is -inf: A -inf; A1 -inf A2 -6.92554331291155 A3 -0.2255550003176261



----------------------------------------------------------------------
Ran 3 tests in 0.020s

OK


## Likelihood functions and checks
This notebook is to be used as a submodule that contains wrappers for all the likelihood functions used by the ordinal probit model for survey data, and Metropolis-Hastings sampler. There is also an optional testing suite.

In [2]:
class OrdinalProbitSurveyData():
    """A class to fit ordinal probit models to survey data using MCMC. Meant to implement the model in 
    "Analyzing ordinal data with metric models: What could possibly go wrong?""
    Torrin M.Liddell and John K.Kruschke (2018)
    https://www.sciencedirect.com/science/article/abs/pii/S0022103117307746
    
    Initial implementation: Ruairidh McLennan Battleday
    
    Inputs
    ---------
    
    surveyResponses: matrix of ints or list or list of ints. Each row or list is a set of responses
                     to a particular question, where the answers form an ordinal list (ordered sequence)
                     and each entry is the counts for each answers.
                     All rows or questions must have the same meaning and permissible responses
                     (e.g., similarity, 1=highly similar, 9=not at all similar, etc).
                     
                     
    k: int; the number of applicable responses. This will be used to define a range of possible responses
                    from 0:k-1.
                    
                    
    num_samples: number of MCMC samples after discarding burn in samples;
    burn_in: number of initial MCMC samples to discard;
    slice_frequency: how frequently to retain samples after burn in samples;
    print_frequency: how frequently to print.
    
    printing: whether to call subfunctions and methods with printing on (prints all intermediate values)
    debug: whether to run assertion statements within subfunctions.
    
    Structures
    --------------
    paramDict: a dictionary of variables and arguments for each parameter, which are used to conduct MCMC. 
                Should contain the parameter name as key, which returns a sub dictionary.
                Each subdictionary has the following key:value pairs
                "value" key that stores its value
                "proposal_function" key, which contains the proposal function
                "proposal" key, which stores a list of proposal function arguments
                "acceptance_function" key, which stores the acceptance function
                "acceptance" key, which stores the acceptance function arguments.
                
    Methods
    -------------
    Cycle function, which updates a single parameter, and then updates dependencies.
                    
    Outputs
    -----------
    Each of the below will be embedded into a matrix of K x S, where K is the size of the paramter vector in question,
    and S is the number of retained MCMC samples.
    
    mus: vector of floats. These are the mean parameters for the latent Gaussians underlying each question;
    sigmas: vector of floats from positive reals. These are the sd parameters for the latent Gaussians underlying each question;
    thetas: vector of floats from positive reals; ascending sequence. These define the quantiles used to calculate
            the mean response for a question / the response thresholds determining the probability of any given answer.
    """
    
    def __init__(surveyResponses, k, num_samples, burn_in, slice_frequency, print_frequency,
                printing=False, debug=False):
        
        self.surveyResponses = surveyResponses
        self.Q = None
        self.k = k
        self.num_samples = num_samples
        self.burn_in = burn_in
        self.slice_frequency = slice_frequency
        self.print_frequency = print_frequency
        self.printing = printing
        self.debug = debug
        self.paramDict = {}
        self.paramNames = []
        
        self.sigma_prop = 1.0 # proposal function SD
        
    self.initialize
        
    def initialize(self):
        try:
            self.surveyResponses = np.int_(np.array(surveyResponses))
        except Exception as E:
            print(E)
            print("Survey responses are the wrong format: should be List of Lists of ints" + \
            "with each subList the same length; or, matrix of ints.")
        assert self.surveyResponses.shape[-1] == k, "mismatch between number of columns in surveyResponses and k {} vs {}".format(self.surveyResponses[-1],
                                                                                                                                 k)
        # first establish global and prior parameters; taken from original paper
        self.Q = self.surveyResponses.shape[0]
        self.mu_0 = (self.k+1)/2
        self.sigma_0 = self.k
        self.gamma_mean = 3.0 # mean of gamma prior on sigmas; note, original paper used 3.0 as mode, 
        # but I've changed this to mean for ease of conversion (was it a typo in original paper?)
        self.gamma_spread = 3.0 # sd of gamma prior on sigmas
        
        
        for q in np.arange(self.Q):
            paramDict["mu_{}".format(q)] = {"value": self.mu_0,
                                           "proposal_function": mu_proposal,
                                            "proposal": None,
                                            "acceptance_function": mu_accept,
                                            "acceptance": None}
            
            paramDict["sigma_{}".format(q)] = {"value": None,
                                           "proposal_function": sigma_proposal,
                                            "proposal": None,
                                            "acceptance_function": sigma_accept,
                                            "acceptance": None}
            
            
    def update_all_params(paramList, paramDict):
        """ParamDict is the key data structure here.

        Returns a copy of dict for safety: might need to rethink this later."""

        for param_name in sorted(paramList):
            # here we need to refresh values of dependencies somehow
            
            
            paramDict["Value"] = update_parameter(param_name, paramDict[param_name]["proposal_fn"], 
                                                  paramDict[param_name]["proposal"],
                                                  paramDict[param_name]["acceptance_fn"]
                                                 paramDict[param_name]["acceptance"])
            
            

        return paramDict.copy()
        
    

SyntaxError: invalid syntax (772172027.py, line 120)