In [2]:
from time import time

import numpy as np
import scipy as sp

from scipy import integrate
from scipy.optimize import fmin_powell, fminbound, brentq

from bokeh.plotting import figure, show, output_notebook
output_notebook()

# Functions to Synthesize Data

In [78]:
# Functions to create synthetic data
def irt_evaluation(difficulty, discrimination, thetas):
    """
        Evaluates an IRT model and returns the exact values
        
        Args:
            difficulty: [array] of difficulty parameters
            discrimination:  [array | number] of discrimination parameters
            thetas: [array] of person abilities
            
        Returns:
            dichotomous matrix of [difficulty.size x thetas.size] representing
            synthetic data
    """
    # If discrimination is a scalar, make it an array
    if not np.ndim(discrimination):
        discrimination = np.ones_like(difficulty) * discrimination

    kernel = difficulty[:, None] - thetas
    kernel *= discrimination[:, None]
    return 1.0 / (1 + np.exp(kernel))



def create_synthetic_irt_dichotomous(difficulty, discrimination, thetas):
    """
        Creates synthetic IRT data to test parameters estimation
        functions.  Only for use with dichotomous outputs
        
        Args:
            difficulty: [array] of difficulty parameters
            discrimination:  [array | number] of discrimination parameters
            thetas: [array] of person abilities
            
        Returns:
            dichotomous matrix of [difficulty.size x thetas.size] representing
            synthetic data
    """
    continuous_output = irt_evaluation(difficulty, discrimination, thetas)

    # convert to binary based on probability
    random_compare = np.random.rand(*continuous_output.shape)
    
    return random_compare <= continuous_output


def compute_rmse(vector1, vector2):
    """Computes the root mean square error
    
    Args:
        vector1:  Input 1d array of first comparison
        vector2:  Input 1d array of second comparison
        
    Returns:
        RMSE
    """
    return np.sqrt(np.square(vector1 - vector2).mean())


# Utility functions

In [4]:
from scipy.special import roots_legendre


def _get_quadrature_points(n, a, b):
    """
        Utility function to get the legendre points, 
        shifted from [-1, 1] to [a, b]
        
        Args:
            n: number of quadrature_points
            a: lower bound of integration
            b: upper bound of integration 
            
        A local function of the based fixed_quad found in scipy
    """
    x, w = roots_legendre(n)
    x = np.real(x)
    
    return (b - a) * (x + 1) * 0.5 + a
    

def _compute_partial_integral(theta, difficulty, discrimination, the_sign):
    """
        To be added
    """
    if np.ndim(discrimination) < 1:
        discrimination = np.full(the_sign.shape[0], discrimination)
        
    kernel = the_sign[:, :, None] * np.ones((1, 1, theta.size))
    kernel *= discrimination[:, None, None]   
    kernel *= (theta[None, None, :] - difficulty[:, None, None])
    
    # Distribution
    gauss = 1.0 / np.sqrt(2 * np.pi) * np.exp(-np.square(theta) / 2)

    return  gauss[None, :] * (1.0 / (1.0 + np.exp(kernel))).prod(axis=0).squeeze()    

# Joint Maximum Likelihood

In [73]:
def rauch_jml(dataset, discrimination=1, max_iter=25):
    """
        Estimates parameters in an IRT model joint maximum likelihood
        
        Args:
            dataset: [items x participants] matrix of True/False Values
            discrimination: scalar of discrimination used in model (default to 1)
            max_iter: maximum number of iterations to run
            
        Returns:
            array of difficulty estimates
    """
    unique_sets, counts = np.unique(dataset, axis=1, return_counts=True)
    n_items, _ = unique_sets.shape
    
    # Use easy model to seed guess
    betas = rauch_estimate(dataset, discrimination)
    
    # Remove the zero and full count values
    if(unique_sets[:, 0].sum() == 0):
        unique_sets = np.delete(unique_sets, 0, axis=1)
        counts = np.delete(counts, 0)

    if(unique_sets[:, -1].sum() == n_items):
        unique_sets = np.delete(unique_sets, -1, axis=1)
        counts = np.delete(counts, -1)

    n_takers = unique_sets.shape[1]
    the_sign = discrimination * (-1)**unique_sets
    thetas = np.zeros((n_takers,))

    for iteration in range(max_iter):
        previous_betas = betas.copy()
        
        # Estimate theta, given betas
        for ndx in range(n_takers):            
            def _theta_min(theta):
                otpt = 1.0  / (1.0 + np.exp(np.outer(the_sign[:, ndx], (theta - betas))))
                
                return -np.log(otpt).sum()

            thetas[ndx] = fminbound(_theta_min, -6, 6)

        # Recenter theta to identify model
        thetas -= thetas.mean()
        thetas /= thetas.std(ddof=1)
            
        # Estimate betas, given_theta
        for ndx in range(n_items):
            def _beta_min(beta):
                otpt = 1.0 / (1.0 + np.exp((thetas - beta) * the_sign[ndx,:]))
                return -np.log(otpt).dot(counts)
            
            betas[ndx] = fminbound(_beta_min, -6, 6)
            
        if(np.abs(previous_betas - betas).max() < 1e-3):
            print(f"Stopped at Iteration {iteration}")
            break
    
    return betas
        

def onepl_jml(dataset, max_iter=25):
    """
        Estimates parameters in an IRT model joint maximum likelihood
        
        Args:
            dataset: [items x participants] matrix of True/False Values
            max_iter: maximum number of iterations to run
            
        Returns:
            discrimination, array of difficulty estimates
    """
    unique_sets, counts = np.unique(dataset, axis=1, return_counts=True)
    n_items, _ = unique_sets.shape
    
    # Use easy model to seed guess
    discrimination = 1.0
    betas = rauch_estimate(dataset, discrimination)
    
    # Remove the zero and full count values
    if(unique_sets[:, 0].sum() == 0):
        unique_sets = np.delete(unique_sets, 0, axis=1)
        counts = np.delete(counts, 0)

    if(unique_sets[:, -1].sum() == n_items):
        unique_sets = np.delete(unique_sets, -1, axis=1)
        counts = np.delete(counts, -1)

    n_takers = unique_sets.shape[1]
    the_sign = (-1)**unique_sets
    thetas = np.zeros((n_takers,))

    for iteration in range(max_iter):
        previous_betas = betas.copy()
        previous_discrimination = discrimination * 1.0
        
        # Estimate theta, given betas
        for ndx in range(n_takers):            
            def _theta_min(theta):
                otpt = 1.0  / (1.0 + np.exp(np.outer(the_sign[:, ndx] * discrimination, 
                                                     (theta - betas))))
                
                return -np.log(otpt).sum()

            thetas[ndx] = fminbound(_theta_min, -6, 6)

        # Recenter theta to identify model
        thetas -= thetas.mean()
        thetas /= thetas.std(ddof=1)
            
        # Estimate betas, given_theta
        def _alpha_min(estimate):
            cost = 0
            for ndx in range(n_items):
                def _beta_min(beta):
                    otpt = 1.0 / (1.0 + np.exp((thetas - beta) * the_sign[ndx,:] * estimate))
                    return -np.log(otpt).dot(counts)

                betas[ndx] = fminbound(_beta_min, -6, 6)
                cost += _beta_min(betas[ndx])
            return cost
        
        discrimination = fminbound(_alpha_min, 0.25, 5)
            
        if(np.abs(previous_discrimination - discrimination).max() < 1e-3):
            break
    
    return discrimination, betas    
    

def twopl_jml(dataset, max_iter=25):
    """
        Estimates parameters in an IRT model joint maximum likelihood
        
        Args:
            dataset: [items x participants] matrix of True/False Values
            max_iter: maximum number of iterations to run
            
        Returns:
            array of discriminations, array of difficulty estimates
    """
    unique_sets, counts = np.unique(dataset, axis=1, return_counts=True)
    n_items, _ = unique_sets.shape
    
    # Use easy model to seed guess
    discrimination = np.ones((n_items,))
    betas = rauch_estimate(dataset, discrimination)*0
    
    # Remove the zero and full count values
    if(unique_sets[:, 0].sum() == 0):
        unique_sets = np.delete(unique_sets, 0, axis=1)
        counts = np.delete(counts, 0)

    if(unique_sets[:, -1].sum() == n_items):
        unique_sets = np.delete(unique_sets, -1, axis=1)
        counts = np.delete(counts, -1)

    n_takers = unique_sets.shape[1]
    the_sign = (-1)**unique_sets
    thetas = np.zeros((n_takers,))

    for iteration in range(max_iter):
        previous_betas = betas.copy()
        
        # Estimate theta, given betas
        for ndx in range(n_takers):            
            def _theta_min(theta):
                otpt = 1.0  / (1.0 + np.exp(np.outer(the_sign[:, ndx], 
                                                     discrimination * (theta - betas))))
                
                return -np.log(otpt).sum()

            thetas[ndx] = fminbound(_theta_min, -6, 6)

        # Recenter theta to identify model
        thetas -= thetas.mean()
        thetas /= thetas.std(ddof=1)
            
        # Estimate alpha, betas, given_theta
        for ndx in range(n_items):
            def _alpha_beta_min(estimates):
                otpt = 1.0 / (1.0 + np.exp((thetas - estimates[1]) * the_sign[ndx,:] * estimates[0]))
                return -np.log(otpt).dot(counts)

            otpt = fmin_powell(_alpha_beta_min, (discrimination[ndx], betas[ndx]), disp=False)
            discrimination[ndx] = otpt[0]
            betas[ndx] = otpt[1]
        
            
        if(np.abs(previous_betas - betas).max() < 1e-3):
            print(f'Done at iteration {iteration}')
            break
    
    return discrimination, betas    
    

## Conditional Probability Rauch

In [74]:
def conditional_probability(dataset, discrimination=1, max_iter=25):
    """
        Estimates parameters in an IRT model with full        
        gaussian quadrature
        
        Args:
            dataset: [items x participants] matrix of True/False Values
            discrimination: scalar of discrimination used in model (default to 1)
            max_iter: maximum number of iterations to run
            
        Returns:
            array of discrimination estimates
    """
    n_items = dataset.shape[0]
    unique_sets, counts = np.unique(dataset, axis=1, return_counts=True)

    # First run mml to get coarse guess,
    # use the mean to set identify the solution
    betas = rauch_estimate(dataset, discrimination) * 0
    identifying_mean = betas.mean()
    betas -= identifying_mean

    # Remove the zero and full count values
    if(unique_sets[:, 0].sum() == 0):
        unique_sets = np.delete(unique_sets, 0, axis=1)
        counts = np.delete(counts, 0)

    if(unique_sets[:, -1].sum() == n_items):
        unique_sets = np.delete(unique_sets, -1, axis=1)
        counts = np.delete(counts, -1)

    response_set_sums = unique_sets.sum(axis=0)

    def _denominator(betas):
        """Computes the symmetric functions based on the betas
        
         Indexes by score, left to right
        
        """
        polynomials = np.c_[np.ones_like(betas), np.exp(-betas)]

        otpt = 1
        for polynomial in polynomials:
            otpt = np.convolve(otpt, polynomial)
        return otpt
 
    for iteration in range(max_iter):
        previous_betas = betas.copy()
        
        for ndx in range(n_items):
            partial_conv = _denominator(np.delete(betas, ndx))
            
            def min_func(estimate):
                betas[ndx] = estimate
                full_convolution = np.convolve([1, np.exp(-estimate)], partial_conv)
                
                numerator = np.exp(-np.sum(unique_sets * betas[:,None], axis=0))
                denominator = full_convolution[response_set_sums]
                
                return -np.log(numerator / denominator).dot(counts)
            
            betas[ndx] = fminbound(min_func, -5, 5)

            # recenter
            betas += (identifying_mean - betas.mean())
        
        if np.abs(betas - previous_betas).max() < 1e-3:
            print(f'Ended in {iteration} iterations')
            break
            
    return betas / discrimination


# Estimate functions based on approximation

In [75]:
def rauch_estimate(dataset, discrimination=1):
    """
        Estimates the difficulty parameters via the approximation
    
        Args:
            dataset: [items x participants] matrix of True/False Values
            discrimination: scalar of discrimination used in model (default to 1)
            
        Returns:
            array of discrimination estimates
    """
    n_no = np.count_nonzero(~dataset, axis=1)
    n_yes = np.count_nonzero(dataset, axis=1)
    return (np.sqrt(1 + discrimination**2 / 3) * 
            np.log(n_no / n_yes) / discrimination)


def onepl_estimate(dataset):
    """
        Estimates the difficulty parameters via the approximation
    
        Args:
            dataset: [items x participants] matrix of True/False Values
            
        Returns:
            array of discrimination, difficulty estimates
    """
    n_no = np.count_nonzero(~dataset, axis=1)
    n_yes = np.count_nonzero(dataset, axis=1)
    scalar = np.log(n_no / n_yes)

    unique_sets, counts = np.unique(dataset, axis=1, return_counts=True)
    the_sign = (-1)**unique_sets

    # Inline definition of quadrature function
    def quadrature_function(theta, difficulty, discrimination, response):
        gauss = 1.0 / np.sqrt(2 * np.pi) * np.exp(-np.square(theta) / 2)
        kernel = the_sign[:, :, None] * np.ones((1, 1, theta.size))
        kernel *= discrimination   
        kernel *= (theta[None, None, :] - difficulty[:, None, None])
        
        return  gauss[None, :] * (1.0 / (1.0 + np.exp(kernel))).prod(axis=0).squeeze()

    # Inline definition of cost function to minimize
    def min_func(estimate):
        difficulty = np.sqrt(1 + estimate**2 / 3) * scalar / estimate
        otpt = integrate.fixed_quad(quadrature_function, -5, 5, 
                                    (difficulty, estimate, unique_sets), n=61)[0]
        return -np.log(otpt).dot(counts)
       
    # Perform the minimization
    discrimination = fminbound(min_func, 0.25, 10)
    
    return discrimination, np.sqrt(1 + discrimination**2 / 3) * scalar / discrimination


def twopl_estimate(dataset, max_iter=25):
    """
        Estimates the difficulty parameters via the approximation
    
        Args:
            dataset: [items x participants] matrix of True/False Values
            max_iter:  maximum number of iterations to run
            
        Returns:
            array of discrimination, difficulty estimates
    """
    n_items = dataset.shape[0]
    unique_sets, counts = np.unique(dataset, axis=1, return_counts=True)
    the_sign = (-1)**unique_sets
    
    theta = _get_quadrature_points(61, -5, 5)

    # Inline definition of quadrature function
    def quadrature_function(theta, discrimination, old_discrimination, 
                            difficulty, old_difficulty,
                            partial_int, the_sign):
        kernel1 = the_sign[:, None] * (theta[None, :] - difficulty)
        kernel1 *= discrimination

        kernel2 = the_sign[:, None] * (theta[None, :] - old_difficulty)
        kernel2 *= old_discrimination

        return partial_int * (1 + np.exp(kernel2)) / (1 + np.exp(kernel1))
    
    
    # Inline definition of cost function to minimize
    def min_func(estimate, dataset, old_estimate, old_difficulty,
                 partial_int, the_sign):
        new_difficulty = rauch_estimate(dataset, estimate)
        otpt = integrate.fixed_quad(quadrature_function, -5, 5, 
                                    (estimate, old_estimate, 
                                     new_difficulty, old_difficulty,
                                     partial_int, the_sign), n=61)[0]
        return -np.log(otpt).dot(counts)
       
    # Perform the minimization
    initial_guess = np.ones((dataset.shape[0],))
    difficulties = rauch_estimate(dataset)
    
    for iteration in range(max_iter):
        previous_guess = initial_guess.copy()
        previous_difficulty = difficulties.copy()

        #Quadrature evaluation for values that do not change
        partial_int = _compute_partial_integral(theta, difficulties,
                          initial_guess, the_sign)
        
        for ndx in range(n_items):
            def min_func_local(estimate):
                return min_func(estimate, dataset[ndx].reshape(1, -1),  
                                previous_guess[ndx], 
                                previous_difficulty[ndx],
                                partial_int, the_sign[ndx])

            initial_guess[ndx] = fminbound(min_func_local, 0.25, 6, xtol=1e-3)
            difficulties[ndx] = rauch_estimate(dataset[ndx].reshape(1, -1), 
                                               initial_guess[ndx])
            
            partial_int = quadrature_function(theta, initial_guess[ndx], 
                                              previous_guess[ndx], difficulties[ndx],
                                              previous_difficulty[ndx],
                                              partial_int, the_sign[ndx])            

        if np.abs(initial_guess - previous_guess).max() < 1e-3:
            break
            
    return initial_guess, difficulties


# Functions based on full integral

In [76]:
def rauch_estimate_int(dataset, discrimination=1, max_iter=25):
    """
        Estimates parameters in an IRT model with full        
        gaussian quadrature
        
        Args:
            dataset: [items x participants] matrix of True/False Values
            discrimination: scalar of discrimination used in model (default to 1)
            max_iter: maximum number of iterations to run
            
        Returns:
            array of discrimination estimates
    """
    n_items = dataset.shape[0]
    n_no = np.count_nonzero(~dataset, axis=1)
    n_yes = np.count_nonzero(dataset, axis=1)
    scalar = n_yes / (n_yes + n_no)
    
    if np.ndim(discrimination) < 1:
        discrimination = np.full(n_items, discrimination)
   
    # Inline definition of quadrature function
    def quadrature_function(theta, difficulty, discrimination):
        gauss = 1.0 / np.sqrt(2 * np.pi) * np.exp(-np.square(theta) / 2)
        return irt_evaluation(np.array([difficulty]), np.array([discrimination]), theta) * gauss

    the_parameters = np.zeros((n_items,))

    # Perform the minimization
    for ndx in range(n_items):
        
        # Minimize each item separately
        def min_zero_local(estimate):
            return (scalar[ndx] - 
                    integrate.fixed_quad(quadrature_function, -10, 10, 
                    (estimate, discrimination[ndx]), n=101)[0])
        
        the_parameters[ndx] = brentq(min_zero_local, -6, 6)
            
    return the_parameters


def onepl_estimate_int(dataset):
    """
        Estimates the difficulty parameters via the approximation
    
        Args:
            dataset: [items x participants] matrix of True/False Values
            
        Returns:
            array of discrimination, difficulty estimates
    """
    unique_sets, counts = np.unique(dataset, axis=1, return_counts=True)
    the_sign = (-1)**unique_sets

    # Inline definition of quadrature function
    def quadrature_function(theta, difficulty, discrimination, response):
        gauss = 1.0 / np.sqrt(2 * np.pi) * np.exp(-np.square(theta) / 2)
        kernel = the_sign[:, :, None] * np.ones((1, 1, theta.size))
        kernel *= discrimination   
        kernel *= (theta[None, None, :] - difficulty[:, None, None])
        
        return  gauss[None, :] * (1.0 / (1.0 + np.exp(kernel))).prod(axis=0).squeeze()

    # Inline definition of cost function to minimize
    def min_func(estimate):
        difficulty = rauch_estimate_int(dataset, estimate)
        otpt = integrate.fixed_quad(quadrature_function, -5, 5, 
                                    (difficulty, estimate, unique_sets), n=61)[0]
        return -np.log(otpt).dot(counts)
       
    # Perform the minimization
    discrimination = fminbound(min_func, 0.25, 10)
    
    return discrimination, rauch_estimate_int(dataset, discrimination)


def twopl_estimate_int(dataset, max_iter=25):
    """
        Estimates the difficulty parameters via the approximation
    
        Args:
            dataset: [items x participants] matrix of True/False Values
            max_iter:  maximum number of iterations to run
            
        Returns:
            array of discrimination, difficulty estimates
    """
    n_items = dataset.shape[0]
    unique_sets, counts = np.unique(dataset, axis=1, return_counts=True)
    the_sign = (-1)**unique_sets
    
    theta = _get_quadrature_points(61, -5, 5)

    # Inline definition of quadrature function
    def quadrature_function(theta, discrimination, old_discrimination, 
                            difficulty, old_difficulty,
                            partial_int, the_sign):
        kernel1 = the_sign[:, None] * (theta[None, :] - difficulty)
        kernel1 *= discrimination

        kernel2 = the_sign[:, None] * (theta[None, :] - old_difficulty)
        kernel2 *= old_discrimination

        return partial_int * (1 + np.exp(kernel2)) / (1 + np.exp(kernel1))
    
    
    # Inline definition of cost function to minimize
    def min_func(estimate, dataset, old_estimate, old_difficulty,
                 partial_int, the_sign):
        new_difficulty = rauch_estimate_int(dataset, estimate)
        otpt = integrate.fixed_quad(quadrature_function, -5, 5, 
                                    (estimate, old_estimate, 
                                     new_difficulty, old_difficulty,
                                     partial_int, the_sign), n=61)[0]
        return -np.log(otpt).dot(counts)
       
    # Perform the minimization
    initial_guess = np.ones((dataset.shape[0],))
    difficulties = rauch_estimate(dataset)
    
    for iteration in range(max_iter):
        previous_guess = initial_guess.copy()
        previous_difficulty = difficulties.copy()

        #Quadrature evaluation for values that do not change
        partial_int = _compute_partial_integral(theta, difficulties,
                          initial_guess, the_sign)
        
        for ndx in range(n_items):
            def min_func_local(estimate):
                return min_func(estimate, dataset[ndx].reshape(1, -1),  
                                previous_guess[ndx], 
                                previous_difficulty[ndx],
                                partial_int, the_sign[ndx])

            initial_guess[ndx] = fminbound(min_func_local, 0.25, 6, xtol=1e-3)
            difficulties[ndx] = rauch_estimate_int(dataset[ndx].reshape(1, -1), 
                                                   initial_guess[ndx])
            
            partial_int = quadrature_function(theta, initial_guess[ndx], 
                                              previous_guess[ndx], difficulties[ndx],
                                              previous_difficulty[ndx],
                                              partial_int, the_sign[ndx])            

        if np.abs(initial_guess - previous_guess).max() < 1e-3:
            break
            
    return initial_guess, difficulties


## Joint Maximum Likelihood

In [77]:
def _rauch_estimate_full_abstract(dataset, discrimination=1, max_iter=25):
    """
        Estimates parameters in an IRT model with full        
        gaussian quadrature
        
        Args:
            dataset: [items x participants] matrix of True/False Values
            discrimination: scalar of discrimination used in model (default to 1)
            max_iter: maximum number of iterations to run
            
        Returns:
            array of discrimination estimates
    """
    n_items = dataset.shape[0]
    unique_sets, counts = np.unique(dataset, axis=1, return_counts=True)
    the_sign = (-1)**unique_sets

    theta = _get_quadrature_points(61, -5, 5)
    
    # Inline definition of quadrature function
    def quadrature_function(theta, difficulty, old_difficulty, partial_int, the_sign):
        kernel1 = the_sign[:, None] * (theta[None, :] - difficulty)
        kernel1 *= discrimination

        kernel2 = the_sign[:, None] * (theta[None, :] - old_difficulty)
        kernel2 *= discrimination

        return partial_int * (1 + np.exp(kernel2)) / (1 + np.exp(kernel1))
    
    # Inline definition of cost function to minimize
    def min_func(difficulty, old_difficulty, partial_int, the_sign):
        otpt = integrate.fixed_quad(quadrature_function, -5, 5, 
                (difficulty, old_difficulty, partial_int, the_sign), n=61)[0] + 1e-23
        return -np.log(otpt).dot(counts)

    # Get approximate guess to begin with
    initial_guess = rauch_estimate(dataset, discrimination=discrimination)

    for iteration in range(max_iter):
        previous_guess = initial_guess.copy()

        #Quadrature evaluation for values that do not change
        partial_int = _compute_partial_integral(theta, initial_guess,
                          discrimination, the_sign)
                
        for ndx in range(n_items):
            # Minimize each one separately
            value = initial_guess[ndx] * 1.0
            
            def min_func_local(estimate):
                return min_func(estimate, previous_guess[ndx], 
                                partial_int, the_sign[ndx])
            
            initial_guess[ndx] = fminbound(min_func_local, 
                                           value-0.75,
                                           value+0.75)
            
            partial_int = quadrature_function(theta, initial_guess[ndx], 
                                              previous_guess[ndx], partial_int, the_sign[ndx])

        if(np.abs(initial_guess - previous_guess).max() < 0.001):
            break
            
    # Get the value of the cost function
    cost = integrate.fixed_quad(lambda x: partial_int, -5, 5, n=61)[0]
    
    return initial_guess, -np.log(cost).dot(counts)


def rauch_estimate_full(dataset, discrimination=1, max_iter=25):
    """
        Estimates parameters in an IRT model with full        
        gaussian quadrature
        
        Args:
            dataset: [items x participants] matrix of True/False Values
            discrimination: scalar of discrimination used in model (default to 1)
            max_iter: maximum number of iterations to run
            
        Returns:
            array of discrimination estimates
    """
    return _rauch_estimate_full_abstract(dataset, discrimination, max_iter)[0]


def onepl_estimate_full(dataset, max_iter=25):
    """
        Estimates parameters in an IRT model with full        
        gaussian quadrature
        
        Args:
            dataset: [items x participants] matrix of True/False Values
            
        Returns:
            array of discrimination, difficulty estimates
    """
    def min_func_local(estimate):
        _, cost = _rauch_estimate_full_abstract(dataset, estimate, max_iter)
        return cost
    
    discrimination = fminbound(min_func_local, 0.5, 4)
    
    return discrimination, rauch_estimate_full(dataset, discrimination)


def twopl_estimate_full(dataset, max_iter=25):
    """
        Estimates parameters in an IRT model with full        
        gaussian quadrature
        
        Args:
            dataset: [items x participants] matrix of True/False Values
            
        Returns:
            array of discrimination, difficulty estimates
    """
    n_items = dataset.shape[0]
    unique_sets, counts = np.unique(dataset, axis=1, return_counts=True)
    the_sign = (-1)**unique_sets

    theta = _get_quadrature_points(61, -5, 5)
    
    # Inline definition of quadrature function
    def quadrature_function(theta, estimates, old_estimates, partial_int, the_sign):
        kernel1 = the_sign[:, None] * (theta[None, :] - estimates[1])
        kernel1 *= estimates[0]

        kernel2 = the_sign[:, None] * (theta[None, :] - old_estimates[1])
        kernel2 *= old_estimates[0]

        return partial_int * (1 + np.exp(kernel2)) / (1 + np.exp(kernel1))
    
    # Inline definition of cost function to minimize
    def min_func(estimates, old_estimates, partial_int, the_sign):
        otpt = integrate.fixed_quad(quadrature_function, -5, 5, 
                (estimates, old_estimates, partial_int, the_sign), n=61)[0] + 1e-23
        return -np.log(otpt).dot(counts)

    # Get approximate guess to begin with rasch model
    a1, b1 = twopl_estimate(dataset)
    initial_guess = np.c_[a1, b1]

    for iteration in range(max_iter):
        previous_guess = initial_guess.copy()

        #Quadrature evaluation for values that do not change
        partial_int = _compute_partial_integral(theta, initial_guess[:, 1],
                          initial_guess[:, 0], the_sign)
                
        for ndx in range(n_items):
            # Minimize each one separately
            value = initial_guess[ndx] * 1.0
            
            def min_func_local(estimate):
                return min_func(estimate, previous_guess[ndx], 
                                partial_int, the_sign[ndx])

            initial_guess[ndx] = fmin_powell(min_func_local, value, xtol=1e-3, disp=0)
            partial_int = quadrature_function(theta, initial_guess[ndx], 
                                              previous_guess[ndx], partial_int, the_sign[ndx])

        if(np.abs(initial_guess - previous_guess).max() < 0.001):
            break
                
    return initial_guess[:, 0], initial_guess[:, 1]



# Create a set of synthetic data

In [89]:
n_items, n_participants = 30, 1000
diffc = np.linspace(-2.5, 2.5, n_items)
discr = 1.0 + np.random.rand(n_items,)
thetas = np.random.randn(n_participants)

syn_data = create_synthetic_irt_dichotomous(diffc, discr, thetas)

In [80]:
t1 = time()
b_est = rauch_estimate(syn_data, 1.0)
t2 = time()
b_int = rauch_estimate_int(syn_data, 1.0)
t3 = time()
b_full = rauch_estimate_full(syn_data, 1.0)
t4 = time()

print(f'Approximation Time = {np.round(t2 - t1, 2)} seconds')
print(f'Separable Time = {np.round(t3 - t2, 2)} seconds')
print(f'Full Time = {np.round(t4 - t3, 2)} seconds')

Approximation Time = 0.0 seconds
Separable Time = 0.01 seconds
Full Time = 0.57 seconds


In [81]:
t1 = time()
a_est = onepl_estimate(syn_data)
t2 = time()
a_int = onepl_estimate_int(syn_data)
t3 = time()
a_full = onepl_estimate_full(syn_data)
t4 = time()

print(f'Approximation Time = {np.round(t2 - t1, 2)} seconds')
print(f'Separable Time = {np.round(t3 - t2, 2)} seconds')
print(f'Full Time = {np.round(t4 - t3, 2)} seconds')

Approximation Time = 0.15 seconds
Separable Time = 0.16 seconds
Full Time = 7.18 seconds


In [90]:
t1 = time()
c_est = twopl_estimate(syn_data)
t2 = time()
c_int = twopl_estimate_int(syn_data)
t3 = time()
c_full = twopl_estimate_full(syn_data)
t4 = time()

print(f'Approximation Time = {np.round(t2 - t1, 2)} seconds')
print(f'Separable Time = {np.round(t3 - t2, 2)} seconds')
print(f'Full Time = {np.round(t4 - t3, 2)} seconds')

Approximation Time = 1.05 seconds
Separable Time = 1.65 seconds
Full Time = 8.67 seconds


## Dependency Plots