## Theory

Suppose that a group of $P$ students is given a test with $I$ multiple choice questions. Let $Y_{pi}=1$ mean that student $p \in \{1, ..., P\}$ has the correct answer on item $i \in \{1, ..., I\}$. Assuming that all responses are conditionally independent given the parameters $a, b, \theta$, we model the outcomes with:

$$Y_{pi} | a,b,\theta \sim Ber(\frac{e^{a_i\theta_p - b_i}}{1 + e^{a_i\theta_p - b_i}})$$

where $\theta_p$ measures the student's learning ability, $a_i$ measures the items discriminatory power, and $b_i$ measures the item's difficulty. Take the following prior assumptions on the distribution of parameters $\theta, a, b$, where all the priors are assumed independent: $a_i \sim \mathcal{N}(0, \sigma_a^2), b_i \sim \mathcal{N}(0, \sigma_b^2), \theta_p \sim \mathcal{N}(0,1)$. Then the posterior distribution of $(a, b, \theta)$ is given by

$$\pi(a,b,\theta | y) = exp\bigr\{ \frac{1}{2\sigma_a^2}||a||^2 - \frac{1}{2\sigma_b^2}||b||^2 + \frac{1}{2}||\theta||^2 + \sum_{p,i} y_{pi}(a_i\theta_p - b_i) - log(1 + e^{a_i\theta_p - b_i}) \bigl\}$$

and the full conditionals (the conditional distribution of one variable given all others) are given by

$$\pi(a_i | b,\theta,y) = exp\bigl\{ \frac{a_i^2}{2\sigma_a^2} + \sum_{p=1}^P a_i y_{pi} \theta_p - log(1+e^{a_i\theta_p - b_i}) \bigr\}$$


$$\pi(b_i | a, \theta, y) = exp\bigl\{ \frac{b_i^2}{2\sigma_b^2} + \sum_{p=1}^P y_{pi}b_i - log(1+e^{a_i\theta_p - b_i}) \bigr \}$$

$$\pi(\theta_p | a, b, y) = exp\bigl\{ \frac{\theta_p^2}{2} + \sum_{i=1}^I a_iy_{pi}\theta_p - log(1+e^{a_i\theta_p - b_i}) \bigr\}$$

Since these full conditionals are not easy to sample from, below is an implementation of the Metropolis-within-Gibbs sampler to make sampling easier.

In [1]:
import torch
from tqdm import trange

In [52]:
def loga_conditional(a, b, theta, y, sigma):
    """Function to compute the (log) conditional probability of an observed value of 
    variable a at a given index i. To get the actual conditional probability, raise
    Euler's constant to the power of the returned value.
    
    Parameters
    ----------
    a : torch.tensor
        The observed value of a at index i
    b : torch.tensor
        The observed value of b at index i
    theta : torch.tensor
        A vector of observed theta values
    y : torch.tensor
        The ith column vector of the data Y
    sigma : float
        The variance of variable a
        
    Returns
    -------
    torch.tensor
        The (log) conditional probability
    """
    
    assert theta.shape[0] == y.shape[0]
    
    logsum = 0
    for p in range(y.shape[0]):
        logsum += a * y[p] * theta[p] - torch.log(1 + torch.exp(a*theta[p]-b))
    
    return (torch.pow(a, 2)/(2*sigma**2)) + logsum

def logb_conditional(a, b, theta, y, sigma):
    """Function to compute the (log) conditional probability of an observed value of 
    variable b at a given index i. To get the actual conditional probability, raise
    Euler's constant to the power of the returned value.
    
    Parameters
    ----------
    a : torch.tensor
        The observed value of a at index i
    b : torch.tensor
        The observed value of b at index i
    theta : torch.tensor
        A vector of observed theta values
    y : torch.tensor
        The ith column vector of the data Y
    sigma : float
        The variance of variable a
        
    Returns
    -------
    torch.tensor
        The (log) conditional probability
    """
    
    assert theta.shape[0] == y.shape[0]
    
    logsum = 0
    for p in range(y.shape[0]):
        logsum += b * y[p] - torch.log(1 + torch.exp(a*theta[p]-b))
    
    return (torch.pow(b, 2)/(2*sigma**2)) + logsum

def logtheta_conditional(a, b, theta, y):
    """Function to compute the (log) conditional probability of an observed value of 
    variable theta at a given index p. To get the actual conditional probability, raise
    Euler's constant to the power of the returned value.
    
    Parameters
    ----------
    a : torch.tensor
        A vector of observed a values
    b : torch.tensor
        A vector of observed b values
    theta : torch.tensor
        The observed value of theta at index p
    y : torch.tensor
        The pth row vector of the data Y
        
    Returns
    -------
    torch.tensor
        The (log) conditional probability
    """
    
    assert a.shape == b.shape
    
    logsum = 0
    for i in range(a.shape[0]):
        logsum += a[i] * y[i] * theta - torch.log(1 + torch.exp(a[i]*theta - b[i]))
    
    return (torch.pow(theta, 2)/2) + logsum

In [3]:
def metropolis(prev_sample, sigma, full_cond, *full_cond_args):
    
    prop = prev_sample + torch.distributions.MultivariateNormal(torch.zeros(len(prev_sample)), torch.eye(sigma))
        
    logr = full_cond(prop, *full_cond_args) - full_cond(prev_sample, *full_cond_args)  # TODO: *?
    A = torch.min(1, torch.exp(logr))
    U = torch.distributions.Uniform(torch.tensor([0.0]), torch.tensor([1.0]))
        
    if U <= A:
        samples.append(prop)
    else:
        samples.append(prev_sample)
              
    print(f"Done sampling from {full_cond}")
    return

In [4]:
def gibbs(init_a, init_b, init_theta, y, sigma_a, sigma_b, sigma_t, niter=10000):
    
    assert init_a.shape == init_b.shape
    assert init_theta.shape[1] == y.shape[0]
    
    I = len(init_a)
    P = len(init_theta)
    
    A = torch.empty(size=(niter, I))
    B = torch.empty(size=(niter, I))
    THETA = torch.empty(size=(niter, P))
    
    A[0] = init_a
    B[0] = init_b
    THETA[0] = init_theta
    
    print(f"Starting Gibbs sampler... \n--------------------------------------------\n")
    for s in trange(1, niter):
        
        A[s] = metropolis(A[s-1], sigma_a, loga_conditional, A[s-1], B[s-1], THETA[s-1], y, sigma_a)
        B[s] = metropolis(B[s-1], sigma_b, logb_conditional, B[s-1], A[s], THETA[s-1], y, sigma_b)
        THETA[s] = metropolis(THETA[s-1], sigma_t, logtheta_conditional, THETA[s], A[s], B[s], y, sigma_t)
    
        if s % 100 == 0:
            print(f"Current samples at iteration {s}: \nA\n{A[s]} \nB\n{B[s]} \nTHETA\n{THETA[s]}")
            print("\n--------------------------------------------\n")
    
    print("Done sampling.")
    return A, B, THETA

In [60]:
# TEST

I = 10  # exam items
P = 10  # pupils
sigma_a, sigma_b, sigma_t = 100.0, 100.0, 1.0

init_a = torch.distributions.MultivariateNormal(torch.zeros(I), sigma_a*torch.eye(I)).sample()
init_b = torch.distributions.MultivariateNormal(torch.zeros(I), sigma_b*torch.eye(I)).sample()
init_t = torch.distributions.MultivariateNormal(torch.zeros(P), sigma_t*torch.eye(P)).sample()

true_a = torch.tensor([1, 0.9, 0.01, 0.5, 0.7, 0.4, 0.03, 0.9, 0.8, 1])  # items' discriminatory power
true_b = torch.tensor([0.01, 0.9, 1, 0.8, 0.2, 0.3, 0.88, 1, 0.3, 0.5])  # items' difficulty
true_theta = torch.tensor([1, 1, 0, 0.5, 0.7, 0.1, 0.3, 0.9, 0.6, 0.7])  # students' skills

# exam data:
Y = torch.empty(size=(P, I))
for i in range(P):
    for j in range(I):
        p = torch.exp(true_a[i]*true_theta[j] - true_b[i]) / (1 + torch.exp(true_a[i]*true_theta[j] - true_b[i]))
        B = torch.distributions.Bernoulli(p)
        Y[i, j] = B.sample()
        
print(Y)

tensor([[1., 0., 1., 0., 1., 0., 0., 1., 0., 1.],
        [1., 1., 1., 1., 0., 0., 1., 1., 0., 0.],
        [0., 0., 1., 0., 1., 0., 0., 1., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 1., 1., 0.],
        [0., 0., 1., 1., 0., 1., 0., 1., 0., 0.],
        [1., 1., 0., 0., 0., 0., 0., 1., 0., 0.],
        [0., 0., 1., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 1., 1., 0., 0., 1.],
        [0., 0., 0., 0., 1., 1., 0., 1., 1., 0.],
        [0., 0., 1., 1., 1., 0., 1., 0., 0., 0.]])


In [61]:
print(init_a[0])
print(init_b[0])
print(init_t)
print(Y[:, 0])

tensor(-11.5201)
tensor(14.4100)
tensor([-1.8374, -1.3133,  1.3199, -0.7984,  0.4708, -1.8918,  0.2370,  0.7092,
        -1.1899, -0.2501])
tensor([1., 1., 0., 0., 0., 1., 0., 0., 0., 0.])
<class 'float'>


In [25]:
print(init_t.shape[0])
print(Y.shape[0])

10
10


In [56]:
print(loga_conditional(a=init_a[0], b=init_b[0], theta=init_t, y=Y[:, 0], sigma=sigma_a))
print(logb_conditional(a=init_a[0], b=init_b[0], theta=init_t, y=Y[:, 0], sigma=sigma_b))
print(logtheta_conditional(a=init_a, b=init_b, theta=theta[0], y=Y[0,]))

tensor(-19.6841)
tensor(-20.2758)
tensor(-48.0334)


In [None]:
# TODO:
#  1. Implement Gibbs as if full-conditionals could be sampled from -> test w/ normal FCs
#  2. Sub-implement MH to sample from FCs -> test w/ normal FCs
#  3. Test MH-within-Gibbs sampler with actual FCs
#  4. Check q,p proposal and target distributions from notes