# Sanity Check For Click-based Lambdas

In [1]:
import os
os.system("export LC_ALL=en_US.UTF-8")
os.system("export LANG=en_US.UTF-8")

%matplotlib inline

import numpy as np
np.set_printoptions(precision=2, linewidth=np.inf, suppress=True)
np.seterr(invalid='ignore', divide='ignore')

import cPickle as pickle
import matplotlib.pyplot as plt
from matplotlib.colors import hsv_to_rgb

from matplotlib.ticker import MultipleLocator

from ipywidgets import interact, interactive, Dropdown, FloatText, VBox, HBox

from IPython.display import display

# Load the click models for the queries of interest.
# with open('./data/model_query_uniform_lambdas_v1_collection_c2.pkl') as ifile:
# with open('./data/model_query_softmax_lambdas_v1_collection_c2.pkl') as ifile:
# with open('./data/model_query_uniform_lambdas_v2_collection_c2.pkl') as ifile:
with open('./data/model_query_softmax_lambdas_v2_collection_c2.pkl') as ifile:
# with open('./data/model_query_uniform_lambdas_v3_collection_c2.pkl') as ifile:
# with open('./data/model_query_softmax_lambdas_v3_collection_c2.pkl') as ifile:
    MQD = pickle.load(ifile)

# For reproducibility -- re-seed the click models' RNGs.
for click_model_type in MQD:
    for query in MQD[click_model_type]:
        MQD[click_model_type][query]['model'].seed = 42

# Available Click Models

In [2]:
MQD.keys()

['DCM', 'CM', 'DBN', 'CCM', 'PBM', 'UBM']

# MSE Error In Delta Estimation

In [3]:
query_model_losses = {}
for query in MQD['CM']:
    model_losses = {}
    for click_model_name in MQD:
        model = MQD[click_model_name][query]['model']
        relevances = MQD[click_model_name][query]['relevances']
        n_documents = len(relevances)

        test_lambdas = np.zeros((n_documents, n_documents), dtype='float64')

        if click_model_name == 'CM':
            p_attract = model.click_proba

            for i in range(n_documents):
                for j in range(n_documents):
                    if i == j: continue
                    test_lambdas[i, j] = (1.0 - p_attract[j]) * p_attract[i]

        elif click_model_name == 'PBM':
            p_attract = model.click_proba
            p_exam = model.exam_proba

            for i in range(n_documents):
                for j in range(n_documents):
                    if i == j: continue
                    test_lambdas[i, j] = (1.0 - p_exam[0] * p_attract[j]) * p_exam[1] * p_attract[i]

        elif click_model_name == 'CCM':
            p_attract = model.p_attraction
            tau1 = 1.0 - model.p_stop_noclick
            tau2 = 1.0 - model.p_stop_click_norel
            tau3 = 1.0 - model.p_stop_click_rel

            for i in range(n_documents):
                for j in range(n_documents):
                    if i == j: continue
                    test_lambdas[i, j] = (1.0 - p_attract[j]) * tau1 * p_attract[i]

        elif click_model_name == 'DBN':
            p_attract = model.click_proba
            p_cont = 1.0 - model.stop_proba
            p_abandon = model.abandon_proba

            for i in range(n_documents):
                for j in range(n_documents):
                    if i == j: continue
                    test_lambdas[i, j] = (1.0 - p_attract[j]) * p_attract[i]

        elif click_model_name == 'DCM':
            p_attraction = model.click_proba
            p_cont = 1.0 - model.stop_proba

            for i in range(n_documents):
                for j in range(n_documents):
                    if i == j: continue
                    test_lambdas[i, j] = (1.0 - p_attraction[j]) * p_attraction[i]

        elif click_model_name == 'UBM':
            p_attraction = model.p_attraction
            p_examination = model.p_examination

            for i in range(n_documents):
                for j in range(n_documents):
                    if i == j: continue
                    test_lambdas[i, j] = (1.0 - p_attraction[j]) * p_examination[1, -1] * p_attraction[i]

        else:
            raise ValueError('unknow click model: %s' % click_model_name)

        # This is what we want to estimate with click lambdas?
        test_lambdas = test_lambdas - test_lambdas.T

        viewed_loss, total_loss = np.empty(10), np.empty(10)
        for i in range(10):
            viewed_lambdas = MQD[click_model_name][query]['stats'][1000000]['viewed_lambdas'][i]
            total_lambdas = MQD[click_model_name][query]['stats'][1000000]['total_lambdas'][i]

            viewed_loss[i] = np.mean((test_lambdas - viewed_lambdas)**2)
            total_loss[i] = np.mean((test_lambdas - total_lambdas)**2)
        
        model_losses[click_model_name] = (np.mean(viewed_loss), np.std(viewed_loss),
                                          np.mean(total_loss), np.std(total_loss))
    query_model_losses[query] = model_losses

print '   QUERY     MODEL     VL/MSE       VL/STD       TL/MSE       TL/STD'
print '  -------   -------   --------     --------     --------     --------'
for click_model_name in query_model_losses['2548']:
    for query in query_model_losses:
        params = (query, click_model_name) + query_model_losses[query][click_model_name]
        print '{0:>8s} {1:>8s} {2:>12.6f} {3:>12.6f} {4:>12.6f} {5:>12.6f}'.format(*params)

   QUERY     MODEL     VL/MSE       VL/STD       TL/MSE       TL/STD
  -------   -------   --------     --------     --------     --------
    2548      DCM     0.009095     0.000404     0.000015     0.000005
   77932      DCM     0.008267     0.000459     0.000018     0.000003
    1153      DCM     0.003229     0.000130     0.000025     0.000005
   42696      DCM     0.000683     0.000076     0.000009     0.000002
   49111      DCM     0.004645     0.000176     0.000023     0.000004
  297115      DCM     0.002914     0.000228     0.000010     0.000002
   39492      DCM     0.003121     0.000184     0.000021     0.000003
   59560      DCM     0.002473     0.000158     0.000009     0.000002
   28658      DCM     0.006831     0.000392     0.000027     0.000006
  486195      DCM     0.004756     0.000160     0.000027     0.000004
    2548       CM     0.056033     0.000000     0.000008     0.000002
   77932       CM     0.055497     0.000000     0.000012     0.000003
    1153       CM    

# Click Model

In [4]:
click_model_name = 'UBM'

printed_model_characteristics = False

for query in MQD[click_model_name]:
    model = MQD[click_model_name][query]['model']
    relevances = MQD[click_model_name][query]['relevances']
    n_documents = len(relevances)
    
    deltas = relevances[:, None] - relevances[None, :]
    test_lambdas = np.zeros((n_documents, n_documents), dtype='float64')

    if click_model_name == 'CM':
        p_attract = model.click_proba    
        
        if not printed_model_characteristics:
            printed_model_characteristics = True
            
            print 'CM model'
            print '--------'
            print 'Attractiveness:', p_attract
            print
            
        for i in range(n_documents):
            for j in range(n_documents):
                if i == j: continue
                test_lambdas[i, j] = (1.0 - p_attract[j]) * p_attract[i]

    elif click_model_name == 'PBM':
        p_attract = model.click_proba
        p_exam = model.exam_proba
        
        if not printed_model_characteristics:
            printed_model_characteristics = True
            
            print 'PBM model'
            print '---------'
            print 'Attractiveness:', p_attract
            print 'Examination:   ', p_exam
            print

        for i in range(n_documents):
            for j in range(n_documents):
                if i == j: continue
                test_lambdas[i, j] = (1.0 - p_exam[0] * p_attract[j]) * p_exam[1] * p_attract[i]

    elif click_model_name == 'CCM':
        p_attract = model.p_attraction
        tau1 = 1.0 - model.p_stop_noclick
        tau2 = 1.0 - model.p_stop_click_norel
        tau3 = 1.0 - model.p_stop_click_rel
        
        if not printed_model_characteristics:
            printed_model_characteristics = True
            
            print 'PBM model'
            print '---------'
            print 'Attractiveness:', p_attract
            print 'Tau1:          ', tau1
            print 'Tau2:          ', tau2
            print 'Tau3:          ', tau3
            print

        for i in range(n_documents):
            for j in range(n_documents):
                if i == j: continue
                test_lambdas[i, j] = (1.0 - p_attract[j]) * tau1 * p_attract[i]

    elif click_model_name == 'DBN':
        p_attract = model.click_proba
        p_cont = 1.0 - model.stop_proba
        p_abandon = model.abandon_proba
        
        if not printed_model_characteristics:
            printed_model_characteristics = True
            
            print 'DBN model'
            print '---------'
            print 'Attractiveness:', p_attract
            print 'Continuation:  ', p_cont
            print 'Abandonment:   ', p_abandon

        for i in range(n_documents):
            for j in range(n_documents):
                if i == j: continue
                test_lambdas[i, j] = (1.0 - p_attract[j]) * p_attract[i]

    elif click_model_name == 'DCM':
        p_attraction = model.click_proba
        p_cont = 1.0 - model.stop_proba
        
        if not printed_model_characteristics:
            printed_model_characteristics = True
            
            print 'DCM model'
            print '---------'
            print 'Attractiveness:', p_attraction
            print 'Continuation:  ', p_cont
            print

        for i in range(n_documents):
            for j in range(n_documents):
                if i == j: continue
                test_lambdas[i, j] = (1.0 - p_attraction[j]) * p_attraction[i]
        
    elif click_model_name == 'UBM':
        p_attraction = model.p_attraction
        p_examination = model.p_examination
        
        if not printed_model_characteristics:
            printed_model_characteristics = True
            
            print 'UBM model'
            print '---------'
            print 'Attractiveness:', p_attraction
            print 'Examination:  '
            print p_examination[:,[9] + range(9)].T
            print

        for i in range(n_documents):
            for j in range(n_documents):
                if i == j: continue
                test_lambdas[i, j] = (1.0 - p_attraction[j]) * p_examination[1, -1] * p_attraction[i]

    else:
        raise ValueError('unknow click model: %s' % click_model_name)

    # This is what we want to estimate with click lambdas?
    test_lambdas = test_lambdas - test_lambdas.T

    viewed_loss, total_loss = np.empty(10), np.empty(10)
    for i in range(10):
        viewed_lambdas = MQD[click_model_name][query]['stats'][1000000]['viewed_lambdas'][i]
        total_lambdas = MQD[click_model_name][query]['stats'][1000000]['total_lambdas'][i]
    
        viewed_loss[i] = np.mean((test_lambdas - viewed_lambdas)**2)
        total_loss[i] = np.mean((test_lambdas - total_lambdas)**2)        
    
    lambdas = MQD[click_model_name][query]['stats'][1000000]['lambdas'][0]
    
    viewed_lambdas = MQD[click_model_name][query]['stats'][1000000]['viewed_lambdas'][0]
    viewed_counts = MQD[click_model_name][query]['stats'][1000000]['viewed_counts'][0]
    
    total_lambdas = MQD[click_model_name][query]['stats'][1000000]['total_lambdas'][0]
    total_counts = MQD[click_model_name][query]['stats'][1000000]['total_counts'][0]
    
    print 'deltas:'
    print '-------'
    print deltas
    print
    print 'test lambdas:'
    print '-------------'
    print test_lambdas
    print 
    print 'viewed lambdas'
    print '--------------'
    print 'MSE: %.6f (+/- %.6f SE)' % (np.mean(viewed_loss), np.std(viewed_loss))
    print viewed_lambdas
    print
    print 'total lambdas'
    print '--------------'
    print 'MSE: %.6f (+/- %.6f SE)' % (np.mean(total_loss), np.std(total_loss))
    print total_lambdas
    print
    print 'test lambdas / deltas:'
    print '----------------------'
    print np.nan_to_num(test_lambdas / deltas)
    print
    print 'viewed lambdas / deltas:'
    print '----------------------'
    print np.nan_to_num(viewed_lambdas / deltas)
    print
    print 'total lambdas / deltas:'
    print '----------------------'
    print np.nan_to_num(total_lambdas / deltas)
    print
    print 'lambdas:'
    print lambdas
    print
    print 'total counts:'
    print '-------------'
    print total_counts
    print
    print 'viewed counts:'
    print '--------------'
    print viewed_counts
    print
    print 'viewed / total counts:'
    print '----------------------'
    print np.nan_to_num(1.0 * viewed_counts / total_counts)
    break
            

UBM model
---------
Attractiveness: [ 0.59  0.09  0.11  0.21  0.14  0.1   0.05  0.23  0.08  0.11]
Examination:  
[[ 1.    0.89  0.52  0.35  0.24  0.18  0.14  0.11  0.1   0.09]
 [ 0.2   0.99  0.47  0.26  0.15  0.1   0.07  0.05  0.04  0.03]
 [ 0.2   0.2   1.    0.68  0.39  0.26  0.18  0.12  0.09  0.08]
 [ 0.2   0.2   0.2   1.    0.73  0.47  0.3   0.21  0.16  0.13]
 [ 0.2   0.2   0.2   0.2   1.    0.77  0.5   0.32  0.25  0.2 ]
 [ 0.2   0.2   0.2   0.2   0.2   1.    0.81  0.53  0.38  0.3 ]
 [ 0.2   0.2   0.2   0.2   0.2   0.2   1.    0.81  0.58  0.46]
 [ 0.2   0.2   0.2   0.2   0.2   0.2   0.2   1.    0.9   0.69]
 [ 0.2   0.2   0.2   0.2   0.2   0.2   0.2   0.2   1.    0.97]
 [ 0.2   0.2   0.2   0.2   0.2   0.2   0.2   0.2   0.2   1.  ]]

deltas:
-------
[[ 0.    0.5   0.48  0.38  0.45  0.49  0.54  0.36  0.51  0.48]
 [-0.5   0.   -0.02 -0.12 -0.04 -0.    0.05 -0.14  0.01 -0.02]
 [-0.48  0.02  0.   -0.1  -0.03  0.01  0.06 -0.12  0.03 -0.01]
 [-0.38  0.12  0.1   0.    0.07  0.11  0.16 -0.02 

# Probabilities of Ranking-Click Patterns for UBM model

## Helper Code Generating Ranking-Click Paterns and Their Probabilities

In [5]:
import numpy as np
from itertools import permutations

def generate_clicks(n):
    i = 0
    while i < (1 << n):
        cs = []
        for j in range(n):
            cs.append(1 if (i & (1 << (n - 1 - j))) > 0 else 0)
        yield cs
        i += 1

def generate_latex_table_header():
    return r'\begin{array}{|c|c|l|}' + '\n' + r'\hline\hline'

def generate_latex_table_body(ranking, clicks):
    last_c_r = 0

    if ranking.index('y') < ranking.index('x'):
        return None

    result = ''.join(ranking) + ' & ' + ''.join(map(str, clicks)) + ' & '

    for r, c_r in enumerate(clicks):
        if c_r == 1:
            result += r'\gamma_{%d,%d}\alpha_{%c}' % (last_c_r, r + 1, ranking[r])
            last_c_r = r + 1
        else:
            result += r'(1 - \gamma_{%d,%d}\alpha_{%c})' % (last_c_r, r + 1, ranking[r])
        if r + 1 < len(clicks):
            result += r' \cdot '
    result += r'\\'
    return result

def generate_latex_table_footer():
    return r'\hline\hline' + '\n' + r'\end{array}'

rankings = list(permutations('xyz'))
clicks = list(generate_clicks(3))

print generate_latex_table_header()

for i, r in enumerate(rankings):
    for c in clicks:
        s = generate_latex_table_body(r, c)
        if s is not None:
            print s
    if s is not None and i + 1 < len(rankings):
        print '\hline'

print generate_latex_table_footer()

\begin{array}{|c|c|l|}
\hline\hline
xyz & 000 & (1 - \gamma_{0,1}\alpha_{x}) \cdot (1 - \gamma_{0,2}\alpha_{y}) \cdot (1 - \gamma_{0,3}\alpha_{z})\\
xyz & 001 & (1 - \gamma_{0,1}\alpha_{x}) \cdot (1 - \gamma_{0,2}\alpha_{y}) \cdot \gamma_{0,3}\alpha_{z}\\
xyz & 010 & (1 - \gamma_{0,1}\alpha_{x}) \cdot \gamma_{0,2}\alpha_{y} \cdot (1 - \gamma_{2,3}\alpha_{z})\\
xyz & 011 & (1 - \gamma_{0,1}\alpha_{x}) \cdot \gamma_{0,2}\alpha_{y} \cdot \gamma_{2,3}\alpha_{z}\\
xyz & 100 & \gamma_{0,1}\alpha_{x} \cdot (1 - \gamma_{1,2}\alpha_{y}) \cdot (1 - \gamma_{1,3}\alpha_{z})\\
xyz & 101 & \gamma_{0,1}\alpha_{x} \cdot (1 - \gamma_{1,2}\alpha_{y}) \cdot \gamma_{1,3}\alpha_{z}\\
xyz & 110 & \gamma_{0,1}\alpha_{x} \cdot \gamma_{1,2}\alpha_{y} \cdot (1 - \gamma_{2,3}\alpha_{z})\\
xyz & 111 & \gamma_{0,1}\alpha_{x} \cdot \gamma_{1,2}\alpha_{y} \cdot \gamma_{2,3}\alpha_{z}\\
\hline
xzy & 000 & (1 - \gamma_{0,1}\alpha_{x}) \cdot (1 - \gamma_{0,2}\alpha_{z}) \cdot (1 - \gamma_{0,3}\alpha_{y})\\
xzy & 001 & 

$$
\begin{array}{|c|c|l|}
\hline\hline
xyz & 000 & (1 - \gamma_{0,1}\alpha_{x}) \cdot (1 - \gamma_{0,2}\alpha_{y}) \cdot (1 - \gamma_{0,3}\alpha_{z})\\
xyz & 001 & (1 - \gamma_{0,1}\alpha_{x}) \cdot (1 - \gamma_{0,2}\alpha_{y}) \cdot \gamma_{0,3}\alpha_{z}\\
xyz & 010 & (1 - \gamma_{0,1}\alpha_{x}) \cdot \gamma_{0,2}\alpha_{y} \cdot (1 - \gamma_{2,3}\alpha_{z})\\
xyz & 011 & (1 - \gamma_{0,1}\alpha_{x}) \cdot \gamma_{0,2}\alpha_{y} \cdot \gamma_{2,3}\alpha_{z}\\
xyz & 100 & \gamma_{0,1}\alpha_{x} \cdot (1 - \gamma_{1,2}\alpha_{y}) \cdot (1 - \gamma_{1,3}\alpha_{z})\\
xyz & 101 & \gamma_{0,1}\alpha_{x} \cdot (1 - \gamma_{1,2}\alpha_{y}) \cdot \gamma_{1,3}\alpha_{z}\\
xyz & 110 & \gamma_{0,1}\alpha_{x} \cdot \gamma_{1,2}\alpha_{y} \cdot (1 - \gamma_{2,3}\alpha_{z})\\
xyz & 111 & \gamma_{0,1}\alpha_{x} \cdot \gamma_{1,2}\alpha_{y} \cdot \gamma_{2,3}\alpha_{z}\\
\hline
xzy & 000 & (1 - \gamma_{0,1}\alpha_{x}) \cdot (1 - \gamma_{0,2}\alpha_{z}) \cdot (1 - \gamma_{0,3}\alpha_{y})\\
xzy & 001 & (1 - \gamma_{0,1}\alpha_{x}) \cdot (1 - \gamma_{0,2}\alpha_{z}) \cdot \gamma_{0,3}\alpha_{y}\\
xzy & 010 & (1 - \gamma_{0,1}\alpha_{x}) \cdot \gamma_{0,2}\alpha_{z} \cdot (1 - \gamma_{2,3}\alpha_{y})\\
xzy & 011 & (1 - \gamma_{0,1}\alpha_{x}) \cdot \gamma_{0,2}\alpha_{z} \cdot \gamma_{2,3}\alpha_{y}\\
xzy & 100 & \gamma_{0,1}\alpha_{x} \cdot (1 - \gamma_{1,2}\alpha_{z}) \cdot (1 - \gamma_{1,3}\alpha_{y})\\
xzy & 101 & \gamma_{0,1}\alpha_{x} \cdot (1 - \gamma_{1,2}\alpha_{z}) \cdot \gamma_{1,3}\alpha_{y}\\
xzy & 110 & \gamma_{0,1}\alpha_{x} \cdot \gamma_{1,2}\alpha_{z} \cdot (1 - \gamma_{2,3}\alpha_{y})\\
xzy & 111 & \gamma_{0,1}\alpha_{x} \cdot \gamma_{1,2}\alpha_{z} \cdot \gamma_{2,3}\alpha_{y}\\
\hline
zxy & 000 & (1 - \gamma_{0,1}\alpha_{z}) \cdot (1 - \gamma_{0,2}\alpha_{x}) \cdot (1 - \gamma_{0,3}\alpha_{y})\\
zxy & 001 & (1 - \gamma_{0,1}\alpha_{z}) \cdot (1 - \gamma_{0,2}\alpha_{x}) \cdot \gamma_{0,3}\alpha_{y}\\
zxy & 010 & (1 - \gamma_{0,1}\alpha_{z}) \cdot \gamma_{0,2}\alpha_{x} \cdot (1 - \gamma_{2,3}\alpha_{y})\\
zxy & 011 & (1 - \gamma_{0,1}\alpha_{z}) \cdot \gamma_{0,2}\alpha_{x} \cdot \gamma_{2,3}\alpha_{y}\\
zxy & 100 & \gamma_{0,1}\alpha_{z} \cdot (1 - \gamma_{1,2}\alpha_{x}) \cdot (1 - \gamma_{1,3}\alpha_{y})\\
zxy & 101 & \gamma_{0,1}\alpha_{z} \cdot (1 - \gamma_{1,2}\alpha_{x}) \cdot \gamma_{1,3}\alpha_{y}\\
zxy & 110 & \gamma_{0,1}\alpha_{z} \cdot \gamma_{1,2}\alpha_{x} \cdot (1 - \gamma_{2,3}\alpha_{y})\\
zxy & 111 & \gamma_{0,1}\alpha_{z} \cdot \gamma_{1,2}\alpha_{x} \cdot \gamma_{2,3}\alpha_{y}\\
\hline\hline
\end{array}
$$

## X skipped and Y clicked
Binary random variable $\lambda_{y,x}$ is 1 only if the document $x$ was presented above document $y$ and the user clicked on document $y$. The following table show ranking-click paterns consistent with $\lambda_{y,x} = 1$:

$$
\begin{array}{|c|c|l|}
\hline\hline
xyz & 010 & (1 - \gamma_{0,1}\alpha_{x}) \cdot \gamma_{0,2}\alpha_{y} \cdot (1 - \gamma_{2,3}\alpha_{z})\\
xyz & 011 & (1 - \gamma_{0,1}\alpha_{x}) \cdot \gamma_{0,2}\alpha_{y} \cdot \gamma_{2,3}\alpha_{z}\\
\hline
xzy & 001 & (1 - \gamma_{0,1}\alpha_{x}) \cdot (1 - \gamma_{0,2}\alpha_{z}) \cdot \gamma_{0,3}\alpha_{y}\\
xzy & 011 & (1 - \gamma_{0,1}\alpha_{x}) \cdot \gamma_{0,2}\alpha_{z} \cdot \gamma_{2,3}\alpha_{y}\\
\hline
zxy & 001 & (1 - \gamma_{0,1}\alpha_{z}) \cdot (1 - \gamma_{0,2}\alpha_{x}) \cdot \gamma_{0,3}\alpha_{y}\\
zxy & 101 & \gamma_{0,1}\alpha_{z} \cdot (1 - \gamma_{1,2}\alpha_{x}) \cdot \gamma_{1,3}\alpha_{y}\\
\hline\hline
\end{array}
$$

Cosidering that each ranking (permutation) has the same probability of being shown, we get the following expression for $\lambda_{y,x}$:

\begin{array}{}
P(\lambda_{y,x} = 1\vert\{x,y,z\}) = &\gamma_{0,2}\alpha_{y} - \gamma_{0,1}\gamma_{0,2}\alpha_{x}\alpha_{y} + \gamma_{0,3}\alpha_{y} - \gamma_{0,2}\gamma_{0,3}\alpha_{y}\alpha_{z} - \gamma_{0,1}\gamma_{0,3}\alpha_{x}\alpha_{y} + \\
&\gamma_{0,1}\gamma_{0,2}\gamma_{0,3}\alpha_{x}\alpha_{y}\alpha_{z} + \gamma_{0,2}\gamma_{2,3}\alpha_{y}\alpha_{z} - \gamma_{0,1}\gamma_{0,2}\gamma_{2,3}\alpha_{x}\alpha_{y}\alpha_{z} + \gamma_{0,3}\alpha_{y} - \gamma_{0,2}\gamma_{0,3}\alpha_{x}\alpha_{y} - \\
&\gamma_{0,1}\gamma_{0,3}\alpha_{y}\alpha_{z} + \gamma_{0,1}\gamma_{0,2}\gamma_{0,3}\alpha_{x}\alpha_{y}\alpha_{z} + \gamma_{0,1}\gamma_{1,3}\alpha_{y}\alpha_{z} - \gamma_{0,1}\gamma_{1,2}\gamma_{1,3}\alpha_{x}\alpha_{y}\alpha_{z}
\end{array}

by summing up a few terms in above expression we get

\begin{array}{}
P(\lambda_{y,x} = 1\vert\{x,y,z\}) = &(\gamma_{0,2} + 2\gamma_{0,3})\alpha_{y} - (\gamma_{0,1}\gamma_{0,2} + \gamma_{0,1}\gamma_{0,3} + \gamma_{0,2}\gamma_{0,3})\alpha_{x}\alpha_{y} - (\gamma_{0,1}\gamma_{0,3} + \gamma_{0,2}\gamma_{0,3} - \gamma_{0,2}\gamma_{2,3} - \gamma_{0,1}\gamma_{1,3})\alpha_{y}\alpha_{z} + \\
&(\gamma_{0,1}\gamma_{0,2}\gamma_{0,3} - \gamma_{0,1}\gamma_{0,2}\gamma_{2,3} + \gamma_{0,1}\gamma_{0,2}\gamma_{0,3} - \gamma_{0,1}\gamma_{1,2}\gamma_{1,3})\alpha_{x}\alpha_{y}\alpha_{z}
\end{array}

In more generality we may consider the case in which the rankings are sampled from a non-uniform distribution $P(xyz)$ which denotes the probability of seeing the permutation $xyz$.

\begin{array}{}
P(\lambda_{y,x} = 1\vert\{x,y,z\}) = &\gamma_{0,2}\alpha_{y}P(xyz) - \gamma_{0,1}\gamma_{0,2}\alpha_{x}\alpha_{y}P(xyz) + \gamma_{0,3}\alpha_{y}P(xzy) - \gamma_{0,2}\gamma_{0,3}\alpha_{y}\alpha_{z}P(xzy) - \gamma_{0,1}\gamma_{0,3}\alpha_{x}\alpha_{y}P(xzy) + \\
&\gamma_{0,1}\gamma_{0,2}\gamma_{0,3}\alpha_{x}\alpha_{y}\alpha_{z}P(xzy) + \gamma_{0,2}\gamma_{2,3}\alpha_{y}\alpha_{z}P(xzy) - \gamma_{0,1}\gamma_{0,2}\gamma_{2,3}\alpha_{x}\alpha_{y}\alpha_{z}P(xzy) + \gamma_{0,3}\alpha_{y}P(zxy) - \gamma_{0,2}\gamma_{0,3}\alpha_{x}\alpha_{y}P(zxy) - \\
&\gamma_{0,1}\gamma_{0,3}\alpha_{y}\alpha_{z}P(zxy) + \gamma_{0,1}\gamma_{0,2}\gamma_{0,3}\alpha_{x}\alpha_{y}\alpha_{z}P(zxy) + \gamma_{0,1}\gamma_{1,3}\alpha_{y}\alpha_{z}P(zxy) - \gamma_{0,1}\gamma_{1,2}\gamma_{1,3}\alpha_{x}\alpha_{y}\alpha_{z}P(zxy)
\end{array}

summing again a few terms we end up with

\begin{array}{}
P(\lambda_{y,x} = 1\vert\{x,y,z\}) = &(\gamma_{0,2}P(xyz) + \gamma_{0,3}(P(xzy) + P(zxy))\alpha_{y} - (\gamma_{0,1}\gamma_{0,2}P(xyz) + \gamma_{0,1}\gamma_{0,3}P(xzy) + \gamma_{0,2}\gamma_{0,3}P(zxy))\alpha_{x}\alpha_{y} - \\
&((\gamma_{0,1}\gamma_{0,3} - \gamma_{0,1}\gamma_{1,3})P(zxy) + \gamma_{0,2}\gamma_{0,3}P(xzy) - \gamma_{0,2}\gamma_{2,3}P(xyz))\alpha_{y}\alpha_{z} + \\
&(\gamma_{0,1}\gamma_{0,2}\gamma_{0,3}P(xzy) - \gamma_{0,1}\gamma_{0,2}\gamma_{2,3}P(xyz) + (\gamma_{0,1}\gamma_{0,2}\gamma_{0,3} - \gamma_{0,1}\gamma_{1,2}\gamma_{1,3})P(zxy))\alpha_{x}\alpha_{y}\alpha_{z}
\end{array}

## Y skipped and X clicked

Binary random variable $\lambda_{x,y}$ is 1 only if the document $y$ was presented above document $x$ and the user clicked on document $x$. The following table show ranking-click paterns consistent with $\lambda_{x,y} = 1$:

$$
\begin{array}{|c|c|l|}
\hline\hline
yxz & 010 & (1 - \gamma_{0,1}\alpha_{y}) \cdot \gamma_{0,2}\alpha_{x} \cdot (1 - \gamma_{2,3}\alpha_{z})\\
yxz & 011 & (1 - \gamma_{0,1}\alpha_{y}) \cdot \gamma_{0,2}\alpha_{x} \cdot \gamma_{2,3}\alpha_{z}\\
\hline
yzx & 001 & (1 - \gamma_{0,1}\alpha_{y}) \cdot (1 - \gamma_{0,2}\alpha_{z}) \cdot \gamma_{0,3}\alpha_{x}\\
yzx & 011 & (1 - \gamma_{0,1}\alpha_{y}) \cdot \gamma_{0,2}\alpha_{z} \cdot \gamma_{2,3}\alpha_{x}\\
\hline
zyx & 001 & (1 - \gamma_{0,1}\alpha_{z}) \cdot (1 - \gamma_{0,2}\alpha_{y}) \cdot \gamma_{0,3}\alpha_{x}\\
zyx & 101 & \gamma_{0,1}\alpha_{z} \cdot (1 - \gamma_{1,2}\alpha_{y}) \cdot \gamma_{1,3}\alpha_{x}\\
\hline\hline
\end{array}
$$

Cosidering that each ranking (permutation) has the same probability of being shown, we get the following expression for $\lambda_{x,y}$:

\begin{array}{}
P(\lambda_{x,y} = 1\vert\{x,y,z\}) = &\gamma_{0,2}\alpha_{x} - \gamma_{0,1}\gamma_{0,2}\alpha_{x}\alpha_{y} + \gamma_{0,3}\alpha_{x} - \gamma_{0,2}\gamma_{0,3}\alpha_{x}\alpha_{z} - \gamma_{0,1}\gamma_{0,3}\alpha_{y}\alpha_{x} + \\
&\gamma_{0,1}\gamma_{0,2}\gamma_{0,3}\alpha_{x}\alpha_{y}\alpha_{z} + \gamma_{0,2}\gamma_{2,3}\alpha_{x}\alpha_{z} - \gamma_{0,1}\gamma_{0,2}\gamma_{2,3}\alpha_{x}\alpha_{y}\alpha_{z} + \gamma_{0,3}\alpha_{x} - \gamma_{0,2}\gamma_{0,3}\alpha_{x}\alpha_{y} - \\
&\gamma_{0,1}\gamma_{0,3}\alpha_{x}\alpha_{z} + \gamma_{0,1}\gamma_{0,2}\gamma_{0,3}\alpha_{x}\alpha_{y}\alpha_{z} + \gamma_{0,1}\gamma_{1,3}\alpha_{x}\alpha_{z} - \gamma_{0,1}\gamma_{1,2}\gamma_{1,3}\alpha_{x}\alpha_{y}\alpha_{z} 
\end{array}

by summing up a few terms in above expression we get

\begin{array}{}
P(\lambda_{x,y} = 1\vert\{x,y,z\}) = &(\gamma_{0,2} + 2\gamma_{0,3})\alpha_{x} - (\gamma_{0,1}\gamma_{0,2} + \gamma_{0,1}\gamma_{0,3} + \gamma_{0,2}\gamma_{0,3})\alpha_{x}\alpha_{y} - (\gamma_{0,1}\gamma_{0,3} + \gamma_{0,2}\gamma_{0,3} - \gamma_{0,2}\gamma_{2,3} - \gamma_{0,1}\gamma_{1,3})\alpha_{x}\alpha_{z} + \\
&(\gamma_{0,1}\gamma_{0,2}\gamma_{0,3} - \gamma_{0,1}\gamma_{0,2}\gamma_{2,3} + \gamma_{0,1}\gamma_{0,2}\gamma_{0,3} - \gamma_{0,1}\gamma_{1,2}\gamma_{1,3})\alpha_{x}\alpha_{y}\alpha_{z}
\end{array}

In more generality we may consider the case in which the rankings are sampled from a non-uniform distribution $P(xyz)$ which denotes the probability of seeing the permutation $xyz$.

\begin{array}{}
P(\lambda_{x,y} = 1\vert\{x,y,z\}) = &\gamma_{0,2}\alpha_{x}P(yxz) - \gamma_{0,1}\gamma_{0,2}\alpha_{x}\alpha_{y}P(yxz) + \gamma_{0,3}\alpha_{x}P(yzx) - \gamma_{0,2}\gamma_{0,3}\alpha_{x}\alpha_{z}P(yzx) - \gamma_{0,1}\gamma_{0,3}\alpha_{x}\alpha_{y}P(yzx) + \\
&\gamma_{0,1}\gamma_{0,2}\gamma_{0,3}\alpha_{x}\alpha_{y}\alpha_{z}P(yzx) + \gamma_{0,2}\gamma_{2,3}\alpha_{x}\alpha_{z}P(yzx) - \gamma_{0,1}\gamma_{0,2}\gamma_{2,3}\alpha_{x}\alpha_{y}\alpha_{z}P(yzx) + \gamma_{0,3}\alpha_{x}P(zyx) - \gamma_{0,2}\gamma_{0,3}\alpha_{x}\alpha_{y}P(zyx) - \\
&\gamma_{0,1}\gamma_{0,3}\alpha_{x}\alpha_{z}P(zyx) + \gamma_{0,1}\gamma_{0,2}\gamma_{0,3}\alpha_{x}\alpha_{y}\alpha_{z}P(zyx) + \gamma_{0,1}\gamma_{1,3}\alpha_{x}\alpha_{z}P(zyx) - \gamma_{0,1}\gamma_{1,2}\gamma_{1,3}\alpha_{x}\alpha_{y}\alpha_{z}P(zyx)
\end{array}

summing again a few terms we end up with

\begin{array}{}
P(\lambda_{x,y} = 1\vert\{x,y,z\}) = &(\gamma_{0,2}P(yxz) + \gamma_{0,3}(P(yzx) + P(zyx))\alpha_{x} - (\gamma_{0,1}\gamma_{0,2}P(yxz) + \gamma_{0,1}\gamma_{0,3}P(yzx) + \gamma_{0,2}\gamma_{0,3}P(zyx))\alpha_{x}\alpha_{y} - \\
&((\gamma_{0,1}\gamma_{0,3} - \gamma_{0,1}\gamma_{1,3})P(zyx) + \gamma_{0,2}\gamma_{0,3}P(yzx) - \gamma_{0,2}\gamma_{2,3}P(yxz))\alpha_{x}\alpha_{z} + \\
&(\gamma_{0,1}\gamma_{0,2}\gamma_{0,3}P(yzx) - \gamma_{0,1}\gamma_{0,2}\gamma_{2,3}P(yxz) + (\gamma_{0,1}\gamma_{0,2}\gamma_{0,3} - \gamma_{0,1}\gamma_{1,2}\gamma_{1,3})P(zyx))\alpha_{x}\alpha_{y}\alpha_{z}
\end{array}

## Formula for Differences of Lambdas

Considering a fixed triplet of documents \{x, y, z\}, than the difference in $\lambda$s for pair $(x, y)$ is

$$
P(\lambda_{x,y} = 1\vert\{x,y,z\}) - P(\lambda_{y,x} = 1\vert\{x,y,z\}) = \left[\gamma_{0,2} + 2\gamma_{0,3} - \alpha_{z}(\gamma_{0,1}\gamma_{0,3} + \gamma_{0,2}\gamma_{0,3} - \gamma_{0,2}\gamma_{2,3} - \gamma_{0,1}\gamma_{1,3})\right](\alpha_{x} - \alpha_{y})
$$

which leads to the following definition of $\Lambda_{x,y}$

\begin{align}
\Lambda_{x,y} &= \sum_{z\in D\setminus{\{x,y}\}}\frac{\left(P(\lambda_{x,y} = 1\vert\{x,y,z\}) - P(\lambda_{y,x} = 1\vert\{x,y,z\})\right)}{\vert D\vert\choose3}\\
&= \frac{\left[\gamma_{0,2} + 2\gamma_{0,3} - \left(\sum_{d\in D\setminus{\{x,y}\}}\alpha_{d}\right)(\gamma_{0,1}\gamma_{0,3} + \gamma_{0,2}\gamma_{0,3} - \gamma_{0,2}\gamma_{2,3} - \gamma_{0,1}\gamma_{1,3})\right](\alpha_{x} - \alpha_{y})}{\vert D\vert\choose3}\\
\end{align}

**<font color='red'>BEWARE:</font>** The above result holds only for rankings sampled uniformly at random.

In [6]:
import cPickle as pickle
import contextlib

@contextlib.contextmanager
def printoptions(*args, **kwargs):
    original = np.get_printoptions()
    np.set_printoptions(*args, **kwargs)
    yield 
    np.set_printoptions(**original)

with open('./data/model_query_uniform_lambdas_v2_collection_c3.pkl') as ifile:
    MQD = pickle.load(ifile)

alphas = MQD['UBM']['2548']['model'].p_attraction
gammas = MQD['UBM']['2548']['model'].p_examination
deltas = MQD['UBM']['2548']['relevances']
deltas = deltas[:, None] - deltas[None, :]
viewed_lambdas = MQD['UBM']['2548']['stats'][1000000]['viewed_lambdas'][0]
total_lambdas = MQD['UBM']['2548']['stats'][1000000]['total_lambdas'][0]

def Lambda(i, j, alphas, gammas):
    alphas_sum = alphas.sum() - alphas[i] - alphas[j]
    return (gammas[1, -1] + 2 * gammas[2, -1] - alphas_sum * 
            (gammas[0, -1] * gammas[2, -1] + 
             gammas[1, -1] * gammas[2, -1] - 
             gammas[1, -1] * gammas[2, 1] -
             gammas[0, -1] * gammas[2, 0])) * (alphas[i] - alphas[j]) / 120.

true_lambdas = np.array([[Lambda(i, j, alphas, gammas) for j in range(10)] for i in range(10)])

In [7]:
with printoptions(precision=4, linewidth=np.inf):
    print alphas

[ 0.59    0.0928  0.1091  0.2098  0.1369  0.0955  0.0473  0.2331  0.0811  0.1149]


In [8]:
with printoptions(precision=4, linewidth=np.inf):
    print gammas

[[ 0.2     0.2     0.2     0.2     0.2     0.2     0.2     0.2     0.2     1.    ]
 [ 0.9949  0.2     0.2     0.2     0.2     0.2     0.2     0.2     0.2     0.8854]
 [ 0.472   0.9998  0.2     0.2     0.2     0.2     0.2     0.2     0.2     0.5151]
 [ 0.2552  0.6767  0.9998  0.2     0.2     0.2     0.2     0.2     0.2     0.3508]
 [ 0.1497  0.3877  0.7324  0.9997  0.2     0.2     0.2     0.2     0.2     0.2371]
 [ 0.1002  0.2572  0.4658  0.775   0.9996  0.2     0.2     0.2     0.2     0.1849]
 [ 0.0709  0.1756  0.3008  0.5044  0.8056  0.9995  0.2     0.2     0.2     0.1434]
 [ 0.0513  0.1203  0.2082  0.3236  0.5305  0.807   0.9995  0.2     0.2     0.1135]
 [ 0.0385  0.0909  0.1625  0.2481  0.38    0.5816  0.8953  0.9995  0.2     0.0959]
 [ 0.0342  0.0843  0.1329  0.203   0.3049  0.4552  0.6905  0.9743  0.9995  0.0925]]


In [9]:
with printoptions(precision=4, linewidth=np.inf):
    print deltas

[[ 0.      0.4972  0.4809  0.3802  0.4531  0.4945  0.5427  0.3569  0.5089  0.4751]
 [-0.4972  0.     -0.0164 -0.117  -0.0442 -0.0028  0.0454 -0.1404  0.0116 -0.0222]
 [-0.4809  0.0164  0.     -0.1006 -0.0278  0.0136  0.0618 -0.124   0.028  -0.0058]
 [-0.3802  0.117   0.1006  0.      0.0728  0.1143  0.1624 -0.0234  0.1286  0.0948]
 [-0.4531  0.0442  0.0278 -0.0728  0.      0.0414  0.0896 -0.0962  0.0558  0.022 ]
 [-0.4945  0.0028 -0.0136 -0.1143 -0.0414  0.      0.0482 -0.1376  0.0144 -0.0194]
 [-0.5427 -0.0454 -0.0618 -0.1624 -0.0896 -0.0482  0.     -0.1858 -0.0338 -0.0676]
 [-0.3569  0.1404  0.124   0.0234  0.0962  0.1376  0.1858  0.      0.152   0.1182]
 [-0.5089 -0.0116 -0.028  -0.1286 -0.0558 -0.0144  0.0338 -0.152   0.     -0.0338]
 [-0.4751  0.0222  0.0058 -0.0948 -0.022   0.0194  0.0676 -0.1182  0.0338  0.    ]]


In [10]:
with printoptions(precision=4, linewidth=np.inf):
    print np.nan_to_num(viewed_lambdas / deltas)

[[ 0.      1.1481  1.143   1.0473  1.1149  1.1466  1.2575  1.0461  1.1788  1.1227]
 [ 1.1481  0.      3.2433  1.9614  2.2661  2.4966  3.7685  1.8132  4.5601  2.9879]
 [ 1.143   3.2433  0.      1.993   2.145   4.3716  3.4159  1.7688  2.4193  2.0466]
 [ 1.0473  1.9614  1.993   0.      1.7276  1.918   2.5091  1.2466  1.936   1.6721]
 [ 1.1149  2.2661  2.145   1.7276  0.      2.1063  3.0238  1.5985  2.0535  1.871 ]
 [ 1.1466  2.4966  4.3716  1.918   2.1063  0.      3.6424  1.8988  5.468   2.7135]
 [ 1.2575  3.7685  3.4159  2.5091  3.0238  3.6424  0.      2.3558  4.865   3.3671]
 [ 1.0461  1.8132  1.7688  1.2466  1.5985  1.8988  2.3558  0.      1.9423  1.5667]
 [ 1.1788  4.5601  2.4193  1.936   2.0535  5.468   4.865   1.9423  0.      2.305 ]
 [ 1.1227  2.9879  2.0466  1.6721  1.871   2.7135  3.3671  1.5667  2.305   0.    ]]


In [11]:
with printoptions(precision=4, linewidth=np.inf):
    print np.nan_to_num(total_lambdas / deltas)

[[ 0.      0.6472  0.6585  0.6478  0.6494  0.6568  0.6589  0.6531  0.6531  0.6528]
 [ 0.6472  0.      0.7661  0.6711  0.709   0.2137  0.6409  0.6468  0.6937  0.8055]
 [ 0.6585  0.7661  0.      0.7168  0.7826  0.7488  0.6261  0.684   0.5917  0.4514]
 [ 0.6478  0.6711  0.7168  0.      0.675   0.6723  0.6525  0.6372  0.6721  0.6523]
 [ 0.6494  0.709   0.7826  0.675   0.      0.651   0.6781  0.6722  0.6213  0.6563]
 [ 0.6568  0.2137  0.7488  0.6723  0.651   0.      0.6114  0.6703  0.8701  0.7931]
 [ 0.6589  0.6409  0.6261  0.6525  0.6781  0.6114  0.      0.6631  0.6941  0.6275]
 [ 0.6531  0.6468  0.684   0.6372  0.6722  0.6703  0.6631  0.      0.6676  0.6292]
 [ 0.6531  0.6937  0.5917  0.6721  0.6213  0.8701  0.6941  0.6676  0.      0.6701]
 [ 0.6528  0.8055  0.4514  0.6523  0.6563  0.7931  0.6275  0.6292  0.6701  0.    ]]


In [12]:
with printoptions(precision=4, linewidth=np.inf):
    print np.nan_to_num(true_lambdas / deltas)

[[ 0.      0.0193  0.0192  0.0189  0.0191  0.0193  0.0194  0.0188  0.0193  0.0192]
 [ 0.0193  0.      0.0208  0.0205  0.0207  0.0209  0.021   0.0204  0.0209  0.0208]
 [ 0.0192  0.0208  0.      0.0204  0.0207  0.0208  0.021   0.0204  0.0209  0.0207]
 [ 0.0189  0.0205  0.0204  0.      0.0204  0.0205  0.0206  0.02    0.0205  0.0204]
 [ 0.0191  0.0207  0.0207  0.0204  0.      0.0207  0.0209  0.0203  0.0208  0.0207]
 [ 0.0193  0.0209  0.0208  0.0205  0.0207  0.      0.021   0.0204  0.0209  0.0208]
 [ 0.0194  0.021   0.021   0.0206  0.0209  0.021   0.      0.0206  0.0211  0.0209]
 [ 0.0188  0.0204  0.0204  0.02    0.0203  0.0204  0.0206  0.      0.0205  0.0203]
 [ 0.0193  0.0209  0.0209  0.0205  0.0208  0.0209  0.0211  0.0205  0.      0.0208]
 [ 0.0192  0.0208  0.0207  0.0204  0.0207  0.0208  0.0209  0.0203  0.0208  0.    ]]
