# Sanity Check For Click-based Lambdas

In [1]:
import os
os.system("export LC_ALL=en_US.UTF-8")
os.system("export LANG=en_US.UTF-8")

%matplotlib inline

import numpy as np
np.set_printoptions(precision=2, linewidth=np.inf, suppress=True)
np.seterr(invalid='ignore', divide='ignore')

import cPickle as pickle
import matplotlib.pyplot as plt
from matplotlib.colors import hsv_to_rgb

from matplotlib.ticker import MultipleLocator

from ipywidgets import interact, interactive, Dropdown, FloatText, VBox, HBox

from IPython.display import display

# Load the click models for the queries of interest.
# with open('./data/model_query_uniform_lambdas_v1_collection_c2.pkl') as ifile:
# with open('./data/model_query_softmax_lambdas_v1_collection_c2.pkl') as ifile:
with open('./data/model_query_uniform_lambdas_v2_collection_c2.pkl') as ifile:
# with open('./data/model_query_softmax_lambdas_v2_collection_c2.pkl') as ifile:
# with open('./data/model_query_uniform_lambdas_v3_collection_c2.pkl') as ifile:
# with open('./data/model_query_softmax_lambdas_v3_collection_c2.pkl') as ifile:
    MQD = pickle.load(ifile)

# For reproducibility -- re-seed the click models' RNGs.
for click_model_type in MQD:
    for query in MQD[click_model_type]:
        MQD[click_model_type][query]['model'].seed = 42

# Available Click Models

In [2]:
MQD.keys()

['DCM', 'CM', 'DBN', 'CCM', 'PBM', 'UBM']

# MSE Error In Delta Estimation

In [3]:
query_model_losses = {}
for query in MQD['CM']:
    model_losses = {}
    for click_model_name in MQD:
        model = MQD[click_model_name][query]['model']
        relevances = MQD[click_model_name][query]['relevances']
        n_documents = len(relevances)

        test_lambdas = np.zeros((n_documents, n_documents), dtype='float64')

        if click_model_name == 'CM':
            p_attract = model.click_proba

            for i in range(n_documents):
                for j in range(n_documents):
                    if i == j: continue
                    test_lambdas[i, j] = (1.0 - p_attract[j]) * p_attract[i]

        elif click_model_name == 'PBM':
            p_attract = model.click_proba
            p_exam = model.exam_proba

            for i in range(n_documents):
                for j in range(n_documents):
                    if i == j: continue
                    test_lambdas[i, j] = (1.0 - p_exam[0] * p_attract[j]) * p_exam[1] * p_attract[i]

        elif click_model_name == 'CCM':
            p_attract = model.p_attraction
            tau1 = 1.0 - model.p_stop_noclick
            tau2 = 1.0 - model.p_stop_click_norel
            tau3 = 1.0 - model.p_stop_click_rel

            for i in range(n_documents):
                for j in range(n_documents):
                    if i == j: continue
                    test_lambdas[i, j] = (1.0 - p_attract[j]) * tau1 * p_attract[i]

        elif click_model_name == 'DBN':
            p_attract = model.click_proba
            p_cont = 1.0 - model.stop_proba
            p_abandon = model.abandon_proba

            for i in range(n_documents):
                for j in range(n_documents):
                    if i == j: continue
                    test_lambdas[i, j] = (1.0 - p_attract[j]) * p_attract[i]

        elif click_model_name == 'DCM':
            p_attraction = model.click_proba
            p_cont = 1.0 - model.stop_proba

            for i in range(n_documents):
                for j in range(n_documents):
                    if i == j: continue
                    test_lambdas[i, j] = (1.0 - p_attraction[j]) * p_attraction[i]

        elif click_model_name == 'UBM':
            p_attraction = model.p_attraction
            p_examination = model.p_examination

            for i in range(n_documents):
                for j in range(n_documents):
                    if i == j: continue
                    test_lambdas[i, j] = (1.0 - p_attraction[j]) * p_examination[1, -1] * p_attraction[i]

        else:
            raise ValueError('unknow click model: %s' % click_model_name)

        # This is what we want to estimate with click lambdas?
        test_lambdas = test_lambdas - test_lambdas.T

        viewed_loss, total_loss = np.empty(10), np.empty(10)
        for i in range(10):
            viewed_lambdas = MQD[click_model_name][query]['stats'][1000000]['viewed_lambdas'][i]
            total_lambdas = MQD[click_model_name][query]['stats'][1000000]['total_lambdas'][i]

            viewed_loss[i] = np.mean((test_lambdas - viewed_lambdas)**2)
            total_loss[i] = np.mean((test_lambdas - total_lambdas)**2)
        
        model_losses[click_model_name] = (np.mean(viewed_loss), np.std(viewed_loss),
                                          np.mean(total_loss), np.std(total_loss))
    query_model_losses[query] = model_losses

print '   QUERY     MODEL     VL/MSE       VL/STD       TL/MSE       TL/STD'
print '  -------   -------   --------     --------     --------     --------'
for click_model_name in query_model_losses['2548']:
    for query in query_model_losses:
        params = (query, click_model_name) + query_model_losses[query][click_model_name]
        print '{0:>8s} {1:>8s} {2:>12.6f} {3:>12.6f} {4:>12.6f} {5:>12.6f}'.format(*params)

   QUERY     MODEL     VL/MSE       VL/STD       TL/MSE       TL/STD
  -------   -------   --------     --------     --------     --------
    2548      DCM     0.009168     0.000922     0.000014     0.000003
   77932      DCM     0.008062     0.000525     0.000018     0.000003
    1153      DCM     0.003359     0.000193     0.000023     0.000006
   42696      DCM     0.000691     0.000109     0.000008     0.000001
   49111      DCM     0.004737     0.000200     0.000024     0.000004
  297115      DCM     0.002876     0.000258     0.000009     0.000002
   39492      DCM     0.003089     0.000085     0.000023     0.000005
   59560      DCM     0.002538     0.000175     0.000010     0.000001
   28658      DCM     0.006880     0.000274     0.000024     0.000006
  486195      DCM     0.004837     0.000309     0.000027     0.000005
    2548       CM     0.056033     0.000000     0.000010     0.000003
   77932       CM     0.055497     0.000000     0.000013     0.000004
    1153       CM    

# Click Model

In [4]:
click_model_name = 'UBM'

printed_model_characteristics = False

for query in MQD[click_model_name]:
    model = MQD[click_model_name][query]['model']
    relevances = MQD[click_model_name][query]['relevances']
    n_documents = len(relevances)
    
    deltas = relevances[:, None] - relevances[None, :]
    test_lambdas = np.zeros((n_documents, n_documents), dtype='float64')

    if click_model_name == 'CM':
        p_attract = model.click_proba    
        
        if not printed_model_characteristics:
            printed_model_characteristics = True
            
            print 'CM model'
            print '--------'
            print 'Attractiveness:', p_attract
            print
            
        for i in range(n_documents):
            for j in range(n_documents):
                if i == j: continue
                test_lambdas[i, j] = (1.0 - p_attract[j]) * p_attract[i]

    elif click_model_name == 'PBM':
        p_attract = model.click_proba
        p_exam = model.exam_proba
        
        if not printed_model_characteristics:
            printed_model_characteristics = True
            
            print 'PBM model'
            print '---------'
            print 'Attractiveness:', p_attract
            print 'Examination:   ', p_exam
            print

        for i in range(n_documents):
            for j in range(n_documents):
                if i == j: continue
                test_lambdas[i, j] = (1.0 - p_exam[0] * p_attract[j]) * p_exam[1] * p_attract[i]

    elif click_model_name == 'CCM':
        p_attract = model.p_attraction
        tau1 = 1.0 - model.p_stop_noclick
        tau2 = 1.0 - model.p_stop_click_norel
        tau3 = 1.0 - model.p_stop_click_rel
        
        if not printed_model_characteristics:
            printed_model_characteristics = True
            
            print 'PBM model'
            print '---------'
            print 'Attractiveness:', p_attract
            print 'Tau1:          ', tau1
            print 'Tau2:          ', tau2
            print 'Tau3:          ', tau3
            print

        for i in range(n_documents):
            for j in range(n_documents):
                if i == j: continue
                test_lambdas[i, j] = (1.0 - p_attract[j]) * tau1 * p_attract[i]

    elif click_model_name == 'DBN':
        p_attract = model.click_proba
        p_cont = 1.0 - model.stop_proba
        p_abandon = model.abandon_proba
        
        if not printed_model_characteristics:
            printed_model_characteristics = True
            
            print 'DBN model'
            print '---------'
            print 'Attractiveness:', p_attract
            print 'Continuation:  ', p_cont
            print 'Abandonment:   ', p_abandon

        for i in range(n_documents):
            for j in range(n_documents):
                if i == j: continue
                test_lambdas[i, j] = (1.0 - p_attract[j]) * p_attract[i]

    elif click_model_name == 'DCM':
        p_attraction = model.click_proba
        p_cont = 1.0 - model.stop_proba
        
        if not printed_model_characteristics:
            printed_model_characteristics = True
            
            print 'DCM model'
            print '---------'
            print 'Attractiveness:', p_attraction
            print 'Continuation:  ', p_cont
            print

        for i in range(n_documents):
            for j in range(n_documents):
                if i == j: continue
                test_lambdas[i, j] = (1.0 - p_attraction[j]) * p_attraction[i]
        
    elif click_model_name == 'UBM':
        p_attraction = model.p_attraction
        p_examination = model.p_examination
        
        if not printed_model_characteristics:
            printed_model_characteristics = True
            
            print 'UBM model'
            print '---------'
            print 'Attractiveness:', p_attraction
            print 'Examination:  '
            print p_examination[:,[9] + range(9)].T
            print

        for i in range(n_documents):
            for j in range(n_documents):
                if i == j: continue
                test_lambdas[i, j] = (1.0 - p_attraction[j]) * p_examination[1, -1] * p_attraction[i]

    else:
        raise ValueError('unknow click model: %s' % click_model_name)

    # This is what we want to estimate with click lambdas?
    test_lambdas = test_lambdas - test_lambdas.T

    viewed_loss, total_loss = np.empty(10), np.empty(10)
    for i in range(10):
        viewed_lambdas = MQD[click_model_name][query]['stats'][1000000]['viewed_lambdas'][i]
        total_lambdas = MQD[click_model_name][query]['stats'][1000000]['total_lambdas'][i]
    
        viewed_loss[i] = np.mean((test_lambdas - viewed_lambdas)**2)
        total_loss[i] = np.mean((test_lambdas - total_lambdas)**2)        
    
    lambdas = MQD[click_model_name][query]['stats'][1000000]['lambdas'][0]
    
    viewed_lambdas = MQD[click_model_name][query]['stats'][1000000]['viewed_lambdas'][0]
    viewed_counts = MQD[click_model_name][query]['stats'][1000000]['viewed_counts'][0]
    
    total_lambdas = MQD[click_model_name][query]['stats'][1000000]['total_lambdas'][0]
    total_counts = MQD[click_model_name][query]['stats'][1000000]['total_counts'][0]
    
    print 'deltas:'
    print '-------'
    print deltas
    print
    print 'test lambdas:'
    print '-------------'
    print test_lambdas
    print 
    print 'viewed lambdas'
    print '--------------'
    print 'MSE: %.6f (+/- %.6f SE)' % (np.mean(viewed_loss), np.std(viewed_loss))
    print viewed_lambdas
    print
    print 'total lambdas'
    print '--------------'
    print 'MSE: %.6f (+/- %.6f SE)' % (np.mean(total_loss), np.std(total_loss))
    print total_lambdas
    print
    print 'test lambdas / deltas:'
    print '----------------------'
    print np.nan_to_num(test_lambdas / deltas)
    print
    print 'viewed lambdas / deltas:'
    print '----------------------'
    print np.nan_to_num(viewed_lambdas / deltas)
    print
    print 'total lambdas / deltas:'
    print '----------------------'
    print np.nan_to_num(total_lambdas / deltas)
    print
    print 'lambdas:'
    print lambdas
    print
    print 'total counts:'
    print '-------------'
    print total_counts
    print
    print 'viewed counts:'
    print '--------------'
    print viewed_counts
    print
    print 'viewed / total counts:'
    print '----------------------'
    print np.nan_to_num(1.0 * viewed_counts / total_counts)
    break
            

UBM model
---------
Attractiveness: [ 0.59  0.09  0.11  0.21  0.14  0.1   0.05  0.23  0.08  0.11]
Examination:  
[[ 1.    0.89  0.52  0.35  0.24  0.18  0.14  0.11  0.1   0.09]
 [ 0.2   0.99  0.47  0.26  0.15  0.1   0.07  0.05  0.04  0.03]
 [ 0.2   0.2   1.    0.68  0.39  0.26  0.18  0.12  0.09  0.08]
 [ 0.2   0.2   0.2   1.    0.73  0.47  0.3   0.21  0.16  0.13]
 [ 0.2   0.2   0.2   0.2   1.    0.77  0.5   0.32  0.25  0.2 ]
 [ 0.2   0.2   0.2   0.2   0.2   1.    0.81  0.53  0.38  0.3 ]
 [ 0.2   0.2   0.2   0.2   0.2   0.2   1.    0.81  0.58  0.46]
 [ 0.2   0.2   0.2   0.2   0.2   0.2   0.2   1.    0.9   0.69]
 [ 0.2   0.2   0.2   0.2   0.2   0.2   0.2   0.2   1.    0.97]
 [ 0.2   0.2   0.2   0.2   0.2   0.2   0.2   0.2   0.2   1.  ]]

deltas:
-------
[[ 0.    0.5   0.48  0.38  0.45  0.49  0.54  0.36  0.51  0.48]
 [-0.5   0.   -0.02 -0.12 -0.04 -0.    0.05 -0.14  0.01 -0.02]
 [-0.48  0.02  0.   -0.1  -0.03  0.01  0.06 -0.12  0.03 -0.01]
 [-0.38  0.12  0.1   0.    0.07  0.11  0.16 -0.02 