In [None]:
# -*- coding: utf-8 -*-

import os
import numpy as np
import cPickle as pickle

%matplotlib inline
import matplotlib.pyplot as plt
import matplotlib.scale as mscale
import matplotlib.transforms as mtransforms

from matplotlib.ticker import NullFormatter, ScalarFormatter, NullLocator, AutoLocator, MultipleLocator


class SqrtScale(mscale.ScaleBase):
    # The name under which this scale will be registered with matplotlib.
    name = 'sqrt'

    def __init__(self, axis, **kwargs):
        mscale.ScaleBase.__init__(self)

    def get_transform(self):
        return self.SqrtTransform()

    def set_default_locators_and_formatters(self, axis):
        axis.set_major_locator(AutoLocator())
        axis.set_major_formatter(ScalarFormatter())
        axis.set_minor_locator(NullLocator())
        axis.set_minor_formatter(NullFormatter())

    def limit_range_for_scale(self, vmin, vmax, minpos):
        return max(vmin, 0.0), vmax

    class SqrtTransform(mtransforms.Transform):
        input_dims = 1
        output_dims = 1
        is_separable = True

        def __init__(self):
            mtransforms.Transform.__init__(self)

        def transform_non_affine(self, a):
            masked = np.ma.masked_where((a < 0.0), a)
            if masked.mask.any():
                return ma.sqrt(masked)
            else:
                return np.sqrt(a)

        def inverted(self):
            return SqrtScale.InvertedSqrtTransform()

    class InvertedSqrtTransform(mtransforms.Transform):
        input_dims = 1
        output_dims = 1
        is_separable = True

        def __init__(self):
            mtransforms.Transform.__init__(self)

        def transform_non_affine(self, a):
            return a**2

        def inverted(self):
            return SqrtScale.SqrtTransform()


mscale.register_scale(SqrtScale)

ranker2legend = {'CascadeUCB1[FC]': 'CUCB1-fc',
                 'CascadeKL-UCB[FC]': 'CKL-UCB-fc',
                 'RelativeCascadeUCB1Algorithm[FC]': 'UCT-fc',
                 'QuickRank[FC]': 'QR-fc',
                 'MergeRank[FC]': 'MR-fc',
                 'MergeRankKL[FC]': 'MR-KL-fc',
                 'MergeRankZeroKL[FC]': 'MR-Z-KL-fc',
                 'ShuffleAndSplit[FC]': 'SnS-fc',
                 'RankedBanditsUCB1[FC]': 'RB-UCB1-fc',
                 'RankedBanditsExp3[FC]': 'RB-Exp3-fc',
                 'RankedBanditsKL-UCB[FC]': 'RB-KL-fc',
                 'RealMergeRankZeroKL[FC]': '#MR-ZE-KL-fc',

                 'CascadeUCB1[LC]': 'CUCB1-lc',
                 'CascadeKL-UCB[LC]': 'CKL-UCB-lc',
                 'RelativeCascadeUCB1Algorithm[LC]': 'UCT-lc',
                 'QuickRank[LC]': 'QR-lc',
                 'MergeRank[LC]': 'MR-lc',
                 'MergeRankKL[LC]': 'MR-KL-lc',
                 'MergeRankZeroKL[LC]': 'MR-Z-KL-lc',
                 'ShuffleAndSplit[LC]': 'SnS-lc',
                 'RankedBanditsUCB1[LC]': 'RB-UCB1-lc',
                 'RankedBanditsExp3[LC]': 'RB-Exp3-lc',
                 'RankedBanditsKL-UCB[LC]': 'RB-KL-lc',
                 'RealMergeRankZeroKL[LC]': '#MR-ZE-KL-lc',

                 'CascadeUCB1[FF]': 'CUCB1-ff',
                 'CascadeKL-UCB[FF]': 'CKL-UCB-ff',
                 'RelativeCascadeUCB1Algorithm[FF]': 'UCT-ff',
                 'QuickRank[FF]': 'QR-ff',
                 'MergeRank[FF]': 'MR-ff',
                 'MergeRankKL[FF]': 'MR-KL-ff',
                 'MergeRankZeroKL[FF]': 'MR-Z-KL-ff',
                 'ShuffleAndSplit[FF]': 'SnS-ff',
                 'RankedBanditsUCB1[FF]': 'RB-UCB1-ff',
                 'RankedBanditsExp3[FF]': 'RB-Exp3-ff',
                 'RankedBanditsKL-UCB[FF]': 'RB-KL-ff',
                 'RealMergeRankZeroKL[FF]': '#MR-ZE-KL-ff'}


def plot_regret_curves(ax, info, regret, xscale=None, yscale=None, cumulative=False):
    ranker_model_name = info['ranking_model'].getName()
    click_model_name = info['click_model'].getName()
    cutoff = info['cutoff']
    qid = info['query']

    if not cumulative:
        regret = regret.cumsum()
    
    xscale = 'linear' if xscale is None else xscale
    yscale = 'linear' if yscale is None else yscale
    
    # Subsample regret if there is more than 10^5 iterations.
    if regret.shape[0] > 100000:
        indices = np.linspace(0, regret.shape[0] - 1, 100000).astype('int32')
        regret = regret[indices]
    else:
        indices = np.arange(regret.shape[0], dtype='int32')

    ax.set_title('%s - %s@%d - Q%s' % (ranker_model_name, click_model_name, cutoff, qid))

    ax.plot(indices, regret, ls='-', lw=1.5, color='k')

    ax.set_ylabel('Regret - (CTR@%d)' % cutoff)
    ax.set_xlabel('Impressions')

    ax.set_xscale(xscale)
    ax.set_yscale(yscale)

    ax.title.set_fontsize(10)

    ax.xaxis.label.set_fontsize(10)
    ax.yaxis.label.set_fontsize(10)

    ax.xaxis.get_offset_text().set_fontsize(10)
    ax.yaxis.get_offset_text().set_fontsize(10)

    plt.setp(ax.get_xticklabels(), fontsize=10)
    plt.setp(ax.get_yticklabels(), fontsize=10)


def plot_multiple_regret_curves(ax, infos, regrets, xscale=None, yscale=None, cumulative=False):
    '''
    Plots the regret curves for the experiments described in `infos` with axis
    scaled according to `xscale` and `yscale` respecitvely.
    
    Each corresponding element of `infos` and `regrets` should contain
    a specification of an experiment with a particular ranking model
    and corresponding regret, respectively.
    
    Parameters
    ----------
    ax: instance of matplotlib's axes
        Target axes to plot the curves into.

    infos: array
        The description of the experiments.

    regrets: array
        Regrets of ranking models in corresponding experiments.
    '''    
    if len(set([info['query'] for info in infos])) != 1:
        raise ValueError('Experiments to plot were not done with the same query.')
        
    if len(set([info['cutoff'] for info in infos])) != 1:
        raise ValueError('Experiments to plot were not done with the same cutoff.')
    
    if len(set([info['click_model'].getName() for info in infos])) != 1:
        raise ValueError('Experiments to plot were not done with the same click model.')
    
    qid = infos[0]['query']
    cutoff = infos[0]['cutoff']
    click_model_name = infos[0]['click_model'].getName()
    
    xscale = 'linear' if xscale is None else xscale
    yscale = 'linear' if yscale is None else yscale
    
    ax.set_title('%s@%d - Q%s' % (click_model_name, cutoff, qid))
    
    ax.set_ylabel('Regret - (CTR@%d)' % cutoff)
    ax.set_xlabel('Impressions')

    ax.set_xscale(xscale)
    ax.set_yscale(yscale)

    ax.title.set_fontsize(10)

    ax.xaxis.label.set_fontsize(10)
    ax.yaxis.label.set_fontsize(10)

    ax.xaxis.get_offset_text().set_fontsize(10)
    ax.yaxis.get_offset_text().set_fontsize(10)

    plt.setp(ax.get_xticklabels(), fontsize=10)
    plt.setp(ax.get_yticklabels(), fontsize=10)
    
    # hsv = plt.get_cmap('hsv')
    # colors = hsv(np.linspace(0, 1.0, len(infos)))

    colors = ['r', 'g', 'b', 'c', 'm', 'y', 'k', '#f089cf']
    
    for info, regret, color in zip(infos, regrets, colors):
        ranker_model_name = ranker2legend[info['ranking_model'].getName()]
        
        if not cumulative:
            regret = regret.cumsum()

        # Subsample regret if there is more than 10^5 iterations.
        if regret.shape[0] > 100000:
            indices = np.linspace(0, regret.shape[0] - 1, 100000).astype('int32')
            regret = regret[indices]
        else:
            indices = np.arange(regret.shape[0], dtype='int32')
            
        # Pick a line style based on the feedback!
        if info['ranking_model'].getName().endswith('[FC]'):
            ls = '-.'
        elif info['ranking_model'].getName().endswith('[LC]'):
            ls = '--'
        else:
            ls = '-'

        ax.plot(indices, regret, ls='-', color=color, lw=1, label=ranker_model_name)
    
    ax.legend(loc='lower right')


def plot_average_regret_curves(ax, info, regret, xscale=None, yscale=None):
    '''
    Plots the regret curves for the experiment described in `info` with axis
    scaled according to `xscale` and `yscale` respecitvely.
    
    Parameters
    ----------
    ax: instance of matplotlib's axes
        Target axes to plot the curves into.

    info:
        The description of the experiment.

    regret: array, shape = [4, n_impressions]
        Matrix containing 4 row vectors storing mean, std, minima, and maxima
        of cumulative regret, respectively.
    '''
    ranker_model_name = info['ranking_model'].getName()
    click_model_name = info['click_model'].getName()
    cutoff = info['cutoff']
    qid = info['query']
    
    xscale = 'linear' if xscale is None else xscale
    yscale = 'linear' if yscale is None else yscale
    
    # Subsample regret if there is more than 10^5 iterations.
    if regret[0].shape[0] > 100000:
        indices = np.linspace(0, regret[0].shape[0] - 1, 100000).astype('int32')
        regret = regret[:, indices]
    else:
        indices = np.arange(regret[0].shape[0], dtype='int32')
    
    ax.set_title('%s - %s@%d - Q%s' % (ranker_model_name, click_model_name, cutoff, qid))

    tmp1 = np.maximum(regret[0] - regret[1], regret[2])
    tmp2 = np.minimum(regret[0] + regret[1], regret[3])
    
    ax.fill_between(indices, regret[2], tmp1, facecolor='red', edgecolor='none',
                    alpha=0.2, interpolate=True)
    
    ax.fill_between(indices, tmp1, tmp2, facecolor='blue', edgecolor='none',
                    alpha=0.2, interpolate=True)
    
    ax.fill_between(indices, tmp2, regret[3], facecolor='red', edgecolor='none',
                    alpha=0.2, interpolate=True)
    
    ax.plot(indices, regret[0], 'k-')

    ax.set_ylabel('Regret - (CTR@%d)' % cutoff)
    ax.set_xlabel('Impressions')
    
    ax.set_ylim(bottom=0.0)

    ax.set_xscale(xscale)
    ax.set_yscale(yscale)

    ax.title.set_fontsize(10)

    ax.xaxis.label.set_fontsize(10)
    ax.yaxis.label.set_fontsize(10)

    ax.xaxis.get_offset_text().set_fontsize(10)
    ax.yaxis.get_offset_text().set_fontsize(10)

    plt.setp(ax.get_xticklabels(), fontsize=10)
    plt.setp(ax.get_yticklabels(), fontsize=10)

    
def plot_multiple_average_regret_curves(ax, infos, regrets, xscale=None, yscale=None, xlim=None, reward=False):
    '''
    Plots the regret curves for the experiments described in `infos` with axis
    scaled according to `xscale` and `yscale` respecitvely.
    
    Each corresponding element of `infos` and `regrets` should contain
    a specification of repeated experiments with a particular ranking model
    and their average regret, respectively.
    
    Parameters
    ----------
    ax: instance of matplotlib's axes
        Target axes to plot the curves into.

    info:
        The description of the experiment.

    regret: array, shape = [4, n_impressions]
        Matrix containing 4 row vectors storing mean, std, minima, and maxima
        of cumulative regret, respectively.
    '''    
    if len(set([info['query'] for info in infos])) != 1:
        raise ValueError('Experiments to plot were not done with the same query.')
        
    if len(set([info['cutoff'] for info in infos])) != 1:
        raise ValueError('Experiments to plot were not done with the same cutoff.')
    
    if len(set([info['click_model'].getName() for info in infos])) != 1:
        raise ValueError('Experiments to plot were not done with the same click model.')
    
    qid = infos[0]['query']
    cutoff = infos[0]['cutoff']
    click_model_name = infos[0]['click_model'].getName()
    
    xscale = 'linear' if xscale is None else xscale
    yscale = 'linear' if yscale is None else yscale
    
    ax.set_title('%s@%d - Q%s' % (click_model_name, cutoff, qid))
    
    if reward:
        ax.set_ylabel('Reward - (CTR@%d)' % cutoff)
    else:
        ax.set_ylabel('Regret - (CTR@%d)' % cutoff)

    ax.set_xlabel('Impressions')
    
    if xlim is not None:
        ax.set_xlim(xlim)

    ax.set_xscale(xscale)
    ax.set_yscale(yscale)

    ax.title.set_fontsize(10)

    ax.xaxis.label.set_fontsize(10)
    ax.yaxis.label.set_fontsize(10)

    ax.xaxis.get_offset_text().set_fontsize(10)
    ax.yaxis.get_offset_text().set_fontsize(10)

    plt.setp(ax.get_xticklabels(), fontsize=10)
    plt.setp(ax.get_yticklabels(), fontsize=10)
    
    # hsv = plt.get_cmap('hsv')
    # colors = hsv(np.linspace(0, 1.0, len(infos)))
    
    colors = ['r', 'g', 'b', 'c', 'm', 'y', 'k', '#f089cf']

    for info, regret, color in zip(infos, regrets, colors):
        ranker_model_name = ranker2legend[info['ranking_model'].getName()]

        # Subsample regret if there is more than 10^5 iterations.
        if regret[0].shape[0] > 100000:
            indices = np.linspace(0, regret[0].shape[0] - 1, 100000).astype('int32')
            regret = regret[:, indices]
        else:
            indices = np.arange(regret[0].shape[0], dtype='int32')
        
        if regret.shape[0] > 1:
            tmp1 = np.maximum(regret[0] - regret[1], regret[2])
            tmp2 = np.minimum(regret[0] + regret[1], regret[3])

            ax.fill_between(indices, regret[2], tmp1, facecolor=color, edgecolor='none',
                            alpha=0.1, interpolate=True)

            ax.fill_between(indices, tmp1, tmp2, facecolor=color, edgecolor='none',
                            alpha=0.2, interpolate=True)

            ax.fill_between(indices, tmp2, regret[3], facecolor=color, edgecolor='none',
                            alpha=0.1, interpolate=True)
        
        ax.plot(indices, regret[0], ls='-', c=color, label=ranker_model_name)
    
    ax.legend(loc='upper left')


def plot_multiple_average_reward_curves(ax, infos, regrets, xscale=None, yscale=None, xlim=None):
    '''
    Plots the reward curves for the experiments described in `infos` with axis
    scaled according to `xscale` and `yscale` respecitvely.
    
    Each corresponding element of `infos` and `rewards` should contain
    a specification of repeated experiments with a particular ranking model
    and their average regret, respectively.
    
    Parameters
    ----------
    ax: instance of matplotlib's axes
        Target axes to plot the curves into.

    info:
        The description of the experiment.

    regrets: array, shape = [4, n_impressions] or [n_impressions,]
        Matrix containing either 4 row vectors storing mean, std, minima, and maxima
        of cumulative reward, respectively, or just mean of cumulative reward.
    '''    
    if len(set([info['cutoff'] for info in infos])) != 1:
        raise ValueError('Experiments to plot were not done with the same cutoff.')
    
    if len(set([info['click_model'].getName() for info in infos])) != 1:
        raise ValueError('Experiments to plot were not done with the same click model.')

    cutoff = infos[0]['cutoff']
    click_model_name = infos[0]['click_model'].getName()
    
    xscale = 'linear' if xscale is None else xscale
    yscale = 'linear' if yscale is None else yscale
    
    ax.set_title('%s@%d' % (click_model_name, cutoff))

    ax.set_ylabel('Reward - (CTR@%d)' % cutoff)
    ax.set_xlabel('Impressions')
    
    if xlim is not None:
        ax.set_xlim(xlim)

    ax.set_xscale(xscale)
    ax.set_yscale(yscale)

    ax.title.set_fontsize(10)

    ax.xaxis.label.set_fontsize(10)
    ax.yaxis.label.set_fontsize(10)

    ax.xaxis.get_offset_text().set_fontsize(10)
    ax.yaxis.get_offset_text().set_fontsize(10)

    plt.setp(ax.get_xticklabels(), fontsize=10)
    plt.setp(ax.get_yticklabels(), fontsize=10)
    
    # hsv = plt.get_cmap('hsv')
    # colors = hsv(np.linspace(0, 1.0, len(infos)))
    
    colors = ['r', 'g', 'b', 'c', 'm', 'y', 'k', '#f089cf']

    for info, regret, color in zip(infos, regrets, colors):
        ranker_model_name = ranker2legend[info['ranking_model'].getName()]

        # Subsample regret if there is more than 10^5 iterations.
        if regret[0].shape[0] > 100000:
            indices = np.linspace(0, regret[0].shape[0] - 1, 100000).astype('int32')
            regret = regret[:, indices]
        else:
            indices = np.arange(regret[0].shape[0], dtype='int32')
        
        if regret.shape[0] > 1:
            tmp1 = np.maximum(regret[0] - regret[1], regret[2])
            tmp2 = np.minimum(regret[0] + regret[1], regret[3])

            ax.fill_between(indices, regret[2], tmp1, facecolor=color, edgecolor='none',
                            alpha=0.1, interpolate=True)

            ax.fill_between(indices, tmp1, tmp2, facecolor=color, edgecolor='none',
                            alpha=0.2, interpolate=True)

            ax.fill_between(indices, tmp2, regret[3], facecolor=color, edgecolor='none',
                            alpha=0.1, interpolate=True)
        
        ax.plot(indices, regret[0], ls='-', c=color, label=ranker_model_name)
    
    ax.legend(loc='lower right')


def plot_optimal_impressions(ax, info, n_impressions):
    ranker_model_name = info['ranking_model'].getName()
    click_model_name = info['click_model'].getName()
    cutoff = info['cutoff']
    qid = info['query']
    
    # Subsample n_impressions if there is more than 10^5 iterations.
    if n_impressions.shape[0] > 100000:
        indices = np.linspace(0, n_impressions.shape[0] - 1, 100000).astype('int32')
        n_impressions = n_impressions[indices]
    else:
        indices = np.arange(n_impressions.shape[0], dtype='int32')

    ax.set_title('%s - %s@%d - Q%s' % (ranker_model_name, click_model_name, cutoff, qid))

    ax.plot(indices, n_impressions, 'k-')

    ax.set_ylabel('Impressions')
    ax.set_xlabel('Iteration')

    # ax.set_ylim([0, 1])

    ax.title.set_fontsize(10)

    ax.xaxis.label.set_fontsize(10)
    ax.yaxis.label.set_fontsize(10)

    ax.xaxis.get_offset_text().set_fontsize(10)
    ax.yaxis.get_offset_text().set_fontsize(10)

    plt.setp(ax.get_xticklabels(), fontsize=10)
    plt.setp(ax.get_yticklabels(), fontsize=10)

# Cumulative Regret Curves for Ranking and Click Model Pairs

In [None]:
import os
import cPickle as pickle
from ipywidgets import Select, Dropdown, interactive, HBox
from IPython.display import display

# Change this to force the visualization of experiments under this directory.
# DO NOT FORGET TO SET THESE TWO DIRECTORIES CONSISTENTLY!!!
#EXPERIMENTS_DIRECTORY = 'experiments/custom/mr_ucb_vs_kl'
EXPERIMENTS_DIRECTORY = 'experiments/mergerank_benchmark/RealMergeRankKLAlgorithm/run1'
SAVE_PLOTS_DIRECTORY = 'figures'

def get_regret_filepath(experiment_info_filepath):
    return experiment_info_filepath.rstrip('experiment.nfo') + 'regret.npy'

def show_regrets_widget(xscale='linear', yscale='linear', save_plots=False):
    experiment_filepaths = []
    experiment_specs = []

    def show_regrets(ranking_model_name, click_model_name, xscale, yscale, save_plots=False):
        spec_indices = [i for i, spec in enumerate(experiment_specs)
                        if spec['click_model'].getName() == click_model_name and
                        spec['ranking_model'].getName() == ranking_model_name and
                        os.path.exists(get_regret_filepath(experiment_filepaths[i]))]
        
        specs, spec_indices = zip(*sorted([(experiment_specs[i], i) for i in spec_indices],
                                          key=lambda info: info[0]['query']))
        regrets = [np.load(get_regret_filepath(experiment_filepaths[i])) for i in spec_indices]

        n_rows = (len(specs) + 1) / 2
        n_cols = 2 if len(specs) > 1 else 1
        
        fig, axes = plt.subplots(n_rows, n_cols, figsize=(9 * n_cols, 5 * n_rows))
        
        axes = axes.ravel() if len(specs) > 1 else [axes]
        
        for ax, info, regret in zip(axes, specs, regrets):
            # regret = np.ceil(regret)
            plot_regret_curves(ax, info, regret, xscale=xscale, yscale=yscale)
        
        if len(specs) > 1 and len(specs) % 2 == 1:
            axes[-1].axis('off')

        if save_plots:
            filename = ranking_model_name + '_' + click_model_name + '_' + xscale + '_' + yscale + '.png'
            fig.savefig(os.path.join(SAVE_PLOTS_DIRECTORY, filename), bbox_inches='tight')
        else:
            plt.tight_layout()
            plt.show()

        plt.close(fig)

    for root, dirs, files in os.walk(EXPERIMENTS_DIRECTORY, topdown=True, followlinks=True):
        for fn in files:
            if fn.endswith('.nfo'):
                fp = os.path.join(root, fn)
                experiment_filepaths.append(fp)
                with open(fp) as ifile:
                    experiment_specs.append(pickle.load(ifile))
    
    ranking_model_names = set([spec['ranking_model'].getName() for spec in experiment_specs])
    click_model_names = set([spec['click_model'].getName() for spec in experiment_specs])
    
    if save_plots:
        for rm, cm in set([(spec['ranking_model'].getName(), spec['click_model'].getName())
                           for spec in experiment_specs]):
            print 'Saving figures for', rm, cm, 'models...',
            show_regrets(rm, cm, xscale, yscale, save_plots)
            print 'done.'
        return

    rmdd = Select(options=list(ranking_model_names), description='Ranking Model:', width='150px')
    cmdd = Select(options=list(click_model_names), description='Click Model:', width='75px')
    xsdd = Select(options=['linear', 'sqrt', 'log'], description='X Scale:', width='100px', height='65px')
    ysdd = Select(options=['linear', 'sqrt', 'log'], description='Y Scale:', width='100px', height='65px')
    
    controls = HBox([rmdd, cmdd, xsdd, ysdd])
    backend = interactive(show_regrets, ranking_model_name=rmdd, click_model_name=cmdd,
                          xscale=xsdd, yscale=ysdd)

    controls.on_displayed(lambda _: show_regrets(rmdd.value, cmdd.value, xsdd.value, ysdd.value))

    display(controls)

show_regrets_widget(xscale='linear', yscale='linear', save_plots=False)

# Cumulative Regret Curves for Multiple Ranking Algorithms

In [None]:
import os
from itertools import groupby
import cPickle as pickle
from ipywidgets import Select, Dropdown, interactive, HBox
from IPython.display import display

# Change this to force the visualization of experiments under this directory.
# DO NOT FORGET TO SET THESE TWO DIRECTORIES CONSISTENTLY!!!
EXPERIMENTS_DIRECTORY = 'experiments/60Q/yandex/MergeRankComparison'
SAVE_PLOTS_DIRECTORY = 'figures/yandex'

if not os.path.isdir(SAVE_PLOTS_DIRECTORY):
    os.makedirs(SAVE_PLOTS_DIRECTORY)

def get_regret_filepath(experiment_info_filepath):
    return experiment_info_filepath.rstrip('experiment.nfo') + 'regret.npy'

def show_regrets_widget(click_models=None, xscale='log', yscale='log', save_plots=False, filename_prefix=''):
    experiment_filepaths = []
    experiment_specs = []

    def show_regrets(click_model_name, xscale, yscale, save_plots=False, filename_prefix=''):
        spec_indices = [i for i, spec in enumerate(experiment_specs)
                        if spec['click_model'].getName() == click_model_name and
                        os.path.exists(get_regret_filepath(experiment_filepaths[i]))]
        
        q_specs = {}
        q_regrets = {}
        
        for q, g1 in groupby(sorted([(experiment_specs[i], i) for i in spec_indices],
                                         key=lambda info: info[0]['query']),
                             key=lambda info: info[0]['query']):
            q_rm_specs, q_rm_spec_indices = zip(*g1)

            # The only thing that can go wrong in this place is that the number 
            # of impressions or the cut-off were different in different experiments
            # on the same query and click model across different ranking models!
            n_impressions_check = set([q_rm_spec['n_impressions'] for q_rm_spec in q_rm_specs])
            cutoff_check = set([q_rm_spec['cutoff'] for q_rm_spec in q_rm_specs])

            if len(n_impressions_check) != 1:
                raise ValueError('Detected different number of impressions in experiments '
                                 ' with query %s, ranking model %s, and click model %s: %r'
                                % (q, q_rm_specs[0]['ranking_model'],
                                   click_model_name, list(n_impressions_check)))

            if len(cutoff_check) != 1:
                raise ValueError('Detected different settings of cutoff in experiments '
                                 ' with query %s, ranking model %s, and click model %s: %r'
                                % (q, q_rm_specs[0]['ranking_model'],
                                   click_model_name, list(cutoff_check)))

            if q not in q_specs:
                q_specs[q] = []
                q_regrets[q] = []
            
            q_specs[q].extend(q_rm_specs)
            q_regrets[q].extend([np.load(get_regret_filepath(experiment_filepaths[i]))
                                 for i in q_rm_spec_indices])

        n_rows = (len(q_specs) + 1) / 2
        n_cols = 2 if len(q_specs) > 1 else 1

        fig, axes = plt.subplots(n_rows, n_cols, figsize=(9 * n_cols, 5 * n_rows))

        axes = axes.ravel() if len(q_specs) > 1 else [axes]
        
        qs = sorted(q_specs.keys())

        for ax, infos, regrets in zip(axes, [q_specs[q] for q in qs], [q_regrets[q] for q in qs]):
            plot_multiple_regret_curves(ax, infos, regrets, xscale=xscale,
                                        yscale=yscale)

        if len(q_specs) > 1 and len(q_specs) % 2 == 1:
            axes[-1].axis('off')

        if save_plots:
            filename = filename_prefix + click_model_name + '_' + xscale + '_' + yscale + '.png'
            fig.savefig(os.path.join(SAVE_PLOTS_DIRECTORY, filename), bbox_inches='tight')
        else:
            plt.tight_layout()
            plt.show()

        plt.close(fig)

    for root, dirs, files in os.walk(EXPERIMENTS_DIRECTORY, topdown=True, followlinks=True):
        for fn in files:
            if fn.endswith('.nfo'):
                fp = os.path.join(root, fn)
                experiment_filepaths.append(fp)
                with open(fp) as ifile:
                    experiment_specs.append(pickle.load(ifile))

    if save_plots:
        if click_models is None:
            click_models = set([spec['click_model'].getName() for spec in experiment_specs])
            
        for cm in click_models:
            print 'Saving figures for', cm, 'model...',
            show_regrets(cm, xscale, yscale, save_plots, filename_prefix)
            print 'done.'
        return

    click_model_names = set([spec['click_model'].getName() for spec in experiment_specs])

    cmdd = Select(options=list(click_model_names), description='Click Model:', width='75px')
    xsdd = Select(options=['linear', 'sqrt', 'log'], description='X Scale:', width='100px', height='65px')
    ysdd = Select(options=['linear', 'sqrt', 'log'], description='Y Scale:', width='100px', height='65px')

    controls = HBox([cmdd, xsdd, ysdd])
    backend = interactive(show_regrets, click_model_name=cmdd, xscale=xsdd, yscale=ysdd)

    controls.on_displayed(lambda _: show_regrets(cmdd.value, xsdd.value, ysdd.value))

    display(controls)

show_regrets_widget(xscale='linear', yscale='linear', save_plots=True, filename_prefix='MergeRankWithAndWitoutReset')

# Cumulative Regret Curves for Query and Click Model Pairs

In [None]:
import os
import cPickle as pickle
from ipywidgets import Select, Dropdown, interactive, HBox
from IPython.display import display

# Change this to force the visualization of experiments under this directory.
EXPERIMENTS_DIRECTORY = 'experiments'

def get_regret_filepath(experiment_info_filepath):
    return experiment_info_filepath.rstrip('experiment.nfo') + 'regret.npy'

def show_regrets_widget():
    experiment_filepaths = []
    experiment_specs = []
    
    def show_regrets(query, click_model_name, xscale, yscale):
        spec_indices = [i for i, spec in enumerate(experiment_specs)
                        if spec['click_model'].getName() == click_model_name and
                        spec['query'] == query and
                        os.path.exists(get_regret_filepath(experiment_filepaths[i]))]
        specs, spec_indices = zip(*sorted([(experiment_specs[i], i) for i in spec_indices],
                                          key=lambda info: info[0]['query']))
        regrets = [np.load(get_regret_filepath(experiment_filepaths[i])) for i in spec_indices]

        n_rows = (len(specs) + 1) / 2
        n_cols = 2 if len(specs) > 1 else 1
        
        fig, axes = plt.subplots(n_rows, n_cols, figsize=(6 * n_cols, 2.5 * n_rows))
        
        axes = axes.ravel() if len(specs) > 1 else [axes]
        
        for ax, info, regret in zip(axes, specs, regrets):
            plot_regret_curves(ax, info, regret, xscale=xscale, yscale=yscale)
        
        if len(specs) > 1 and len(specs) % 2 == 1:
            axes[-1].axis('off')
            
        plt.tight_layout()
        plt.show()   

    for root, dirs, files in os.walk(EXPERIMENTS_DIRECTORY, topdown=True):
        for fn in files:
            if fn.endswith('.nfo'):
                fp = os.path.join(root, fn)
                experiment_filepaths.append(fp)
                with open(fp) as ifile:
                    experiment_specs.append(pickle.load(ifile))
    
    queries = sorted(set([spec['query'] for spec in experiment_specs]))
    click_model_names = set([spec['click_model'].getName() for spec in experiment_specs])

    qdd = Select(options=list(queries), description='Queries:', width='150px', height='175px')
    cmdd = Select(options=list(click_model_names), description='Click Model:', width='75px')
    xsdd = Select(options=['linear', 'sqrt', 'log'], description='X Scale:', width='100px', height='65px')
    ysdd = Select(options=['linear', 'sqrt', 'log'], description='Y Scale:', width='100px', height='65px')
    
    controls = HBox([qdd, cmdd, xsdd, ysdd])
    print controls
    backend = interactive(show_regrets, query=qdd, click_model_name=cmdd, xscale=xsdd, yscale=ysdd)

    controls.on_displayed(lambda _: show_regrets(qdd.value, cmdd.value, xsdd.value, ysdd.value))

    display(controls)

show_regrets_widget()

# Average Cumulative Regret Curves for Single Ranking Algorithm

In [None]:
import os
from itertools import groupby
import cPickle as pickle
from ipywidgets import Select, Dropdown, interactive, HBox
from IPython.display import display

# Change this to force the visualization of experiments under this directory.
# DO NOT FORGET TO SET THESE TWO DIRECTORIES CONSISTENTLY!!!
EXPERIMENTS_DIRECTORY = 'experiments/mergerank_benchmark/RealMergeRankKLAlgorithm'
SAVE_PLOTS_DIRECTORY = 'figures/averages'

if not os.path.isdir(SAVE_PLOTS_DIRECTORY):
    os.makedirs(SAVE_PLOTS_DIRECTORY)

def get_regret_filepath(experiment_info_filepath):
    return experiment_info_filepath.rstrip('experiment.nfo') + 'regret.npy'

def show_regrets_widget(xscale='linear', yscale='linear', save_plots=False):
    experiment_filepaths = []
    experiment_specs = []

    def show_regrets(ranking_model_name, click_model_name, xscale, yscale, save_plots=False):
        spec_indices = [i for i, spec in enumerate(experiment_specs)
                        if spec['click_model'].getName() == click_model_name and
                        spec['ranking_model'].getName() == ranking_model_name and
                        os.path.exists(get_regret_filepath(experiment_filepaths[i]))]
        
        specs = []
        regrets = []
        
        for q, g in groupby(sorted([(experiment_specs[i], i) for i in spec_indices],
                                         key=lambda info: info[0]['query']),
                                  key=lambda info: info[0]['query']):
            q_specs, q_spec_indices = zip(*g)
            
            # The only thing that can go wrong in this place is that the number 
            # of impressions or the cut-off were different in different experiments
            # on the same query, ranking model, and click model!
            n_impressions_check = set([q_spec['n_impressions'] for q_spec in q_specs])
            cutoff_check = set([q_spec['cutoff'] for q_spec in q_specs])
            
            if len(n_impressions_check) != 1:
                raise ValueError('Detected different number of impressions in experiments '
                                 ' with query %s, ranking model %s, and click model %s: %r'
                                % (q, ranking_model_name, click_model_name, list(n_impressions_check)))
            
            if len(cutoff_check) != 1:
                raise ValueError('Detected different settings of cutoff in experiments '
                                 ' with query %s, ranking model %s, and click model %s: %r'
                                % (q, ranking_model_name, click_model_name, list(cutoff_check)))
            
            regrets_collection = np.vstack([np.load(get_regret_filepath(experiment_filepaths[i]))
                                  for i in q_spec_indices]).cumsum(axis=1)
            
            # All experiments specifications are equal (except for the ranking model,
            # that is not used) so we use just the 1st one in the list.
            specs.append(q_specs[0])
            regrets.append(np.vstack([regrets_collection.mean(axis=0),
                                      regrets_collection.std(axis=0),
                                      regrets_collection.min(axis=0),
                                      regrets_collection.max(axis=0)]))

        n_rows = (len(specs) + 1) / 2
        n_cols = 2 if len(specs) > 1 else 1

        fig, axes = plt.subplots(n_rows, n_cols, figsize=(9 * n_cols, 5 * n_rows))

        axes = axes.ravel() if len(specs) > 1 else [axes]

        for ax, info, regret in zip(axes, specs, regrets):
            plot_average_regret_curves(ax, info, regret, xscale=xscale, yscale=yscale)

        if len(specs) > 1 and len(specs) % 2 == 1:
            axes[-1].axis('off')

        if save_plots:
            filename = ranking_model_name + '_' + click_model_name + '_' + xscale + '_' + yscale + '.png'
            fig.savefig(os.path.join(SAVE_PLOTS_DIRECTORY, filename), bbox_inches='tight')
        else:
            plt.tight_layout()
            plt.show()

        plt.close(fig)

    for root, dirs, files in os.walk(EXPERIMENTS_DIRECTORY, topdown=True):
        for fn in files:
            if fn.endswith('.nfo'):
                fp = os.path.join(root, fn)
                experiment_filepaths.append(fp)
                with open(fp) as ifile:
                    experiment_specs.append(pickle.load(ifile))
    
    if save_plots:
        for rm, cm in set([(spec['ranking_model'].getName(), spec['click_model'].getName())
                           for spec in experiment_specs]):
            print 'Saving figures for', rm, cm, 'models...',
            show_regrets(rm, cm, xscale, yscale, save_plots)
            print 'done.'
        return
    
    ranking_model_names = set([spec['ranking_model'].getName() for spec in experiment_specs])
    click_model_names = set([spec['click_model'].getName() for spec in experiment_specs])

    rmdd = Select(options=list(ranking_model_names), description='Ranking Model:', width='150px')
    cmdd = Select(options=list(click_model_names), description='Click Model:', width='75px')
    xsdd = Select(options=['linear', 'sqrt', 'log'], description='X Scale:', width='100px', height='65px')
    ysdd = Select(options=['linear', 'sqrt', 'log'], description='Y Scale:', width='100px', height='65px')
    
    controls = HBox([rmdd, cmdd, xsdd, ysdd])
    backend = interactive(show_regrets, ranking_model_name=rmdd, click_model_name=cmdd,
                          xscale=xsdd, yscale=ysdd)

    controls.on_displayed(lambda _: show_regrets(rmdd.value, cmdd.value, xsdd.value, ysdd.value))

    display(controls)

show_regrets_widget(save_plots=False)

# Average Cumulative Regret Curves for Multiple Ranking Algorithm

In [None]:
import os
from itertools import groupby
import cPickle as pickle
from ipywidgets import Select, Dropdown, interactive, HBox
from IPython.display import display

# Change this to force the visualization of experiments under this directory.
# DO NOT FORGET TO SET THESE TWO DIRECTORIES CONSISTENTLY!!!
EXPERIMENTS_DIRECTORY = 'experiments/mergerank_benchmark/algorithms'
SAVE_PLOTS_DIRECTORY = 'figures/averages'

if not os.path.isdir(SAVE_PLOTS_DIRECTORY):
    os.makedirs(SAVE_PLOTS_DIRECTORY)

def get_regret_filepath(experiment_info_filepath):
    return experiment_info_filepath.rstrip('experiment.nfo') + 'regret.npy'

def show_regrets_widget(click_models=None, xscale='log', yscale='linear', xlim=None,
                        save_plots=False, filename_prefix='', average_only=False, reward=False):
    experiment_filepaths = []
    experiment_specs = []

    def show_regrets(click_model_name, xscale, yscale, save_plots=False, filename_prefix=''):
        spec_indices = [i for i, spec in enumerate(experiment_specs)
                        if spec['click_model'].getName() == click_model_name and
                        os.path.exists(get_regret_filepath(experiment_filepaths[i]))]
        
        q_specs = {}
        q_regrets = {}
        
        for q, g1 in groupby(sorted([(experiment_specs[i], i) for i in spec_indices],
                                         key=lambda info: info[0]['query']),
                             key=lambda info: info[0]['query']):
        
            _, q_spec_indices = zip(*g1)

            for q_rm, g2 in groupby(sorted([(experiment_specs[i], i) for i in q_spec_indices],
                                           key=lambda info: info[0]['ranking_model'].getName()),
                                    key=lambda info: info[0]['ranking_model'].getName()):
            
                q_rm_specs, q_rm_spec_indices = zip(*g2)
            
                # The only thing that can go wrong in this place is that the number 
                # of impressions or the cut-off were different in different experiments
                # on the same query, ranking model, and click model!
                n_impressions_check = set([q_rm_spec['n_impressions'] for q_rm_spec in q_rm_specs])
                cutoff_check = set([q_rm_spec['cutoff'] for q_rm_spec in q_rm_specs])

                if len(n_impressions_check) != 1:
                    raise ValueError('Detected different number of impressions in experiments '
                                     ' with query %s, ranking model %s, and click model %s: %r'
                                    % (q, q_rm_specs[0]['ranking_model'],
                                       click_model_name, list(n_impressions_check)))

                if len(cutoff_check) != 1:
                    raise ValueError('Detected different settings of cutoff in experiments '
                                     ' with query %s, ranking model %s, and click model %s: %r'
                                    % (q, q_rm_specs[0]['ranking_model'],
                                       click_model_name, list(cutoff_check)))
                
                click_model = q_rm_specs[0]['click_model']
                n_impressions = q_rm_specs[0]['n_impressions']
                n_documents = q_rm_specs[0]['n_documents']
                cutoff = q_rm_specs[0]['cutoff']
                
                # Get the ideal top-`cutoff` ranking for the query and click model ...
                ideal_ranking = click_model.get_ideal_ranking(cutoff=cutoff)

                # ... and compute its clickthrough rate.
                ideal_ctr = click_model.get_clickthrough_rate(ideal_ranking,
                                                              np.arange(n_documents, dtype='int32'),
                                                              cutoff=cutoff)

                regrets_collection = np.vstack([np.load(get_regret_filepath(experiment_filepaths[i]))
                                      for i in q_rm_spec_indices]).cumsum(axis=1)

                if q not in q_specs:
                    q_specs[q] = []
                    q_regrets[q] = []
                    
                # Contents of all experiment specifications are equal (except for the ranking model,
                # so we use just the 1st one in the list.
                q_specs[q].append(q_rm_specs[0])
                
                if reward:
                    regrets_collection /= np.arange(1, 1 + n_impressions, dtype='f4')[None, :]
                    regrets_collection = ideal_ctr - regrets_collection
                    
                    if average_only:
                        q_regrets[q].append(regrets_collection.mean(axis=0)[None, :])
                    else:
                        q_regrets[q].append(np.vstack([regrets_collection.mean(axis=0),
                                                       regrets_collection.std(axis=0),
                                                       regrets_collection.min(axis=0),
                                                       regrets_collection.max(axis=0)]))
                else:
                    if average_only:
                        q_regrets[q].append(regrets_collection.mean(axis=0)[None, :])
                    else:
                        q_regrets[q].append(np.vstack([regrets_collection.mean(axis=0),
                                                       regrets_collection.std(axis=0),
                                                       regrets_collection.min(axis=0),
                                                       regrets_collection.max(axis=0)]))
                del regrets_collection
                
        n_rows = (len(q_specs) + 1) / 2
        n_cols = 2 if len(q_specs) > 1 else 1

        fig, axes = plt.subplots(n_rows, n_cols, figsize=(9 * n_cols, 5 * n_rows))

        axes = axes.ravel() if len(q_specs) > 1 else [axes]

        qs = sorted(q_specs.keys())

        for ax, infos, regrets in zip(axes, [q_specs[q] for q in qs], [q_regrets[q] for q in qs]):
            plot_multiple_average_regret_curves(ax, infos, regrets, xscale=xscale,
                                                yscale=yscale, xlim=xlim, reward=reward)

        if len(q_specs) > 1 and len(q_specs) % 2 == 1:
            axes[-1].axis('off')

        if save_plots:
            filename = filename_prefix + click_model_name + '_AVG_REG_' + xscale + '_' + yscale + '.png'
            fig.savefig(os.path.join(SAVE_PLOTS_DIRECTORY, filename), bbox_inches='tight')
        else:
            plt.tight_layout()
            plt.show()

        plt.close(fig)

    for root, dirs, files in os.walk(EXPERIMENTS_DIRECTORY, topdown=True, followlinks=True):
        for fn in files:
            if fn.endswith('.nfo'):
                fp = os.path.join(root, fn)
                experiment_filepaths.append(fp)
                with open(fp) as ifile:
                    experiment_specs.append(pickle.load(ifile))

    if save_plots:
        if click_models is None:
            click_models = set([spec['click_model'].getName() for spec in experiment_specs])
            
        for cm in click_models:
            print 'Saving figures for', cm, 'model...',
            show_regrets(cm, xscale, yscale, save_plots, filename_prefix)
            print 'done.'
        return

    click_model_names = set([spec['click_model'].getName() for spec in experiment_specs])

    cmdd = Select(options=list(click_model_names), description='Click Model:', width='75px')
    xsdd = Select(options=['linear', 'sqrt', 'log'], description='X Scale:', width='100px', height='65px')
    ysdd = Select(options=['linear', 'sqrt', 'log'], description='Y Scale:', width='100px', height='65px')
    
    controls = HBox([cmdd, xsdd, ysdd])
    backend = interactive(show_regrets, click_model_name=cmdd, xscale=xsdd, yscale=ysdd)

    controls.on_displayed(lambda _: show_regrets(cmdd.value, xsdd.value, ysdd.value))

    display(controls)

# show_regrets_widget(xlim=[1e5, 1e7], save_plots=False)
show_regrets_widget(xscale='linear', yscale='linear', average_only=True, reward=False,
                    filename_prefix='MergeRankBenchmarkAveragesOnly_', save_plots=False)

# Average Cumulative Regret Curves over All Queries for Multiple Ranking Algorithm 

In [None]:
import os
from itertools import groupby
import cPickle as pickle
from ipywidgets import Select, Dropdown, interactive, HBox
from IPython.display import display

# Change this to force the visualization of experiments under this directory.
# DO NOT FORGET TO SET THESE TWO DIRECTORIES CONSISTENTLY!!!
EXPERIMENTS_DIRECTORY = 'experiments/mergerank_benchmark/algorithms'
SAVE_PLOTS_DIRECTORY = 'figures/averages'

if not os.path.isdir(SAVE_PLOTS_DIRECTORY):
    os.makedirs(SAVE_PLOTS_DIRECTORY)

def get_regret_filepath(experiment_info_filepath):
    return experiment_info_filepath.rstrip('experiment.nfo') + 'regret.npy'

def show_rewards_widget(click_models=None, xscale='log', yscale='linear', xlim=None,
                        save_plots=False, filename_prefix='', average_only=False):
    experiment_filepaths = []
    experiment_specs = []

    def show_rewards(click_model_name, xscale, yscale, save_plots=False, filename_prefix=''):
        # Allowing averaging over cumulative rewards across multiple click models.
        if not isinstance(click_model_name, list):
            click_model_name = [click_model_name]

        spec_indices = [i for i, spec in enumerate(experiment_specs)
                        if spec['click_model'].getName() in click_model_name and
                        os.path.exists(get_regret_filepath(experiment_filepaths[i]))]
        
        if len(spec_indices) == 0:
            raise ValueError('no experiment was found for with %s click model(s)'
                             % ', '.join(click_model_name))
        
        all_rm_specs = {}
        all_rm_regrets = {}
        
        for rm, g in groupby(sorted([(experiment_specs[i], i) for i in spec_indices],
                                    key=lambda info: info[0]['ranking_model'].getName()),
                             key=lambda info: info[0]['ranking_model'].getName()):

            rm_specs, rm_spec_indices = zip(*g)

            # The only thing that can go wrong in this place is that the number 
            # of impressions or the cut-off were different in different experiments
            # on the same query, ranking model, and click model!
            n_impressions_check = set([rm_spec['n_impressions'] for rm_spec in rm_specs])
            cutoff_check = set([rm_spec['cutoff'] for rm_spec in rm_specs])

            if len(n_impressions_check) != 1:
                raise ValueError('Detected different number of impressions in experiments '
                                 ' with ranking model %s, and click model %s: %r'
                                % (rm, click_model_name, list(n_impressions_check)))

            if len(cutoff_check) != 1:
                raise ValueError('Detected different settings of cutoff in experiments '
                                 ' with ranking model %s, and click model %s: %r'
                                % (rm, click_model_name, list(cutoff_check)))

            click_models = [rm_spec['click_model'] for rm_spec in rm_specs]
            n_impressions = rm_specs[0]['n_impressions']
            n_documents = rm_specs[0]['n_documents']
            cutoff = rm_specs[0]['cutoff']

            ideal_ctrs = np.array([cm.get_clickthrough_rate(cm.get_ideal_ranking(cutoff=cutoff),
                                                            np.arange(n_documents, dtype='int32'),
                                                            cutoff=cutoff)
                                   for cm in click_models])[:, None]

            regrets_collection = np.vstack([np.load(get_regret_filepath(experiment_filepaths[i]))
                                  for i in rm_spec_indices]).cumsum(axis=1)

            # Contents of all experiment specifications are equal except for the query,
            # so we use just the 1st one in the list.
            all_rm_specs[rm] = rm_specs[0]

            regrets_collection /= np.arange(1, 1 + n_impressions, dtype='f4')[None, :]
            regrets_collection = ideal_ctrs - regrets_collection

            if average_only:
                all_rm_regrets[rm] = regrets_collection.mean(axis=0)[None, :]
            else:
                all_rm_regrets[rm] = np.vstack([regrets_collection.mean(axis=0),
                                                regrets_collection.std(axis=0),
                                                regrets_collection.min(axis=0),
                                                regrets_collection.max(axis=0)])
            del regrets_collection

        fig, ax = plt.subplots(figsize=(7 * np.sqrt(2), 7))

        rms = sorted(all_rm_specs.keys())
        
        plot_multiple_average_reward_curves(ax, [all_rm_specs[rm] for rm in rms],
                                            [all_rm_regrets[rm] for rm in rms],
                                            xscale=xscale, yscale=yscale, xlim=xlim)

        if save_plots:
            filename = filename_prefix + '+'.join(click_model_name) + '_AVG_REWARD_' + xscale + '_' + yscale + '.png'
            fig.savefig(os.path.join(SAVE_PLOTS_DIRECTORY, filename), bbox_inches='tight')
        else:
            plt.tight_layout()
            plt.show()

        plt.close(fig)

    for root, dirs, files in os.walk(EXPERIMENTS_DIRECTORY, topdown=True, followlinks=True):
        for fn in files:
            if fn.endswith('.nfo'):
                fp = os.path.join(root, fn)
                experiment_filepaths.append(fp)
                with open(fp) as ifile:
                    experiment_specs.append(pickle.load(ifile))

    if save_plots:
        if click_models is None:
            click_models = set([spec['click_model'].getName() for spec in experiment_specs])
            
        for cm in click_models:
            print 'Saving figures for', cm, 'model...',
            show_rewards(cm, xscale, yscale, save_plots, filename_prefix)
            print 'done.'
        return

    click_model_names = set([spec['click_model'].getName() for spec in experiment_specs])

    cmdd = Select(options=list(click_model_names), description='Click Model:', width='75px')
    xsdd = Select(options=['linear', 'sqrt', 'log'], description='X Scale:', width='100px', height='65px')
    ysdd = Select(options=['linear', 'sqrt', 'log'], description='Y Scale:', width='100px', height='65px')

    # Set the initially selected axis scale.
    xsdd.value = xscale
    ysdd.value = yscale
    
    controls = HBox([cmdd, xsdd, ysdd])
    backend = interactive(show_rewards, click_model_name=cmdd, xscale=xsdd, yscale=ysdd)

    controls.on_displayed(lambda _: show_rewards(cmdd.value, xsdd.value, ysdd.value))

    display(controls)

In [None]:
show_rewards_widget(xscale='log', yscale='linear', average_only=True,
                    filename_prefix='MergeRankBenchmarkRewardAveragesOnly_', save_plots=False)

In [None]:
# These will save the average cumulative reward for each ranking algorithm across all
# the experiments stored under directory `EXPERIMENTS_DIRECTORY` in the cell
# above. The click models over which the reward is averaged is determined by the first
# parameter - `click_models`.

# If you want to plot 1 sigma, min/max bounds, set `average_only` to False.

show_rewards_widget(click_models=['PBM'], xscale='log', yscale='linear', average_only=True,
                    filename_prefix='MergeRankBenchmarkRewardAveragesOnly_', save_plots=True)

show_rewards_widget(click_models=['CM'], xscale='log', yscale='linear', average_only=True,
                    filename_prefix='MergeRankBenchmarkRewardAveragesOnly_', save_plots=True)

show_rewards_widget(click_models=[['PBM', 'CM']] , xscale='log', yscale='linear', average_only=True,
                    filename_prefix='MergeRankBenchmarkRewardAveragesOnly_', save_plots=True)

# Backward in Time Relative Number of Impressions with Optimal Ranking

In [None]:
import os
import cPickle as pickle
from ipywidgets import Select, Dropdown, interactive, HBox
from IPython.display import display

# Change this to force the visualization of experiments under this directory.
EXPERIMENTS_DIRECTORY = 'experiments'

def get_rankings_filepath(experiment_info_filepath):
    return experiment_info_filepath.rstrip('experiment.nfo') + 'rankings.npy'

def compute_n_impressions(rankings, optimal_ranking):
    C = 1.0 * (rankings == optimal_ranking).all(axis=1)[::-1].cumsum()
    C /= np.arange(1, 1 + rankings.shape[0])
    return C[::-1]

def show_regrets_widget(save_plots=False):
    experiment_filepaths = []
    experiment_specs = []

    def show_regrets(ranking_model_name, click_model_name, save_plots=False):
        spec_indices = [i for i, spec in enumerate(experiment_specs)
                        if spec['click_model'].getName() == click_model_name and
                        spec['ranking_model'].getName() == ranking_model_name and
                        os.path.exists(get_rankings_filepath(experiment_filepaths[i]))]
        
        specs, spec_indices = zip(*sorted([(experiment_specs[i], i) for i in spec_indices],
                                          key=lambda info: info[0]['query']))
        specs_n_impressions = []
        
        for i in spec_indices:
            rankings = np.load(get_rankings_filepath(experiment_filepaths[i]))
            cutoff = experiment_specs[i]['cutoff']
            optimal_ranking = experiment_specs[i]['click_model'].get_ideal_ranking(cutoff, satisfied=False)
            specs_n_impressions.append(compute_n_impressions(rankings[:, :cutoff], optimal_ranking))
                           
        n_rows = (len(specs) + 1) / 2
        n_cols = 2 if len(specs) > 1 else 1
        
        fig, axes = plt.subplots(n_rows, n_cols, figsize=(6 * n_cols, 2.5 * n_rows))
        
        axes = axes.ravel() if len(specs) > 1 else [axes]
        
        for ax, info, n_impressions in zip(axes, specs, specs_n_impressions):
            plot_optimal_impressions(ax, info, n_impressions)
        
        if len(specs) > 1 and len(specs) % 2 == 1:
            axes[-1].axis('off')

        if save_plots:
            fig.savefig('Figs/' + ranking_model_name + '_' + click_model_name + '_OptImpCnt.pdf',
                        bbox_inches='tight')
        else:
            plt.tight_layout()
            plt.show()

    for root, dirs, files in os.walk(EXPERIMENTS_DIRECTORY, topdown=True):
        for fn in files:
            if fn.endswith('.nfo'):
                fp = os.path.join(root, fn)
                experiment_filepaths.append(fp)
                with open(fp) as ifile:
                    experiment_specs.append(pickle.load(ifile))
    
    ranking_model_names = set([spec['ranking_model'].getName() for spec in experiment_specs])
    click_model_names = set([spec['click_model'].getName() for spec in experiment_specs])
    
    if save_plots:
        for gl_ranking_model_name in ranking_model_names:
            for gl_click_model_name in click_model_names:
                print (gl_ranking_model_name, gl_click_model_name, save_plots)
                show_regrets(gl_ranking_model_name, gl_click_model_name, save_plots)
        return

    rmdd = Select(options=list(ranking_model_names), description='Ranking Model:', width='150px')
    cmdd = Select(options=list(click_model_names), description='Click Model:', width='75px')
    
    controls = HBox([rmdd, cmdd])
    backend = interactive(show_regrets, ranking_model_name=rmdd, click_model_name=cmdd)

    controls.on_displayed(lambda _: show_regrets(rmdd.value, cmdd.value))

    display(controls)

show_regrets_widget(save_plots=False)

# Click Model Attractiveness Probabilities of Documents 

In [None]:
from ipywidgets import interact, interactive, Dropdown, FloatText, VBox, HBox, fixed

from IPython.display import display

SAVE_PLOTS_DIRECTORY = 'figures'

with open('data/60Q/model_query_collection.pkl') as ifile:
    MQD = pickle.load(ifile)

def show_all_queries_relevances(click_model_name, save_plots=fixed(False)):
    fig, axes = plt.subplots((len(MQD.values()[0]) + 1) / 2, 2, figsize=(12, len(MQD.values()[0])),
                             sharex=True, sharey=True)

    for i, query in enumerate(sorted(MQD[click_model_name].keys())):
        ax = axes[i / 2, i % 2]

        relevances = sorted(MQD[click_model_name][query]['relevances'], reverse=True)

        ax.bar(np.arange(len(relevances)), relevances, 0.75,
               alpha=0.7, color='green', align='center')

        ax.set_title('Query: %s' % query)
        ax.grid(axis='y', which='major')
        
        b = 'on' if i / 2 == 4 else 'off'
        lb = 'on' if i / 2 == 4 else 'off'
        l = 'on' if i % 2 == 0 else 'off'
        ll = 'on' if i % 2 == 0 else 'off'
                
        ax.tick_params(axis='both', which='major', left=l, direction='out',
                       top='off', right='off', bottom=b, labelleft=ll,
                       labeltop='off', labelright='off', labelbottom=lb)
            
    ax.xaxis.set_major_locator(MultipleLocator(1.0))
    ax.set_xlim([-0.5, len(relevances) - 0.5])
    plt.tight_layout()
    if save_plots:
        filename = click_model_name + '_relevances.png'
        fig.savefig(os.path.join(SAVE_PLOTS_DIRECTORY, filename), bbox_inches='tight')
    else:
        plt.tight_layout()
        plt.show()
    plt.close(fig)

# To save the plots into `SAVE_PLOTS_DIRECTORY`.
# for cm in MQD:
#     show_all_queries_relevances(cm, save_plots=True)
    
_ = interact(show_all_queries_relevances, click_model_name=Dropdown(options=MQD.keys(), description='Click Model:'))

# PBM Attractivenesses and Examination Probabilities

In [None]:
def show_PBM_queries_relevances(collection, cutoff=5, output_filename=None):
    with open(collection) as ifile:
        MQD = pickle.load(ifile)
    
    # We show one extra document below the last rank of interest.
    cutoff += 1
    
    n_queries = len(MQD.values()[0])
    
    fig, axes = plt.subplots(n_queries + 1, 2, figsize=(12, 2 * len(MQD.values()[0])),
                             sharex=True, sharey=False)
    
    ax = axes[0, 0]
    
    exam_proba = MQD['PBM'][MQD['PBM'].keys()[0]]['model'].exam_proba[:cutoff]

    ax.bar(np.arange(len(exam_proba)), exam_proba, 0.75,
           alpha=0.7, color='green', align='center')

    ax.set_title('PBM Examination Probabilities')
    ax.grid(axis='y', which='major')
    
    ax = axes[0, 1]
    ax.axis('off')

    for i, query in enumerate(sorted(MQD['PBM'].keys()), 1):
        # Plot left column depicting the attractiveness probabilities
        # of documents sorted in decreasing order (No relationship between
        # position and document identity should be taken from this plot!).
        ax = axes[i, 0]

        relevances = sorted(MQD['PBM'][query]['relevances'], reverse=True)[:cutoff]

        ax.bar(np.arange(len(relevances)), relevances, 0.75,
               alpha=0.7, color='blue', align='center')

        ax.set_title('Query: %s (Attractivenesses)' % query)
        ax.grid(axis='y', which='major')
        
        b = 'on' if i + 1 == n_queries else 'off'
        lb = 'on' if i + 1 == n_queries else 'off'
                
        ax.tick_params(axis='both', which='major', left='on', direction='out',
                       top='off', right='off', bottom=b, labelleft='on',
                       labeltop='off', labelright='off', labelbottom=lb)

        ax.spines['right'].set_visible(False)
        
        ax = axes[i, 1]

        exam_proba = MQD['PBM'][query]['model'].exam_proba[:cutoff]
        click_proba = relevances * exam_proba
        
        ax.bar(np.arange(len(click_proba)), click_proba, 0.75,
               alpha=0.7, color='red', align='center')

        ax.set_title('Atractivenesses * Examination Probabilities')
        ax.grid(axis='y', which='major')
        
        b = 'on' if i + 1 == n_queries else 'off'
        lb = 'on' if i + 1 == n_queries else 'off'
                
        ax.tick_params(axis='both', which='major', left='off', direction='out',
                       top='off', right='off', bottom=b, labelleft='off',
                       labeltop='off', labelright='off', labelbottom=lb)
        
        ax.spines['left'].set_visible(False)
            
    ax.xaxis.set_major_locator(MultipleLocator(1.0))
    ax.set_xlim([-0.5, len(relevances) - 0.5])
    
    plt.tight_layout()
    plt.subplots_adjust(wspace=0)
    
    if output_filename is not None:
        fig.savefig(output_filename, bbox_inches='tight')
    else:
        plt.show()
        
    plt.close(fig)


# To save the plots into a file, call:
# show_all_queries_relevances('data/60Q/model_query_collection.pkl', output_filename='figures/PBM_60Q_RELEVANCES.png')

show_PBM_queries_relevances('data/60Q/model_query_collection.pkl')

# Cumulative Regret Distribution at Specific Iteration

In [None]:
import os
import cPickle as pickle

from collections import defaultdict


def get_regret_filepath(experiment_info_filepath):
    return experiment_info_filepath.rstrip('experiment.nfo') + 'regret.npy'


def show_algorithms_regret_boxplot(experiments_directory, iteration, click_model_name='PBM'):
    # To zero-based.
    iteration -= 1
    
    ranking_model_regrets_at_iteration = defaultdict(list)
    
    # Go through the given directory and collect all
    # experiment *.nfo files beneath it.
    for root, dirs, files in os.walk(experiments_directory, topdown=True, followlinks=True):
        for fn in files:
            if fn.endswith('.nfo'):
                fp = os.path.join(root, fn)
                with open(fp) as ifile:
                    spec = pickle.load(ifile)
                    # Filter the desired experiment results.
                    if (spec['click_model'].getName() == click_model_name and
                        os.path.exists(get_regret_filepath(fp))):
                        # Regret of the algorithm in the current experiment.
                        regret = np.load(get_regret_filepath(fp))
                        # Cumulative regret at the given iteration:
                        regret = regret.cumsum()[iteration]
                        ranking_model_regrets_at_iteration[spec['ranking_model'].getName()].append(regret)
    
    # Print the regrets for each ranking model in the experiments directory
    # that has NOT been filtered out.
#     for ranking_model, regrets in ranking_model_regrets_at_iteration.items():
#         print '%s: %r' % (ranking_model, regrets)

    fig, ax = plt.subplots(figsize=(12, 6))
    
    meanprops = dict(marker='D', markeredgecolor='black', markerfacecolor='firebrick')
    
    flierprops = dict(marker='o', markerfacecolor='green', markersize=6, linestyle='none')

    labels, regrets = zip(*ranking_model_regrets_at_iteration.items())
    
    ax.boxplot(regrets, whis=[5, 95], notch=True, bootstrap=5000,
               showmeans=True, meanprops=meanprops, flierprops=flierprops)
    
    # You can specify a rotation for the tick labels in degrees or with keywords.
    plt.xticks(1 + np.arange(len(labels)), labels, rotation=17)
    # Pad margins so that markers don't get clipped by the axes
    plt.margins(0.2)
    # Tweak spacing to prevent clipping of tick-labels
    plt.subplots_adjust(bottom=0.15)
    
    ax.set_yscale('log')
    ax.set_title('Cumulative Regret Distribution at Iteration %d' % (iteration + 1))

show_algorithms_regret_boxplot('experiments/mergerank_benchmark/algorithms/', 10000000)

# Atractivenesses of documents under PBM and CM models and their parameters

In [None]:
import os
import numpy as np

# The collection of click_models and queries used in the experiments.
MQD = np.load('data/60Q/model_query_collection.pkl')

# The PBM's intrinsic parameters (examination probability at each rank) is shared
# across all the queries in the dataset.
pbm_examination_probabilities = MQD['PBM'][MQD['PBM'].keys()[0]]['model'].exam_proba

# Mapping "query id" -> "attraction probabilities"
pbm_attraction_probabilities = dict(zip(MQD['PBM'], [MQD['PBM'][q]['relevances'] for q in MQD['PBM']]))
cm_attraction_probabilities = dict(zip(MQD['CM'], [MQD['CM'][q]['relevances'] for q in MQD['CM']]))

# We use cutoff @5
cutoff = 5

# Each query has 10 documents.
n_documents = 10

# Awful, used internally by a user model.
identity = np.arange(n_documents, dtype='int32')

if not os.path.isdir('parameters'):
    os.makedirs('parameters')

# Create output file for queries' parameters under PBM click model.
for q in MQD['PBM']:
    with open('parameters/%s_PBM.txt' % q, 'w') as ofile:
        click_model = MQD['PBM'][q]['model']
        ideal_ctr = click_model.get_clickthrough_rate(click_model.get_ideal_ranking(cutoff=cutoff),
                                                      identity, cutoff=cutoff)
        ofile.write('%.6f\n' % ideal_ctr)
        ofile.write(','.join(map(lambda x: '%.6f' % x, click_model.exam_proba)) + '\n')
        ofile.write(','.join(map(lambda x: '%.6f' % x, MQD['PBM'][q]['relevances'])) + '\n')


# Create output file for queries' parameters under PBM click model.
for q in MQD['CM']:
    with open('parameters/%s_CM.txt' % q, 'w') as ofile:
        click_model = MQD['CM'][q]['model']
        ideal_ctr = click_model.get_clickthrough_rate(click_model.get_ideal_ranking(cutoff=cutoff),
                                                      identity, cutoff=cutoff)
        ofile.write('%.6f\n' % ideal_ctr)
        # There are no examination probabilities under CM user model.
        # ofile.write(','.join(map(lambda x: '%.6f' % x, click_model.exam_proba)) + '\n')
        ofile.write(','.join(map(lambda x: '%.6f' % x, MQD['CM'][q]['relevances'])) + '\n')