# Imports

In [1]:
from systemtools.hayj import *
from systemtools.location import *
from systemtools.basics import *
from systemtools.file import *
from systemtools.printer import *
from twinews.utils import *
from twinews.evaluation.utils import *
from twinews.models.ranking import *
from dataviztools.bokehutils import *

In [2]:
import bokeh
from bokeh.plotting import figure, output_notebook, show, ColumnDataSource
from bokeh.models import Grid, Legend, LegendItem
from bokeh.layouts import gridplot
from bokeh.io import export_svgs, export_png
output_notebook()

In [3]:
def getModel(row):
    model = row['model']
    if model == 'combin':
        model = row['models'][0][:-6] + " " + row['models'][1][:-6]
    return model

In [4]:
def bokehPointsSetup(fig, amount, shapeAmount=5, paletteIndex=0):
    global palettes
    # We define all shapes:
    def __shapes(p):
        return [p.triangle, p.circle, p.diamond, p.square, p.inverted_triangle][:shapeAmount]
    shapes = __shapes(p)
    # We define all colors:
    colors = palettes[paletteIndex]
    # We generate all combinasons
    dots = []
    for i in range(len(shapes)):
        for u in range(len(colors)):
            f = (u + i) % 5
            s = u
            dots.append((shapes[s], colors[f]))
    return dots[:amount]

In [5]:
def isModels(row, *args):
    if 'models' not in row:
        return False
    else:
        modelsRepr = str(row['models'])
        for arg in args:
            if arg not in modelsRepr:
                return False
        return True

In [6]:
def getColorKwargs(color):
    return {'fill_color': color, 'line_color': color}

In [7]:
# https://learnui.design/tools/data-color-picker.html#palette
palettes = \
[
    ['#488f31', '#a7c162', '#fff59f', '#f49e5c', '#de425b'],
    ['#003f5c', '#58508d', '#bc5090', '#ff6361', '#ffa600'],
    ['#009b95', '#007acc', '#bc5090', '#aaaaaa', '#ff8f8f', '#ef9a32'],
    ['#a31430', '#0071bd', '#02bebf', '#df7d00', '#017f01'],
]

# Getting data

In [None]:
models2 = twinewsGet\
(
    splitVersion=2,
    blackModels=['combin', 'worst', 'ideal'],
    noSubsampling=True,
    doNormalization=True,
)

In [None]:
bests2 = twinewsGet\
(
    splitVersion=2,
    onlyBestForField='ndcg',
    blackModels=['combin', 'worst', 'ideal'],
    noSubsampling=True,
    doNormalization=True,
)

In [None]:
combins2 = twinewsGet\
(
    splitVersion=2,
    whiteModels=['combin'],
    noSubsampling=True,
    doNormalization=True,
)

In [None]:
alonesOthers = twinewsGet\
(
    splitVersion=splitVersion,
    blackModels=['bm25', 'dbert-ft', 'combin', 'random', 'ideal', 'worst'],
    noSubsampling=True,
    onlyBestForField='ndcg',
    doNormalization=doNormalization,
)

# Displaying data

In [None]:
def displayRows(rows):
    displayDicts([dictSelect(e, ['id', 'ndcg', 'maxUsers', 'splitVersion', 'div@100']) for e in rows])

In [None]:
displayRows(models2)

# Multi-objective of combins

In [None]:
def getFigure():
    global xaxis, yaxis, xrange, yrange, splitVersion, doNormalization, palette, blackIds, paretoFrontier, addTitle, displayAlones, hideTickLabels, hideGrid, margeRatio, hideXLabel, hideYLabel, hideTickLines
    rows = twinewsGet\
    (
        splitVersion=splitVersion,
        whiteModels=['combin'],
        noSubsampling=True,
        doNormalization=doNormalization,
    )
    rows = [row for row in rows if 'rankAsScore' not in row or row['rankAsScore'] == [False, False]]
    # rows = [row for row in rows if row['model'] != 'combin' or not isModels(row, "stylo", "word2vec")]
    rows = [row for row in rows if row['id'] not in blackIds]
    # bp(rows)
    # print(len(rows))
    alones = twinewsGet\
    (
        splitVersion=splitVersion,
        whiteModels=['bm25', 'dbert-ft'],
        noSubsampling=True,
        onlyBestForField='ndcg',
        doNormalization=doNormalization,
    )
    alonesOthers = twinewsGet\
    (
        splitVersion=splitVersion,
        blackModels=['bm25', 'dbert-ft', 'combin', 'random', 'ideal', 'worst'],
        noSubsampling=True,
        onlyBestForField='ndcg',
        doNormalization=doNormalization,
    )
    bm25 = alones[0] if 'bm25' in alones[0]['id'] else alones[1]
    dbertft = alones[0] if 'dbert-ft' in alones[0]['id'] else alones[1]
    # bp(bm25)
    # bp(dbertft)
    havingBoth = {'x': [], 'y': [], 'id': [], 'model': []}
    havingDBertft = copy.deepcopy(havingBoth)
    havingBm25 = copy.deepcopy(havingBoth)
    others = copy.deepcopy(havingBoth)
    for row in rows:
        modelsRepr = str(row['models']) if 'models' in row else ""
        model = getModel(row)
        if 'dbert-ft' in modelsRepr and 'bm25' in modelsRepr:
            havingBoth['x'].append(row[xaxis])
            havingBoth['y'].append(row[yaxis])
            havingBoth['model'].append(model)
            havingBoth['id'].append(row['id'])
        elif 'dbert-ft' in modelsRepr:
            havingDBertft['x'].append(row[xaxis])
            havingDBertft['y'].append(row[yaxis])
            havingDBertft['model'].append(model)
            havingDBertft['id'].append(row['id'])
        elif 'bm25' in modelsRepr:
            havingBm25['x'].append(row[xaxis])
            havingBm25['y'].append(row[yaxis])
            havingBm25['model'].append(model)
            havingBm25['id'].append(row['id'])
        else:
            others['x'].append(row[xaxis])
            others['y'].append(row[yaxis])
            others['model'].append(model)
            others['id'].append(row['id'])
    # bp([havingBoth, havingDBertft, havingBm25, others])
    # A function to search for the right row:
    def getRow(rows, id):
        for row in rows:
            if row['id'] == id:
                return row
    if width <= 500:
        row = getRow(rows + alones, paretoFrontier[0])
        newYTop = row[yaxis] + margeRatio * abs(yrange[0] - yrange[1])
        yrange = (yrange[0], newYTop)
        row = getRow(rows + alones, paretoFrontier[-1])
        newXRight = row[xaxis] + margeRatio * abs(xrange[0] - xrange[1])
        xrange = (xrange[0], newXRight)
    TOOLTIPS = [("model", "@model"), ("id", "@id")]
    rangeKwargs = {} if xrange is None else {'x_range': xrange, 'y_range': yrange}
    title = "Multi-objective (" + xaxis + " and " + yaxis + ") of combinations"
    if width <= 300:
        title = yaxis + " a.a.f.o. " + xaxis # "as a function of", "given" + "a.a.f.o."
    x_axis_label = None if hideXLabel else xaxis
    y_axis_label = None if hideYLabel else yaxis
    if not addTitle:
        title = None
    p = figure(title=title, x_axis_label=x_axis_label, y_axis_label=y_axis_label, tooltips=TOOLTIPS, width=width, height=height, **rangeKwargs)
    # We define styles:
    styles = \
    {
        "combination": (p.circle, palette[3], circleSizes[2]),
        "havingBoth": (p.diamond, palette[0], diamondSizes[0]),
        "havingDBertft": (p.triangle, palette[1], triangleSizes[1]),
        "havingBm25": (p.square, palette[0], squareSizes[1]),
        "bm25": (p.square, palette[5], squareSizes[1]),
        "dbertft": (p.triangle, palette[5], triangleSizes[0]),
        "alone": (p.circle, palette[5], circleSizes[2]),
        "pareto": (p.line, palette[4], lineWidth),
    }
    for key in styles:
        styles[key] = {"shape": styles[key][0], "color": styles[key][1], "size": styles[key][2]}
    # Misc params:
    lineAlpha = 0.4
    lineDash = 'dashed' # dashed, dotted, dotdash, dashdot
    # We draw the pareto frontier:
    x, y = [], []
    for current in paretoFrontier:
        for row in rows + alones:
            if row['id'] == current:
                x.append(row[xaxis])
                y.append(row[yaxis])
                break
    styles['pareto']['shape'](x, y, line_width=styles['pareto']['size'], line_color=styles['pareto']['color'], line_dash=lineDash)
    # We draw the left extension:
    row = getRow(rows + alones, paretoFrontier[0])
    x = [xrange[0], row[xaxis]]
    y = [row[yaxis], row[yaxis]]
    styles['pareto']['shape'](x, y, line_width=styles['pareto']['size'], line_color=styles['pareto']['color'], line_dash=lineDash, line_alpha=lineAlpha)
    # We draw the bottom extension:
    row = getRow(rows + alones, paretoFrontier[-1])
    x = [row[xaxis], row[xaxis]]
    y = [row[yaxis], 0]
    styles['pareto']['shape'](x, y, line_width=styles['pareto']['size'], line_color=styles['pareto']['color'], line_dash=lineDash, line_alpha=lineAlpha)
    # We draw all combinations:
    args = ('x', 'y')
    sizeKwargs = {'size': styles['combination']['size']}
    styles['combination']['shape'](*args, **sizeKwargs, source=ColumnDataSource(others), **getColorKwargs(styles['combination']['color']))
    sizeKwargs = {'size': styles['havingBoth']['size']}
    styles['havingBoth']['shape'](*args, **sizeKwargs, source=ColumnDataSource(havingBoth), **getColorKwargs(styles['havingBoth']['color']))
    sizeKwargs = {'size': styles['havingDBertft']['size']}
    styles['havingDBertft']['shape'](*args, **sizeKwargs, source=ColumnDataSource(havingDBertft), **getColorKwargs(styles['havingDBertft']['color']))
    sizeKwargs = {'size': styles['havingBm25']['size']}
    styles['havingBm25']['shape'](*args, **sizeKwargs, source=ColumnDataSource(havingBm25), **getColorKwargs(styles['havingBm25']['color']))
    # Adding models alones:
    bm25Source = {'x': [bm25[xaxis]], 'y': [bm25[yaxis]], 'model': [bm25['model']], 'id': [bm25['id']]}
    dbertftSource = {'x': [dbertft[xaxis]], 'y': [dbertft[yaxis]], 'model': [dbertft['model']], 'id': [dbertft['id']]}
    sizeKwargs = {'size': styles['bm25']['size']}
    styles['bm25']['shape'](*args, source=bm25Source, **sizeKwargs, **getColorKwargs(styles['bm25']['color']))
    sizeKwargs = {'size': styles['dbertft']['size']}
    styles['dbertft']['shape'](*args, source=dbertftSource, **sizeKwargs, **getColorKwargs(styles['dbertft']['color']))
    # Adding other models being alone:
    source = {'x': [e[xaxis] for e in alonesOthers], 'y': [e[yaxis] for e in alonesOthers], 'model': [e['model'] for e in alonesOthers], 'id': [e['id'] for e in alonesOthers]}
    sizeKwargs = {'size': styles['alone']['size']}
    if displayAlones:
        styles['alone']['shape'](*args, source=source, **sizeKwargs, **getColorKwargs(styles['alone']['color']))
    # Adding the legend:
    if displayLegend:
        legends = []
        legendTexts = \
        {
            # 'pareto': "Pareto frontier",
            # 'comb': "Combination",
            # 'both': "Having dbert-ft and bm25",
            # 'comb-dbert-ft': "Having at least dbert-ft",
            # 'comb-bm25': "Having at least bm25",
            # 'alone-dbert': "dbert-ft alone",
            # 'alone-bm25': "bm25 alone",
            # 'alone': "Alone model",
            'pareto': "Pareto frontier",
            'comb': "Other combinations of models",
            'both': "Combination of dbert-ft and bm25",
            'comb-dbert-ft': "Combinations having dbert-ft at least",
            'comb-bm25': "Combinations having bm25 at least",
            'alone-dbert': "dbert-ft alone",
            'alone-bm25': "bm25 alone",
            'alone': "Other lone models",
        }
        legends.append(LegendItem(label=legendTexts['pareto'], renderers=[p.renderers[0]]))
        legends.append(LegendItem(label=legendTexts['comb'], renderers=[p.renderers[1 + 2]]))
        legends.append(LegendItem(label=legendTexts['both'], renderers=[p.renderers[2 + 2]]))
        legends.append(LegendItem(label=legendTexts['comb-dbert-ft'], renderers=[p.renderers[3 + 2]]))
        legends.append(LegendItem(label=legendTexts['comb-bm25'], renderers=[p.renderers[4 + 2]]))
        legends.append(LegendItem(label=legendTexts['alone-bm25'], renderers=[p.renderers[5 + 2]]))
        legends.append(LegendItem(label=legendTexts['alone-dbert'], renderers=[p.renderers[6 + 2]]))
        if displayAlones:
            legends.append(LegendItem(label=legendTexts['alone'], renderers=[p.renderers[7 + 2]]))
        if displayAlones:
            # legendsOrder = \
            # [
            #     legends[1], legends[3], legends[4],
            #     legends[2], legends[7], legends[6],
            #     legends[5], legends[0],
            # ]
            legendsOrder = \
            [
                legends[3], legends[4], legends[2],
                legends[1], legends[6], legends[5],
                legends[7], legends[0],
            ]
        else:
            legendsOrder = \
            [
                legends[1], legends[3], legends[4],
                legends[2], legends[6],
                legends[5], legends[0],
            ]
        legend1 = Legend(items=legendsOrder, location='bottom_right' if 'ser' in xaxis else 'top_right')
        p.add_layout(legend1)
    if hideTickLabels:
        p.xaxis.major_label_text_font_size = '0pt'
        p.yaxis.major_label_text_font_size = '0pt'
    if hideGrid:
        p.xgrid.grid_line_color = None
        p.ygrid.grid_line_color = None
    # https://docs.bokeh.org/en/latest/docs/reference/models/axes.html
    if hideTickLines:
        p.xaxis.minor_tick_line_width = 0.0
        p.xaxis.major_tick_line_width = 0.0
        p.yaxis.minor_tick_line_width = 0.0
        p.yaxis.major_tick_line_width = 0.0
    return p

In [None]:
# To save all figures:
figures = dict()

In [None]:
# Misc vars:
path = '/home/hayj/Dashboard/Articles/Manuscrit/MultiObjective/sources'
width = 600 # 600, 250, 300 (300 si on met tous les tick et labels)
height = width
displayAlones = True
margeRatio = 0.06
if width == 600:
    circleSizes = [15, 12, 10]
    triangleSizes = [15, 12, 10]
    diamondSizes = [18, 15, 12]
    squareSizes = [13, 10, 8]
    lineWidth = 3
    displayLegend = True
    addTitle = False
    hideTickLabels = False
    hideXLabel = False
    hideYLabel = False
    hideTickLines = False
elif width <= 300:
    circleSizes = [8, 8, 6]
    triangleSizes = [11, 11, 8]
    diamondSizes = [13, 10, 9]
    squareSizes = [8, 8, 5]
    lineWidth = 2
    displayLegend = False
    addTitle = False
    hideTickLabels = False
    hideXLabel = False
    hideYLabel = False
    hideTickLines = False
else:
    print("ERROR")
hideGrid = hideTickLabels

In [None]:
# Diversity:
xaxis = 'div@100'
yaxis = 'ndcg'
xrange, yrange = (0.45, 0.65), (0.45, 0.70)
splitVersion = 1
doNormalization = True
palette = palettes[2]
blackIds = {'combin-d2d96', 'combin-904f0'}
paretoFrontier = ['combin-8f846', 'combin-ea42e', 'combin-1ca06', 'combin-bbe73', 'dbert-ft-7847a', 'combin-093a2', 'combin-4f0a5']
p = getFigure()
figures[xaxis] = p
show(p)
exportAsSVG(p, path + "/" + xaxis + ".svg")

In [None]:
# Topic diversity:
xaxis = 'topic-div@100'
yaxis = 'ndcg'
xrange, yrange = (0.25, 0.51), (0.39, 0.715) # Ou moins bien (0.225, 0.525), (0.39, 0.69)
# xrange, yrange = (0, 1), (0, 1)
splitVersion = 1
doNormalization = True
palette = palettes[2]
blackIds = {'combin-d2d96', 'combin-904f0'}
paretoFrontier = ['combin-8f846', 'combin-6ecf3', 'combin-d0fd0', 'combin-80112', 'combin-c12e9', 'dbert-ft-7847a']
p = getFigure()
figures[xaxis] = p
show(p)
exportAsSVG(p, path + "/" + xaxis + ".svg")

In [None]:
# Jacccard diversity:
xaxis = 'jacc-div@100'
yaxis = 'ndcg'
xrange, yrange = (0.855, 0.885), (0.35, 0.65)
splitVersion = 1
doNormalization = True
palette = palettes[2]
blackIds = {'combin-d2d96', 'combin-904f0'}
paretoFrontier = ['combin-8f846', 'combin-4f0a5']
p = getFigure()
figures[xaxis] = p
show(p)
exportAsSVG(p, path + "/" + xaxis + ".svg")

In [None]:
# SW Jacccard diversity:
xaxis = 'swjacc-div@100'
yaxis = 'ndcg'
xrange, yrange = (0.945, 0.973), (0.4, 0.65)
splitVersion = 1
doNormalization = True
palette = palettes[2]
blackIds = {'combin-d2d96', 'combin-904f0'}
paretoFrontier = ['combin-8f846', 'combin-4f0a5']
p = getFigure()
figures[xaxis] = p
show(p)
exportAsSVG(p, path + "/" + xaxis + ".svg")

In [None]:
# Style diversity:
xaxis = 'style-div@100'
yaxis = 'ndcg'
xrange, yrange = (0.1, 0.5), (0.25, 0.65)
splitVersion = 1
doNormalization = True
palette = palettes[2]
blackIds = {'combin-d2d96', 'combin-904f0'}
paretoFrontier = ['combin-8f846', 'combin-4f0a5']
p = getFigure()
figures[xaxis] = p
show(p)
exportAsSVG(p, path + "/" + xaxis + ".svg")

In [None]:
# Novelty:
xaxis = 'nov@100'
yaxis = 'ndcg'
xrange, yrange = (0.49, 0.62), (0.49, 0.67)
splitVersion = 1
doNormalization = True
palette = palettes[2]
blackIds = {'combin-d2d96', 'combin-904f0'}
paretoFrontier = ['combin-8f846', 'combin-d0fd0', 'combin-1ca06', 'combin-c12e9', 'dbert-ft-7847a']
p = getFigure()
figures[xaxis] = p
show(p)
exportAsSVG(p, path + "/" + xaxis + ".svg")

In [None]:
# Topic novelty:
xaxis = 'topic-nov@100'
yaxis = 'ndcg'
xrange, yrange = (0.215, 0.445), (0.45, 0.68)
splitVersion = 1
doNormalization = True
palette = palettes[2]
blackIds = {'combin-d2d96', 'combin-904f0'}
paretoFrontier = ['combin-8f846', 'combin-6ecf3', 'combin-d0fd0', 'combin-1ca06', 'combin-80112', 'combin-c12e9', 'dbert-ft-7847a', 'combin-8169e']
p = getFigure()
figures[xaxis] = p
show(p)
exportAsSVG(p, path + "/" + xaxis + ".svg")

In [None]:
# SW Jaccard novelty:
xaxis = 'jacc-nov@100'
yaxis = 'ndcg'
xrange, yrange = (0.215, 0.445), (0.45, 0.68)
splitVersion = 1
doNormalization = True
palette = palettes[2]
blackIds = {'combin-d2d96', 'combin-904f0'}
paretoFrontier = ['combin-8f846', 'combin-6ecf3', 'combin-d0fd0', 'combin-1ca06', 'combin-80112', 'combin-c12e9', 'dbert-ft-7847a', 'combin-8169e']
p = getFigure()
figures[xaxis] = p
show(p)
exportAsSVG(p, path + "/" + xaxis + ".svg")

In [None]:
# Strict novelty:
xaxis = 'snov@100'
yaxis = 'ndcg'
xrange, yrange = (0.409, 0.56), (0.47, 0.67)
splitVersion = 1
doNormalization = True
palette = palettes[2]
blackIds = {'combin-d2d96', 'combin-904f0'}
paretoFrontier = ['combin-8f846', 'combin-d0fd0', 'combin-ea42e', 'combin-1ca06', 'combin-80112', 'combin-c12e9', 'dbert-ft-7847a']
p = getFigure()
figures[xaxis] = p
show(p)
exportAsSVG(p, path + "/" + xaxis + ".svg")

In [None]:
# Topic strict novelty:
xaxis = 'topic-snov@100'
yaxis = 'ndcg'
xrange, yrange = (0.25, 0.54), (0.41, 0.68)
splitVersion = 1
doNormalization = True
palette = palettes[2]
blackIds = {'combin-d2d96', 'combin-904f0'}
paretoFrontier = ['combin-8f846', 'combin-6ecf3', 'combin-d0fd0', 'combin-1ca06', 'combin-80112', 'combin-c12e9', 'combin-71538', 'dbert-ft-7847a', 'combin-8169e']
p = getFigure()
figures[xaxis] = p
show(p)
exportAsSVG(p, path + "/" + xaxis + ".svg")

In [None]:
# swjacc-snov@100, swjacc-nov@100 aucun interet

In [None]:
# SW Jaccard strict novelty:
xaxis = 'swjacc-snov@100'
yaxis = 'ndcg'
xrange, yrange = (0.215, 0.445), (0.45, 0.68)
splitVersion = 1
doNormalization = True
palette = palettes[2]
blackIds = {'combin-d2d96', 'combin-904f0'}
paretoFrontier = ['combin-8f846', 'combin-6ecf3', 'combin-d0fd0', 'combin-1ca06', 'combin-80112', 'combin-c12e9', 'dbert-ft-7847a', 'combin-8169e']
p = getFigure()
figures[xaxis] = p
show(p)
exportAsSVG(p, path + "/" + xaxis + ".svg")

In [None]:
# Serendipity:
xaxis = 'avg-ser@100' # {'tfidf-ser@100', 'wtfidf-ser@100'}
yaxis = 'ndcg'
xrange, yrange = (0.14, 0.46), (0.42, 0.65)
splitVersion = 1
doNormalization = True
palette = palettes[2]
blackIds = {'combin-d2d96', 'combin-904f0'}
paretoFrontier = ['combin-8f846', 'combin-d0fd0', 'combin-ea42e']
p = getFigure()
figures[xaxis] = p
show(p)
export_png(p, filename=path + "/" + xaxis + ".png")
exportAsSVG(p, path + "/" + xaxis + ".svg")

In [None]:
# Serendipity:
xaxis = 'jacc-ser@100'
yaxis = 'ndcg'
xrange, yrange = (0.5, 0.7), (0.45, 0.65)
splitVersion = 1
doNormalization = True
palette = palettes[2]
blackIds = {'combin-d2d96', 'combin-904f0'}
paretoFrontier = ['combin-8f846', 'combin-d0fd0', 'combin-ea42e']
p = getFigure()
figures[xaxis] = p
show(p)
export_png(p, filename=path + "/" + xaxis + ".png")
exportAsSVG(p, path + "/" + xaxis + ".svg")

In [None]:
# Serendipity:
xaxis = 'bm25-ser@100'
yaxis = 'ndcg'
xrange, yrange = (0.0, 0.4), (0.35, 0.65)
splitVersion = 1
doNormalization = True
palette = palettes[2]
blackIds = {'combin-d2d96', 'combin-904f0'}
paretoFrontier = ['combin-8f846', 'combin-6ecf3', 'combin-d0fd0', 'combin-ea42e', 'dbert-ft-7847a']
p = getFigure()
figures[xaxis] = p
show(p)
export_png(p, filename=path + "/" + xaxis + ".png")
exportAsSVG(p, path + "/" + xaxis + ".svg")

In [None]:
# Serendipity:
xaxis = 'style-ser@100'
yaxis = 'ndcg'
xrange, yrange = (0.0, 0.6), (0.35, 0.65)
splitVersion = 1
doNormalization = True
palette = palettes[2]
blackIds = {'combin-d2d96', 'combin-904f0'}
paretoFrontier = ['combin-8f846', 'combin-d0fd0', 'combin-ea42e']
p = getFigure()
figures[xaxis] = p
show(p)
export_png(p, filename=path + "/" + xaxis + ".png")
exportAsSVG(p, path + "/" + xaxis + ".svg")

In [None]:
# Grid:
grid = gridplot\
(
    [
        [figures['div@100'], figures['topic-div@100']],
        [figures['nov@100'], figures['topic-nov@100']],
        [figures['snov@100'], figures['topic-snov@100']],
    ],
    # plot_width=250,
    # plot_height=250
)
show(grid)

# History-ref

In [8]:
def removeFields(row, fields):
    row = copy.deepcopy(row)
    for key in fields:
        if key in row:
            del row[key]
    return row

In [9]:
def getBestParams(model, splitVersion=2, metric='ndcg'):
    bestParams = twinewsGet\
    (
        splitVersion=splitVersion,
        whiteModels=[model],
        noSubsampling=True,
        doNormalization=False,
        onlyBestForField=metric,
        averageSerendipities=False,
    )[0]
    toRemove = METRICS_ORDER + ['id', 'maxUsers', 'splitVersion', 'model']
    bestParams = removeFields(bestParams, toRemove)
    return bestParams
def getBestId(model, splitVersion=2, metric='ndcg'):
    bestParams = twinewsGet\
    (
        splitVersion=splitVersion,
        whiteModels=[model],
        noSubsampling=True,
        doNormalization=False,
        onlyBestForField=metric,
        averageSerendipities=False,
    )[0]
    return bestParams['id']

In [10]:
splitVersion = 2
models = {'tfidf', 'doc2vec', 'nmf', 'dbert-ft', 'usent', 'dbert-base', 'infersent', 'bert', 'sent2vec'}
metric = 'ndcg'

In [11]:
points = []
for model in models:
    # We get the best model:
    bestParams = getBestParams(model, splitVersion=splitVersion)
    del bestParams['historyRef']
    print('---------')
    bp(bestParams, 5)
    # We get all rows for the model:
    allRows = twinewsGet\
    (
        splitVersion=splitVersion,
        whiteModels=[model],
        noSubsampling=True,
        doNormalization=False,
        averageSerendipities=False,
    )
    nRows = []
    for row in allRows:
        # We check if these are same parameters and if the historyRef is a float: 
        foundNotOk = False
        for key in bestParams:
            if key not in row or row[key] != bestParams[key] or (not isinstance(row['historyRef'], float)):
                foundNotOk = True
                break
        if not foundNotOk:
            nRows.append(row)
    allRows = nRows
    allIds = [e['id'] for e in allRows]
    allHR = [e['historyRef'] for e in allRows]
    print(model)
    print(allIds)
    print(allHR)
    print('---------')
    points += allRows
bp(points, 4)

---------
{ 'distance': cosine }
dbert-ft
['dbert-ft-22089', 'dbert-ft-f3f5d', 'dbert-ft-72cac', 'dbert-ft-babc3', 'dbert-ft-f3551', 'dbert-ft-71ce1', 'dbert-ft-7f4c7', 'dbert-ft-6c7f3', 'dbert-ft-78ac9', 'dbert-ft-73917', 'dbert-ft-14cf9', 'dbert-ft-d66a1', 'dbert-ft-3a57f', 'dbert-ft-59ecb', 'dbert-ft-11978', 'dbert-ft-d1b5f', 'dbert-ft-79517', 'dbert-ft-2529b', 'dbert-ft-df583', 'dbert-ft-d6977']
[0.7, 0.95, 0.55, 0.05, 0.8, 0.3, 0.15, 0.4, 0.65, 0.85, 0.75, 0.2, 1.0, 0.5, 0.1, 0.35, 0.45, 0.6, 0.25, 0.9]
---------
---------
{ 'dimensions': None, 'distance': cosine, 'doLemmatization': False, 'lowercase': False, 'maxDF': 300, 'minDF': 0.0005, 'sublinearTF': True }
tfidf
['tfidf-9bb25', 'tfidf-7c894', 'tfidf-59307', 'tfidf-ad998', 'tfidf-01001', 'tfidf-432e9', 'tfidf-e85ec', 'tfidf-00a4c', 'tfidf-f57a2', 'tfidf-03ad6', 'tfidf-be107', 'tfidf-11f90', 'tfidf-0f4f4', 'tfidf-d38c9', 'tfidf-8c1b3', 'tfidf-a1534', 'tfidf-6f90d', 'tfidf-6c83e', 'tfidf-45e4a', 'tfidf-7e79d']
[0.6, 0.45, 0.15, 

In [12]:
TOOLTIPS = [("model", "@model")]
p = figure(title="title", x_axis_label="historyRef", y_axis_label="nDCG", tooltips=TOOLTIPS, width=500, height=500)

In [13]:
sources = {'x': [], 'y': [], 'model': []}
for point in points:
    sources['x'].append(point['historyRef'])
    sources['y'].append(point['ndcg'])
    sources['model'].append(point['model'])
p.circle('x', 'y', source=sources)

In [14]:
show(p)

In [15]:
# We get all scores per hr:
hrScores = dict()
for point in points:
    if point['historyRef'] not in hrScores:
        hrScores[point['historyRef']] = []
    hrScores[point['historyRef']].append(point['ndcg'])

In [16]:
print("Here the count of point per historyRef: " + str([len(v) for v in hrScores.values()]))

Here the count of point per historyRef: [9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9]


In [127]:
colors = palettes[2]

In [192]:
# We plot the mean:
p = figure(x_axis_label="historyRef", y_axis_label="nDCG", width=600, height=410)
sources = {'x': [], 'y': []}
for hr in sorted(list(hrScores.keys())):
    ndcgList = hrScores[hr]
    ndcgMean = float(np.mean(hrScores[hr]))
    sources['x'].append(hr)
    sources['y'].append(ndcgMean)
p.circle('x', 'y', size=8, source=sources, color=colors[4])
p.line('x', 'y', source=sources, line_width=2, color=colors[4])

In [193]:
# We plot others:
model = 'dbert-ft'
tuples = []
for point in points:
    if point['model'] == model:
        tuples.append((point['historyRef'], point['ndcg']))
tuples = sortBy(tuples, index=0)
print(tuples)
sources = {'x': [], 'y': []}
for t in tuples:
    sources['x'].append(t[0])
    sources['y'].append(t[1] - 0.02)
p.circle('x', 'y', source=sources, size=6, color=colors[0])
p.line('x', 'y', source=sources, line_dash="dashed", line_width=1.3, color=colors[0])

[(0.05, 0.45090619581776864), (0.1, 0.4636548712189237), (0.15, 0.47520528199876666), (0.2, 0.4856798601348586), (0.25, 0.4912388108442173), (0.3, 0.49234620165753606), (0.35, 0.4925415417030777), (0.4, 0.4924304829933544), (0.45, 0.49147831909034595), (0.5, 0.4898301488742038), (0.55, 0.4886495284996133), (0.6, 0.48681336413602105), (0.65, 0.4852942534174489), (0.7, 0.48350713294586994), (0.75, 0.4813740970480439), (0.8, 0.47932450810755256), (0.85, 0.4776906344947764), (0.9, 0.4755425485660477), (0.95, 0.47321318268421586), (1.0, 0.4695205361836381)]


In [194]:
# We plot others:
model = 'tfidf'
tuples = []
for point in points:
    if point['model'] == model:
        tuples.append((point['historyRef'], point['ndcg']))
tuples = sortBy(tuples, index=0)
print(tuples)
sources = {'x': [], 'y': []}
for t in tuples:
    sources['x'].append(t[0])
    sources['y'].append(t[1] - 0.07)
p.circle('x', 'y', source=sources, size=6, color=colors[1])
p.line('x', 'y', source=sources, line_dash="dashed", line_width=1.3, color=colors[1])

[(0.05, 0.5164656235365549), (0.1, 0.527152813537479), (0.15, 0.5352473658891441), (0.2, 0.5429410153459369), (0.25, 0.5476228179223473), (0.3, 0.5484067541069992), (0.35, 0.5486356754369442), (0.4, 0.5486772802803085), (0.45, 0.5476996680527879), (0.5, 0.5463190308377611), (0.55, 0.5450986529120728), (0.6, 0.5438871651554802), (0.65, 0.5423130244948554), (0.7, 0.5406687407326325), (0.75, 0.5387224557460027), (0.8, 0.536746386696299), (0.85, 0.5351549152487327), (0.9, 0.5331177083657234), (0.95, 0.531258609947553), (1.0, 0.5283767640521252)]


In [195]:
legends = []
legendTexts = \
{
    'avg': "Averaged nDCG scores (9 models)",
    'dbert-ft': "Shape of dbert-ft scores",
    'tfidf': "Shape of tfidf scores",
}
legends.append(LegendItem(label=legendTexts['avg'], renderers=[p.renderers[1]]))
legends.append(LegendItem(label=legendTexts['dbert-ft'], renderers=[p.renderers[3]]))
legends.append(LegendItem(label=legendTexts['tfidf'], renderers=[p.renderers[5]]))
legendsOrder = \
[
    legends[0], legends[1], legends[2],
]
legend1 = Legend(items=legendsOrder, location='bottom_right')
p.add_layout(legend1)

In [196]:
show(p)

In [171]:
if False:
    computedHR = dict()
    for model in models:
        for row in points:
            if row['model'] == model:
                if model not in computedHR:
                    computedHR[model] = set()
                computedHR[model].add(row['historyRef'])
    bp(computedHR, 5)
    allHR = set([truncateFloat(e, 2) for e in np.arange(0.05, 1.05, 0.05)])
    missingHR = dict()
    for model in computedHR:
        missingHR[model] = substract(allHR, computedHR[model])
    bp(missingHR, 5)
    tuples = []
    for model in missingHR:
        # if model not in {"nmf", "tfidf", "lda"}:
        if model in {"nmf", "tfidf", "lda"}:
            for current in missingHR[model]:
                tuples.append((model, current))
    print(tuples)

{
  'bert': { 0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95, 1.0 },
  'dbert-base': { 0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95, 1.0 },
  'dbert-ft': { 0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95, 1.0 },
  'doc2vec': { 0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95, 1.0 },
  'infersent': { 0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95, 1.0 },
  'nmf': { 0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95, 1.0 },
  'sent2vec': { 0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95, 1.0 },
  'tfidf': { 0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.