# Imports

In [1]:
from systemtools.hayj import *
from systemtools.location import *
from systemtools.basics import *
from systemtools.file import *
from systemtools.printer import *
from twinews.utils import *
from twinews.evaluation.utils import *
from twinews.models.ranking import *

In [98]:
import bokeh
from bokeh.plotting import figure, output_notebook, show, ColumnDataSource
from bokeh.models import Grid, Legend, LegendItem
from bokeh.layouts import gridplot
output_notebook()

In [3]:
def getModel(row):
    model = row['model']
    if model == 'combin':
        model = row['models'][0][:-6] + " " + row['models'][1][:-6]
    return model

In [4]:
def bokehPointsSetup(fig, amount, shapeAmount=5, colorsIndex=0):
    # We define all shapes:
    def __shapes(p):
        return [p.triangle, p.circle, p.diamond, p.square, p.inverted_triangle][:shapeAmount]
    shapes = __shapes(p)
    # We define all colors:
    # https://learnui.design/tools/data-color-picker.html#palette
    colors = \
    [
        ['#488f31', '#a7c162', '#fff59f', '#f49e5c', '#de425b'],
        ['#003f5c', '#58508d', '#bc5090', '#ff6361', '#ffa600'],
    ]
    colors = colors[colorsIndex]
    # We generate all combinasons
    dots = []
    for i in range(len(shapes)):
        for u in range(len(colors)):
            f = (u + i) % 5
            s = u
            dots.append((shapes[s], colors[f]))
    return dots[:amount]

In [5]:
def isModels(row, *args):
    if 'models' not in row:
        return False
    else:
        modelsRepr = str(row['models'])
        for arg in args:
            if arg not in modelsRepr:
                return False
        return True

In [6]:
def getColorKwargs(color):
    return {'fill_color': color, 'line_color': color}

In [7]:
palettes = \
[
    ['#488f31', '#a7c162', '#fff59f', '#f49e5c', '#de425b'],
    ['#003f5c', '#58508d', '#bc5090', '#ff6361', '#ffa600'],
    ['#009b95', '#007acc', '#bc5090', '#aaaaaa', '#ff8f8f'],
]

# Getting data

In [None]:
models2 = twinewsGet\
(
    splitVersion=2,
    blackModels=['combin', 'worst', 'ideal'],
    noSubsampling=True,
    doNormalization=True,
)

In [None]:
bests2 = twinewsGet\
(
    splitVersion=2,
    onlyBestForField='ndcg',
    blackModels=['combin', 'worst', 'ideal'],
    noSubsampling=True,
    doNormalization=True,
)

In [None]:
combins2 = twinewsGet\
(
    splitVersion=2,
    whiteModels=['combin'],
    noSubsampling=True,
    doNormalization=True,
)

# Displaying data

In [None]:
def displayRows(rows):
    displayDicts([dictSelect(e, ['id', 'ndcg', 'maxUsers', 'splitVersion', 'div@100']) for e in rows])

In [None]:
displayRows(bests2)

# Multi-objective of combins

In [531]:
# To save all figures:
figures = dict()

In [532]:
# Misc vars:
width = 600 # 600, 300, 250
height = width
if width >= 600:
    circleSizes = [15, 12, 10]
    triangleSizes = [15, 12, 10]
    lineWidth = 3
    displayLegend = True
else:
    circleSizes = [8, 8, 6]
    triangleSizes = [11, 11, 8]
    lineWidth = 2
    displayLegend = False
if width > 250:
    addAxis = True
    addTitle = False
else:
    addAxis = False
    addTitle = True

In [533]:
# Diversity:
xaxis = 'div@100'
yaxis = 'ndcg'
xrange, yrange = (0.45, 0.65), (0.45, 0.70)
splitVersion = 1
doNormalization = True
palette = palettes[2]
blackIds = {'combin-d2d96', 'combin-904f0'}
paretoFrontier = ['combin-8f846', 'combin-ea42e', 'combin-1ca06', 'combin-bbe73', 'dbert-ft-7847a', 'combin-093a2', 'combin-4f0a5']
p = getFigure()
figures[xaxis] = p
show(p)

In [534]:
# Topic diversity:
xaxis = 'topic-div@100'
yaxis = 'ndcg'
xrange, yrange = (0.25, 0.51), (0.39, 0.715) # Ou moins bien (0.225, 0.525), (0.39, 0.69)
# xrange, yrange = (0, 1), (0, 1)
splitVersion = 1
doNormalization = True
palette = palettes[2]
blackIds = {'combin-d2d96', 'combin-904f0'}
paretoFrontier = ['combin-8f846', 'combin-6ecf3', 'combin-d0fd0', 'combin-80112', 'combin-c12e9', 'dbert-ft-7847a']
p = getFigure()
figures[xaxis] = p
show(p)

In [535]:
# SW Jacccard diversity:
xaxis = 'swjacc-div@100'
yaxis = 'ndcg'
xrange, yrange = (0.945, 0.973), (0.4, 0.65)
splitVersion = 1
doNormalization = True
palette = palettes[2]
blackIds = {'combin-d2d96', 'combin-904f0'}
paretoFrontier = ['combin-8f846', 'combin-4f0a5']
p = getFigure()
figures[xaxis] = p
show(p)

In [536]:
# Style diversity:
xaxis = 'style-div@100'
yaxis = 'ndcg'
xrange, yrange = (0, 1), (0, 1)
splitVersion = 1
doNormalization = True
palette = palettes[2]
blackIds = {'combin-d2d96', 'combin-904f0'}
paretoFrontier = ['combin-8f846', 'combin-4f0a5']
p = getFigure()
figures[xaxis] = p
show(p)

In [537]:
# Novelty:
xaxis = 'nov@100'
yaxis = 'ndcg'
xrange, yrange = (0.49, 0.62), (0.49, 0.67)
splitVersion = 1
doNormalization = True
palette = palettes[2]
blackIds = {'combin-d2d96', 'combin-904f0'}
paretoFrontier = ['combin-8f846', 'combin-d0fd0', 'combin-1ca06', 'combin-c12e9', 'dbert-ft-7847a']
p = getFigure()
figures[xaxis] = p
show(p)

In [538]:
# Topic novelty:
xaxis = 'topic-nov@100'
yaxis = 'ndcg'
xrange, yrange = (0.215, 0.445), (0.45, 0.68)
splitVersion = 1
doNormalization = True
palette = palettes[2]
blackIds = {'combin-d2d96', 'combin-904f0'}
paretoFrontier = ['combin-8f846', 'combin-6ecf3', 'combin-d0fd0', 'combin-1ca06', 'combin-80112', 'combin-c12e9', 'dbert-ft-7847a', 'combin-8169e']
p = getFigure()
figures[xaxis] = p
show(p)

In [539]:
# Strict novelty:
xaxis = 'snov@100'
yaxis = 'ndcg'
xrange, yrange = (0.409, 0.56), (0.47, 0.67)
splitVersion = 1
doNormalization = True
palette = palettes[2]
blackIds = {'combin-d2d96', 'combin-904f0'}
paretoFrontier = ['combin-8f846', 'combin-d0fd0', 'combin-ea42e', 'combin-1ca06', 'combin-80112', 'combin-c12e9', 'dbert-ft-7847a']
p = getFigure()
figures[xaxis] = p
show(p)

In [540]:
# Topic strict novelty:
xaxis = 'topic-snov@100'
yaxis = 'ndcg'
xrange, yrange = (0.25, 0.54), (0.41, 0.68)
splitVersion = 1
doNormalization = True
palette = palettes[2]
blackIds = {'combin-d2d96', 'combin-904f0'}
paretoFrontier = ['combin-8f846', 'combin-6ecf3', 'combin-d0fd0', 'combin-1ca06', 'combin-80112', 'combin-c12e9', 'combin-71538', 'dbert-ft-7847a', 'combin-8169e']
p = getFigure()
figures[xaxis] = p
show(p)

In [548]:
# Serendipity:
xaxis = 'avg-ser@100'
yaxis = 'ndcg'
xrange, yrange = (0.14, 0.46), (0.42, 0.65)
splitVersion = 1
doNormalization = True
palette = palettes[2]
blackIds = {'combin-d2d96', 'combin-904f0'}
paretoFrontier = ['combin-8f846', 'combin-d0fd0', 'combin-ea42e']
p = getFigure()
figures[xaxis] = p
show(p)

In [547]:
def getFigure():
    global xaxis, yaxis, xrange, yrange, splitVersion, doNormalization, palette, blackIds, paretoFrontier, addAxis, addTitle
    rows = twinewsGet\
    (
        splitVersion=splitVersion,
        whiteModels=['combin', 'random', 'ideal', 'worst'],
        noSubsampling=True,
        doNormalization=doNormalization,
    )
    rows = [row for row in rows if 'rankAsScore' not in row or row['rankAsScore'] == [False, False]]
    # rows = [row for row in rows if row['model'] != 'combin' or not isModels(row, "stylo", "word2vec")]
    rows = [row for row in rows if row['id'] not in blackIds]
    # bp(rows)
    # print(len(rows))
    alones = twinewsGet\
    (
        splitVersion=splitVersion,
        whiteModels=['bm25', 'dbert-ft'],
        noSubsampling=True,
        onlyBestForField='ndcg',
        doNormalization=doNormalization,
    )
    bm25 = alones[0] if 'bm25' in alones[0]['id'] else alones[1]
    dbertft = alones[0] if 'dbert-ft' in alones[0]['id'] else alones[1]
    # bp(bm25)
    # bp(dbertft)
    havingBoth = {'x': [], 'y': [], 'id': [], 'model': []}
    havingDBertft = copy.deepcopy(havingBoth)
    havingBm25 = copy.deepcopy(havingBoth)
    others = copy.deepcopy(havingBoth)
    for row in rows:
        modelsRepr = str(row['models']) if 'models' in row else ""
        model = getModel(row)
        if 'dbert-ft' in modelsRepr and 'bm25' in modelsRepr:
            havingBoth['x'].append(row[xaxis])
            havingBoth['y'].append(row[yaxis])
            havingBoth['model'].append(model)
            havingBoth['id'].append(row['id'])
        elif 'dbert-ft' in modelsRepr:
            havingDBertft['x'].append(row[xaxis])
            havingDBertft['y'].append(row[yaxis])
            havingDBertft['model'].append(model)
            havingDBertft['id'].append(row['id'])
        elif 'bm25' in modelsRepr:
            havingBm25['x'].append(row[xaxis])
            havingBm25['y'].append(row[yaxis])
            havingBm25['model'].append(model)
            havingBm25['id'].append(row['id'])
        else:
            others['x'].append(row[xaxis])
            others['y'].append(row[yaxis])
            others['model'].append(model)
            others['id'].append(row['id'])
    # bp([havingBoth, havingDBertft, havingBm25, others])
    # A function to search for the right row:
    def getRow(rows, id):
        for row in rows:
            if row['id'] == id:
                return row
    if width <= 300:
        row = getRow(rows + alones, paretoFrontier[0])
        newYTop = row[yaxis] + 0.05 * abs(yrange[0] - yrange[1])
        yrange = (yrange[0], newYTop)
        row = getRow(rows + alones, paretoFrontier[-1])
        newXRight = row[xaxis] + 0.05 * abs(xrange[0] - xrange[1])
        xrange = (xrange[0], newXRight)
    TOOLTIPS = [("model", "@model"), ("id", "@id")]
    rangeKwargs = {} if xrange is None else {'x_range': xrange, 'y_range': yrange}
    title = "Multi-objective (" + xaxis + " and " + yaxis + ") of combinations"
    if width <= 300:
        title = yaxis + " given " + xaxis
    if addAxis:
        x_axis_label, y_axis_label = xaxis, yaxis
    else:
        x_axis_label, y_axis_label = None, None
    if not addTitle:
        title = None
    p = figure(title=title, x_axis_label=x_axis_label, y_axis_label=y_axis_label, tooltips=TOOLTIPS, width=width, height=height, **rangeKwargs)
    # Misc params:
    lineColor = palette[4]
    lineAlpha = 0.4
    lineDash = 'dashed' # dashed, dotted, dotdash, dashdot
    # We draw the pareto frontier:
    x, y = [], []
    for current in paretoFrontier:
        for row in rows + alones:
            if row['id'] == current:
                x.append(row[xaxis])
                y.append(row[yaxis])
                break
    p.line(x, y, line_width=lineWidth, line_color=lineColor, line_dash=lineDash)
    # We draw the left extension:
    row = getRow(rows + alones, paretoFrontier[0])
    x = [xrange[0], row[xaxis]]
    y = [row[yaxis], row[yaxis]]
    p.line(x, y, line_width=lineWidth, line_color=lineColor, line_dash=lineDash, line_alpha=lineAlpha)
    # We draw the bottom extension:
    row = getRow(rows + alones, paretoFrontier[-1])
    x = [row[xaxis], row[xaxis]]
    y = [row[yaxis], 0]
    p.line(x, y, line_width=lineWidth, line_color=lineColor, line_dash=lineDash, line_alpha=lineAlpha)
    args = ('x', 'y')
    sizeKwargs = {'size': circleSizes[2]}
    p.circle(*args, **sizeKwargs, source=ColumnDataSource(others), **getColorKwargs(palette[3]))
    sizeKwargs = {'size': triangleSizes[0]}
    p.triangle(*args, **sizeKwargs, source=ColumnDataSource(havingBoth), **getColorKwargs(palette[0]))
    sizeKwargs = {'size': triangleSizes[1]}
    p.triangle(*args, **sizeKwargs, source=ColumnDataSource(havingDBertft), **getColorKwargs(palette[1]))
    sizeKwargs = {'size': circleSizes[1]}
    p.circle(*args, **sizeKwargs, source=ColumnDataSource(havingBm25), **getColorKwargs(palette[0]))
    bm25Source = {'x': [bm25[xaxis]], 'y': [bm25[yaxis]], 'model': [bm25['model']], 'id': [bm25['id']]}
    dbertftSource = {'x': [dbertft[xaxis]], 'y': [dbertft[yaxis]], 'model': [dbertft['model']], 'id': [dbertft['id']]}
    sizeKwargs = {'size': circleSizes[1]}
    p.circle(*args, source=bm25Source, **sizeKwargs, **getColorKwargs(palette[4]))
    sizeKwargs = {'size': triangleSizes[0]}
    p.triangle(*args, source=dbertftSource, **sizeKwargs, **getColorKwargs(palette[4]))
    if displayLegend:
        li0 = LegendItem(label='Pareto frontier', renderers=[p.renderers[0]])
        li1 = LegendItem(label='Having dbert-ft and bm25', renderers=[p.renderers[1 + 2]])
        li2 = LegendItem(label='Having at least dbert-ft', renderers=[p.renderers[2 + 2]])
        li3 = LegendItem(label='Having at least bm25', renderers=[p.renderers[3 + 2]])
        li4 = LegendItem(label='Other combinations', renderers=[p.renderers[4 + 2]])
        li5 = LegendItem(label='bm25 alone', renderers=[p.renderers[5 + 2]])
        li6 = LegendItem(label='dbert-ft alone', renderers=[p.renderers[6 + 2]])
        legend1 = Legend(items=[li2, li3, li1, li5, li6, li4, li0], location='bottom_right' if 'ser' in xaxis else 'top_right')
        p.add_layout(legend1)
    return p

In [543]:
# Grid:
grid = gridplot\
(
    [
        [figures['div@100'], figures['topic-div@100']],
        [figures['nov@100'], figures['topic-nov@100']],
        [figures['snov@100'], figures['topic-snov@100']],
    ],
    # plot_width=250,
    # plot_height=250
)
show(grid)