This notebook allow you to print results of models in tables

In [1]:
# from twinews import config as twinewsConf
# twinewsConf.mongoLocation = "octods"

In [2]:
from systemtools.hayj import *
from systemtools.location import *
from systemtools.basics import *
from systemtools.file import *
from systemtools.printer import *
from twinews.utils import *
from twinews.evaluation.utils import *
from twinews.models.ranking import *

In [3]:
if False:
    purgeSubsampledRankings()

In [4]:
if False:
    removeRankingsAndScores('tfidf-b0e81')

In [5]:
def basicPrint\
(
    models,
    *args, w=None, b=None,
    splitVersion=2,
    doNormalization=True,
    discardAt10=True, addNdcg=True,
    returnDF=False,
    **kwargs,
):
    if w is not None and isinstance(w, str):
        w = [w]
    if b is not None and isinstance(b, str):
        b = [b]
    if isinstance(w, set):
        w = list(w)
    if isinstance(b, set):
        b = list(b)
    if len(args) > 0:
        if w is not None:
            w = list(w) + list(args)
        else:
            w = list(args)
    if discardAt10:
        if b is None:
            b = []
        b = b + ['ndcg@10', 'ndcg@100', 'p@10$']
    if addNdcg:
        if w is None:
            w = []
        w = w + ['ndcg']
    printReportKwargs = \
    {
        'whiteModels': models,
        'splitVersion': splitVersion,
        'noSubsampling': True,
        'whiteMetrics': w,
        'blackMetrics': b,
        'sortBy': 'ndcg',
        'doNormalization': doNormalization,
    }
    printReportKwargs = mergeDicts(printReportKwargs, kwargs)
    df = printReport(**printReportKwargs)
    if returnDF:
        return df.data

In [6]:
def printAll(*args, **kwargs):
    kwargs['onlyFields'] = {'id', 'model'}
    kwargs['blackModels'] = {'combin'}
    return basicPrint(None, *args, **kwargs)

In [7]:
modelMapping = \
{
    'random': 'Random', 'ideal': 'Ideal', 'worst': 'Worst',
    'bm25': 'BM25', 'tfidf': 'TFIDF', 'doc2vec': 'Doc2Vec', 'nmf': 'NMF', 'usent': 'USent', 'lda': 'LDA',
    'infersent': 'InferSent', 'bert': 'BERT', 'sent2vec': 'Sent2Vec', 'jaccard': 'Jaccard', 'stylo': 'Stylo', 'word2vec': 'Word2Vec',
    'dbert-ft': 'DBert-ft', 'dbert-base': 'DBert',
}
fieldMapping = {'model': 'Model', 'ndcg': 'nDCG', 'ndcg@100': 'nDCG@100', 'mrr': 'MRR', 'map': 'MAP', 'p@100': 'P@100',}
def dfToLatex\
(
    df, fields=None, bfColumns={0}, itColumns={0},
    lineBreak=" \\\\", # " \\\\\n"
    interScores="", # "\\hdashedline"
    interHeads="\n\\hline",
    scoreDigits=5,
):
    if fields is None:
        fields = ['model', 'dominance'] + METRICS_ORDER
    data = df.to_dict('records')
    rowFields = []
    alreadyPrintedFields = False
    for row in data:
        rowText = ""
        columnNumber = 0
        for field in fields:
            if field in row:
                if not alreadyPrintedFields:
                    rowFields.append(field)
                score = row[field]
                if isinstance(score, float) or isinstance(score, int):
                    score = padAndTruncateFloat(score, 5)
                elif isinstance(score, str):
                    if score in modelMapping:
                        score = modelMapping[score]
                if columnNumber in bfColumns:
                    score = "\\textbf{" + score + "}"
                if columnNumber in itColumns:
                    score = "\\textit{" + score + "}"
                rowText += score + " & "
                columnNumber += 1
        if not alreadyPrintedFields:
            alreadyPrintedFields = True
            fieldsText = ""
            for field in rowFields:
                if field in fieldMapping:
                    field = fieldMapping[field]
                fieldsText += "\\textbf{" + field + "} & "
            print(fieldsText[:-3] + lineBreak + interHeads)
        print(rowText[:-3] + lineBreak + interScores)
    print(interHeads)

# Misc

In [None]:
if False:
    twinewsRankings = getTwinewsRankings()
    keysToDelete = set()
    for key in twinewsRankings.keys():
        if "combin" in key:
            meta = twinewsRankings.getMeta(key)
            hasTFIDF = False
            for m in meta['models']:
                if 'tfidf' in m:
                    hasTFIDF = True
                    break
            if hasTFIDF and meta['splitVersion'] == 1:
                keysToDelete.add(key)
    print(keysToDelete)

In [None]:
if False:
    for key in keysToDelete:
        removeRankingsAndScores(key)

In [None]:
# Removing combin models with maxUsers is not None:
if False:
    twinewsRankings = getTwinewsRankings()
    ids = twinewsRankings.keys()
    bp(ids)
    for currentId in ids:
        if 'combin' in currentId:
            meta = twinewsRankings.getMeta(currentId)
            if meta['maxUsers'] is not None:
                del twinewsRankings[currentId]

# Model lists per split version

In [8]:
def printModelList(splitVersion):
    twinewsRankings = getTwinewsRankings()
    models = set()
    for key in twinewsRankings.keys():
        meta = twinewsRankings.getMeta(key)
        if meta["splitVersion"] == splitVersion:
            models.add(meta['model'])
    print(" ".join(sorted(list(models))) + " (" + str(len(models)) + ")")

In [None]:
printModelList(2)

In [None]:
printModelList(1)

# Reference models

In [None]:
args = ('div')
basicPrint('ideal', *args, splitVersion=2)
basicPrint('random', *args, splitVersion=2)
basicPrint('worst', *args, splitVersion=2)

In [None]:
basicPrint(['ideal', 'random', 'worst'], 'ndcg', 'p@', 'mrr', 'map', '^div', splitVersion=2)

# Ad-hoc models

In [None]:
basicPrint('jaccard', 'div', b=['topic', 'jacc', 'style'])

In [None]:
basicPrint('bm25', 'map', 'mrr', 'ndcg', 'p@100')

In [None]:
basicPrint('BM25_yf')

In [None]:
basicPrint('BM25_yf', noSubsampling=False)

# Models that use vector representations

In [13]:
basicPrint('lda', 'div')

These values are common to all rows (18):

	- useExtraNews: False
	- model: lda
	- splitVersion: 2
	- maxUsers: None
	- ldaLearningMethod: online
	- lowercase: True
	- maxDF: 300
	- ldaLearningDecay: 0.7


Unnamed: 0,id,distance,doLemmatization,historyRef,implementation,ldaLearningOffset,maxDocuments,maxIter,minDF,nbTopics,useTFIDF,ndcg,div@100,topic-div@100,jacc-div@100,swjacc-div@100,style-div@100
11,lda-d4f1d,cosine,False,30.0,gensim-lda,1,300000,60,0.0005,100,False,0.41897,0.62944,0.29897,0.8836,0.96685,0.44918
5,lda-b123f,cosine,False,30.0,gensim-lda,1,100000,120,0.0005,100,False,0.41415,0.62624,0.30663,0.88368,0.96666,0.44481
15,lda-d1220,cosine,False,0.3,gensim-lda,1,100000,60,0.0005,100,False,0.38648,0.63024,0.30067,0.88368,0.9673,0.44309
17,lda-af807,cosine,False,30.0,gensim-lda,1,10000,60,0.0005,100,False,0.37941,0.61098,0.31975,0.88208,0.96476,0.43527
12,lda-759de,cosine,False,0.6,gensim-lda,1,10000,60,0.0005,100,False,0.37484,0.62378,0.33044,0.88227,0.96573,0.4497
6,lda-64d0c,cosine,False,1.0,gensim-lda,1,10000,60,0.0005,100,False,0.37478,0.59564,0.30158,0.87916,0.96317,0.42985
8,lda-2fa8e,euclidean,False,0.3,gensim-lda,1,10000,60,0.0005,100,False,0.36168,0.60972,0.32202,0.88171,0.96346,0.42415
0,lda-b849e,cosine,False,0.3,gensim-lda,1,10000,60,0.0005,300,False,0.35651,0.61025,0.35456,0.88224,0.9645,0.44078
10,lda-7786b,cosine,True,0.3,gensim-lda,1,10000,60,0.0005,100,False,0.35505,0.62631,0.35744,0.88137,0.96584,0.4651
9,lda-d3b82,cosine,False,0.3,gensim-lda,1,10000,60,0.0005,100,False,0.35335,0.63907,0.35906,0.88448,0.96705,0.45477


In [14]:
basicPrint('nmf', 'div')

Found key difference: {'topic-div@100', 'div@100', 'jacc-div@100', 'swjacc-div@100', 'style-div@100'}
Found key difference: {'topic-div@100', 'div@100', 'jacc-div@100', 'swjacc-div@100', 'style-div@100'}
Found key difference: {'topic-div@100', 'div@100', 'jacc-div@100', 'swjacc-div@100', 'style-div@100'}
Found key difference: {'topic-div@100', 'div@100', 'jacc-div@100', 'swjacc-div@100', 'style-div@100'}
Found key difference: {'topic-div@100', 'div@100', 'jacc-div@100', 'swjacc-div@100', 'style-div@100'}
Found key difference: {'topic-div@100', 'div@100', 'jacc-div@100', 'swjacc-div@100', 'style-div@100'}
Found key difference: {'topic-div@100', 'div@100', 'jacc-div@100', 'swjacc-div@100', 'style-div@100'}
Found key difference: {'topic-div@100', 'div@100', 'jacc-div@100', 'swjacc-div@100', 'style-div@100'}
Found key difference: {'topic-div@100', 'div@100', 'jacc-div@100', 'swjacc-div@100', 'style-div@100'}
Found key difference: {'topic-div@100', 'div@100', 'jacc-div@100', 'swjacc-div@100

Unnamed: 0,id,doLemmatization,historyRef,maxDocuments,maxIter,nbTopics,nmfInit,nmfL1Ratio,ndcg,div@100,topic-div@100,jacc-div@100,swjacc-div@100,style-div@100
6,nmf-9cd4f,False,30.0,100000,200,100,nndsvd,0.0,0.49322,0.59517,0.20486,0.88125,0.9633,0.42497
34,nmf-a4000,False,0.8,100000,200,100,nndsvd,0.0,0.49122,,,,,
30,nmf-ab73b,False,0.95,100000,200,100,nndsvd,0.0,0.49064,,,,,
24,nmf-96201,False,0.85,100000,200,100,nndsvd,0.0,0.48998,,,,,
22,nmf-330c8,False,30.0,10000,200,100,nndsvd,0.0,0.48901,0.59875,0.21346,0.88119,0.96368,0.42782
9,nmf-b1354,False,30.0,100000,200,100,,0.0,0.48842,0.59543,0.20692,0.88081,0.96328,0.42732
35,nmf-cef82,False,0.9,100000,200,100,nndsvd,0.0,0.48726,,,,,
2,nmf-ee6c6,False,1.0,100000,200,100,nndsvd,0.0,0.48682,0.59351,0.20202,0.88075,0.96303,0.42721
18,nmf-ad87b,False,30.0,100000,400,100,nndsvd,0.0,0.48653,0.59549,0.20561,0.88088,0.9633,0.4288
3,nmf-6e0ff,False,0.7,100000,200,100,nndsvd,0.0,0.48606,,,,,


In [None]:
basicPrint('tfidf', 'map', 'mrr', 'p@100')

In [None]:
basicPrint('dbert-ft')

In [None]:
basicPrint('dbert-base')

In [None]:
basicPrint('stylo')

In [None]:
basicPrint('infersent')

In [None]:
basicPrint('sent2vec')

In [None]:
basicPrint('word2vec')

In [None]:
basicPrint('doc2vec')

In [None]:
basicPrint('usent')

In [None]:
basicPrint('bert')

# All models for split version 2

In [12]:
printAll('^div', splitVersion=2)

No serendipities to average in a row.
Normalized metrics: {'topic-div@100', 'style-snov@100', 'style-nov@100', 'div@100', 'topic-nov@100', 'nov@100', 'snov@100', 'topic-snov@100', 'style-div@100'}
Found key difference: {'div@100'}
Found key difference: {'div@100'}
Found key difference: {'div@100'}
Found key difference: {'div@100'}
Found key difference: {'div@100'}
Found key difference: {'div@100'}
Found key difference: {'div@100'}
Found key difference: {'div@100'}
Found key difference: {'div@100'}
Found key difference: {'div@100'}
Found key difference: {'div@100'}
Found key difference: {'div@100'}
Found key difference: {'div@100'}
Found key difference: {'div@100'}
Found key difference: {'div@100'}
Found key difference: {'div@100'}
Found key difference: {'div@100'}
Found key difference: {'div@100'}
Found key difference: {'div@100'}
Found key difference: {'div@100'}
Found key difference: {'div@100'}
Found key difference: {'div@100'}
Found key difference: {'div@100'}
Found key difference:

Unnamed: 0,id,model,ndcg,div@100
217,ideal-5ac2d,ideal,1.0,0.76969
166,bm25-1eb2a,bm25,0.55949,0.53165
234,bm25-e8059,bm25,0.5592,0.53989
286,tfidf-be107,tfidf,0.54867,0.43447
147,tfidf-6c83e,tfidf,0.54863,0.44013
105,tfidf-01001,tfidf,0.5484,0.44522
197,tfidf-7c894,tfidf,0.54769,0.43126
282,tfidf-03ad6,tfidf,0.54762,
161,tfidf-f57a2,tfidf,0.54631,0.42688
83,tfidf-11f90,tfidf,0.54509,0.42513


# Combinations of models

In [9]:
# Expected 156 rows:
basicPrint('combin')

No serendipities to average in a row.
Normalized metrics: {'nov@100', 'style-nov@100', 'style-div@100', 'topic-div@100', 'snov@100', 'div@100', 'topic-snov@100', 'topic-nov@100', 'style-snov@100'}
These values are common to all rows (161):

	- splitVersion: 2
	- model: combin
	- weights: [0.5, 0.5]
	- maxUsers: None


Unnamed: 0,id,alphas,betas,dominance,models,rankAsScore,ndcg
23,combin-cf91b,"[0.5, 0.5]","['LOG', 'LOG']",0.33,"['bm25-1eb2a', 'dbert-ft-d1b5f']","[False, False]",0.58772
58,combin-c56e2,"[0.5, 0.5]","['LOG', 'LOG']",0.66,"['dbert-ft-d1b5f', 'tfidf-be107']","[False, False]",0.58034
17,combin-f3aa7,"[0.5, 0.5]","['LOG', 'LOG']",0.32,"['bm25-1eb2a', 'doc2vec-19ca3']","[False, False]",0.57265
35,combin-e7de3,"[0.5, 0.5]","['LOG', 'LOG']",0.49,"['bm25-1eb2a', 'tfidf-be107']","[False, False]",0.56678
126,combin-84cca,"[0.5, 0.5]","['LOG', 'LOG']",0.51,"['bm25-1eb2a', 'tfidf-be107']","[True, True]",0.56655
152,combin-6b411,"[0.5, 0.5]","['LOG', 'LOG']",0.52,"['dbert-ft-d1b5f', 'doc2vec-19ca3']","[False, False]",0.56448
32,combin-a9c56,"[0.5, 0.5]","['LOG', 'LOG']",0.5,"['bm25-1eb2a', 'doc2vec-19ca3']","[True, True]",0.56408
72,combin-01199,"[0.5, 0.5]","['LOG', 'LOG']",0.65,"['doc2vec-19ca3', 'tfidf-be107']","[False, False]",0.56279
60,combin-1e53b,"[0.5, 0.5]","['LOG', 'LOG']",0.54,"['bm25-1eb2a', 'dbert-base-aafd1']","[False, False]",0.5598
64,combin-7d8fe,"[0.5, 0.5]","['LOG', 'LOG']",1.0,"['bm25-1eb2a', 'word2vec-4a2c5']","[False, False]",0.55949


In [None]:
# dbert-base-fe9f3', 'sent2vec-7e0f9 --> ok 0.401 (arrondi à 0.40) + dom=0.3 vs 0.418 (arrondi à 0.42) + dom=0.5
# lda-d4f1d', 'sent2vec-7e0f9
# bert-fae09', 'sent2vec-7e0f9

In [None]:
generalBlackIds = {'combin-efb75', 'combin-0b1b1', 'combin-8ac23', 'combin-36cf8', 'combin-e084e'}

# Deep Learning models

In [None]:
basicPrint('DSSM_yf', noSubsampling=False)

In [None]:
basicPrint('DSSM_title', noSubsampling=False)

# Reports on the test set (split version 1)

In [10]:
args = ('ndcg', 'p@', 'mrr', 'map')

In [None]:
basicPrint(['ideal', 'random', 'worst'], *args, splitVersion=1)

In [None]:
df = printAll(*args, splitVersion=1, returnDF=True)

In [None]:
print(a)

In [None]:
dfToLatex(df)

In [None]:
for current in :
    print(current)

In [11]:
basicPrint('combin', *args, splitVersion=1)

Normalized metrics: {'nov@100', 'style-nov@100', 'style-div@100', 'topic-div@100', 'snov@100', 'div@100', 'topic-snov@100', 'topic-nov@100', 'style-snov@100'}
These values are common to all rows (156):

	- splitVersion: 1
	- model: combin
	- alphas: [0.5, 0.5]
	- betas: ['LOG', 'LOG']
	- weights: [0.5, 0.5]
	- maxUsers: None


Unnamed: 0,id,dominance,models,rankAsScore,ndcg,map,mrr,p@100
128,combin-8f846,0.32,"['bm25-933f7', 'dbert-ft-7847a']","[False, False]",0.63565,0.39517,0.66123,0.07999
70,combin-6ecf3,0.66,"['dbert-ft-7847a', 'tfidf-4a1dc']","[False, False]",0.62673,0.38256,0.6522,0.07858
127,combin-6fad8,0.32,"['bm25-933f7', 'doc2vec-e013a']","[False, False]",0.62146,0.37663,0.63976,0.07817
16,combin-efad4,0.5,"['bm25-933f7', 'tfidf-4a1dc']","[False, False]",0.61633,0.37163,0.63546,0.07679
28,combin-fb58c,0.5,"['bm25-933f7', 'tfidf-4a1dc']","[True, True]",0.61588,0.37086,0.63463,0.07683
43,combin-1c964,0.49,"['bm25-933f7', 'doc2vec-e013a']","[True, True]",0.61391,0.36506,0.6335,0.07748
103,combin-35594,0.66,"['doc2vec-e013a', 'tfidf-4a1dc']","[False, False]",0.61083,0.36301,0.62302,0.07715
59,combin-d0fd0,0.52,"['dbert-ft-7847a', 'doc2vec-e013a']","[False, False]",0.61009,0.3584,0.62854,0.07831
74,combin-904f0,1.0,"['bm25-933f7', 'word2vec-f3c38']","[False, False]",0.60819,0.36316,0.61723,0.07674
110,combin-50f90,0.53,"['bm25-933f7', 'dbert-base-d092a']","[False, False]",0.60802,0.36089,0.6235,0.07555


# Multi-objective

In [13]:
args = \
(
    'ndcg',
    '^div@100$', '^topic-div@100$',
    '^nov@100$', '^topic-nov@100$',
    '^snov@100$', '^topic-snov@100$', '^avg-ser@100$',
    '^bm25-ser@100$',
)
onlyFields = {'id', 'model'}

In [18]:
models = "jaccard worst ideal random".split()
basicPrint(models, *args, onlyFields=onlyFields, splitVersion=1, sortBy="ndcg", metricsDigits=5)

Unnamed: 0,id,model,ndcg,div@100,topic-div@100,nov@100,topic-nov@100,snov@100,topic-snov@100,avg-ser@100,bm25-ser@100
1,ideal-5eb2b,ideal,1.0,0.765,0.642,0.714,0.589,0.653,0.699,0.999,0.999
2,jaccard-1d3f1,jaccard,0.303,0.392,0.404,0.556,0.48,0.532,0.601,0.071,0.118
3,random-xxxxx,random,0.24,0.767,0.651,0.741,0.639,0.695,0.751,0.044,0.045
0,worst-559a0,worst,0.205,0.778,0.657,0.752,0.65,0.705,0.762,0.0,0.0


In [None]:
# Il faut éliminer :
# tfidf-7febb
# tfidf-4b89a

In [15]:
models = "bert bm25 dbert-base dbert-ft doc2vec infersent lda nmf sent2vec stylo tfidf usent".split()
basicPrint(models, *args, onlyFields=onlyFields, splitVersion=1, sortBy="ndcg", metricsDigits=5)

Normalized metrics: {'nov@100', 'style-nov@100', 'style-div@100', 'topic-div@100', 'snov@100', 'div@100', 'topic-snov@100', 'topic-nov@100', 'style-snov@100'}


Unnamed: 0,id,model,ndcg,div@100,topic-div@100,nov@100,topic-nov@100,snov@100,topic-snov@100,avg-ser@100,bm25-ser@100
10,bm25-933f7,bm25,0.608,0.53,0.321,0.528,0.266,0.439,0.316,0.29,0.0
8,tfidf-4a1dc,tfidf,0.596,0.438,0.338,0.494,0.29,0.409,0.336,0.181,0.142
2,doc2vec-e013a,doc2vec,0.582,0.541,0.382,0.548,0.323,0.469,0.382,0.319,0.245
6,tfidf-7febb,tfidf,0.573,0.417,0.333,0.489,0.295,0.43,0.378,0.123,0.122
3,tfidf-4b89a,tfidf,0.561,0.521,0.423,0.533,0.342,0.371,0.294,0.112,0.205
12,nmf-6078e,nmf,0.54,0.591,0.213,0.562,0.185,0.498,0.264,0.35,0.253
9,dbert-ft-7847a,dbert-ft,0.536,0.607,0.477,0.602,0.422,0.541,0.506,0.394,0.354
7,usent-64ec0,usent,0.517,0.614,0.382,0.599,0.351,0.518,0.404,0.317,0.246
0,lda-82272,lda,0.474,0.618,0.282,0.591,0.275,0.517,0.323,0.293,0.215
1,dbert-base-d092a,dbert-base,0.463,0.57,0.417,0.583,0.391,0.526,0.479,0.21,0.179


In [None]:
# Il faut éliminer word2vec (parce qu'il a 0 dominance) et ces modèles :
# id	dominance	models	rankAsScore	ndcg
# combin-fb58c	0.5	['bm25-933f7', 'tfidf-4a1dc']	[True, True]	0.61588
# combin-1c964	0.49	['bm25-933f7', 'doc2vec-e013a']	[True, True]	0.61391
# combin-904f0	1	['bm25-933f7', 'word2vec-f3c38']	[False, False]	0.60819
# combin-c32f2	0.49	['bm25-933f7', 'dbert-ft-7847a']	[True, True]	0.60681

In [33]:
models = "combin"
onlyFields.add('models')
basicPrint(models, *args, onlyFields=onlyFields, splitVersion=1, sortBy="ndcg", metricsDigits=5)
del onlyFields['models']

These values are common to all rows (156):

	- model: combin


Unnamed: 0,id,models,ndcg,div@100,topic-div@100,nov@100,topic-nov@100,snov@100,topic-snov@100,avg-ser@100,bm25-ser@100
128,combin-8f846,"['bm25-933f7', 'dbert-ft-7847a']",0.635,0.544,0.376,0.545,0.313,0.467,0.375,0.395,0.285
70,combin-6ecf3,"['dbert-ft-7847a', 'tfidf-4a1dc']",0.626,0.498,0.38,0.529,0.327,0.453,0.388,0.35,0.298
127,combin-6fad8,"['bm25-933f7', 'doc2vec-e013a']",0.621,0.515,0.328,0.525,0.275,0.439,0.325,0.325,0.166
16,combin-efad4,"['bm25-933f7', 'tfidf-4a1dc']",0.616,0.473,0.317,0.503,0.267,0.415,0.313,0.245,0.092
28,combin-fb58c,"['bm25-933f7', 'tfidf-4a1dc']",0.615,0.477,0.316,0.505,0.267,0.417,0.312,0.246,0.097
43,combin-1c964,"['bm25-933f7', 'doc2vec-e013a']",0.613,0.511,0.328,0.524,0.276,0.44,0.329,0.315,0.163
103,combin-35594,"['doc2vec-e013a', 'tfidf-4a1dc']",0.61,0.472,0.337,0.509,0.287,0.425,0.336,0.275,0.201
59,combin-d0fd0,"['dbert-ft-7847a', 'doc2vec-e013a']",0.61,0.547,0.4,0.556,0.341,0.483,0.408,0.409,0.347
74,combin-904f0,"['bm25-933f7', 'word2vec-f3c38']",0.608,0.53,0.321,0.528,0.266,0.439,0.316,0.29,0.0
110,combin-50f90,"['bm25-933f7', 'dbert-base-d092a']",0.608,0.515,0.319,0.526,0.276,0.445,0.334,0.277,0.096


TypeError: 'set' object does not support item deletion

# Latex tables for combins

In [None]:
models = ['bm25', 'tfidf', 'doc2vec', 'nmf', 'dbert-ft', 'usent', 'lda', 'dbert-base', 'infersent', 'bert', 'sent2vec', 'stylo', 'word2vec']

In [None]:
data = twinewsGet\
(
    'combin',
    splitVersion=1,
)
print(len(data))

In [None]:
newData = []
for current in data:
    if current['rankAsScore'] == [False, False]:
        newData.append(current)
data = newData
print(len(data))

In [None]:
bp(data)

In [None]:
def findScore(m1, m2, data, metric):
    for row in data:
        rowModels = [row['models'][0][:-6], row['models'][1][:-6]]
        if m1 in rowModels and m2 in rowModels:
            return row[metric]
def modelsToComb(m):
    m1 = modelMapping[m[0][:-6]]
    m2 = modelMapping[m[1][:-6]]
    return m1, m2

In [None]:
def getBestScore(m, metric="ndcg", splitVersion=1):
    return twinewsGet(m, splitVersion=splitVersion, onlyBestForField=metric)[0][metric]

In [None]:
metric = 'ndcg'
head = "\\textbf{Model} & "
for m2 in models:
    head += "\\vertbox{" + modelMapping[m2] + "} & "
head = head[:-3] + " \\\\"
print(head)
print('\\hline')
i = 0
for m1 in models:
    m1Text = modelMapping[m1]
    line = "\\textbf{\\textit{" + m1Text + "}} & " + "\ctg & " * i
    u = 0
    for m2 in models:
        if u > i or m1 == m2:
            if u > i:
                score = findScore(m1, m2, data, metric)
            elif m1 == m2:
                score = getBestScore(m1, metric=metric)
            score = padAndTruncateFloat(score, 5)
            score = score[2:]
            line += str(score) + " & "
        u += 1
    line = line[:-3]
    line += " \\\\\n\\hdashedline"
    print(line)
    i += 1

# Trash

In [None]:
def basicPrint_deprecated(model, *args, b=None, w=None, splitVersion=2, **kwargs):
    if w is not None and isinstance(w, str):
        w = [w]
    if b is not None and isinstance(b, str):
        b = [b]
    if w is not None and len(args) > 0:
        args = list(args) + list(w)
    printReportKwargs = \
    {
        'model': model,
        'splitVersion': splitVersion,
        'noSubsampling': True,
        'metaFilter': \
        {
            # "implementation": "gensim-lda",
        },
        # 'allowedMetrics': {'ndcg', 'div@100', 'topic-div@100', 'swjacc-div@100', 'jacc-div@100'}, # None, {'ndcg', 'ndcg@10'}
        # 'allowedMetrics': {'ndcg', 'ndcg@100', 'p@100', 'mrr', 'map'},
        'discardedMetrics': {'map', 'mrr', 'ndcg@10', 'ndcg@100', 'p@10', 'p@100'},
        'discardedMetrics': {'ndcg@10', 'p@10'},
        'whiteMetricPatterns': None if len(args) == 0 else set(list(args) + ['ndcg']),
        'blackMetricPatterns': b,
        'sortBy': 'ndcg',
    }
    printReportKwargs = mergeDicts(printReportKwargs, kwargs)
    printReport_deprecated(**printReportKwargs)

In [None]:
def printAll_deprecated(*args, **kwargs):
    kwargs['onlyFields'] = {'id', 'model'}
    kwargs['excludedModels'] = {'combin', 'ideal', 'worst', 'random'}
    return basicPrint_deprecated(None, *args, **kwargs)