This notebook allow you to print results of models in tables

In [1]:
import os ; os.environ["CUDA_VISIBLE_DEVICES"] = ""

In [2]:
isNotebook = '__file__' not in locals()

In [3]:
from systemtools.hayj import *
from systemtools.location import *
from systemtools.basics import *
from systemtools.file import *
from systemtools.printer import *
from twinews.utils import *

In [4]:
import pandas as pd
from IPython.display import display, HTML

In [7]:
def printResults\
(
    model=None,
    splitVersion=None,
    metaFilter={}, # A dict that map field to mandatory values
    metricsFilter=None, # A set of allowed metrics
    noSubsampling=True,
    logger=None,
):
    twinewsRankings = getTwinewsRankings(logger=logger)
    twinewsScores = getTwinewsScores(logger=logger)
    data = []
    if noSubsampling and "maxUsers" not in metaFilter:
        metaFilter = mergeDicts(metaFilter, {"maxUsers": None})
    if model is not None and "model" not in metaFilter:
        metaFilter = mergeDicts(metaFilter, {"model": model})
    if splitVersion is not None and "splitVersion" not in metaFilter:
        metaFilter = mergeDicts(metaFilter, {"splitVersion": splitVersion})
    for key in twinewsRankings.keys():
        toKeep = True
        meta = twinewsRankings.getMeta(key)
        if 'historyRef' in meta:
            meta['historyRef'] = str(meta['historyRef'])
        for filtKey in metaFilter:
            if filtKey not in meta:
                raise Exception(filtKey + "is not in " + b(meta, 5))
            if metaFilter[filtKey] != meta[filtKey]:
                toKeep = False
                break
        if toKeep:
            data.append(meta)
    if len(data) == 0:
        log("No data found", logger)
    else:
        try:
            refKeys = data[0].keys()
            for e in data:
                assert e.keys() == refKeys
        except:
            raise Exception("Some data keys doesn't match:\n" + b(data, 5))
        if len(data) > 1:
            keysHavingSameValues = set(data[0].keys())
            baseValues = data[0]
            for current in data[1:]:
                for key in baseValues.keys():
                    if key in keysHavingSameValues and baseValues[key] != current[key]:
                        keysHavingSameValues.remove(key)
            sameValues = dict()
            for key in keysHavingSameValues:
                sameValues[key] = data[0][key]
            log("These values are common to all rows:\n", logger)
            for key, value in sameValues.items():
                log("\t- " + str(key) + ": " + str(value), logger)
            for i in range(len(data)):
                for key in keysHavingSameValues:
                    del data[i][key]
        # We add scores:
        metrics = set()
        for current in data:
            key = current['id']
            scores = twinewsScores.find({'id': key})
            for score in scores:
                metrics.add(score['metric'])
                current[score['metric']] = score['score']
        df = pd.DataFrame(data)
        df = reorderDFColumns(df, start=['id'], end=sorted(list(metrics)))
        display(df)
        return df

In [8]:
df = printResults\
(
    model='lda',
    splitVersion=2,
    noSubsampling=True,
    metaFilter=\
    {
        # "implementation": "gensim-lda",
    },
    metricsFilter={'ndcg', 'ndcg@10'},
)

twinews scores initialised.
These values are common to all rows:

	- useExtraNews: False
	- ldaLearningMethod: online
	- ldaLearningDecay: 0.7
	- model: lda
	- splitVersion: 2
	- lowercase: True
	- maxUsers: None
	- maxDF: 300


Unnamed: 0,id,distance,doLemmatization,historyRef,implementation,ldaLearningOffset,maxDocuments,maxIter,minDF,nbTopics,useTFIDF,map,mrr,ndcg,ndcg@10,ndcg@100,p@10,p@100
0,lda-de387,cosine,False,0.5,sklearn-lda,1.0,500,2,0.002,30,False,,,,,,,
1,lda-b637b,cosine,False,0.5,gensim-lda,1.0,10000,30,0.0005,100,True,0.032071,0.086638,0.256728,0.035279,0.087218,0.023961,0.013872
2,lda-51ce2,cosine,False,0.3,gensim-lda,10.0,10000,60,0.0005,100,False,0.04431,0.108344,0.280176,0.046566,0.12454,0.033776,0.021236
3,lda-2fa8e,euclidean,False,0.3,gensim-lda,1.0,10000,60,0.0005,100,False,0.108196,0.254482,0.361689,0.13783,0.24575,0.091047,0.034073
4,lda-aa8d5,cosine,True,0.3,gensim-lda,1.0,1000,30,0.0005,30,True,0.033513,0.089155,0.258672,0.037511,0.090241,0.025803,0.014548
5,lda-94782,cosine,False,3.0,gensim-lda,1.0,10000,60,0.0005,100,False,,,,,,,
6,lda-71511,cosine,True,0.3,gensim-lda,1.0,10000,30,0.0005,30,True,,,,,,,
7,lda-d3b82,cosine,False,0.3,gensim-lda,1.0,10000,60,0.0005,100,False,,,,,,,
