In [1]:
import os ; os.environ["CUDA_VISIBLE_DEVICES"] = ""

In [2]:
isNotebook = '__file__' not in locals()

In [3]:
from systemtools.hayj import *
from systemtools.location import *
from systemtools.basics import *
from systemtools.file import *
from systemtools.printer import *
from databasetools.mongo import *
from newstools.goodarticle.utils import *
from nlptools.preprocessing import *
from nlptools.news import parser as newsParser
from machinelearning.iterator import *
from twinews.utils import *
from twinews.evaluation import metrics
from twinews.evaluation.utils import *
import time
import pymongo

In [4]:
logger = Logger(tmpDir('logs') + "/evaluation.log") if isNotebook else Logger("evaluation.log")
tt = TicToc(logger=logger)
tt.tic()

--> tictoc starts...


-1

In [5]:
iterations = 1 if isNotebook else 10000000
sleep = 0 if isNotebook else 10
exceptionSleep = 10

In [6]:
metricFuncts = \
{
    'ndcg': metrics.ndcg,
    'ndcg@10': metrics.ndcgAt10,
    'ndcg@100': metrics.ndcgAt100,
    'mrr': metrics.mrr,
    'p@10': metrics.pAt10,
    'p@100': metrics.pAt100,
    'map': metrics.map,
}

In [8]:
# We get scores collection et the rankings GridFS:
twinewsScores = getTwinewsScores(logger=logger)
twinewsRankings = getTwinewsRankings(logger=logger)
# For a certain amount of iterations:
for i in range(iterations):
    # We get all
    modelsKeys = twinewsRankings.keys()
    # For all model instances:
    for modelKey in modelsKeys:
        # We init the eval data to None:
        evalData = None
        rankings = None
        # For all metrics:
        for metricKey, metricFunct in metricFuncts.items():
            # If we didn't added the score previously:
            if not twinewsScores.has({'id': modelKey, 'metric': metricKey}):
                try:
                    # We print infos:
                    log("Computing " + metricKey + " score of " + modelKey + "...", logger)
                    # We get all data:
                    meta = twinewsRankings.getMeta(modelKey)
                    splitVersion = meta['splitVersion']
                    maxUsers = meta['maxUsers']
                    modelName = meta['model']
                    # We get eval data:
                    if evalData is None:
                        log("Downloading eval data version " + str(splitVersion) + "...", logger)
                        evalData = getEvalData(splitVersion, logger=logger, maxExtraNews=0)
                    candidates = evalData['candidates']
                    # We get rankings:
                    if rankings is None:
                        log("Downloading rankings of " + modelKey + "...", logger)
                        rankings = twinewsRankings[modelKey]
                        # We check if rankings are coherent with candidates (for the right split version):
                        gotACheck = False
                        rankingsKeys = set(rankings.keys())
                        candidatesKeys = set(candidates.keys())
                        if maxUsers is None:
                            assert len(rankingsKeys) == len(candidatesKeys)
                        assert len(rankingsKeys.union(candidatesKeys)) == len(candidatesKeys)
                        for userId in rankings:
                            assert len(rankings[userId]) == len(candidates[userId])
                            for i, ranking in enumerate(rankings[userId]):
                                assert isinstance(ranking, list)
                                rankingSet = set(ranking)
                                currentCandidates = candidates[userId][i]
                                assert len(rankingSet) == len(currentCandidates)
                                assert isinstance(currentCandidates, set)
                                assert len(rankingSet.union(currentCandidates)) == len(rankingSet)
                                gotACheck = True
                        assert gotACheck
                        log("Done.", logger)
                    # We convert all in a list of rel vectors:
                    rels = []
                    for userId in rankings:
                        for ranking in rankings[userId]:
                            rel = rankingToRelevanceVector(ranking, set(evalData['testUsers'][userId].keys()))
                            rels.append(rel)
                    # We compute all scores:
                    scores = []
                    for rel in rels:
                        scores.append(metricFunct(rel))
                    # We mean all scrores:
                    score = np.mean(scores)
                    # And finally we add the score in the db:
                    addTwinewsScore(modelKey, metricKey, score, verbose=False)
                    # We print result:
                    log(metricKey + " score of " + modelKey + ": " + str(truncateFloat(score, 2)), logger)
                except Exception as e:
                    if isNotebook:
                        raise e
                    else:
                        logException(e, logger)
                        time.sleep(exceptionSleep)
    if sleep > 0:
        log("Sleeping " + str(sleep) " seconds for the iteration " + str(i) + " on " + str(iterations) + "...", logger)
    time.sleep(sleep)

twinews scores initialised.
Computing ndcg score of lda-72dfd...
Downloading eval data version 2...
--> tic: 32.9s | message: Eval data loaded
--> toc total duration: 36.36s | message: Got Twinews evaluation data
Downloading rankings of lda-72dfd...
Done.
ndcg score of lda-72dfd: 0.17
Computing ndcg@10 score of lda-72dfd...
ndcg@10 score of lda-72dfd: 0.0
Computing ndcg@100 score of lda-72dfd...
ndcg@100 score of lda-72dfd: 0.02
Computing mrr score of lda-72dfd...
mrr score of lda-72dfd: 0.0
Computing p@10 score of lda-72dfd...
p@10 score of lda-72dfd: 0.0
Computing p@100 score of lda-72dfd...
p@100 score of lda-72dfd: 0.0
Computing map score of lda-72dfd...
map score of lda-72dfd: 0.0
Computing ndcg score of lda-71511...
Downloading eval data version 2...
--> tic: 33.15s | message: Eval data loaded
--> toc total duration: 36.88s | message: Got Twinews evaluation data
Downloading rankings of lda-71511...
Done.
ndcg score of lda-71511: 0.23
Computing ndcg@10 score of lda-71511...
ndcg@1

In [10]:
bp(evalData)

{
  candidates: 
  {
    100022528: 
    [
      {
        http://a.msn.com/0A/en-us/BBI0fMo?ocid=st,
        http://abc27.com/2018/01/05/a-basketball-life-donyae-baylor-carroll/,
        ...,
        https://www.yorkshireeveningpost.co.uk/news/crime/how-many-clare-s-law-requests-are-approved-by-west,
        https://www.zerohedge.com/news/2017-12-28/global-stocks-rally-new-record-highs-dollar-tumbles
      }
    ],
    100024324: 
    [
      {
        http://a.msn.com/0A/en-us/BBI0fMo?ocid=st,
        http://a.msn.com/0B/en-us/BBIb6wL?ocid=st,
        ...,
        https://www.yahoo.com/news/fallout-louisiana-teachers-arrest-rages-035613133.html?.tsrc=fauxdal,
        https://www.yorkregion.com/whatson-story/7998261-full-circle-a-caffeine-fuelled-record-breaking-conc
      }
    ],
    100064338: 
    [
      {
        http://a.msn.com/00/en-us/BBHyFMw?ocid=st,
        http://a.msn.com/01/en-ca/BBHuE8G?ocid=st,
        ...,
        https://www.yorkregion.com/news-story/8071343-york-re

In [11]:
bp(evalData['meta'], 5)

{ 'created': 2020.03.24-14.28.06, 'endDate': 2018-01-15, 'id': 2, 'ranksLength': 1000, 'splitDate': 2017-12-25, 'startDate': 2017-10-01, 'testMaxNewsPerUser': 97, 'testMeanNewsPerUser': 7.22, 'testMinNewsPerUser': 2, 'testNewsCount': 71781, 'totalNewsAvailable': 570210, 'trainMaxNewsPerUser': 379, 'trainMeanNewsPerUser': 26.48, 'trainMinNewsPerUser': 8, 'trainNewsCount': 237150, 'usersCount': 15905 }
