#Answer Retrieval for a given question:

### This notebook gradually builds upon the QA System

Here we mount my google drive to this notebook.

In [1]:
from google.colab import drive
drive.mount('/gdrive')
%cd /gdrive

Mounted at /gdrive
/gdrive


In [2]:
!pip install pyserini
!pip install faiss

!apt install libomp-dev
!python -m pip install --upgrade faiss faiss-gpu
import faiss

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting pyserini
  Downloading pyserini-0.17.0-py3-none-any.whl (109.5 MB)
[K     |████████████████████████████████| 109.5 MB 34 kB/s 
Collecting sentencepiece>=0.1.95
  Downloading sentencepiece-0.1.96-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.2 MB)
[K     |████████████████████████████████| 1.2 MB 78.9 MB/s 
Collecting nmslib>=2.1.1
  Downloading nmslib-2.1.1-cp37-cp37m-manylinux2010_x86_64.whl (13.5 MB)
[K     |████████████████████████████████| 13.5 MB 52.9 MB/s 
Collecting lightgbm>=3.3.2
  Downloading lightgbm-3.3.2-py3-none-manylinux1_x86_64.whl (2.0 MB)
[K     |████████████████████████████████| 2.0 MB 64.8 MB/s 
[?25hCollecting transformers>=4.6.0
  Downloading transformers-4.20.1-py3-none-any.whl (4.4 MB)
[K     |████████████████████████████████| 4.4 MB 51.2 MB/s 
[?25hCollecting onnxruntime>=1.8.1
  Downloading onnxruntime-1.11.1-cp37-cp37m-manylinux_2

In [3]:
import pandas as pd
import regex as re
import csv
from itertools import islice
import pickle
import numpy as np
import json
import os
import sys
import argparse
from pathlib import Path
from sklearn.model_selection import train_test_split
from pathlib import Path
# os.environ["JAVA_HOME"] = "/usr/lib/jvm/java-11-openjdk-amd64"
#from pyserini.search as pysearch

from pyserini.search import SimpleSearcher

In [4]:
!pip install pydot --quiet
!pip install gensim==3.8.3 --quiet
!pip install tensorflow-datasets --quiet
!pip install -U tensorflow-text --quiet
!pip install transformers --quiet
!pip install pydot --quiet

[K     |████████████████████████████████| 24.2 MB 1.5 MB/s 
[K     |████████████████████████████████| 4.6 MB 14.2 MB/s 
[K     |████████████████████████████████| 511.7 MB 7.0 kB/s 
[K     |████████████████████████████████| 1.6 MB 47.4 MB/s 
[K     |████████████████████████████████| 5.8 MB 54.7 MB/s 
[K     |████████████████████████████████| 438 kB 47.6 MB/s 
[?25h

In [5]:
!pip install -q tensorflow-recommenders
!pip install -q scann

[K     |████████████████████████████████| 88 kB 4.0 MB/s 
[K     |████████████████████████████████| 11.2 MB 13.7 MB/s 
[?25h

In [6]:
import tensorflow as tf
from tensorflow import keras

from tensorflow.keras.layers import Embedding, Input, Dense, Lambda
from tensorflow.keras.models import Model
import tensorflow.keras.backend as K
import tensorflow_datasets as tfds
import tensorflow_text as tf_text

from transformers import BertTokenizer, TFBertModel


import sklearn as sk
import nltk
from nltk.corpus import reuters
from nltk.data import find

import matplotlib.pyplot as plt

import re

In [7]:
import os
import pprint
import tempfile

from typing import Dict, Text
import numpy as np
import tensorflow_recommenders as tfrs

### FiQA dataset

Here we load the FiQA dataset. The FiQA 2018 open challenge test collection is based on the use of unstructured text documents from different financial open data sources in English. This data comes in two flavors (a) Task 1: sentiment analysis train, and (b) Task 2:  Opinion-based QA. We are interested only in the QA dataset in task 2 collection.  The task 2 collection consists of three files:

- FiQA question.tsv
- FiQA question-doc.tsv
- FiQA doc.tsv. 

1. FiQA_train_question_final.tsv: This file contains a set of natural language questions on topics related to finance and investment. It consists ofl a .tsv file with tab-seaprated columns:
  - qid: question id number;
  - question: question text;
  - timestamp: the date and time value that represents when the question was posted.

2. FiQA_train_question_doc_final.tsv: This file contains information about the corresponding question-answer matchings. It also columns:
  - qid: question id number;
  - docid: document id number.

3. FiQA_train_doc_final.tsv: This file contains a set of answers and comments that the systems need to find the matching one.

In the following cell we change directory to FinBERT-QA and import all the code relevant to processing the data

In [8]:
%cd /gdrive/MyDrive/nlp-yuan_code/FinBERT-QA
from src.process_data import *

/gdrive/MyDrive/nlp-yuan_code/FinBERT-QA


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


In [9]:
data_path = '/gdrive/MyDrive/nlp-data/nlp-qa-datasets/FiQA/FiQA_train_task2/'
# Document id and Answer text
collection = load_answers_to_df(data_path+"FiQA_train_doc_final.tsv")
# Question id and Question text
queries = load_questions_to_df(data_path+"FiQA_train_question_final.tsv")
# Question id and Answer id pair
qid_docid = load_qid_docid_to_df(data_path+"FiQA_train_question_doc_final.tsv")

In the above cell we have read the three files into three pandas dataframe collection, queries, and qid_docid. 'collection' is a pair of (docid (int), and doc (str)). 'queries' is a pair of qid (int), and question (str). Similarly, 'qid_docid is a pair of qid(int) and docid(int).

In [10]:
fiqa_index = "/gdrive/MyDrive/nlp-yuan_code/FinBERT-QA/retriever/lucene-index-fiqa/"


def split_label(qid_docid):
    """
    Split question answer pairs into train, test, validation sets.

    Returns:
        train_label: Dictonary
            key - question id
            value - list of relevant docids
        test_label: Dictonary
            key - question id
            value - list of relevant docids
        valid_label: Dictonary
            key - question id
            value - list of relevant docids
    ----------
    Arguments:
        qid_docid: Dataframe containing the question id and relevant docids
    """
    # Group the answers for each question into a list
    qid_docid = qid_docid.groupby(['qid']).agg(lambda x: tuple(x)).applymap(list).reset_index()
    # Split data
    train, test_set = train_test_split(qid_docid, test_size=0.05)
    train_set, valid_set = train_test_split(train, test_size=0.1)
    # Expand the list of docids into individual rows to represent a single sample
    train_data = train_set.explode('docid')
    test_data = test_set.explode('docid')
    valid_data = valid_set.explode('docid')

    # Convert data into dictionary - key: qid, value: list of relevant docid
    train_label = label_to_dict(train_data)
    test_label = label_to_dict(test_data)
    valid_label = label_to_dict(valid_data)

    return train_label, test_label, valid_label

def split_question(train_label, test_label, valid_label, queries):
    """
    Split questions into train, test, validation sets.

    Returns:
        train_questions: Dataframe with qids
        test_questions: Dataframe with qids
        valid_questions: Dataframe with qids
    ----------
    Arguments:
        train_label: Dictionary contraining qid and list of relevant docid
        test_label: Dictionary contraining qid and list of relevant docid
        valid_label: Dictionary contraining qid and list of relevant docid
        queries: Dataframe containing the question id and question text
    """
    # Get a list of question ids
    train_q = list(train_label.keys())
    test_q = list(test_label.keys())
    valid_q = list(valid_label.keys())

    # Split question dataframe into train, test, valid set
    train_questions = queries[queries['qid'].isin(train_q)]
    test_questions = queries[queries['qid'].isin(test_q)]
    valid_questions = queries[queries['qid'].isin(valid_q)]

    return train_questions, test_questions, valid_questions

def split_label(qid_docid):
    """
    Split question answer pairs into train, test, validation sets.

    Returns:
        train_label: Dictonary
            key - question id
            value - list of relevant docids
        test_label: Dictonary
            key - question id
            value - list of relevant docids
        valid_label: Dictonary
            key - question id
            value - list of relevant docids
    ----------
    Arguments:
        qid_docid: Dataframe containing the question id and relevant docids
    """
    # Group the answers for each question into a list
    qid_docid = qid_docid.groupby(['qid']).agg(lambda x: tuple(x)).applymap(list).reset_index()
    # Split data
    train, test_set = train_test_split(qid_docid, test_size=0.05)
    train_set, valid_set = train_test_split(train, test_size=0.1)
    # Expand the list of docids into individual rows to represent a single sample
    train_data = train_set.explode('docid')
    test_data = test_set.explode('docid')
    valid_data = valid_set.explode('docid')

    # Convert data into dictionary - key: qid, value: list of relevant docid
    train_label = label_to_dict(train_data)
    test_label = label_to_dict(test_data)
    valid_label = label_to_dict(valid_data)

    return train_label, test_label, valid_label

def split_question(train_label, test_label, valid_label, queries):
    """
    Split questions into train, test, validation sets.

    Returns:
        train_questions: Dataframe with qids
        test_questions: Dataframe with qids
        valid_questions: Dataframe with qids
    ----------
    Arguments:
        train_label: Dictionary contraining qid and list of relevant docid
        test_label: Dictionary contraining qid and list of relevant docid
        valid_label: Dictionary contraining qid and list of relevant docid
        queries: Dataframe containing the question id and question text
    """
    # Get a list of question ids
    train_q = list(train_label.keys())
    test_q = list(test_label.keys())
    valid_q = list(valid_label.keys())

    # Split question dataframe into train, test, valid set
    train_questions = queries[queries['qid'].isin(train_q)]
    test_questions = queries[queries['qid'].isin(test_q)]
    valid_questions = queries[queries['qid'].isin(valid_q)]

    return train_questions, test_questions, valid_questions

def create_dataset(question_df, labels, cands_size):
    """Retrieves the top-k candidate answers for a question and
    creates a list of lists of the dataset containing the question id,
    list of relevant answer ids, and the list of answer candidates

    Returns:
        dataset: list of list in the form [qid, [pos ans], [ans candidates]]
    ----------
    Arguments:
        question_df: Dataframe containing the qid and question text
        labels: Dictonary containing the qid to text map
        cands_size: int - number of candidates to retrieve
    """
    dataset = []
    # Calls retriever
    searcher = SimpleSearcher(fiqa_index)
    # For each question
    for i, row in question_df.iterrows():
        qid = row['qid']
        tmp = []
        # Append qid
        tmp.append(qid)
        # Append list of relevant docs
        tmp.append(labels[qid])
        # Retrieves answer candidates
        cands = []
        cands_score = []
        query = row['question']
        query = re.sub('[£€§]', '', query)
        hits = searcher.search(query, k=cands_size)

        for docid in range(0, len(hits)):
            cands.append(int(hits[docid].docid))
            cands_score.append(hits[docid].score)
        # Append candidate answers
        tmp.append(cands)
        tmp.append(cands_score)
        dataset.append(tmp)

    return dataset

def get_dataset(query_path, labels_path, cands_size):
    """Splits the dataset into train, validation, and test set and creates
    the dataset form for training, validation, and testing.

    Returns:
        train_set: list of list in the form [qid, [pos ans], [ans candidates]]
        valid_set: list of list in the form [qid, [pos ans], [ans candidates]]
        test_set: list of list in the form [qid, [pos ans], [ans candidates]]
    ----------
    Arguments:
        query_path: str - path containing a list of qid and questions
        labels_path: str - path containing a list of qid and relevant docid
        cands_size: int - number of candidates to retrieve
    """
    # Question id and Question text
    queries = load_questions_to_df(query_path)
    # Question id and Answer id pair
    qid_docid = load_qid_docid_to_df(labels_path)
    # qid to docid label map
    labels = label_to_dict(qid_docid)
    train_label, test_label, valid_label = split_label(qid_docid)
    # Split Questions
    train_questions, test_questions, \
    valid_questions = split_question(train_label, test_label, valid_label, queries)

    print("\nGenerating training set...\n")
    train_set = create_dataset(train_questions, labels, cands_size)
    print("Generating validation set...\n")
    valid_set = create_dataset(valid_questions, labels, cands_size)
    print("Generating test set...\n")
    test_set = create_dataset(test_questions, labels, cands_size)

    return train_set, valid_set, test_set

In [11]:
query_path = "/gdrive/MyDrive/nlp-data/nlp-qa-datasets/FiQA/FiQA_train_task2/FiQA_train_question_final.tsv"
labels_path = "/gdrive/MyDrive/nlp-data/nlp-qa-datasets/FiQA/FiQA_train_task2/FiQA_train_question_doc_final.tsv"
train_set, valid_set, test_set = get_dataset(query_path, labels_path, 50)


Generating training set...

SimpleSearcher class has been deprecated, please use LuceneSearcher from pyserini.search.lucene instead
Generating validation set...

SimpleSearcher class has been deprecated, please use LuceneSearcher from pyserini.search.lucene instead
Generating test set...

SimpleSearcher class has been deprecated, please use LuceneSearcher from pyserini.search.lucene instead


In [12]:
print(len(train_set), len(valid_set), len(test_set))

5683 632 333


In [13]:
# Cleaning data
empty_docs, empty_id = get_empty_docs(collection)
# Remove empty answers from collection of answers
collection_cleaned = collection.drop(empty_id)
# Remove empty answers from qa pairs
qid_docid = qid_docid[~qid_docid['docid'].isin(empty_docs)]

print("Number of answers after cleaning: {}".format(len(collection_cleaned)))
print("Number of QA pairs after cleaning: {}".format(len(qid_docid)))

Number of answers after cleaning: 57600
Number of QA pairs after cleaning: 17072


In [14]:
# Write collection df to file
save_tsv("retriever/collection_cleaned.tsv", collection_cleaned)

# Convert collection df to JSON file for Anserini's document indexer
collection_to_json("retriever/collection_json/docs.json", "retriever/collection_cleaned.tsv")

In [15]:
import numpy as np
from sklearn.metrics import ndcg_score
from sklearn.metrics import average_precision_score

def run_baseline(data):
  ap = []
  rr = []
  cg = []


  #for i, seq in enumerate(tqdm(train_set)):
  for i, seq in enumerate(data):
    qid, ans_labels, cands, cands_score = seq[0], seq[1], seq[2], seq[3]
    
    # Map question id to text
    #q_text = qid_to_text[qid]

    max_width = -1
    rr_ = 0
    ap_ = 0.0
    precision_ = 0.0
    relcnt_ = 0

    top_k = 10

    # For each answer in the candidates
    for i in range(top_k):
      docid = cands[i]
      if docid in ans_labels and rr_ == 0:
        rr_ = 1/(i+1)
    
    relscores = [1 if docid in ans_labels else 0 for docid in cands[:top_k]]
    pos = [1.0/(i+1) for i in range(top_k)]
    ap_ = average_precision_score(relscores,pos) if sum(relscores) != 0 else 0 

    #print(ap_, [1 if docid in ans_labels else 0 for docid in cands[:top_k]], [1.0/(i+1) for i in range(top_k)])
    relscores = np.asarray([[np.log2(cands_score[i]) if (cands[i] in ans_labels) else 0.001 for i in range(top_k)]])
    pos = np.asarray([[np.log2(i+2) for i in range(top_k)]])

    cg_ = ndcg_score(relscores, pos)

    ap.append(ap_)
    rr.append(rr_)
    cg.append(cg_)
  return rr, ap, cg

      

In [16]:
rr, ap, cg = run_baseline(train_set)

print('Mean Reciprocal Rank (MRR):', np.mean(rr))
print('Mean average Precision (MAP)', np.mean(ap))
print('Normalized Discounted Cumulative Gain (NDCG)', np.mean(cg))

Mean Reciprocal Rank (MRR): 0.29531790720863393
Mean average Precision (MAP) 0.28095505082788585
Normalized Discounted Cumulative Gain (NDCG) 0.7160566544381635


In [17]:
print(len(train_set), len(valid_set), len(test_set))

5683 632 333


In [18]:
embedding_dimension = 128

In [19]:
def generate_ids(data):
  qids = []
  aids = []
  qa_pairs = []


  #for i, seq in enumerate(tqdm(train_set)):
  for i, seq in enumerate(data):
    qid, ans_labels, cands = seq[0], seq[1], seq[2]
    #print (qid, ans_labels, cands)
    

    # For each answer in the candidates
    for docid in ans_labels:
      qids.append(qid)
      aids.append(docid)
      qa_pairs.append((qid, docid))

  unique_qids = np.unique(qids)
  unique_aids = np.unique(aids)
  return unique_qids, unique_aids, qa_pairs

data = []
data.extend(train_set)
data.extend(valid_set)
data.extend(test_set)
      
qids, aids, qa_pairs = generate_ids(data)

In [20]:
print(len(qids), len(aids), len(qa_pairs))

6648 17110 17110


In [21]:
processed_answers = process_answers(collection_cleaned)
processed_questions = process_questions(queries)

In [22]:
word2index, word2count = create_vocab(processed_answers, processed_questions)

print("Vocab size: {}".format(len(word2index)))
print("Top {} common words: {}".format(35, Counter(word2count).most_common(35)))

Vocab size: 85034
Top 35 common words: [('the', 371203), ('to', 233559), ('a', 201620), ('you', 166702), ('and', 163066), ('of', 157574), ('is', 129894), ('in', 120019), ('that', 111416), ('for', 89366), ('it', 83822), ('i', 74100), ('your', 68153), ('are', 67255), ('if', 60689), ('be', 59266), ('on', 58382), ('have', 55754), ('as', 50088), ('this', 49868), ('not', 49227), ('or', 46080), ('with', 45894), ('they', 44485), ('but', 41690), ('can', 38863), ('will', 36865), ('at', 35548), ('an', 31392), ('money', 31003), ('so', 29980), ('$', 29096), ('would', 28750), ('from', 28582), ('more', 27378)]


In [23]:
qid_to_text, docid_to_text = id_to_text(collection, queries)
qid_to_tokenized_text, docid_to_tokenized_text = id_to_tokenized_text(processed_answers, processed_questions)

In [24]:
unique_questions = np.unique([qid_to_text[qid] for qid in qids])

question_model = tf.keras.Sequential([
  tf.keras.layers.StringLookup(
      vocabulary=unique_questions, mask_token=None),
  # We add an additional embedding to account for unknown tokens.
  tf.keras.layers.Embedding(len(qids) + 1, embedding_dimension)
])

In [25]:
unique_answers = np.unique([docid_to_text[aid] for aid in aids])

answer_model = tf.keras.Sequential([
  tf.keras.layers.StringLookup(
      vocabulary=unique_answers, mask_token=None),
  # We add an additional embedding to account for unknown tokens.
  tf.keras.layers.Embedding(len(aids) + 1, embedding_dimension)
])

In [26]:
questions = tf.data.Dataset.from_tensor_slices([qid_to_text[qid] if isinstance(qid_to_text.get(qid),str) else '' for qid, aid in qa_pairs])
answers = tf.data.Dataset.from_tensor_slices([docid_to_text[aid] if isinstance(docid_to_text.get(aid),str) else '' for qid, aid in qa_pairs])

metrics = tfrs.metrics.FactorizedTopK(
  candidates=answers.batch(128).map(answer_model)
)
task = tfrs.tasks.Retrieval(
  metrics=metrics
)

In [27]:
ds = tf.data.Dataset.zip((questions, answers))
ds = ds.map(lambda x, y : {"question": x, "answer": y})

tf.random.set_seed(42)
shuffled = ds.shuffle(17_110, seed=42, reshuffle_each_iteration=False)

#train = shuffled.take(16_242)


In [28]:
print(5683/(5683+632+333), 632/(5683+632+333), 333/(5683+632+333))
print((17100)*0.8548435619735258, (17100)*0.0950661853188929, (17100)*0.050090252707581225)
14617+1625

0.8548435619735258 0.0950661853188929 0.050090252707581225
14617.824909747293 1625.6317689530686 856.5433212996389


16242

In [29]:
class QAModel(tfrs.Model):

  def __init__(self, question_model, answer_model):
    super().__init__()
    self.question_model: tf.keras.Model = question_model
    self.answer_model: tf.keras.Model = answer_model
    self.task: tf.keras.layers.Layer = task

  def compute_loss(self, features: Dict[Text, tf.Tensor], training=False) -> tf.Tensor:
    # We pick out the user features and pass them into the user model.
    q_embeddings = self.question_model(features["question"])
    # And pick out the movie features and pass them into the movie model,
    # getting embeddings back.
    a_embeddings = self.answer_model(features["answer"])

    # The task computes the loss and the metrics.
    return self.task(q_embeddings, a_embeddings)

In [30]:
cached_train = shuffled.shuffle(17_110).batch(1300)
cached_test =  shuffled.take(856).batch(150)

In [31]:
model = QAModel(question_model, answer_model)
model.compile(optimizer=tf.keras.optimizers.Adagrad(learning_rate=0.1))

model.fit(cached_train, epochs=5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7f7338dc64d0>

In [32]:
model.evaluate(cached_test, return_dict=True)



{'factorized_top_k/top_100_categorical_accuracy': 1.0,
 'factorized_top_k/top_10_categorical_accuracy': 0.9719626307487488,
 'factorized_top_k/top_1_categorical_accuracy': 0.257009357213974,
 'factorized_top_k/top_50_categorical_accuracy': 1.0,
 'factorized_top_k/top_5_categorical_accuracy': 0.8878504633903503,
 'loss': 209.40826416015625,
 'regularization_loss': 0,
 'total_loss': 209.40826416015625}

In [33]:
scann_index = tfrs.layers.factorized_top_k.ScaNN(model.question_model)
scann_index.index_from_dataset(
  tf.data.Dataset.zip((answers.batch(100), answers.batch(100).map(model.answer_model)))
)

<tensorflow_recommenders.layers.factorized_top_k.ScaNN at 0x7f733673a210>

In [34]:
# Get recommendations.
_, cands = scann_index(tf.constant([qid_to_text[5]]))

print(f'QUESTION: ', qid_to_text[5])
print(f"Recommendations for user 42: {cands[0, :10]}")

QUESTION:  Starting a new online business
Recommendations for user 42: [b"Switzerland was once known for its high regard for private property rights.  Recently it is has started to violate those rights by forcing banks to turn over the names of account holders to the US government.  Not a great trend. Another aspect that makes Switzerland an attractive place for people and businesses is the Swiss governemnt's neutral policy.  The Swiss government is not deploying the Swiss military around the globe to fight terrorism, to spread democracy, to advance its own power, or other such murderous government programs.  The Swiss people do not have to worry about the payback that arrives because of such depraved government programs. The Swiss were traditionally extreme advocates of individual gun rights which allows the people to provide protection for themselves against others and against the government.  This too is changing (read section on The Enemy Within) in a not so favorable direction. I 

In [35]:
recommendations, cands = scann_index(tf.constant([qid_to_text[5]]))

In [85]:
type(cands[0, 0].numpy().decode())
#cands[0, 0]
[type(t.numpy().decode()) for t in cands[0]]

[str, str, str, str, str, str, str, str, str, str]

In [83]:
text_to_docid = dict([ (docid_to_text[k], k) for k in docid_to_text])

def run_twotower_scores(data):
  ap = []
  rr = []
  cg = []

  #for i, seq in enumerate(tqdm(train_set)):
  for j, seq in enumerate(data):
    qid, ans_labels, _ , _ = seq[0], seq[1], seq[2], seq[3]
    cands_score, cand_ans = scann_index(tf.constant([qid_to_text[qid]]))
    cands = [t.numpy().decode() for t in cand_ans]
    #cands = [s.decode('utf-8') for s in np.ravel(cands).tolist()]
    cands_score = np.ravel(cands_score).tolist()

    
    # Map question id to text
    #q_text = qid_to_text[qid]

    max_width = -1
    rr_ = 0
    ap_ = 0.0
    precision_ = 0.0
    relcnt_ = 0

    top_k = 10

    # For each answer in the candidates
    for i in range(top_k):
      docid = cands[i]
      print(docid)
      if docid in ans_labels and rr_ == 0:
        rr_ = 1/(i+1)
    
    relscores = [1 if docid in ans_labels else 0 for docid in cands[:top_k]]
    pos = [1.0/(i+1) for i in range(top_k)]
    ap_ = average_precision_score(relscores,pos) if sum(relscores) != 0 else 0 

    #print(ap_, [1 if docid in ans_labels else 0 for docid in cands[:top_k]], [1.0/(i+1) for i in range(top_k)])
    relscores = np.asarray([[np.log2(cands_score[i]) if (cands[i] in ans_labels) else 0.001 for i in range(top_k)]])
    pos = np.asarray([[np.log2(i+2) for i in range(top_k)]])

    cg_ = ndcg_score(relscores, pos)

    print('>>>', i, ap_, rr_, cg_)

    ap.append(ap_)
    rr.append(rr_)
    cg.append(cg_)
  return rr, ap, cg

In [84]:
rr, ap, cg = run_twotower_scores(train_set)

print('Mean Reciprocal Rank (MRR):', np.mean(rr))
print('Mean average Precision (MAP)', np.mean(ap))
print('Normalized Discounted Cumulative Gain (NDCG)', np.mean(cg))

AttributeError: ignored

In [50]:
ap[:10]

[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]