In [1]:
# always reload an imported module before executing a particular cell
# (used to let changes in python files take effect without restarting kernel)
%load_ext autoreload
%autoreload 2


In [134]:
from sentence_transformers import SentenceTransformer, LoggingHandler
import numpy as np
import os
import pandas as pd
from gensim.test.utils import common_texts
from gensim.models.doc2vec import Doc2Vec, TaggedDocument
import nltk
from nltk.corpus import stopwords
from scipy.spatial.distance import cdist
from sklearn.metrics.pairwise import cosine_similarity
from nltk.stem import PorterStemmer 
from abc import ABC, abstractmethod
from collections import Counter, defaultdict as dd, OrderedDict


In [104]:
class Ranker(ABC):
    
    @abstractmethod
    def rank(self, query, docs):
        """
        Return sorted indices
        """
        pass
    
    @staticmethod
    def dists_to_indices(dists):
   
        indexed_dists = enumerate(dists.squeeze())   
        indexed_dists_sorted = sorted(indexed_dists, key=lambda x:x[1], reverse=False)
        indices_sorted = [x[0] for x in indexed_dists_sorted]
        return indices_sorted

In [105]:
class BM25Ranker(Ranker):
    # see also: 
    # https://www.elastic.co/blog/practical-bm25-part-2-the-bm25-algorithm-and-its-variables
    
    stemmer = PorterStemmer() 
    stopwords = set(stopwords.words('english'))
    
    def __init__(self, stem=True):
        self.stem = stem
        self.k1 = 1.2
        
    def _preprocess_text(self, text):
        words = nltk.word_tokenize(text)
        words = [word.lower() for word in words if word.isalpha()]
        if self.stem:
            words = [self.stemmer.stem(word) for word in words]
        words = [w for w in words if not w in self.stopwords]
        return words
    
    def _get_idf_values(self, query_terms, query_term_to_nr_docs, nr_docs):
        query_term_to_idf = {}
        for query_term in set(query_terms):
            idf_exp =  (nr_docs - query_term_to_nr_docs[query_term] + 0.5) /\
                        (query_term_to_nr_docs[query_term] + 0.5)
            #print('idf_exp:', idf_exp)
            idf = np.log(1 + idf_exp)
            query_term_to_idf[query_term] = idf
        return query_term_to_idf
    
    def _get_tf_values(self, query_terms, docs):
        
        tf_index = [{} for _ in range(len(docs))]
        query_term_to_nr_docs =  dd(int)
        
        docs_terms = [Counter(self._preprocess_text(doc)) for doc in docs]
        
        for query_term in set(query_terms):
            for i, doc_terms in enumerate(docs_terms):
                #print('doc_terms[query_term]:', doc_terms[query_term])                
                tf_index[i][query_term] = doc_terms[query_term]
                if tf_index[i][query_term] > 0:
                    query_term_to_nr_docs[query_term] += 1
        return tf_index, query_term_to_nr_docs
    
    def rank(self, query, docs):
        query_terms = self._preprocess_text(query)
        docs_terms = [Counter(self._preprocess_text(doc)) for doc in docs]
                
        tf_index, query_term_to_nr_docs =  self._get_tf_values(query_terms, docs)  
        query_term_to_idf = self._get_idf_values(query_terms, query_term_to_nr_docs, len(docs))
        
        doc_scores = []
        for i in range(len(docs)):
            doc_score = 0
            for query_term in set(query_terms):
                tf = tf_index[i][query_term]                
                tf_score_part = (tf * (self.k1 + 1)) / (tf + self.k1) # no length normalization!
                doc_score += query_term_to_idf[query_term] * tf_score_part
            doc_scores.append(-doc_score) # negative, as we sort by distance
        
        indices_sorted = self.dists_to_indices(np.array(doc_scores))
        return indices_sorted
                
            

In [106]:
bm25_ranker = BM25Ranker()

bm25_ranker.rank('i saw a cat. The cat was sitting on the mat. ', 
                     ['I like cats. Even though I wasn\'t sure. the cat was seen by me. Yeey cats, i Love mats too'
                       , 
                      'Even though I wasn\'t sure. the cat was seen by me. It was still okay for me.',
                      'Even though I wasn\'t sure. ',
                      'the cat was seen by me. '])

[0, 1, 3, 2]

In [107]:
class SBERTAverageRanker(Ranker):
    def __init__(self, model):
        self.model = model
        
    def rank(self, query, docs):
        sents_query =  nltk.sent_tokenize(query)
        sents_docs =  [nltk.sent_tokenize(doc) for doc in docs]
        
        embeddings_query = self.model.encode(sents_query)
        embeddings_docs = [self.model.encode(x) for x in sents_docs]
        
        embedding_query = np.mean(embeddings_query, axis=0)
        embedding_docs =  [np.mean(x, axis=0) for x in embeddings_docs]

        dists = cdist(embedding_query.reshape(-1,1).T, embedding_docs, "cosine")[0]
        indices_sorted = self.dists_to_indices(dists)
        
        return indices_sorted
    

In [108]:
model = SentenceTransformer('roberta-large-nli-stsb-mean-tokens')

In [109]:
sberta_ranker = SBERTAverageRanker(model)

sberta_ranker.rank('i saw a cat. The cat was sitting on the mat', 
                   ['the cat was seen by me', 'pineapple on pizza is bad', 'i saw a cat'])

[2, 0, 1]

In [110]:
class SBERTMaxRanker(Ranker):
    def __init__(self, model):
        self.model = model
        
    def rank(self, query, docs):
        sents_query =  nltk.sent_tokenize(query)
        sents_docs =  [nltk.sent_tokenize(doc) for doc in docs]
        
        embeddings_query = self.model.encode(sents_query)
        embeddings_docs = [self.model.encode(x) for x in sents_docs]
        
        embedding_query = np.max(embeddings_query, axis=0)
        #print('embedding_query:', embedding_query)
        embedding_docs =  [np.max(x, axis=0) for x in embeddings_docs]

        dists = cdist(embedding_query.reshape(-1,1).T, embedding_docs, "cosine")[0]
        indices_sorted = self.dists_to_indices(dists)
        
        return indices_sorted

In [111]:
sbertmax_ranker = SBERTMaxRanker(model)

sbertmax_ranker.rank('The cat was sitting on the mat', 
                     ['the cat was seen by me', 'pineapple on pizza is bad', 'i saw a cat',
                     'The cat was sitting on the mat'])

[3, 2, 0, 1]

In [121]:
class SBERTcomplexRanker(Ranker):
    
    def __init__(self, model, similarity_threshold=.3):
        self.model = model
        self.sim_threshold = similarity_threshold
   
    def _complex(self, query_embeddings, document_embeddings):
        len_query = len(query_embeddings)
        len_doc = len(document_embeddings)
        
        dist_sum = 0
        for i in range(1, len_query+1):
            for j in range(1, len_doc+1):         
                this_query_embedding = query_embeddings[i-1] 
                this_doc_embedding = document_embeddings[j-1]
                
                dist_sum += cdist(this_query_embedding.reshape(-1,1).T, 
                             this_doc_embedding.reshape(-1,1).T, "cosine")[0]
        
        return dist_sum / len(document_embeddings)
                
    def rank(self, query, docs):
        
        if len(docs) == 1:
            return [0]
        
        sents_query =  nltk.sent_tokenize(query)
        sents_docs =  [nltk.sent_tokenize(doc) for doc in docs]
      
        embeddings_query = self.model.encode(sents_query)
        embeddings_docs = [self.model.encode(x) for x in sents_docs]
        
        distances = []
        for embeddings_doc in embeddings_docs:
            distances.append(self._complex(embeddings_query, embeddings_doc))
        return self.dists_to_indices(np.array(distances))

In [122]:
sbertcomplexranker = SBERTcomplexRanker(model, similarity_threshold=.3)

sbertcomplexranker.rank('i saw a cat. The cat was sitting on the mat. ', 
                     ['I like cats. Even though I wasn\'t sure. the cat was seen by me. Yeey cats, i Love mats too'
                       , 
                      'Even though I wasn\'t sure. the cat was seen by me. It was still okay for me.',
                      'Even though I wasn\'t sure. ',
                      'the cat was seen by me. '])

[3, 0, 1, 2]

In [112]:
class SBERTlcsRanker(Ranker):
    
    def __init__(self, model, similarity_threshold=.3):
        self.model = model
        self.sim_threshold = similarity_threshold
    
    def _lcs(self, query_embeddings, document_embeddings):
        
        len_query = len(query_embeddings)
        len_doc = len(document_embeddings)
        
        m = np.zeros((len_doc+1, len_query+1))
        m_sim = np.zeros(m.shape)
        
        for i in range(1, len_query+1):
            for j in range(1, len_doc+1):                
                this_query_embedding = query_embeddings[i-1] 
                this_doc_embedding = document_embeddings[j-1]
                
                dist = cdist(this_query_embedding.reshape(-1,1).T, 
                             this_doc_embedding.reshape(-1,1).T, "cosine")[0]
                
                similarity = 1 - dist
                #print('dist:', dist)
                
                if similarity > self.sim_threshold:
                    m[j,i] = m[j-1,i-1]+1
                    m_sim[j, i] = similarity
                    #print('query sentence {:d} and doc sentence {:d} are similar'.format(i, j))
                else:
                    m[j,i] = max(m[j-1,i], m[j,i-1])
                    #print('query sentence {:d} and doc sentence {:d} are not similar'.format(i, j))
                    
        answer = m[len_doc,len_query]
            
        mean_sim = m_sim.sum() / m.sum() if m.sum() != 0 else 0
        max_sim = m_sim.max() 
        
        distance = 1 - (mean_sim * answer)
        #print('mean_sim: {:2.4f}, distance: {:2.4f}'.format(mean_sim, distance))
        #print('max_sim: {:2.4f}, distance: {:2.4f}'.format(max_sim, distance))
        #print()
        return distance 


    def rank(self, query, docs):
        
        if len(docs) == 1:
            return [0]
        
        sents_query =  nltk.sent_tokenize(query)
        sents_docs =  [nltk.sent_tokenize(doc) for doc in docs]
        #print('sentences query:', sents_query)
        #print('sentences first doc:', sents_docs[0])
        embeddings_query = self.model.encode(sents_query)
        embeddings_docs = [self.model.encode(x) for x in sents_docs]
        
        distances = []
        for embeddings_doc in embeddings_docs:
            distances.append(self._lcs(embeddings_query, embeddings_doc))
        
        return self.dists_to_indices(np.array(distances))

In [113]:
sbertmax_ranker = SBERTlcsRanker(model, similarity_threshold=.3)

sbertmax_ranker.rank('i saw a cat. The cat was sitting on the mat. ', 
                     ['I like cats. Even though I wasn\'t sure. the cat was seen by me. Yeey cats, i Love mats too'
                       , 
                      'Even though I wasn\'t sure. the cat was seen by me. It was still okay for me.',
                      'Even though I wasn\'t sure. ',
                      'the cat was seen by me. '])


[3, 0, 1, 2]

In [114]:
class SBERTRanker(Ranker):
    def __init__(self, model):
        self.model = model
        
    def rank(self, query, docs):
        embeddings = model.encode([query] + docs)

        dists = cdist(embeddings[0].reshape(-1,1).T, embeddings[1:], "cosine")[0]
        indices_sorted = self.dists_to_indices(dists)
        
        return indices_sorted

In [115]:
sbert_ranker = SBERTRanker(model)

In [116]:
sbert_ranker.rank('i saw a cat.', ['the cat was seen by me.', 'Even though I wasn\'t sure.',
                                  'pineapple on pizza is bad', 'i saw a cat'])

[3, 0, 1, 2]

In [117]:
class Doc2vecRanker(Ranker):
    
    stopwords = set(stopwords.words('english'))
    
    def __init__(self, model_fn):
        self.model = Doc2Vec.load(model_fn)        
    
    def _preprocess_text(self, text):
        words = nltk.word_tokenize(text)
        words = [word.lower() for word in words if word.isalpha()]
        words = [w for w in words if not w in self.stopwords]
        return words
    
    def rank(self, query, docs):
        query_vector = self.model.infer_vector(self._preprocess_text(query))
        doc_vectors = [self.model.infer_vector(self._preprocess_text(d)) for d in docs]
        
        dists = cdist(query_vector.reshape(-1,1).T, np.array(doc_vectors), "cosine")[0]
        
        indices_sorted = self.dists_to_indices(dists)
        
        return indices_sorted

In [118]:
D2V_BEST_MODEL = '/run/media/root/Windows/Users/agnes/Downloads/data/msmarco/train_results/doc2vec2/d2v_17'
d2v_ranker = Doc2vecRanker(D2V_BEST_MODEL)

In [120]:
d2v_ranker.rank('i saw a cat. The cat was sitting on the mat. ', 
                     ['I like cats. Even though I wasn\'t sure. the cat was seen by me. Yeey cats, i Love mats too'
                       , 
                      'Even though I wasn\'t sure. the cat was seen by me. It was still okay for me.',
                      'Even though I wasn\'t sure. ',
                      'the cat was seen by me. '])

[2, 0, 1, 3]

In [139]:
def calc_rr(rels_sorted, types_sorted, ttype='original', rel_label=1):
    indices = np.where(rels_sorted == rel_label)[0]
    #print(indices)
    original = np.where(types_sorted[indices] == ttype)[0]
    if len(original) == 0:
        raise Exception('no relevant docs of this type')
    return np.mean(1 / (indices[original]+1))


In [140]:
def calculate_mrr_stats(df,
                        ranker,
                        types=['original', 'degree_2', 'degree_3', 'degree_4', 
                               'degree_4_split', 'degree_8', 'degree_8_split'],
                        limit=None
                       ):

    rr_df = pd.DataFrame(columns=['qid'] + types + ['original_irrelevant'])

    qids = list(set(df['qid']))
    nr_qids = len(qids)
    for i, qid in enumerate(qids):
        
        if limit is not None and i >= limit:
            break
            
        query_df = df[df['qid'] == qid].reset_index(drop=True)
        query = query_df['query'].iloc[0]
        print('query:', query)

        indices_sorted = ranker.rank(query, query_df['doc'].values.tolist())   

        docs_sorted = query_df.iloc[indices_sorted]

        query_rr_row = [qid]
        for ttype in types:
            query_rr_row.append(calc_rr(docs_sorted['rel'].values, docs_sorted['type'].values, ttype=ttype))
        query_rr_row.append(calc_rr(docs_sorted['rel'].values, 
                                    docs_sorted['type'].values, ttype='original', 
                                    rel_label=0))        
        rr_df = rr_df.append(pd.Series(query_rr_row, index=rr_df.columns), ignore_index=True)

        if (i+1) % 5 ==  0:
            print('processed {:d} of {:d}'.format(i+1, nr_qids))
        
    
    return rr_df


In [123]:
queries_file= '/run/media/root/Windows/Users/agnes/Downloads/data/msmarco/queries3.csv'

df = pd.read_csv(queries_file, error_bad_lines=False)

df.head(3)

Unnamed: 0,qid,query,rel,type,doc
0,0,aaa a common cause of a skid is,0,original,Discounts and benefits are available at all He...
1,0,aaa a common cause of a skid is,0,original,AAA North Penn provides Authorized On - Line P...
2,0,aaa a common cause of a skid is,0,original,â¢ EAP-TLS authentication takes place between...


In [9]:
ranker = SBERTRanker(model)

rr_df = calculate_mrr_stats(df, ranker, limit=400)

query: aaa a common cause of a skid is
query: chionophobia is an abnormal fear of what
query: abcmouse price
query: abnormal condition of blood in a joint
query: about how long can a bald eagle live?
processed 5 of 20117
query: 1 cup of strawberries nutrition facts
query: sprint pay by phone telephone number
query: is lameness a type of abnormal locomotion
query: what is pica condition
query: . in what kind of government does a small group have a firm control over a country? brainly
processed 10 of 20117
query: aaa templin phone number
query: according to the amdr what percentage of your daily intake should be protein
query: how to care for goji berry plants
query: how to become a firefighter in pa
query: how costly is it to remove mold from crawl space
processed 15 of 20117
query: do raccoons eat japanese beetles
query: what is a raccoon dog
query: how to calculate growth of dividend
query: how to cancel your zipcar membership
query: what helps get rid of moisture in basement
processe

query: how mych does golds gym membership cost in el paso?
query: krewe of femme fatale membership cost
query: what qualifications do a gym general manager needs
processed 180 of 20117
query: gold gym membership cost
query: how much is a anytime fitness membership
query: monthly cost of a horse
query: adt average monthly cost
query: which cost is an example of a variable cost?
processed 185 of 20117
query: cost of planet fitness membership
query: what's the cost of a costco executive membership
query: average entertainment cost per month
query: monthly cost of avastin cancer treatment
query: monthly t1 line cost
processed 190 of 20117
query: what are david barton gyms
query: usga basic membership cost
query: monthly cost for hulu
query: types of membership models
query: orange theory fitness membership cost
processed 195 of 20117
query: how much is membership at a fitness club
query: average human stomach size
query: how much do gold's gym employees make
query: how much is a kickboxing

processed 350 of 20117
query: how to be a child psychologist
query: average salary human resources recruiter
query: average salary first year lawyer
query: what units are used to measure both velocity and speed
query: average speed on a bike
processed 355 of 20117
query: typing speed average
query: schooling you need to become a child psychologist
query: if quantity demanded goes down what happens to total revenue
query: is acceleration considered a vector or scalar
query: educational psychologist salary
processed 360 of 20117
query: average income clinical psychologist
query: most probable speed formula for gases
query: what is the average starting salary for an art therapist?
query: average salary for a school psychologist
query: average vs instantaneous velocity
processed 365 of 20117
query: average salary medical oncology
query: average salary clinical data coordinator
query: what is the relationship between displacement and the spring constant k
query: average hourly rate for phil

In [129]:
queries_file= '/run/media/root/Windows/Users/agnes/Downloads/data/msmarco/queries3.csv'

df = pd.read_csv(queries_file, error_bad_lines=False)
ttsplit_df = pd.read_csv('/run/media/root/Windows/Users/agnes/Downloads/data/msmarco/train_data/queries3_split.csv')

test_qids = set(ttsplit_df[ttsplit_df['type'] == 'test']['qid'].values)

df_test = df.loc[df['qid'].isin(test_qids)]

df_test

Unnamed: 0,qid,query,rel,type,doc
0,0,aaa a common cause of a skid is,0,original,Discounts and benefits are available at all He...
1,0,aaa a common cause of a skid is,0,original,AAA North Penn provides Authorized On - Line P...
2,0,aaa a common cause of a skid is,0,original,â¢ EAP-TLS authentication takes place between...
3,0,aaa a common cause of a skid is,0,original,This chart represents a partial representation...
4,0,aaa a common cause of a skid is,0,original,Points to Consider before Buying John Deere Sk...
...,...,...,...,...,...
205755,5143,why was the 19th amendment considered progressive,1,degree_8,the 19th amendment is a very important amendme...
205756,5143,why was the 19th amendment considered progressive,1,degree_8_split,"Best Answer: The 16th, 17th, 18th, and 19th am..."
205757,5143,why was the 19th amendment considered progressive,1,degree_8_split,"Best Answer: The 16th, 17th, 18th, and 19th am..."
205758,5143,why was the 19th amendment considered progressive,1,degree_8_split,The 18th amendment is the only amendment to be...


In [130]:
len(test_qids)

400

In [131]:
len(df_test)

16000

In [None]:
OUTPUT_DIR = '/run/media/root/Windows/Users/agnes/Downloads/data/msmarco/eval/eval_model_default'

rankers = OrderedDict({
    'bm25': BM25Ranker(),    
    'bm25_nostemming': BM25Ranker(stem=False),   
    'd2v': Doc2vecRanker(D2V_BEST_MODEL),
    'sbert_lcs_st_0.3': SBERTlcsRanker(model, similarity_threshold=.3),
    'sbert_lcs_st_0.1':  SBERTlcsRanker(model, similarity_threshold=.1),
    'sbert_lcs_st_0.5': SBERTlcsRanker(model, similarity_threshold=.5),
    'sbert_basic': SBERTRanker(model),
    'sbert_complex': SBERTcomplexRanker(model),
    'sbert_avg': SBERTAverageRanker(model),
    'sbert_max': SBERTMaxRanker(model)    
})

rr_dfs = []
for ranker_name, ranker in rankers.items():
    print('>>> checking ranker "{:s}"'.format(ranker_name))
    rr_df = calculate_mrr_stats(df_test, ranker)
    rr_dfs.append(rr_df)    
    rr_df.to_csv( os.path.join(OUTPUT_DIR, ranker_name + '.csv'), index=None)


>>> checking ranker "bm25"
query: aaa a common cause of a skid is
query: output devices definition
query: what is the recovery from mastectomy with reconstruction
query: meaning of the name Joan
query: what does castor oil do for your hair
processed 5 of 400
query: what is the yearly salary of a critical care rn
query: can you use coconut oil as lube
query: number of neutrons for pt
query: what problems do macro viruses cause to your computer
query: what is the universal blood type
processed 10 of 400
query: what is the term describe
query: what is the weather in englewood, co
query: amtrak phone numbers
query: what does blue cypress essential oil do for the body.
query: pandora premium how many users
processed 15 of 400
query: why do we need fish oil
query: what pro soccer players wear nike phelon
query: what language is used in haiti
query: another name for puerto rican
query: most golf courses by state
processed 20 of 400
query: what kind of degree are you supposed to have to become

query: which court is also known as a trial court?
query: what color is the ribbon for ovarian cancer
query: where is susan kelechi watson from in li
processed 180 of 400
query: what countries are in the nordic region
query: which describes the difference between a trade surplus and a trade deficit?
query: does turkey contain campylobacter
query: ffs johnny delusional lyrics
query: where is the biggest ferris wheel on earth
processed 185 of 400
query: what character did bill paxton play in the titanic
query: foods that contain triglycerides
query: who is a carrier of food poisoning
query: which planet is the hottest in the solar system
query: which of the following is a risk factor associated with cardiometabolic risk?
processed 190 of 400
query: what county is blanco, tx in?
query: function of bcl-2
query: who created the first steam engine train
query: gelatin nutritional content
query: function abap
processed 195 of 400
query: who is father of computer
query: what county is knoxvill

processed 355 of 400
query: what is passion
query: what is pixel computing
query: what is protocol is cancer found after hysterectomy
query: is the appendix part of the colon
query: what is pulse scale
processed 360 of 400
query: jiraiya is from what clan
query: itemized deduction definition
query: what is squamous cell carcinoma is malignant or benign?
query: what is technical feasibility?
query: what is silicone sealant used for
processed 365 of 400
query: left parietal lobe tumor
query: what is prescription strength zantac
query: minamata disease was caused by mercury contamination
query: what is software quality assurance testing.
query: metformin side effects long term
processed 370 of 400
query: what is the average net worth of congressmen
query: medication range orders guideline
query: meaning of name jones
query: what is the best mulch color
query: muscle cell depolarization neuromuscular junction
processed 375 of 400
query: what is the flipp app
query: what is the electronic m

processed 125 of 400
query: where do smurfs live
query: weight watchers at work gift
query: where can viruses live
query: waxing means the moon is
query: definition of anecdotal
processed 130 of 400
query: when was the first nobel prize
query: definition of a direct primary care clinic
query: define tremor
query: weight of wistar rat
query: define predicate adjective
processed 135 of 400
query: where does the fracking water comes from
query: definition of salary sacrifice
query: wex bank customer service number
query: definition of twins
query: definition of an electron
processed 140 of 400
query: definition of extrinsic motivation
query: define pop culture
query: did rocky marciano play for the cubs
query: difference between aluminum and tin
query: when was the couch invented
processed 145 of 400
query: describe dietary nutrients for water
query: did johnny cash sing long black train
query: where is cal poly slo located
query: do all babies get colic
query: what are requirements for q

query: how to diet with eating little sugar
query: how safe is nexgard
query: what is an automobile tort case
query: how to compare annual salary from hourly wage
processed 305 of 400
query: how to make spruce pitch salve
query: if a president is accused of breaking the twenty-second amendment, that means the president is trying to
query: what is an asset sale of a business
query: what is bad back pain
query: what is chuck
processed 310 of 400
query: how to prepare to work in nursing home
query: how to get cricket wireless account number
query: what is bdc reticle
query: what is bollywood movies
query: how to raise coleus
processed 315 of 400
query: what is cofactor values
query: what is btu rating
query: what is a blood clot made of vein
query: if going to Cabo San Lucas, where do you fly into
query: what is chicken laying mash
processed 320 of 400
query: what does neuroma mean
query: integration and application the definition
query: what is eschar
query: is azure ad sync installed
qu

In [10]:
rr_df

Unnamed: 0,qid,original,degree_2,degree_3,degree_4,degree_4_split,degree_8,degree_8_split,original_irrelevant
0,0.0,0.500000,0.206250,0.136905,0.296259,0.057528,0.040323,0.066947,0.046494
1,1.0,1.000000,0.213745,0.105420,0.091106,0.148611,0.055914,0.078646,0.039379
2,2.0,0.071429,0.051042,0.054717,0.078517,0.169338,0.182870,0.085419,0.191220
3,3.0,1.000000,0.263278,0.064440,0.057346,0.068040,0.148898,0.089815,0.040191
4,4.0,1.000000,0.200505,0.178030,0.049946,0.114469,0.077782,0.074963,0.038252
...,...,...,...,...,...,...,...,...,...
395,395.0,0.111111,0.110227,0.081496,0.052324,0.047498,0.418779,0.156481,0.063808
396,396.0,0.083333,0.117469,0.060886,0.429825,0.101190,0.066011,0.108352,0.058816
397,397.0,1.000000,0.138492,0.189732,0.054852,0.129167,0.046634,0.136555,0.038384
398,398.0,1.000000,0.134733,0.172393,0.100603,0.056061,0.144037,0.075956,0.044208


In [11]:
from datetime import datetime

def get_timestamp():
    now = datetime.now()
    return now.strftime("%Y-%m-%d_%H-%M")

In [None]:
EVAL_DIR = '/run/media/root/Windows/Users/agnes/Downloads/data/msmarco/eval'

fn = 'rr_queries3_od_model_roberta_400_' + get_timestamp() + '.csv'
rr_df.to_csv(os.path.join(EVAL_DIR, fn), index=None)