# Environment Setup 


In [292]:
# !pip install sentence_transformers
# !pip3 install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cu118
# !pip3 install black[jupyter]


In [293]:
# Variables 
# model_name = "all-distilroberta-v1" # will also change the savefile name (csv.gz files)
# model_name = "all-mpnet-base-v2" # will also change the savefile name (csv.gz files)
# model_name = "multi-qa-mpnet-base-dot-v1" # will also change the savefile name (csv.gz files)
# model_name = "msmarco-distilbert-base-tas-b" # will also change the savefile name (csv.gz files)
# model_name = "multi-qa-distilbert-cos-v1" # will also change the savefile name (csv.gz files)
model_name = "msmarco-distilbert-cos-v5" # will also change the savefile name (csv.gz files)





In [294]:
import pandas as pd
import os
# import nltk
import re
from transformers import DistilBertModel
from sentence_transformers import SentenceTransformer, util
import torch
# import torchvision
import numpy as np
import scipy


In [295]:
# Modified version of Document where tokens are not included 
class Document:
  def __init__(self, doc_no, doc_text, vector):
    self.doc_no = doc_no
    self.doc_text = doc_text
    # self.tokens = tokens
    self.vector = vector

  def __str__(self):
    # return 'Document Number: ' + self.doc_no + '\nDocument Text: ' + self.doc_text + '\nTokens: ' + str(self.tokens) + '\n'
    return 'Document Number: ' + self.doc_no + '\nDocument Text: ' + self.doc_text  + '\n Vectors: ' + str(self.vector) + '\n'


  def to_dict(self):
    # return {'docno': self.doc_no, 'doctext': self.doc_text, 'tokens': self.tokens, 'text': ' '.join(self.tokens)}
    return {'docno': self.doc_no, 'doctext': self.doc_text, 'vector': self.vector.tolist()}


In [296]:
# Modified version of preprocess where tokenizing is removed 
def preprocess(file):
    with open(file, "r") as f:
        content = f.read()
    documents = re.findall(r'<DOC>(.*?)</DOC>', content, re.DOTALL)
    preprocessed_documents = []
    for document in documents:
        # Get the document number and text
        raw_no = re.search(r'<DOCNO>(.*?)</DOCNO>', document, re.DOTALL)
        doc_no = raw_no.group(1) if raw_no else ''
        raw_text = re.search(r'<TEXT>(.*?)</TEXT>', document, re.DOTALL)
        doc_text = raw_text.group(1) if raw_text else ''
        doc = Document(doc_no, doc_text, None)
        preprocessed_documents.append(doc)
    return preprocessed_documents


In [297]:
# main function to preprocess a directory of text files
def preprocess_directory(directory, num_files=-1):
  preprocessed_documents = [] 
  ctr = 0
  for filename in os.listdir(directory):
    print('Preprocessing file: ', filename)
    file = os.path.join(directory, filename)
    preprocessed_documents.extend(preprocess(file))
    ctr += 1
    if ctr == num_files and num_files != -1:
      break
  return preprocessed_documents


In [298]:

def extract_topics(file, descriptions=False):
  with open(file, "r") as f:
    topic_content = f.read()
  all_topics = []
  topics = re.findall(r'<top>(.*?)</top>', topic_content, re.DOTALL)
  for topic in topics:
    raw_title = re.search(r'<title>(.*?)\n\n', topic, re.DOTALL)
    title = raw_title.group(1) if raw_title else ''
    if descriptions:
      raw_desc = re.search(r'<desc>(.*?)\n\n', topic, re.DOTALL)
      desc = raw_desc.group(1) if raw_desc else ''
      all_topics.append({'title': title, 'description': desc})
    else:
      all_topics.append({'title': title})
  return all_topics

In [299]:
extracted_documents = []
extracted_documents = preprocess_directory('AP_collection\coll')

Preprocessing file:  AP880212
Preprocessing file:  AP880213
Preprocessing file:  AP880214
Preprocessing file:  AP880215
Preprocessing file:  AP880216
Preprocessing file:  AP880217
Preprocessing file:  AP880218
Preprocessing file:  AP880219
Preprocessing file:  AP880220
Preprocessing file:  AP880221
Preprocessing file:  AP880222
Preprocessing file:  AP880223
Preprocessing file:  AP880224
Preprocessing file:  AP880225
Preprocessing file:  AP880226
Preprocessing file:  AP880227
Preprocessing file:  AP880228
Preprocessing file:  AP880229
Preprocessing file:  AP880301
Preprocessing file:  AP880302
Preprocessing file:  AP880303
Preprocessing file:  AP880304
Preprocessing file:  AP880307
Preprocessing file:  AP880308
Preprocessing file:  AP880309
Preprocessing file:  AP880310
Preprocessing file:  AP880311
Preprocessing file:  AP880312
Preprocessing file:  AP880313
Preprocessing file:  AP880314
Preprocessing file:  AP880315
Preprocessing file:  AP880316
Preprocessing file:  AP880317
Preprocess

In [300]:
# print(extracted_documents[0].doc_no + "\n")
# print(extracted_documents[0].doc_text) 

In [301]:
topics = extract_topics("topics1-50.txt")


# DistillBERT Setup (with Cuda)


In [302]:
model = SentenceTransformer(model_name, device="cuda:0")


Downloading: 0.00B [00:00, ?B/s]

Downloading:   0%|          | 0.00/190 [00:00<?, ?B/s]

Downloading: 0.00B [00:00, ?B/s]

Downloading:   0%|          | 0.00/545 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/122 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/265M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/112 [00:00<?, ?B/s]

Downloading: 0.00B [00:00, ?B/s]

Downloading:   0%|          | 0.00/319 [00:00<?, ?B/s]

Downloading: 0.00B [00:00, ?B/s]

Downloading:   0%|          | 0.00/349 [00:00<?, ?B/s]

In [303]:
torch.cuda.is_available()

True

In [304]:
torch.cuda.current_device()

0

In [305]:
torch.cuda.get_device_name(0)

'NVIDIA GeForce RTX 3070 Ti Laptop GPU'

In [306]:
import datetime
import pytz

# Set the timezone to Eastern Standard Time (EST)
tz = pytz.timezone('US/Eastern')

def print_time_est():
    # Get the current time in EST
    est_time = datetime.datetime.now(tz)

    # Print the current EST time
    print("Current EST time:", est_time)


In [307]:
print_time_est()

Current EST time: 2023-04-06 00:06:09.887557-04:00


In [308]:
for doc in extracted_documents: 
    doc.vector = model.encode([doc.doc_text])
    print(doc.doc_no + " is done.")

 AP880212-0001  is done.
 AP880212-0002  is done.
 AP880212-0003  is done.
 AP880212-0004  is done.
 AP880212-0005  is done.
 AP880212-0006  is done.
 AP880212-0007  is done.
 AP880212-0008  is done.
 AP880212-0009  is done.
 AP880212-0010  is done.
 AP880212-0011  is done.
 AP880212-0012  is done.
 AP880212-0013  is done.
 AP880212-0014  is done.
 AP880212-0015  is done.
 AP880212-0016  is done.
 AP880212-0017  is done.
 AP880212-0018  is done.
 AP880212-0019  is done.
 AP880212-0020  is done.
 AP880212-0021  is done.
 AP880212-0022  is done.
 AP880212-0023  is done.
 AP880212-0024  is done.
 AP880212-0025  is done.
 AP880212-0026  is done.
 AP880212-0027  is done.
 AP880212-0028  is done.
 AP880212-0029  is done.
 AP880212-0030  is done.
 AP880212-0031  is done.
 AP880212-0032  is done.
 AP880212-0033  is done.
 AP880212-0034  is done.
 AP880212-0035  is done.
 AP880212-0036  is done.
 AP880212-0037  is done.
 AP880212-0038  is done.
 AP880212-0039  is done.
 AP880212-0040  is done.


In [309]:
print_time_est()

Current EST time: 2023-04-06 00:21:54.864745-04:00


In [310]:
len(extracted_documents)

79923

In [311]:
extracted_documents[79922].vector

array([[-7.98370615e-02, -4.25998913e-03, -4.12845947e-02,
        -4.76558041e-03, -3.74700911e-02,  1.22973062e-02,
        -8.11692253e-02,  3.13121825e-02, -5.24810292e-02,
         5.46069704e-02,  7.33496249e-02,  1.08867669e-02,
         1.18617099e-02,  1.16053801e-02, -1.34231923e-02,
        -4.56524231e-02, -7.40494803e-02, -4.26333509e-02,
        -1.75567791e-02, -1.10534998e-02, -4.22026822e-03,
        -3.46932113e-02,  4.65057939e-02,  3.50126065e-02,
         5.35766222e-02, -4.13008481e-02, -3.82502787e-02,
        -1.50174117e-02, -2.10058615e-02,  3.31214420e-03,
        -2.33786497e-02,  2.09040958e-02,  4.68314206e-03,
        -2.15435624e-02, -4.25764732e-02, -2.28324383e-02,
         5.36605641e-02,  1.09828012e-02, -2.73415390e-02,
        -5.27500026e-02,  1.79763753e-02,  3.13595869e-03,
         9.97664873e-03,  7.60509372e-02, -6.64404705e-02,
        -5.39427772e-02,  1.25085087e-02,  1.19898831e-02,
         1.14828981e-02, -6.64285719e-02,  1.03463391e-0

In [312]:
from sklearn.metrics.pairwise import cosine_similarity


In [313]:
# extracted_documents[0].vector = model.encode(extracted_documents[0].doc_text)

# Output to File 


### NOT compressed (too big for Github)

In [314]:
# LEGACY CODE 
# import json 

# with open("embedding_saves/distilroberta.json", "w") as outfile:
#     # for doc in extracted_documents:
#     json.dump(extracted_documents[0].to_dict(),outfile)
#     #     json.dump(doc.to_dict(), outfile)

#### Possible csv write *(not compressed)


In [315]:
# import csv

# # assuming you have a list of Document objects called documents
# # and assuming you have already populated the vector attribute of each Document object

# # define the headers for your CSV file
# headers = ['doc_no', 'doc_text', 'vector']

# # open the CSV file in 'w' mode and write the headers
# with open("embedding_saves/{model_name}.csv", mode='w', newline='') as file:
#     writer = csv.writer(file)
#     writer.writerow(headers)

#     # loop through each Document object and write its attributes to the CSV file
#     for document in extracted_documents:
#         writer.writerow([document.doc_no, document.doc_text, document.vector.tolist() if document.vector is not None else None])


#### Possible csv read *(not compressed)


In [316]:
# # read the CSV file and create new Document objects
# extracted_documents = []
# # --------------------------------------------------------------
# # CHANGE THIS TO extracted_documents later 
# # --------------------------------------------------------------
# with open("embedding_saves/{model_name}.csv", mode='r') as file:
#     reader = csv.reader(file)
#     headers = next(reader) # skip the header row

#     for row in reader:
#         doc_no = row[0]
#         doc_text = row[1]
#         vector = np.array(row[2], dtype=float)
#         document = Document(doc_no, doc_text, vector)
#         extracted_documents.append(document)


### Compressed CSV


In [317]:
import csv
import gzip
import os

# assuming you have a list of Document objects called documents
# and assuming you have already populated the vector attribute of each Document object

# define the headers for your CSV file
headers = ['doc_no', 'doc_text', 'vector']

# open the CSV file in 'w' mode and write the headers
with open(f"embedding_saves/{model_name}.csv", mode='w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(headers)

    # loop through each Document object and write its attributes to the CSV file
    for document in extracted_documents:
        writer.writerow([document.doc_no, document.doc_text, document.vector.tolist() if document.vector is not None else None])

# gzip the CSV file
with open(f"embedding_saves/{model_name}.csv", 'rb') as f_in, gzip.open(f"embedding_saves/{model_name}.csv.gz", 'wb') as f_out:
    f_out.writelines(f_in)

os.remove(f"embedding_saves/{model_name}.csv")


In [318]:
# import csv
# import gzip

# # read the gzip file and create new Document objects
# extracted_documents_csv = []
# with gzip.open("embedding_saves/distilroberta.csv.gz", mode='rb') as file:
#     # read the uncompressed content of the gzip file
#     uncompressed_content = file.read()

#     # parse the uncompressed content as a CSV file
#     csv_content = uncompressed_content.decode('utf-8')
#     reader = csv.reader(csv_content.splitlines())

#     # extract the header row
#     headers = next(reader)

#     # loop through each row and create a new Document object
#     for row in reader:
#         doc_no = row[0]
#         doc_text = row[1]
#         vector = np.array(row[2], dtype=float)
#         document = Document(doc_no, doc_text, vector)
#         extracted_documents_csv.append(document)
import csv
import gzip
import ast

# read the gzip file and create new Document objects
extracted_documents = []

if os.path.isfile(f"embedding_saves/{model_name}.csv.gz"):
    with gzip.open(f"embedding_saves/{model_name}.csv.gz", mode='rb') as file:
        # read the uncompressed content of the gzip file
        uncompressed_content = file.read()

        # parse the uncompressed content as a CSV file
        csv_content = uncompressed_content.decode('utf-8')
        reader = csv.reader(csv_content.splitlines())

        # extract the header row
        headers = next(reader)

        # loop through each row and create a new Document object
        for row in reader:
            doc_no = row[0]
            doc_text = row[1]
            vector = ast.literal_eval(row[2]) if row[2] else None
            if vector is not None:
                vector = np.array(vector, dtype=float)
            document = Document(doc_no, doc_text, vector)
            extracted_documents.append(document)
else:
    print("There is no embedding saves of this model")


In [319]:
# extracted_documents[0].doc_no
# extracted_documents[0].doc_text
extracted_documents[0].vector.flatten()

array([ 4.03420329e-02,  4.58810851e-02, -2.57951096e-02, -4.09202464e-02,
       -4.12085876e-02,  2.18485780e-02,  2.11492609e-02, -4.42826077e-02,
       -5.03315367e-02,  5.63661307e-02, -3.60128097e-02,  2.17052530e-02,
       -6.17159856e-03, -2.89587788e-02,  6.07071258e-02, -4.65171449e-02,
        6.84834225e-03, -5.69607466e-02,  1.71397477e-02, -1.04771731e-02,
        2.68536806e-02,  2.48616119e-03, -7.66025390e-03, -1.86163001e-02,
       -4.25916538e-02,  8.56702868e-03, -1.64654236e-02, -5.31350542e-03,
       -1.29274381e-02,  1.00452332e-02, -2.98066232e-02, -1.39593019e-03,
       -1.17566148e-02,  4.73191924e-02, -4.53620739e-02,  1.24037229e-02,
       -2.44180001e-02,  1.51078077e-03, -7.26960786e-03, -1.34375384e-02,
        5.18579148e-02, -2.00513955e-02, -4.51537073e-02,  1.34330858e-02,
        6.64672162e-03, -7.57504022e-03,  1.05154842e-01, -4.32054363e-02,
        9.49410442e-03, -1.01758670e-02, -2.30892026e-03, -2.16197292e-03,
        2.64588352e-02, -

In [320]:

def search(query, model, preprocessed_documents, doc_embeddings, top_k=20):
  query_embeddings = model.encode([query])
  # compute distances
  distances = scipy.spatial.distance.cdist(query_embeddings, doc_embeddings, "cosine")[0]
  # get the top k results
  results = zip(range(len(distances)), distances)
  results = sorted(results, key=lambda x: x[1])
  # Create a list of tuples with the document number and the distance
  results = [(preprocessed_documents[idx].doc_no, distance) for idx, distance in results[0:top_k]]
  return results

In [321]:
# Go through all the documents and search for the top 1000 results
def query_retrieve(model, preprocessed_documents, doc_embeddings, descriptions=False, runid='runid', filename='Results.txt', top_k=1000):
  # Extract the topics
  topics = extract_topics('topics1-50.txt', descriptions)
  
  file_out = open(filename, 'w')

  for i, topic in enumerate(topics):
    # Search for the documents
    results = search(topic['title'], model, preprocessed_documents, doc_embeddings, top_k)
    for j, (doc_id, distance) in enumerate(results):
      file_out.write(f'{i+1} Q0 {doc_id.strip()} {j+1} {1-distance} {runid}\n')
  file_out.close()
  print('Written results to file ', filename)

In [322]:
# doc_embeddings = model.encode([doc.doc_text for doc in preprocessed_documents])
test = np.array([])

for doc in extracted_documents:
    # test.append(doc.vector)
    np.append(test, doc.vector)


query_retrieve(model, extracted_documents, np.array([doc.vector.flatten() for doc in extracted_documents]), descriptions=False, runid='runid', filename=f'{model_name}_Results.txt', top_k=1000)


Written results to file  msmarco-distilbert-cos-v5_Results.txt


In [323]:
print(type(extracted_documents[0].vector))

type([doc.vector for doc in extracted_documents])


<class 'numpy.ndarray'>


list

# Results 

In [324]:

./trec_eval /home/shang/Info_Retrieval/qrels1-50ap.txt /home/shang/Info_Retrieval/all-distilroberta-v1_Results.txt

SyntaxError: invalid syntax (2700158069.py, line 1)

### multi-qa-mpnet-base-dot-v1_Results
- map = 0.3041

```
./trec_eval /home/shang/Info_Retrieval/qrels1-50ap.txt /home/shang/Info_Retrieval/multi-qa-mpnet-base-dot-v1_Results.txt

runid                   all     runid
num_q                   all     50
num_ret                 all     50000
num_rel                 all     2099
num_rel_ret             all     1426
map                     all     0.3041
gm_map                  all     0.1712
Rprec                   all     0.3133
bpref                   all     0.3937
recip_rank              all     0.6907
iprec_at_recall_0.00    all     0.7293
iprec_at_recall_0.10    all     0.6050
iprec_at_recall_0.20    all     0.5128
iprec_at_recall_0.30    all     0.4411
iprec_at_recall_0.40    all     0.3793
iprec_at_recall_0.50    all     0.2888
iprec_at_recall_0.60    all     0.2212
iprec_at_recall_0.70    all     0.1742
iprec_at_recall_0.80    all     0.1089
iprec_at_recall_0.90    all     0.0505
iprec_at_recall_1.00    all     0.0315
P_5                     all     0.4800
P_10                    all     0.4460
P_15                    all     0.4000
P_20                    all     0.3730
P_30                    all     0.3173
P_100                   all     0.1686
P_200                   all     0.1038
P_500                   all     0.0509
P_1000                  all     0.0285
```

### multi-qa-distilbert-cos-v1 
- map = 0.2733

```
./trec_eval /home/shang/Info_Retrieval/qrels1-50ap.txt /home/shang/Info_Retrieval/multi-qa-distilbert-cos-v1_Results.txt
runid                   all     runid
num_q                   all     50
num_ret                 all     50000
num_rel                 all     2099
num_rel_ret             all     1442
map                     all     0.2733
gm_map                  all     0.1501
Rprec                   all     0.3047
bpref                   all     0.3707
recip_rank              all     0.6945
iprec_at_recall_0.00    all     0.7310
iprec_at_recall_0.10    all     0.5705
iprec_at_recall_0.20    all     0.4815
iprec_at_recall_0.30    all     0.4024
iprec_at_recall_0.40    all     0.3367
iprec_at_recall_0.50    all     0.2626
iprec_at_recall_0.60    all     0.1827
iprec_at_recall_0.70    all     0.1248
iprec_at_recall_0.80    all     0.0858
iprec_at_recall_0.90    all     0.0518
iprec_at_recall_1.00    all     0.0269
P_5                     all     0.4480
P_10                    all     0.4000
P_15                    all     0.3720
P_20                    all     0.3430
P_30                    all     0.3000
P_100                   all     0.1670
P_200                   all     0.1054
P_500                   all     0.0508
P_1000                  all     0.0288
```

### msmarco-distilbert-base-tas-b
- map = 0.1748


```
./trec_eval /home/shang/Info_Retrieval/qrels1-50ap.txt /home/shang/Info_Retrieval/msmarco-distilbert-base-tas-b_Results.txt
runid                   all     runid
num_q                   all     50
num_ret                 all     50000
num_rel                 all     2099
num_rel_ret             all     1157
map                     all     0.1748
gm_map                  all     0.0553
Rprec                   all     0.2057
bpref                   all     0.3011
recip_rank              all     0.5585
iprec_at_recall_0.00    all     0.5814
iprec_at_recall_0.10    all     0.4165
iprec_at_recall_0.20    all     0.3066
iprec_at_recall_0.30    all     0.2441
iprec_at_recall_0.40    all     0.1892
iprec_at_recall_0.50    all     0.1495
iprec_at_recall_0.60    all     0.1076
iprec_at_recall_0.70    all     0.0696
iprec_at_recall_0.80    all     0.0436
iprec_at_recall_0.90    all     0.0271
iprec_at_recall_1.00    all     0.0162
P_5                     all     0.3960
P_10                    all     0.3260
P_15                    all     0.2693
P_20                    all     0.2480
P_30                    all     0.2067
P_100                   all     0.1074
P_200                   all     0.0694
P_500                   all     0.0379
P_1000                  all     0.0231
```

### all-distilroberta-v1
- map = 0.1800

```
./trec_eval /home/shang/Info_Retrieval/qrels1-50ap.txt /home/shang/Info_Retrieval/all-distilroberta-v1_Results.txt
runid                   all     runid
num_q                   all     50
num_ret                 all     50000
num_rel                 all     2099
num_rel_ret             all     1303
map                     all     0.1800
gm_map                  all     0.0727
Rprec                   all     0.2166
bpref                   all     0.3167
recip_rank              all     0.5108
iprec_at_recall_0.00    all     0.5577
iprec_at_recall_0.10    all     0.3966
iprec_at_recall_0.20    all     0.3339
iprec_at_recall_0.30    all     0.2623
iprec_at_recall_0.40    all     0.2090
iprec_at_recall_0.50    all     0.1560
iprec_at_recall_0.60    all     0.1166
iprec_at_recall_0.70    all     0.0853
iprec_at_recall_0.80    all     0.0468
iprec_at_recall_0.90    all     0.0171
iprec_at_recall_1.00    all     0.0134
P_5                     all     0.3320
P_10                    all     0.2940
P_15                    all     0.2760
P_20                    all     0.2510
P_30                    all     0.2193
P_100                   all     0.1276
P_200                   all     0.0816
P_500                   all     0.0440
P_1000                  all     0.0261
```

### all-mpnet-base-v2
- map = 0.3201

```
./trec_eval /home/shang/Info_Retrieval/qrels1-50ap.txt /home/shang/Info_Retrieval/all-mpnet-base-v2_Results.txt
runid                   all     runid
num_q                   all     50
num_ret                 all     50000
num_rel                 all     2099
num_rel_ret             all     1518
map                     all     0.3201
gm_map                  all     0.1956
Rprec                   all     0.3327
bpref                   all     0.4019
recip_rank              all     0.7221
iprec_at_recall_0.00    all     0.7723
iprec_at_recall_0.10    all     0.6283
iprec_at_recall_0.20    all     0.5385
iprec_at_recall_0.30    all     0.4573
iprec_at_recall_0.40    all     0.3758
iprec_at_recall_0.50    all     0.3270
iprec_at_recall_0.60    all     0.2525
iprec_at_recall_0.70    all     0.1777
iprec_at_recall_0.80    all     0.1232
iprec_at_recall_0.90    all     0.0657
iprec_at_recall_1.00    all     0.0338
P_5                     all     0.5160
P_10                    all     0.4700
P_15                    all     0.4213
P_20                    all     0.3940
P_30                    all     0.3307
P_100                   all     0.1786
P_200                   all     0.1114
P_500                   all     0.0540
P_1000                  all     0.0304
```

### msmarco-distilbert-cos-v5
- map = 0.2058

In [None]:
# ./trec_eval /home/shang/Info_Retrieval/qrels1-50ap.txt /home/shang/Info_Retrieval/msmarco-distilbert-cos-v5_Results.txt
# runid                   all     runid
# num_q                   all     50
# num_ret                 all     50000
# num_rel                 all     2099
# num_rel_ret             all     1224
# map                     all     0.2058
# gm_map                  all     0.0666
# Rprec                   all     0.2360
# bpref                   all     0.3251
# recip_rank              all     0.6229
# iprec_at_recall_0.00    all     0.6478
# iprec_at_recall_0.10    all     0.4898
# iprec_at_recall_0.20    all     0.3684
# iprec_at_recall_0.30    all     0.2985
# iprec_at_recall_0.40    all     0.2190
# iprec_at_recall_0.50    all     0.1736
# iprec_at_recall_0.60    all     0.1296
# iprec_at_recall_0.70    all     0.0787
# iprec_at_recall_0.80    all     0.0512
# iprec_at_recall_0.90    all     0.0249
# iprec_at_recall_1.00    all     0.0194
# P_5                     all     0.4160
# P_10                    all     0.3580
# P_15                    all     0.3187
# P_20                    all     0.2850
# P_30                    all     0.2413
# P_100                   all     0.1262
# P_200                   all     0.0801
# P_500                   all     0.0412
# P_1000                  all     0.0245

### 

<!-- ./trec_eval /home/shang/Info_Retrieval/qrels1-50ap.txt /home/shang/Info_Retrieval/msmarco-distilbert-cos-v5_Results.txt
runid                   all     runid
num_q                   all     50
num_ret                 all     50000
num_rel                 all     2099
num_rel_ret             all     1224
map                     all     0.2058
gm_map                  all     0.0666
Rprec                   all     0.2360
bpref                   all     0.3251
recip_rank              all     0.6229
iprec_at_recall_0.00    all     0.6478
iprec_at_recall_0.10    all     0.4898
iprec_at_recall_0.20    all     0.3684
iprec_at_recall_0.30    all     0.2985
iprec_at_recall_0.40    all     0.2190
iprec_at_recall_0.50    all     0.1736
iprec_at_recall_0.60    all     0.1296
iprec_at_recall_0.70    all     0.0787
iprec_at_recall_0.80    all     0.0512
iprec_at_recall_0.90    all     0.0249
iprec_at_recall_1.00    all     0.0194
P_5                     all     0.4160
P_10                    all     0.3580
P_15                    all     0.3187
P_20                    all     0.2850
P_30                    all     0.2413
P_100                   all     0.1262
P_200                   all     0.0801
P_500                   all     0.0412
P_1000                  all     0.0245 -->