In [11]:
from torch.utils.data import DataLoader
from torch.utils.data import IterableDataset
from sentence_transformers import SentenceTransformer, LoggingHandler, util, models, evaluation, losses, InputExample

import numpy as np
import pandas as pd

import logging
import os
import glob
import torch
import json

In [2]:
logging.basicConfig(
    format='- %(message)s',
    datefmt='%Y-%m-%d %H:%M:%S',
    level=logging.INFO,
    handlers=[LoggingHandler()]
)

class TripletsDataset(IterableDataset):
    def __init__(self, model, corpus, train_triplets):
        self.model = model
        self.corpus = corpus
        self.train_triplets = train_triplets

    def __iter__(self):
        for triplet in self.train_triplets:
            qid, pos_id, neg_id = triplet
            query_text = self.corpus[str(qid)]
            pos_text = self.corpus[str(pos_id)]
            neg_text = self.corpus[str(neg_id)]

            yield InputExample(texts=[query_text, pos_text, neg_text])

    def __len__(self):
        return len(self.train_triplets)
    
def get_triplets(Passage_dict):
    triplets = []
    for k, v in Passage_dict.items():
        for x in v[0]:
            for y in v[1]:
                # query, same_api, diff_api
                triplets.append([k, x, y])

    return triplets

# def evaluate_cossim(queries, rel_docs, corpus, model):
#     for (idx, query) in queries:
#         passage_id = list(rel_docs[idx])[0]
#         passage = corpus[passage_id]
#         print(query)
#         print(passage_id)
#         print(passage)
#         query_embedding = model.encode(query)
#         passage_embedding = model.encode(passage)

#         print("Similarity:", util.pytorch_cos_sim(query_embedding, passage_embedding))

In [3]:
# hyperparam
model_name = 'distilroberta-base'
batch_size = 256
model_save_path = 'models/bienc-exp3'
pretrained = '../CLEAR-replication/models/training_biker_bi-encoder-min_5_max_10_ir_10_distilroberta-base-full-best/'

In [4]:
with open('./data/generated/Corpus_dict.json', 'r') as f:
    corpus = json.load(f)

with open('./data/generated/Passage_dict.json', 'r') as f:
    passage = json.load(f)

df = pd.read_json('data/generated/dataset.json')
df = df.set_index('index')

# training data
with open('./data/generated/train_queries.json', 'r') as f:
    train_queries_idx = json.load(f)

# train_passage = {str(k): passage[k] for k in passage if int(k) in train_queries_idx}
# train_corpus = {str(k): corpus[k] for k in corpus if int(k) in train_queries_idx}

with open('./data/generated/train_passage.json', 'r') as f:
    train_passage = json.load(f)

with open('./data/generated/train_corpus.json', 'r') as f:
    train_corpus = json.load(f)

train_triplets = get_triplets(train_passage)

In [5]:
# evaluation data
with open('./data/generated/evaluate_queries.json', 'r') as f:
    val_queries_idx = json.load(f)

with open('./data/generated/evaluate_rel_doc.json', 'r') as f:
    val_rel_doc_raw = json.load(f)

val_queries = {str(k): df.loc[int(k)]['Question Title'] for k in passage if int(k) in val_queries_idx}
val_corpus = {}
for rel_docs in val_rel_doc_raw.values():
    # rel_docs is a single element list
    for rel_doc in rel_docs[0]:
        if not val_corpus.get(rel_doc):
            val_corpus[str(rel_doc)] = df.loc[int(rel_doc)]['Question Title']

val_rel_doc = {}
for query, rel_doc in val_rel_doc_raw.items():
    rel_doc = [str(r) for r in rel_doc[0]]
    val_rel_doc[query] = set(rel_doc)

In [6]:
if pretrained is not None:
    model = SentenceTransformer(pretrained)
else:
    word_embedding_model = models.Transformer(model_name, max_seq_length=350)
    pooling_model = models.Pooling(word_embedding_model.get_word_embedding_dimension())
    model = SentenceTransformer(modules=[word_embedding_model, pooling_model])

- Load pretrained SentenceTransformer: ../CLEAR-replication/models/training_biker_bi-encoder-min_5_max_10_ir_10_distilroberta-base-full-best/
- Use pytorch device: cuda


In [7]:
train_dataset = TripletsDataset(model=model, corpus=train_corpus, train_triplets=train_triplets)
train_dataloader = DataLoader(train_dataset, shuffle=False, batch_size=batch_size)
train_loss = losses.MultipleNegativesRankingLoss(model=model)
ir_evaluator = evaluation.InformationRetrievalEvaluator(val_queries, val_corpus, val_rel_doc, name='distilroberta-train_eval')

In [8]:
model.evaluate(evaluator=ir_evaluator)

- Information Retrieval Evaluation on distilroberta-train_eval dataset:
- Queries: 620
- Corpus: 620

- Score-Function: cos_sim
- Accuracy@1: 0.00%
- Accuracy@3: 25.48%
- Accuracy@5: 39.19%
- Accuracy@10: 57.26%
- Precision@1: 0.00%
- Precision@3: 9.46%
- Precision@5: 11.45%
- Precision@10: 12.37%
- Recall@1: 0.00%
- Recall@3: 0.73%
- Recall@5: 1.41%
- Recall@10: 3.19%
- MRR@10: 0.1662
- NDCG@10: 0.1106
- MAP@100: 0.0468
- Score-Function: dot_score
- Accuracy@1: 3.55%
- Accuracy@3: 23.06%
- Accuracy@5: 36.13%
- Accuracy@10: 57.90%
- Precision@1: 3.55%
- Precision@3: 8.98%
- Precision@5: 10.26%
- Precision@10: 11.39%
- Recall@1: 0.13%
- Recall@3: 0.88%
- Recall@5: 1.49%
- Recall@10: 3.17%
- MRR@10: 0.1740
- NDCG@10: 0.1048
- MAP@100: 0.0458


0.04677852414723046

In [9]:
model.fit(
    train_objectives=[(train_dataloader, train_loss)],
    evaluator=ir_evaluator,
    epochs=5,
    warmup_steps=500,
    output_path=model_save_path,
    evaluation_steps=750,
    save_best_model=True,
    use_amp=True
)

Epoch:   0%|          | 0/5 [00:00<?, ?it/s]

Iteration:   0%|          | 0/1561 [00:00<?, ?it/s]

- Information Retrieval Evaluation on distilroberta-train_eval dataset in epoch 0 after 750 steps:
- Queries: 620
- Corpus: 620

- Score-Function: cos_sim
- Accuracy@1: 0.00%
- Accuracy@3: 31.45%
- Accuracy@5: 45.48%
- Accuracy@10: 65.48%
- Precision@1: 0.00%
- Precision@3: 12.69%
- Precision@5: 15.10%
- Precision@10: 16.39%
- Recall@1: 0.00%
- Recall@3: 1.07%
- Recall@5: 1.91%
- Recall@10: 4.30%
- MRR@10: 0.1976
- NDCG@10: 0.1456
- MAP@100: 0.0774
- Score-Function: dot_score
- Accuracy@1: 10.65%
- Accuracy@3: 31.45%
- Accuracy@5: 42.26%
- Accuracy@10: 61.77%
- Precision@1: 10.65%
- Precision@3: 12.80%
- Precision@5: 13.68%
- Precision@10: 14.58%
- Recall@1: 0.33%
- Recall@3: 1.08%
- Recall@5: 1.67%
- Recall@10: 3.53%
- MRR@10: 0.2444
- NDCG@10: 0.1398
- MAP@100: 0.0744
- Save model to models/bienc-exp3
- Information Retrieval Evaluation on distilroberta-train_eval dataset in epoch 0 after 1500 steps:
- Queries: 620
- Corpus: 620

- Score-Function: cos_sim
- Accuracy@1: 0.00%
- Accurac

Iteration:   0%|          | 0/1561 [00:00<?, ?it/s]

- Information Retrieval Evaluation on distilroberta-train_eval dataset in epoch 1 after 750 steps:
- Queries: 620
- Corpus: 620

- Score-Function: cos_sim
- Accuracy@1: 0.00%
- Accuracy@3: 35.16%
- Accuracy@5: 48.87%
- Accuracy@10: 66.29%
- Precision@1: 0.00%
- Precision@3: 14.35%
- Precision@5: 16.71%
- Precision@10: 18.21%
- Recall@1: 0.00%
- Recall@3: 1.26%
- Recall@5: 2.24%
- Recall@10: 4.70%
- MRR@10: 0.2133
- NDCG@10: 0.1629
- MAP@100: 0.0885
- Score-Function: dot_score
- Accuracy@1: 9.35%
- Accuracy@3: 30.48%
- Accuracy@5: 43.55%
- Accuracy@10: 63.23%
- Precision@1: 9.35%
- Precision@3: 14.95%
- Precision@5: 16.84%
- Precision@10: 17.79%
- Recall@1: 0.24%
- Recall@3: 0.95%
- Recall@5: 1.85%
- Recall@10: 4.15%
- MRR@10: 0.2427
- NDCG@10: 0.1669
- MAP@100: 0.0867
- Information Retrieval Evaluation on distilroberta-train_eval dataset in epoch 1 after 1500 steps:
- Queries: 620
- Corpus: 620

- Score-Function: cos_sim
- Accuracy@1: 0.00%
- Accuracy@3: 36.77%
- Accuracy@5: 50.16%
- A

Iteration:   0%|          | 0/1561 [00:00<?, ?it/s]

- Information Retrieval Evaluation on distilroberta-train_eval dataset in epoch 2 after 750 steps:
- Queries: 620
- Corpus: 620

- Score-Function: cos_sim
- Accuracy@1: 0.00%
- Accuracy@3: 39.52%
- Accuracy@5: 51.45%
- Accuracy@10: 67.10%
- Precision@1: 0.00%
- Precision@3: 16.51%
- Precision@5: 18.23%
- Precision@10: 19.18%
- Recall@1: 0.00%
- Recall@3: 1.47%
- Recall@5: 2.49%
- Recall@10: 5.29%
- MRR@10: 0.2216
- NDCG@10: 0.1729
- MAP@100: 0.0921
- Score-Function: dot_score
- Accuracy@1: 15.00%
- Accuracy@3: 35.65%
- Accuracy@5: 47.58%
- Accuracy@10: 62.74%
- Precision@1: 15.00%
- Precision@3: 17.15%
- Precision@5: 18.90%
- Precision@10: 18.89%
- Recall@1: 0.50%
- Recall@3: 1.21%
- Recall@5: 2.19%
- Recall@10: 4.42%
- MRR@10: 0.2892
- NDCG@10: 0.1858
- MAP@100: 0.0926
- Information Retrieval Evaluation on distilroberta-train_eval dataset in epoch 2 after 1500 steps:
- Queries: 620
- Corpus: 620

- Score-Function: cos_sim
- Accuracy@1: 0.00%
- Accuracy@3: 38.71%
- Accuracy@5: 54.03%
-

Iteration:   0%|          | 0/1561 [00:00<?, ?it/s]

- Information Retrieval Evaluation on distilroberta-train_eval dataset in epoch 3 after 750 steps:
- Queries: 620
- Corpus: 620

- Score-Function: cos_sim
- Accuracy@1: 0.00%
- Accuracy@3: 39.68%
- Accuracy@5: 54.19%
- Accuracy@10: 68.39%
- Precision@1: 0.00%
- Precision@3: 16.72%
- Precision@5: 18.90%
- Precision@10: 19.21%
- Recall@1: 0.00%
- Recall@3: 1.41%
- Recall@5: 2.66%
- Recall@10: 5.06%
- MRR@10: 0.2341
- NDCG@10: 0.1746
- MAP@100: 0.0974
- Score-Function: dot_score
- Accuracy@1: 10.00%
- Accuracy@3: 39.68%
- Accuracy@5: 50.16%
- Accuracy@10: 65.65%
- Precision@1: 10.00%
- Precision@3: 17.53%
- Precision@5: 18.68%
- Precision@10: 18.92%
- Recall@1: 0.35%
- Recall@3: 1.38%
- Recall@5: 2.38%
- Recall@10: 4.49%
- MRR@10: 0.2750
- NDCG@10: 0.1814
- MAP@100: 0.0969
- Information Retrieval Evaluation on distilroberta-train_eval dataset in epoch 3 after 1500 steps:
- Queries: 620
- Corpus: 620

- Score-Function: cos_sim
- Accuracy@1: 0.00%
- Accuracy@3: 35.97%
- Accuracy@5: 51.94%
-

Iteration:   0%|          | 0/1561 [00:00<?, ?it/s]

- Information Retrieval Evaluation on distilroberta-train_eval dataset in epoch 4 after 750 steps:
- Queries: 620
- Corpus: 620

- Score-Function: cos_sim
- Accuracy@1: 0.00%
- Accuracy@3: 42.10%
- Accuracy@5: 54.03%
- Accuracy@10: 67.58%
- Precision@1: 0.00%
- Precision@3: 17.58%
- Precision@5: 18.74%
- Precision@10: 19.63%
- Recall@1: 0.00%
- Recall@3: 1.60%
- Recall@5: 2.69%
- Recall@10: 5.41%
- MRR@10: 0.2342
- NDCG@10: 0.1781
- MAP@100: 0.1010
- Score-Function: dot_score
- Accuracy@1: 9.19%
- Accuracy@3: 37.58%
- Accuracy@5: 49.84%
- Accuracy@10: 65.81%
- Precision@1: 9.19%
- Precision@3: 16.77%
- Precision@5: 18.13%
- Precision@10: 19.02%
- Recall@1: 0.25%
- Recall@3: 1.32%
- Recall@5: 2.39%
- Recall@10: 4.95%
- MRR@10: 0.2654
- NDCG@10: 0.1802
- MAP@100: 0.0975
- Save model to models/bienc-exp3
- Information Retrieval Evaluation on distilroberta-train_eval dataset in epoch 4 after 1500 steps:
- Queries: 620
- Corpus: 620

- Score-Function: cos_sim
- Accuracy@1: 0.00%
- Accuracy@

In [10]:
results = model.evaluate(evaluator=ir_evaluator)

- Information Retrieval Evaluation on distilroberta-train_eval dataset:
- Queries: 620
- Corpus: 620

- Score-Function: cos_sim
- Accuracy@1: 0.00%
- Accuracy@3: 41.61%
- Accuracy@5: 55.32%
- Accuracy@10: 70.32%
- Precision@1: 0.00%
- Precision@3: 17.69%
- Precision@5: 19.48%
- Precision@10: 20.24%
- Recall@1: 0.00%
- Recall@3: 1.78%
- Recall@5: 3.02%
- Recall@10: 5.71%
- MRR@10: 0.2356
- NDCG@10: 0.1838
- MAP@100: 0.1033
- Score-Function: dot_score
- Accuracy@1: 10.81%
- Accuracy@3: 36.61%
- Accuracy@5: 50.00%
- Accuracy@10: 65.65%
- Precision@1: 10.81%
- Precision@3: 16.83%
- Precision@5: 18.19%
- Precision@10: 19.24%
- Recall@1: 0.30%
- Recall@3: 1.40%
- Recall@5: 2.50%
- Recall@10: 4.85%
- MRR@10: 0.2733
- NDCG@10: 0.1834
- MAP@100: 0.0991


In [12]:
del model
torch.cuda.empty_cache()

In [15]:
model_paths = sorted(glob.glob('./models/*'))
for model_path in model_paths:
    experiment = os.path.basename(model_path)
    model = SentenceTransformer(model_path)
    map100 = model.evaluate(evaluator=ir_evaluator)
    print(f'Experiment {experiment}: {map100:.3f}')
    del model
    torch.cuda.empty_cache()

- Load pretrained SentenceTransformer: ./models/bienc-exp1
- Use pytorch device: cuda
- Information Retrieval Evaluation on distilroberta-train_eval dataset:
- Queries: 620
- Corpus: 620

- Score-Function: cos_sim
- Accuracy@1: 0.00%
- Accuracy@3: 56.29%
- Accuracy@5: 69.52%
- Accuracy@10: 78.87%
- Precision@1: 0.00%
- Precision@3: 28.76%
- Precision@5: 33.16%
- Precision@10: 33.52%
- Recall@1: 0.00%
- Recall@3: 3.55%
- Recall@5: 6.20%
- Recall@10: 11.05%
- MRR@10: 0.3081
- NDCG@10: 0.3065
- MAP@100: 0.1785
- Score-Function: dot_score
- Accuracy@1: 15.81%
- Accuracy@3: 34.19%
- Accuracy@5: 40.81%
- Accuracy@10: 58.39%
- Precision@1: 15.81%
- Precision@3: 15.81%
- Precision@5: 14.68%
- Precision@10: 14.60%
- Recall@1: 0.36%
- Recall@3: 1.30%
- Recall@5: 1.95%
- Recall@10: 4.21%
- MRR@10: 0.2741
- NDCG@10: 0.1515
- MAP@100: 0.0702
Experiment bienc-exp1: 0.179
- Load pretrained SentenceTransformer: ./models/bienc-exp2
- Use pytorch device: cuda
- Information Retrieval Evaluation on distil