In [1]:
# always reload an imported module before executing a particular cell
# (used to let changes in python files take effect without restarting kernel)
%load_ext autoreload
%autoreload 2


In [2]:
from sentence_transformers import SentenceTransformer, LoggingHandler
import numpy as np
import os
import pandas as pd
from gensim.test.utils import common_texts
from gensim.models.doc2vec import Doc2Vec, TaggedDocument
import nltk
from nltk.corpus import stopwords
from scipy.spatial.distance import cdist
from sklearn.metrics.pairwise import cosine_similarity
from nltk.stem import PorterStemmer 
from abc import ABC, abstractmethod
from collections import Counter, defaultdict as dd, OrderedDict
import time


In [3]:
class Ranker(ABC):
    
    @abstractmethod
    def rank(self, query, docs):
        """
        Return sorted indices
        """
        pass    
        
    def _get_weight(self, query_len, doc_len):
        #print('query_len:', query_len, 'doc_len:', doc_len)
        w = doc_len / query_len
        #print('weight:', w)
        return w
    
    def _get_weights(self, query_parts, docs_parts):
        query_len = len(query_parts)
        weights = []
        for doc_parts in docs_parts:
            doc_len = len(doc_parts)
            weights.append(self._get_weight(query_len, doc_len))
        return np.array(weights)
    
    @staticmethod
    def dists_to_indices(dists):
   
        indexed_dists = enumerate(dists.squeeze())   
        indexed_dists_sorted = sorted(indexed_dists, key=lambda x:x[1], reverse=False)
        indices_sorted = [x[0] for x in indexed_dists_sorted]
        return indices_sorted

In [4]:
class BM25Ranker(Ranker):
    # see also: 
    # https://www.elastic.co/blog/practical-bm25-part-2-the-bm25-algorithm-and-its-variables
    
    stemmer = PorterStemmer() 
    stopwords = set(stopwords.words('english'))
    
    def __init__(self, stem=True, use_idf=True, weigh=False):
        self.stem = stem
        self.k1 = 1.2
        self.use_idf = use_idf
        self.weigh = weigh
        
    def _preprocess_text(self, text):
        words = nltk.word_tokenize(text)
        words = [word.lower() for word in words if word.isalpha()]
        if self.stem:
            words = [self.stemmer.stem(word) for word in words]
        words = [w for w in words if not w in self.stopwords]
        return words
    
    def _get_idf_values(self, query_terms, query_term_to_nr_docs, nr_docs):
        query_term_to_idf = {}
        for query_term in set(query_terms):            
            
            if not self.use_idf:
                query_term_to_idf[query_term] = 1
                continue
                
            idf_exp =  (nr_docs - query_term_to_nr_docs[query_term] + 0.5) /\
                        (query_term_to_nr_docs[query_term] + 0.5)
            #print('idf_exp:', idf_exp)
            idf = np.log(1 + idf_exp)
            query_term_to_idf[query_term] = idf
        return query_term_to_idf
    
    def _get_tf_values(self, query_terms, docs):
        
        tf_index = [{} for _ in range(len(docs))]
        query_term_to_nr_docs =  dd(int)
        
        docs_terms = [Counter(self._preprocess_text(doc)) for doc in docs]
        
        for query_term in set(query_terms):
            for i, doc_terms in enumerate(docs_terms):
                #print('doc_terms[query_term]:', doc_terms[query_term])                
                tf_index[i][query_term] = doc_terms[query_term]
                if tf_index[i][query_term] > 0:
                    query_term_to_nr_docs[query_term] += 1
        return tf_index, query_term_to_nr_docs
    
    def rank(self, query, docs):
        query_terms = self._preprocess_text(query)
        docs_terms = [Counter(self._preprocess_text(doc)) for doc in docs]
                
        tf_index, query_term_to_nr_docs =  self._get_tf_values(query_terms, docs)  
        query_term_to_idf = self._get_idf_values(query_terms, query_term_to_nr_docs, len(docs))
        
        doc_scores = []
        for i in range(len(docs)):
            doc_score = 0
            for query_term in set(query_terms):
                tf = tf_index[i][query_term]                
                tf_score_part = (tf * (self.k1 + 1)) / (tf + self.k1) # no length normalization!
                doc_score += query_term_to_idf[query_term] * tf_score_part
            if self.weigh:
                doc_score *= self._get_weight(len(query_terms), len(docs_terms[i]))
            doc_scores.append(-doc_score) # negative, as we sort by distance
        
        indices_sorted = self.dists_to_indices(np.array(doc_scores))
        return indices_sorted
                
            

In [5]:
bm25_ranker = BM25Ranker(weigh=True)

bm25_ranker.rank('i saw a cat. The cat was sitting on the mat. ', 
                     ['I like cats. Even though I wasn\'t sure. the cat was seen by me. Yeey cats, i Love mats too'
                       , 
                      'Even though I wasn\'t sure. the cat was seen by me. It was still okay for me.',
                      'Even though I wasn\'t sure. ',
                      'the cat was seen by me. '])

[0, 1, 3, 2]

In [6]:
# global cache for sentence embeddings per model

SENTENCE_EMBEDDINGS_CACHE = dd(lambda: {})

def get_sentence_embedding(sentence, model):
    model_address = hex(id(model))
    
    try:
        return SENTENCE_EMBEDDINGS_CACHE[model_address][sentence]
    except KeyError:
        embedding = model.encode([sentence])[0]
        SENTENCE_EMBEDDINGS_CACHE[model_address][sentence] = embedding
        return embedding

def get_sentences_embeddings(sentences, model):
    result = []
    for sentence in sentences:
        #print('sentence:', sentence)
        result.append(get_sentence_embedding(sentence, model))
    return np.array(result)

In [7]:
class SBERTAverageRanker(Ranker):
    def __init__(self, model, weigh=False):
        self.model = model
        self.weigh = weigh
        
    def rank(self, query, docs):
        sents_query =  nltk.sent_tokenize(query)
        sents_docs =  [nltk.sent_tokenize(doc) for doc in docs]
        
        embeddings_query = get_sentences_embeddings(sents_query, self.model)
        embeddings_docs = [get_sentences_embeddings(x, self.model) for x in sents_docs]
        
        #print(embeddings_query)
        #print(embeddings_query.shape)
        embedding_query = np.mean(embeddings_query, axis=0)
        embedding_docs = [np.mean(x, axis=0) for x in embeddings_docs]
        
        dists = cdist(embedding_query.reshape(-1,1).T, embedding_docs, "cosine")[0]
        
        if self.weigh:
            dists *= 1/(self._get_weights(embeddings_query, embeddings_docs))
            
        indices_sorted = self.dists_to_indices(dists)
        
        return indices_sorted
    

In [196]:
model_path = '/run/media/root/Windows/Users/agnes/Downloads/data/msmarco/train_results/test_model_5/model_lre06_not_od_checkpoint1'

model = SentenceTransformer(model_path)


In [186]:
model.get_sentence_embedding_dimension()

768

In [187]:
SENTENCE_EMBEDDINGS_CACHE.keys()

dict_keys(['0x7f9e90083630'])

In [188]:
sberta_ranker = SBERTAverageRanker(model)

sberta_ranker.rank('i saw a cat. The cat was sitting on the mat', 
                   ['the cat was seen by me', 'pineapple on pizza is bad', 'i saw a cat'])

[2, 0, 1]

In [189]:
sberta_ranker = SBERTAverageRanker(model, weigh=True)

sberta_ranker.rank('i saw a cat. The cat was sitting on the mat', 
                   ['the cat was seen by me', 'pineapple on pizza is bad', 'i saw a cat'])

[2, 0, 1]

In [190]:
SENTENCE_EMBEDDINGS_CACHE.keys()

dict_keys(['0x7f9e90083630', '0x7f9e6af497f0'])

In [191]:
class SBERTMaxRanker(Ranker):
    def __init__(self, model, weigh=False):
        self.model = model
        self.weigh = weigh
        
    def rank(self, query, docs):
        sents_query =  nltk.sent_tokenize(query)
        sents_docs =  [nltk.sent_tokenize(doc) for doc in docs]
        
        embeddings_query = get_sentences_embeddings(sents_query, self.model) 
        embeddings_docs = [get_sentences_embeddings(x, self.model) for x in sents_docs]
        
        embedding_query = np.max(embeddings_query, axis=0)
        #print('embedding_query:', embedding_query)
        embedding_docs =  [np.max(x, axis=0) for x in embeddings_docs]

        dists = cdist(embedding_query.reshape(-1,1).T, embedding_docs, "cosine")[0]
        
        if self.weigh:
            dists *= 1/(self._get_weights(embeddings_query, embeddings_docs))
            
        indices_sorted = self.dists_to_indices(dists)
        
        return indices_sorted

In [192]:
sbertmax_ranker = SBERTMaxRanker(model)

sbertmax_ranker.rank('The cat was sitting on the mat', 
                     ['the cat was seen by me', 'pineapple on pizza is bad', 'i saw a cat',
                     'The cat was sitting on the mat'])

[3, 2, 0, 1]

In [193]:
sbertmax_ranker = SBERTMaxRanker(model, weigh=True)

sbertmax_ranker.rank('The cat was sitting on the mat', 
                     ['the cat was seen by me', 
                      'pineapple on pizza is bad.', 
                      'i saw a cat',
                      'The cat was sitting on the mat.',
                     'The cat was sitting on the mat. The cat was sitting on the mat.  The cat was sitting on the mat',
                     'The cat was sitting on the mat. pineapple on pizza is bad. pineapple on pizza is bad.'])

[4, 3, 5, 2, 0, 1]

In [194]:
SENTENCE_EMBEDDINGS_CACHE.keys()

dict_keys(['0x7f9e90083630', '0x7f9e6af497f0'])

In [19]:
class SBERTcomplexRanker(Ranker):
    
    def __init__(self, model, weigh=False):
        self.model = model
        self.weigh = weigh
   
    def _complex(self, query_embeddings, document_embeddings):
        len_query = len(query_embeddings)
        len_doc = len(document_embeddings)
        
        dist_sum = 0
        for i in range(1, len_query+1):
            for j in range(1, len_doc+1):         
                this_query_embedding = query_embeddings[i-1] 
                this_doc_embedding = document_embeddings[j-1]
                
                dist_sum += cdist(this_query_embedding.reshape(-1,1).T, 
                             this_doc_embedding.reshape(-1,1).T, "cosine")[0]
        
        return dist_sum / len(document_embeddings)
                
    def rank(self, query, docs):
        
        if len(docs) == 1:
            return [0]
        
        sents_query =  nltk.sent_tokenize(query)
        sents_docs =  [nltk.sent_tokenize(doc) for doc in docs]
      
        embeddings_query = get_sentences_embeddings(sents_query, self.model)
        embeddings_docs = [get_sentences_embeddings(x, self.model) for x in sents_docs]
        
        distances = []
        for embeddings_doc in embeddings_docs:
            weight = 1
            if self.weigh:
                weight *= 1/(self._get_weight(len(embeddings_query), len(embeddings_doc)))

            distances.append(weight * self._complex(embeddings_query, embeddings_doc))
            
        return self.dists_to_indices(np.array(distances))

In [20]:
sbertcomplexranker = SBERTcomplexRanker(model)

sbertcomplexranker.rank('i saw a cat. The cat was sitting on the mat. ', 
                     ['I like cats. Even though I wasn\'t sure. the cat was seen by me. Yeey cats, i Love mats too'
                       , 
                      'Even though I wasn\'t sure. the cat was seen by me. It was still okay for me.',
                      'Even though I wasn\'t sure. ',
                      'the cat was seen by me. '])

[3, 0, 1, 2]

In [21]:
sbertcomplexranker = SBERTcomplexRanker(model, weigh=True)

sbertcomplexranker.rank('i saw a cat. The cat was sitting on the mat. ', 
                     ['I like cats. Even though I wasn\'t sure. the cat was seen by me. Yeey cats, i Love mats too'
                       , 
                      'Even though I wasn\'t sure. the cat was seen by me. It was still okay for me.',
                      'Even though I wasn\'t sure. ',
                      'the cat was seen by me. '])

[0, 1, 3, 2]

In [136]:
v1 = np.array([1,2,3,4,5])
v2 = np.array([0,3,4,2,7])
v3 = np.array([8,3,2,6,6])

dist_v1_v2 = cdist(v1.reshape(-1,1).T,  v2.reshape(-1,1).T, "cosine")[0]
dist_v1_v3 = cdist(v1.reshape(-1,1).T,  v3.reshape(-1,1).T, "cosine")[0]

mean_dists = np.mean([dist_v1_v2, dist_v1_v3])
print('mean dists:', mean_dists)

mean_v2_v3 = np.mean([v2,v3], axis=0)
mean_v2_v3
dist_v1_meanv2v3 = cdist(v1.reshape(-1,1).T,  mean_v2_v3.reshape(-1,1).T, "cosine")[0]

print('dist_v1_meanv2v3:', dist_v1_meanv2v3)

mean dists: 0.1256163017555681
dist_v1_meanv2v3: [0.05236904]


In [181]:
class SBERTlcsRanker(Ranker):
    
    def __init__(self, model, similarity_threshold=.3, weigh=False):
        self.model = model
        self.sim_threshold = similarity_threshold
        self.weigh = weigh
    
    def _lcs(self, query_embeddings, document_embeddings):
        
        len_query = len(query_embeddings)
        len_doc = len(document_embeddings)
        
        #m = np.zeros((len_doc+1, len_query+1))
        #m_sim = np.zeros(m.shape)
        
        doc_vector_list = []
        for i in range(1, len_query+1):
            for j in range(1, len_doc+1):                
                this_query_embedding = query_embeddings[i-1] 
                this_doc_embedding = document_embeddings[j-1]
                
                dist = cdist(this_query_embedding.reshape(-1,1).T, 
                             this_doc_embedding.reshape(-1,1).T, "cosine")[0]
                
                similarity = 1 - dist
                #print('dist:', dist)
                
                if similarity > self.sim_threshold:
                    #m[j,i] = m[j-1,i-1]+1
                    #m_sim[j, i] = similarity
                    doc_vector_list.append(this_doc_embedding)
                    #print('query sentence {:d} and doc sentence {:d} are similar'.format(i, j))
                #else:
                    #m[j,i] = max(m[j-1,i], m[j,i-1])
                    #print('query sentence {:d} and doc sentence {:d} are not similar'.format(i, j))
                    
        #answer = m[len_doc,len_query]
        #print('answer:', answer)    
        #mean_sim = m_sim.sum() / m.sum() if m.sum() != 0 else 0
        #max_sim = m_sim.max() 
        
        #distance = -(mean_sim)
        #print('mean_sim: {:2.4f}, distance: {:2.4f}'.format(mean_sim, distance))
        #print('max_sim: {:2.4f}, distance: {:2.4f}'.format(max_sim, distance))
        #print()
       
        if len(doc_vector_list) == 0:
            return 1
        mean_query_embeddings = np.mean(query_embeddings, axis=0).reshape(-1,1).T
        #print('mean_query_embeddings:', mean_query_embeddings.shape)
        mean_doc_embeddings = np.mean(doc_vector_list, axis=0).reshape(-1,1).T
        #print('mean_doc_embeddings:', mean_doc_embeddings.shape)

        distance = cdist(mean_query_embeddings, 
                         mean_doc_embeddings, 
                         "cosine")[0]
        #print('distance:', distance)
        return distance 


    def rank(self, query, docs):
        
        if len(docs) == 1:
            return [0]
        
        sents_query =  nltk.sent_tokenize(query)
        sents_docs =  [nltk.sent_tokenize(doc) for doc in docs]
        #print('sentences query:', sents_query)
        #print('sentences first doc:', sents_docs[0])
        embeddings_query = get_sentences_embeddings(sents_query, self.model)
        embeddings_docs = [get_sentences_embeddings(x, self.model) for x in sents_docs]
        
        distances = []
        for embeddings_doc in embeddings_docs:
            weight = 1
            if self.weigh:
                weight *= 1/(self._get_weight(len(embeddings_query), len(embeddings_doc)))                
            distances.append(weight * self._lcs(embeddings_query, embeddings_doc))
            
        return self.dists_to_indices(np.array(distances))

In [182]:
sbertmax_ranker = SBERTlcsRanker(model, similarity_threshold=.3)

sbertmax_ranker.rank('i saw a cat. The cat was sitting on the mat. ', 
                     ['I like cats. Even though I wasn\'t sure. the cat was seen by me. Yeey cats, i Love mats too'
                       , 
                      'Even though I wasn\'t sure. the cat was seen by me. It was still okay for me.',
                      'Even though I wasn\'t sure. ',
                      'the cat was seen by me. '])


[0, 1, 3, 2]

In [183]:
sbertmax_ranker = SBERTlcsRanker(model, similarity_threshold=.3, weigh=True)

sbertmax_ranker.rank('i saw a cat. The cat was sitting on the mat. ', 
                     ['I like cats. Even though I wasn\'t sure. the cat was seen by me. Yeey cats, i Love mats too'
                       , 
                      'Even though I wasn\'t sure. the cat was seen by me. It was still okay for me.',
                      'Even though I wasn\'t sure. ',
                      'the cat was seen by me. '])


[0, 1, 3, 2]

In [48]:
class SBERTRanker(Ranker):
    def __init__(self, model):
        self.model = model
        
    def rank(self, query, docs):
        embeddings = get_sentences_embeddings([query] + docs, self.model)
        
        dists = cdist(embeddings[0].reshape(-1,1).T, embeddings[1:], "cosine")[0]
        indices_sorted = self.dists_to_indices(dists)
        
        return indices_sorted

In [49]:
sbert_ranker = SBERTRanker(model)

In [50]:
sbert_ranker.rank('i saw a cat.', ['the cat was seen by me.', 'Even though I wasn\'t sure.',
                                  'pineapple on pizza is bad', 'i saw a cat'])

[3, 0, 2, 1]

In [51]:
SENTENCE_EMBEDDINGS_CACHE.keys()

dict_keys(['0x7f9e90083630'])

In [52]:
class Doc2vecRanker(Ranker):
    
    stopwords = set(stopwords.words('english'))
    
    def __init__(self, model_fn, weigh=False):
        self.model = Doc2Vec.load(model_fn)        
        self.weigh = weigh
    
    def _preprocess_text(self, text):
        words = nltk.word_tokenize(text)
        words = [word.lower() for word in words if word.isalpha()]
        words = [w for w in words if not w in self.stopwords]
        return words
    
    def rank(self, query, docs):
        query_vector = self.model.infer_vector(self._preprocess_text(query))
        doc_vectors = [self.model.infer_vector(self._preprocess_text(d)) for d in docs]
        
        dists = cdist(query_vector.reshape(-1,1).T, np.array(doc_vectors), "cosine")[0]
        
        if self.weigh:
            sents_query =  nltk.sent_tokenize(query)
            sents_docs =  [nltk.sent_tokenize(doc) for doc in docs]
            dists *= 1/(self._get_weights(sents_query, sents_docs))           
                
        indices_sorted = self.dists_to_indices(dists)
        
        return indices_sorted

In [56]:
D2V_BEST_MODEL = '/run/media/root/Windows/Users/agnes/Downloads/data/msmarco/train_results/doc2vec2/d2v_17'
d2v_ranker = Doc2vecRanker(D2V_BEST_MODEL, weigh=True)

In [57]:
d2v_ranker.rank('i saw a cat. The cat was sitting on the mat. ', 
                     ['I like cats. Even though I wasn\'t sure. the cat was seen by me. Yeey cats, i Love mats too'
                       , 
                      'Even though I wasn\'t sure. the cat was seen by me. It was still okay for me.',
                      'Even though I wasn\'t sure. ',
                      'the cat was seen by me. '])

[0, 1, 2, 3]

In [127]:
def calc_nr_irrelevant_above(rels_sorted, types_sorted, ttype='original', rel_label=1):
    #print(indices)
    indices = np.where(types_sorted == ttype)[0]
    #print('indices:', indices)
    if len(indices) == 0:
        raise Exception('no relevant docs of this type')
    total_nr_irrel = 0
    for i in indices:
        irrelevant_indices_above = np.where(rels_sorted[:i] == 0)[0]
        for j in irrelevant_indices_above:
            if types_sorted[j] == ttype:
                continue
            total_nr_irrel += 1
    if total_nr_irrel == 0:
        return 0
    return total_nr_irrel / len(indices)

In [128]:
calc_nr_irrelevant_above(np.array([0,1,1,1, 0, 1]), np.array(['o', 'a',  'o',  'o', 'o', 'a']), ttype='a')

1.5

In [129]:
                     
calc_nr_irrelevant_above(np.array([0,0,1,0,1, 0, 1]), np.array(['o', 'a', 'u', 'o',  'o', 'o', 'a']), ttype='o')
#                                                                0    0    1    0     1    0    1  

0.75

In [58]:
def calc_rr(rels_sorted, types_sorted, ttype='original', rel_label=1):
    indices = np.where(rels_sorted == rel_label)[0]
    #print(indices)
    original = np.where(types_sorted[indices] == ttype)[0]
    if len(original) == 0:
        raise Exception('no relevant docs of this type')
    return np.mean(1 / (indices[original]+1))


In [101]:
def calculate_stats(df,
                        ranker,
                        types=['original', 'degree_2', 'degree_3', 'degree_4', 
                               'degree_4_split', 'degree_8', 'degree_8_split'],
                        limit=None,
                        func=calc_rr                        
                       ):

    stats_df = pd.DataFrame(columns=['qid'] + types + ['original_irrelevant'])

    qids = list(set(df['qid']))
    nr_qids = len(qids)
    for i, qid in enumerate(qids):
        
        if limit is not None and i >= limit:
            break
            
        query_df = df[df['qid'] == qid].reset_index(drop=True)
        query = query_df['query'].iloc[0]
        print('query:', query)

        indices_sorted = ranker.rank(query, query_df['doc'].values.tolist())   

        docs_sorted = query_df.iloc[indices_sorted]

        query_stats_row = [qid]
        for ttype in types:
            query_stats_row.append(func(docs_sorted['rel'].values, docs_sorted['type'].values, ttype=ttype))
        query_stats_row.append(func(docs_sorted['rel'].values, 
                                    docs_sorted['type'].values, ttype='original', 
                                    rel_label=0))        
        stats_df = stats_df.append(pd.Series(query_stats_row, index=stats_df.columns), ignore_index=True)

        if (i+1) % 5 ==  0:
            print('processed {:d} of {:d}'.format(i+1, nr_qids))
        
    
    return stats_df


In [102]:
def calculate_mrr_stats(df,
                        ranker,
                        types=['original', 'degree_2', 'degree_3', 'degree_4', 
                               'degree_4_split', 'degree_8', 'degree_8_split'],
                        limit=None
                       ):

    return calculate_stats(df, ranker, types=types, limit=limit, func=calc_rr)


In [103]:
def calculate_nr_irrel_above_stats(df,
                        ranker,
                        types=['original', 'degree_2', 'degree_3', 'degree_4', 
                               'degree_4_split', 'degree_8', 'degree_8_split'],
                        limit=None
                       ):

    return calculate_stats(df, ranker, types=types, limit=limit, func=calc_nr_irrelevant_above)

In [108]:
queries_file = '/run/media/root/Windows/Users/agnes/Downloads/data/msmarco/queries3.csv'

df = pd.read_csv(queries_file, error_bad_lines=False)
ttsplit_df = pd.read_csv('/run/media/root/Windows/Users/agnes/Downloads/data/msmarco/train_data/queries3_split.csv')

test_qids = set(ttsplit_df[ttsplit_df['type'] == 'test']['qid'].values)

df_test = df.loc[df['qid'].isin(test_qids)]

df_test

Unnamed: 0,qid,query,rel,type,doc
0,0,aaa a common cause of a skid is,0,original,Discounts and benefits are available at all He...
1,0,aaa a common cause of a skid is,0,original,AAA North Penn provides Authorized On - Line P...
2,0,aaa a common cause of a skid is,0,original,â¢ EAP-TLS authentication takes place between...
3,0,aaa a common cause of a skid is,0,original,This chart represents a partial representation...
4,0,aaa a common cause of a skid is,0,original,Points to Consider before Buying John Deere Sk...
...,...,...,...,...,...
205755,5143,why was the 19th amendment considered progressive,1,degree_8,the 19th amendment is a very important amendme...
205756,5143,why was the 19th amendment considered progressive,1,degree_8_split,"Best Answer: The 16th, 17th, 18th, and 19th am..."
205757,5143,why was the 19th amendment considered progressive,1,degree_8_split,"Best Answer: The 16th, 17th, 18th, and 19th am..."
205758,5143,why was the 19th amendment considered progressive,1,degree_8_split,The 18th amendment is the only amendment to be...


In [109]:
len(test_qids)

400

In [110]:
len(df_test)

16000

In [195]:
OUTPUT_DIR = '/run/media/root/Windows/Users/agnes/Downloads/data/msmarco/eval/eval_model_regression'

rankers = OrderedDict({
    
    
    'sbert_lcs_st_0.05_weigh':  SBERTlcsRanker(model, similarity_threshold=.05, weigh=True),
    'sbert_lcs_st_0.1_weigh':  SBERTlcsRanker(model, similarity_threshold=.1, weigh=True),
    'sbert_lcs_st_0.2_weigh':  SBERTlcsRanker(model, similarity_threshold=.2, weigh=True),
    'sbert_lcs_st_0.3_weigh': SBERTlcsRanker(model, similarity_threshold=.3, weigh=True),
    'sbert_lcs_st_0.4_weigh':  SBERTlcsRanker(model, similarity_threshold=.4, weigh=True),
    'sbert_lcs_st_0.5_weigh': SBERTlcsRanker(model, similarity_threshold=.5, weigh=True),
    'sbert_lcs_st_0.6_weigh': SBERTlcsRanker(model, similarity_threshold=.6, weigh=True),
    'sbert_lcs_st_0.7_weigh': SBERTlcsRanker(model, similarity_threshold=.7, weigh=True),
    'sbert_lcs_st_0.8_weigh': SBERTlcsRanker(model, similarity_threshold=.8, weigh=True),
    'sbert_lcs_st_0.9_weigh': SBERTlcsRanker(model, similarity_threshold=.9, weigh=True),
  
  
    'sbert_lcs_st_0.05':  SBERTlcsRanker(model, similarity_threshold=.05),
    'sbert_lcs_st_0.1':  SBERTlcsRanker(model, similarity_threshold=.1),
    'sbert_lcs_st_0.2':  SBERTlcsRanker(model, similarity_threshold=.2),
    'sbert_lcs_st_0.3': SBERTlcsRanker(model, similarity_threshold=.3),
    'sbert_lcs_st_0.4':  SBERTlcsRanker(model, similarity_threshold=.4),
    'sbert_lcs_st_0.5': SBERTlcsRanker(model, similarity_threshold=.5),
    'sbert_lcs_st_0.6': SBERTlcsRanker(model, similarity_threshold=.6),
    'sbert_lcs_st_0.7': SBERTlcsRanker(model, similarity_threshold=.7),
    'sbert_lcs_st_0.8': SBERTlcsRanker(model, similarity_threshold=.8),
    'sbert_lcs_st_0.9': SBERTlcsRanker(model, similarity_threshold=.9),
    
})

stats_dfs = []

for ranker_name, ranker in rankers.items():
    print('>>> checking ranker "{:s}"'.format(ranker_name))
    
    start_time = time.time()
    #stats_df = calculate_nr_irrel_above_stats(df_test, ranker, limit=400)
    stats_df = calculate_mrr_stats(df_test, ranker, limit=400)
    end_time = time.time()
    
    time_log = open(os.path.join(OUTPUT_DIR, 'time_log.txt'), 'a')
    time_log.write('{:s}: {:}\n'.format(ranker_name, end_time-start_time))
    time_log.close()
    
    stats_dfs.append(stats_df)    
    stats_df.to_csv( os.path.join(OUTPUT_DIR, ranker_name + '.csv'), index=None)


>>> checking ranker "sbert_lcs_st_0.05_weigh"
query: aaa a common cause of a skid is
query: output devices definition
query: what is the recovery from mastectomy with reconstruction
query: meaning of the name Joan
query: what does castor oil do for your hair
processed 5 of 400
query: what is the yearly salary of a critical care rn
query: can you use coconut oil as lube
query: number of neutrons for pt
query: what problems do macro viruses cause to your computer
query: what is the universal blood type
processed 10 of 400
query: what is the term describe
query: what is the weather in englewood, co
query: amtrak phone numbers
query: what does blue cypress essential oil do for the body.
query: pandora premium how many users
processed 15 of 400
query: why do we need fish oil
query: what pro soccer players wear nike phelon
query: what language is used in haiti
query: another name for puerto rican
query: most golf courses by state
processed 20 of 400
query: what kind of degree are you suppose

query: where is the gobi desert located
query: which court is also known as a trial court?
query: what color is the ribbon for ovarian cancer
query: where is susan kelechi watson from in li
processed 180 of 400
query: what countries are in the nordic region
query: which describes the difference between a trade surplus and a trade deficit?
query: does turkey contain campylobacter
query: ffs johnny delusional lyrics
query: where is the biggest ferris wheel on earth
processed 185 of 400
query: what character did bill paxton play in the titanic
query: foods that contain triglycerides
query: who is a carrier of food poisoning
query: which planet is the hottest in the solar system
query: which of the following is a risk factor associated with cardiometabolic risk?
processed 190 of 400
query: what county is blanco, tx in?
query: function of bcl-2
query: who created the first steam engine train
query: gelatin nutritional content
query: function abap
processed 195 of 400
query: who is father of

query: largest musical fountain in the world
processed 355 of 400
query: what is passion
query: what is pixel computing
query: what is protocol is cancer found after hysterectomy
query: is the appendix part of the colon
query: what is pulse scale
processed 360 of 400
query: jiraiya is from what clan
query: itemized deduction definition
query: what is squamous cell carcinoma is malignant or benign?
query: what is technical feasibility?
query: what is silicone sealant used for
processed 365 of 400
query: left parietal lobe tumor
query: what is prescription strength zantac
query: minamata disease was caused by mercury contamination
query: what is software quality assurance testing.
query: metformin side effects long term
processed 370 of 400
query: what is the average net worth of congressmen
query: medication range orders guideline
query: meaning of name jones
query: what is the best mulch color
query: muscle cell depolarization neuromuscular junction
processed 375 of 400
query: what is 

query: where can viruses live
query: waxing means the moon is
query: definition of anecdotal
processed 130 of 400
query: when was the first nobel prize
query: definition of a direct primary care clinic
query: define tremor
query: weight of wistar rat
query: define predicate adjective
processed 135 of 400
query: where does the fracking water comes from
query: definition of salary sacrifice
query: wex bank customer service number
query: definition of twins
query: definition of an electron
processed 140 of 400
query: definition of extrinsic motivation
query: define pop culture
query: did rocky marciano play for the cubs
query: difference between aluminum and tin
query: when was the couch invented
processed 145 of 400
query: describe dietary nutrients for water
query: did johnny cash sing long black train
query: where is cal poly slo located
query: do all babies get colic
query: what are requirements for qidp
processed 150 of 400
query: where is moosewood restaurant
query: where is homegro

query: how to compare annual salary from hourly wage
processed 305 of 400
query: how to make spruce pitch salve
query: if a president is accused of breaking the twenty-second amendment, that means the president is trying to
query: what is an asset sale of a business
query: what is bad back pain
query: what is chuck
processed 310 of 400
query: how to prepare to work in nursing home
query: how to get cricket wireless account number
query: what is bdc reticle
query: what is bollywood movies
query: how to raise coleus
processed 315 of 400
query: what is cofactor values
query: what is btu rating
query: what is a blood clot made of vein
query: if going to Cabo San Lucas, where do you fly into
query: what is chicken laying mash
processed 320 of 400
query: what does neuroma mean
query: integration and application the definition
query: what is eschar
query: is azure ad sync installed
query: what is crystalline
processed 325 of 400
query: indeed customer service number
query: what is discount ra

query: what years was the revolutionary war fought?
query: can i wear plaid on plaid
processed 85 of 400
query: what is the best teeth whitening product
query: what was the storm in forrest gump
query: what will a structural engineer look for
query: can you contribute to an ira after retirement
query: How much profit can be made through maui whitening
processed 90 of 400
query: The average resting heart rate for an adult male is
query: cast the guest book
query: when did women vote in the us
query: when are the little league games being broadcast
query: the y generation definition
processed 95 of 400
query: can someone live on a ventilator indefinitely
query: the result of fdr's court-packing scheme
query: cash management definition banking
query: when dissolved beryllium chloride reacts
query: what was the result of fdr's court-packing scheme
processed 100 of 400
query: when a neuron is in the resting potential state:
query: when did india gain its independence
query: can tea tree oil

query: how many calories in trigo
query: what is albuterol sulfate used to treat
processed 265 of 400
query: what is a car spindle
query: how many nfl teams are there
query: what is air molecules
query: how many people died from overdose in us
query: what is a bs in nursing
processed 270 of 400
query: how many fitbit steps equal a mile
query: how many ounces in a pint of strawberries
query: what is a patty wagon
query: what is a hackintosh computer
query: how many states do not automatically restore felons right after the sentence is completed?
processed 275 of 400
query: how many people die from diabetes
query: how many songs can an apple touch hold
query: how many players are there on roblox
query: what is trumps agenda as president
query: how many yards of concrete can a truck hold
processed 280 of 400
query: how much does a aaa membership cost?
query: how many intelligence agencies
query: what is a soccer players salary
query: how much does a microchip cost for a dog
query: how muc

query: average miles per year on vehicle
query: what medicine is in percocet
processed 45 of 400
query: average cost of toe shortening surgery uk
query: what makes hawaii unique
query: what may be the cause of hair loss
query: what must be displayed on each side of the forward half of a registered vessel
query: average life of heat pump compressor
processed 50 of 400
query: safeco number customer service
query: average cost of linoleum flooring
query: what qualifications do i need to be a psychologist
query: average income in utah per person
query: bertha wegmann artist denmark
processed 55 of 400
query: average salary for rn in arkansas
query: safe harbor hours
query: what percentage are attorney's allowed in new york workers comp cases
query: baja food definition
query: should voting be made compulsory under the law
processed 60 of 400
query: shingles symptoms in women
query: benefits of unsubsidized loans
query: what term was used to describe the power given to the male heads of fam

query: what does a hyphen look like
processed 225 of 400
query: who wrote the jungle book
query: who was the general who helped expand byzantine empire
query: why did wyoming allow women to vote first
query: what does a dna sequencer do
query: why is a blood orange called q a blood orange?
processed 230 of 400
query: what does a neutralization reaction produce?
query: how is peat formed
query: why do we celebrate st patrick and wear green
query: how is shelving, inc. founded
query: why does a nipple bleed
processed 235 of 400
query: what does it mean when your arms fall asleep when you sleep
query: what does sterile means
query: how is natural gas measured and sold
query: why did the vietnam war happen?
query: how is a shakespearean sonnet arranged
processed 240 of 400
query: how long does it take to feel better with uti
query: will ray donovan be renewed
query: what does partial diversity mean
query: what does pale stool mean
query: how long does it take to produce a banana after plan

query: output devices definition
query: what is the recovery from mastectomy with reconstruction
query: meaning of the name Joan
query: what does castor oil do for your hair
processed 5 of 400
query: what is the yearly salary of a critical care rn
query: can you use coconut oil as lube
query: number of neutrons for pt
query: what problems do macro viruses cause to your computer
query: what is the universal blood type
processed 10 of 400
query: what is the term describe
query: what is the weather in englewood, co
query: amtrak phone numbers
query: what does blue cypress essential oil do for the body.
query: pandora premium how many users
processed 15 of 400
query: why do we need fish oil
query: what pro soccer players wear nike phelon
query: what language is used in haiti
query: another name for puerto rican
query: most golf courses by state
processed 20 of 400
query: what kind of degree are you supposed to have to become a psychiatrist
query: can neem oil be used on dogs
query: where i

query: does turkey contain campylobacter
query: ffs johnny delusional lyrics
query: where is the biggest ferris wheel on earth
processed 185 of 400
query: what character did bill paxton play in the titanic
query: foods that contain triglycerides
query: who is a carrier of food poisoning
query: which planet is the hottest in the solar system
query: which of the following is a risk factor associated with cardiometabolic risk?
processed 190 of 400
query: what county is blanco, tx in?
query: function of bcl-2
query: who created the first steam engine train
query: gelatin nutritional content
query: function abap
processed 195 of 400
query: who is father of computer
query: what county is knoxville tennessee in
query: what county is burleson tx in
query: who is jacob latimore?
query: who developed association theory
processed 200 of 400
query: who is ace hood
query: what county is luckenbach tx
query: what county is manila, utah in
query: what county is township of washington nj in
query: who

query: what is technical feasibility?
query: what is silicone sealant used for
processed 365 of 400
query: left parietal lobe tumor
query: what is prescription strength zantac
query: minamata disease was caused by mercury contamination
query: what is software quality assurance testing.
query: metformin side effects long term
processed 370 of 400
query: what is the average net worth of congressmen
query: medication range orders guideline
query: meaning of name jones
query: what is the best mulch color
query: muscle cell depolarization neuromuscular junction
processed 375 of 400
query: what is the flipp app
query: what is the electronic medical record
query: narcotics are derived from what plant
query: what is the best way to take activated charcoal
query: what is the difference between healthcare management and healthcare administration
processed 380 of 400
query: hemp seeds oil health benefits
query: symptoms of a faulty ignition coil
query: causes of stinging skin
query: what does the

query: define predicate adjective
processed 135 of 400
query: where does the fracking water comes from
query: definition of salary sacrifice
query: wex bank customer service number
query: definition of twins
query: definition of an electron
processed 140 of 400
query: definition of extrinsic motivation
query: define pop culture
query: did rocky marciano play for the cubs
query: difference between aluminum and tin
query: when was the couch invented
processed 145 of 400
query: describe dietary nutrients for water
query: did johnny cash sing long black train
query: where is cal poly slo located
query: do all babies get colic
query: what are requirements for qidp
processed 150 of 400
query: where is moosewood restaurant
query: where is homegrown from?
query: where is pb and j located in albuquerque
query: where is hyperion tree located
query: waltham abbey essex
processed 155 of 400
query: does dropbox have a free plan
query: what are zappos core values
query: where is mcway falls californ

processed 310 of 400
query: how to prepare to work in nursing home
query: how to get cricket wireless account number
query: what is bdc reticle
query: what is bollywood movies
query: how to raise coleus
processed 315 of 400
query: what is cofactor values
query: what is btu rating
query: what is a blood clot made of vein
query: if going to Cabo San Lucas, where do you fly into
query: what is chicken laying mash
processed 320 of 400
query: what does neuroma mean
query: integration and application the definition
query: what is eschar
query: is azure ad sync installed
query: what is crystalline
processed 325 of 400
query: indeed customer service number
query: what is discount rate
query: incubation period for BKD
query: internet access in haiti
query: incubation time for HPV
processed 330 of 400
query: if you have a hysterectomy can you still get cervical cancer
query: is body tag beginning mandatory in html
query: is workers compensation considered a medical benefit
query: is burglary a f

processed 90 of 400
query: The average resting heart rate for an adult male is
query: cast the guest book
query: when did women vote in the us
query: when are the little league games being broadcast
query: the y generation definition
processed 95 of 400
query: can someone live on a ventilator indefinitely
query: the result of fdr's court-packing scheme
query: cash management definition banking
query: when dissolved beryllium chloride reacts
query: what was the result of fdr's court-packing scheme
processed 100 of 400
query: when a neuron is in the resting potential state:
query: when did india gain its independence
query: can tea tree oil be used vaginally
query: choice hotels reservation phone number
query: comcast internet customer service phone number
processed 105 of 400
query: thermage cost
query: when is gopher season in ca
query: trudef duration shingles cost
query: causes of swollen calves and ankles
query: causes of night sweats in women
processed 110 of 400
query: cholecystit

query: how many ounces in a pint of strawberries
query: what is a patty wagon
query: what is a hackintosh computer
query: how many states do not automatically restore felons right after the sentence is completed?
processed 275 of 400
query: how many people die from diabetes
query: how many songs can an apple touch hold
query: how many players are there on roblox
query: what is trumps agenda as president
query: how many yards of concrete can a truck hold
processed 280 of 400
query: how much does a aaa membership cost?
query: how many intelligence agencies
query: what is a soccer players salary
query: how much does a microchip cost for a dog
query: how much does a welder make in ontario canada
processed 285 of 400
query: what is bruno mars nationality
query: how much does the average american make a year
query: how much do audio-visual make
query: what is drugs addiction
query: what is true arrogance
processed 290 of 400
query: how much senior dba get paid in dubai
query: what is truth i

query: average life of heat pump compressor
processed 50 of 400
query: safeco number customer service
query: average cost of linoleum flooring
query: what qualifications do i need to be a psychologist
query: average income in utah per person
query: bertha wegmann artist denmark
processed 55 of 400
query: average salary for rn in arkansas
query: safe harbor hours
query: what percentage are attorney's allowed in new york workers comp cases
query: baja food definition
query: should voting be made compulsory under the law
processed 60 of 400
query: shingles symptoms in women
query: benefits of unsubsidized loans
query: what term was used to describe the power given to the male heads of families in ancient rome?
query: what the law say the legal process is to evict
query: what temperature should shingles be installed
processed 65 of 400
query: bosulif cost
query: what started the scientific revolution
query: what type of wire is used for telephone
query: cairo ga is what county
query: what 

query: why did wyoming allow women to vote first
query: what does a dna sequencer do
query: why is a blood orange called q a blood orange?
processed 230 of 400
query: what does a neutralization reaction produce?
query: how is peat formed
query: why do we celebrate st patrick and wear green
query: how is shelving, inc. founded
query: why does a nipple bleed
processed 235 of 400
query: what does it mean when your arms fall asleep when you sleep
query: what does sterile means
query: how is natural gas measured and sold
query: why did the vietnam war happen?
query: how is a shakespearean sonnet arranged
processed 240 of 400
query: how long does it take to feel better with uti
query: will ray donovan be renewed
query: what does partial diversity mean
query: what does pale stool mean
query: how long does it take to produce a banana after planting it
processed 245 of 400
query: what does it mean when an account is charged off
query: how long does it take for food poisoning to set in
query: wh

query: meaning of the name Joan
query: what does castor oil do for your hair
processed 5 of 400
query: what is the yearly salary of a critical care rn
query: can you use coconut oil as lube
query: number of neutrons for pt
query: what problems do macro viruses cause to your computer
query: what is the universal blood type
processed 10 of 400
query: what is the term describe
query: what is the weather in englewood, co
query: amtrak phone numbers
query: what does blue cypress essential oil do for the body.
query: pandora premium how many users
processed 15 of 400
query: why do we need fish oil
query: what pro soccer players wear nike phelon
query: what language is used in haiti
query: another name for puerto rican
query: most golf courses by state
processed 20 of 400
query: what kind of degree are you supposed to have to become a psychiatrist
query: can neem oil be used on dogs
query: where is coconut oil sourced from
query: admiral customer service phone number
query: why is my facial s

query: which describes the difference between a trade surplus and a trade deficit?
query: does turkey contain campylobacter
query: ffs johnny delusional lyrics
query: where is the biggest ferris wheel on earth
processed 185 of 400
query: what character did bill paxton play in the titanic
query: foods that contain triglycerides
query: who is a carrier of food poisoning
query: which planet is the hottest in the solar system
query: which of the following is a risk factor associated with cardiometabolic risk?
processed 190 of 400
query: what county is blanco, tx in?
query: function of bcl-2
query: who created the first steam engine train
query: gelatin nutritional content
query: function abap
processed 195 of 400
query: who is father of computer
query: what county is knoxville tennessee in
query: what county is burleson tx in
query: who is jacob latimore?
query: who developed association theory
processed 200 of 400
query: who is ace hood
query: what county is luckenbach tx
query: what coun

processed 365 of 400
query: left parietal lobe tumor
query: what is prescription strength zantac
query: minamata disease was caused by mercury contamination
query: what is software quality assurance testing.
query: metformin side effects long term
processed 370 of 400
query: what is the average net worth of congressmen
query: medication range orders guideline
query: meaning of name jones
query: what is the best mulch color
query: muscle cell depolarization neuromuscular junction
processed 375 of 400
query: what is the flipp app
query: what is the electronic medical record
query: narcotics are derived from what plant
query: what is the best way to take activated charcoal
query: what is the difference between healthcare management and healthcare administration
processed 380 of 400
query: hemp seeds oil health benefits
query: symptoms of a faulty ignition coil
query: causes of stinging skin
query: what does the name joanne mean
query: what is the function of dna polymerase quizlet
process

query: wex bank customer service number
query: definition of twins
query: definition of an electron
processed 140 of 400
query: definition of extrinsic motivation
query: define pop culture
query: did rocky marciano play for the cubs
query: difference between aluminum and tin
query: when was the couch invented
processed 145 of 400
query: describe dietary nutrients for water
query: did johnny cash sing long black train
query: where is cal poly slo located
query: do all babies get colic
query: what are requirements for qidp
processed 150 of 400
query: where is moosewood restaurant
query: where is homegrown from?
query: where is pb and j located in albuquerque
query: where is hyperion tree located
query: waltham abbey essex
processed 155 of 400
query: does dropbox have a free plan
query: what are zappos core values
query: where is mcway falls california
query: difference between a laxative and a stool softener
query: what basis are epithelial tissues classified
processed 160 of 400
query: 

query: how to raise coleus
processed 315 of 400
query: what is cofactor values
query: what is btu rating
query: what is a blood clot made of vein
query: if going to Cabo San Lucas, where do you fly into
query: what is chicken laying mash
processed 320 of 400
query: what does neuroma mean
query: integration and application the definition
query: what is eschar
query: is azure ad sync installed
query: what is crystalline
processed 325 of 400
query: indeed customer service number
query: what is discount rate
query: incubation period for BKD
query: internet access in haiti
query: incubation time for HPV
processed 330 of 400
query: if you have a hysterectomy can you still get cervical cancer
query: is body tag beginning mandatory in html
query: is workers compensation considered a medical benefit
query: is burglary a felony
query: is clinical active serum
processed 335 of 400
query: what is fraud scrubbing
query: is daniel gillies on vampire diaries
query: Is 103 Prime or Composite
query: is

query: cast the guest book
query: when did women vote in the us
query: when are the little league games being broadcast
query: the y generation definition
processed 95 of 400
query: can someone live on a ventilator indefinitely
query: the result of fdr's court-packing scheme
query: cash management definition banking
query: when dissolved beryllium chloride reacts
query: what was the result of fdr's court-packing scheme
processed 100 of 400
query: when a neuron is in the resting potential state:
query: when did india gain its independence
query: can tea tree oil be used vaginally
query: choice hotels reservation phone number
query: comcast internet customer service phone number
processed 105 of 400
query: thermage cost
query: when is gopher season in ca
query: trudef duration shingles cost
query: causes of swollen calves and ankles
query: causes of night sweats in women
processed 110 of 400
query: cholecystitis meaning
query: time warner internet prices forum
query: causes of mumps
quer

query: how many ounces in a pint of strawberries
query: what is a patty wagon
query: what is a hackintosh computer
query: how many states do not automatically restore felons right after the sentence is completed?
processed 275 of 400
query: how many people die from diabetes
query: how many songs can an apple touch hold
query: how many players are there on roblox
query: what is trumps agenda as president
query: how many yards of concrete can a truck hold
processed 280 of 400
query: how much does a aaa membership cost?
query: how many intelligence agencies
query: what is a soccer players salary
query: how much does a microchip cost for a dog
query: how much does a welder make in ontario canada
processed 285 of 400
query: what is bruno mars nationality
query: how much does the average american make a year
query: how much do audio-visual make
query: what is drugs addiction
query: what is true arrogance
processed 290 of 400
query: how much senior dba get paid in dubai
query: what is truth i

query: bertha wegmann artist denmark
processed 55 of 400
query: average salary for rn in arkansas
query: safe harbor hours
query: what percentage are attorney's allowed in new york workers comp cases
query: baja food definition
query: should voting be made compulsory under the law
processed 60 of 400
query: shingles symptoms in women
query: benefits of unsubsidized loans
query: what term was used to describe the power given to the male heads of families in ancient rome?
query: what the law say the legal process is to evict
query: what temperature should shingles be installed
processed 65 of 400
query: bosulif cost
query: what started the scientific revolution
query: what type of wire is used for telephone
query: cairo ga is what county
query: what town does roman atwood live in
processed 70 of 400
query: what the purpose of developing project network
query: standardization definition in chemistry
query: what river feeds the bear lake in utah
query: what to wear to granddaughter's gradu

query: how is peat formed
query: why do we celebrate st patrick and wear green
query: how is shelving, inc. founded
query: why does a nipple bleed
processed 235 of 400
query: what does it mean when your arms fall asleep when you sleep
query: what does sterile means
query: how is natural gas measured and sold
query: why did the vietnam war happen?
query: how is a shakespearean sonnet arranged
processed 240 of 400
query: how long does it take to feel better with uti
query: will ray donovan be renewed
query: what does partial diversity mean
query: what does pale stool mean
query: how long does it take to produce a banana after planting it
processed 245 of 400
query: what does it mean when an account is charged off
query: how long does it take for food poisoning to set in
query: what does shingles look like when it starts
query: why was the 19th amendment considered progressive
query: what do you do if a patient falls
processed 250 of 400
query: how long to cook asparagus tips
query: what 

query: what is the universal blood type
processed 10 of 400
query: what is the term describe
query: what is the weather in englewood, co
query: amtrak phone numbers
query: what does blue cypress essential oil do for the body.
query: pandora premium how many users
processed 15 of 400
query: why do we need fish oil
query: what pro soccer players wear nike phelon
query: what language is used in haiti
query: another name for puerto rican
query: most golf courses by state
processed 20 of 400
query: what kind of degree are you supposed to have to become a psychiatrist
query: can neem oil be used on dogs
query: where is coconut oil sourced from
query: admiral customer service phone number
query: why is my facial skin so dry
processed 25 of 400
query: are any sears store closing in columbus ohio
query: what to use for oily and dry facial skin
query: what is xapps in sap
query: how to incorporate coconut oil into your diet
query: what is zip code for ft lauderdale fl
processed 30 of 400
query: 

query: which of the following is a risk factor associated with cardiometabolic risk?
processed 190 of 400
query: what county is blanco, tx in?
query: function of bcl-2
query: who created the first steam engine train
query: gelatin nutritional content
query: function abap
processed 195 of 400
query: who is father of computer
query: what county is knoxville tennessee in
query: what county is burleson tx in
query: who is jacob latimore?
query: who developed association theory
processed 200 of 400
query: who is ace hood
query: what county is luckenbach tx
query: what county is manila, utah in
query: what county is township of washington nj in
query: who makes raspberry pi
processed 205 of 400
query: how can i see travel document number
query: hashtags should they be in lowercase
query: what county is milan, il in
query: who is the father of atomic theory
query: highest wage garnishment rates
processed 210 of 400
query: who makes dolphin browser
query: who is vicki li
query: how are black s

query: metformin side effects long term
processed 370 of 400
query: what is the average net worth of congressmen
query: medication range orders guideline
query: meaning of name jones
query: what is the best mulch color
query: muscle cell depolarization neuromuscular junction
processed 375 of 400
query: what is the flipp app
query: what is the electronic medical record
query: narcotics are derived from what plant
query: what is the best way to take activated charcoal
query: what is the difference between healthcare management and healthcare administration
processed 380 of 400
query: hemp seeds oil health benefits
query: symptoms of a faulty ignition coil
query: causes of stinging skin
query: what does the name joanne mean
query: what is the function of dna polymerase quizlet
processed 385 of 400
query: what is the general relationship between the vapor pressure  of a liquid and the strength of the intermolecular forces in the liquid
query: what is the emf of the battery?
query: what is 

query: did rocky marciano play for the cubs
query: difference between aluminum and tin
query: when was the couch invented
processed 145 of 400
query: describe dietary nutrients for water
query: did johnny cash sing long black train
query: where is cal poly slo located
query: do all babies get colic
query: what are requirements for qidp
processed 150 of 400
query: where is moosewood restaurant
query: where is homegrown from?
query: where is pb and j located in albuquerque
query: where is hyperion tree located
query: waltham abbey essex
processed 155 of 400
query: does dropbox have a free plan
query: what are zappos core values
query: where is mcway falls california
query: difference between a laxative and a stool softener
query: what basis are epithelial tissues classified
processed 160 of 400
query: where is pleasant view nursing home
query: where is naperville, il
query: what are the three subatomic parts of an atom
query: does animal gelatin smell
query: where is the house that ameri

query: what is btu rating
query: what is a blood clot made of vein
query: if going to Cabo San Lucas, where do you fly into
query: what is chicken laying mash
processed 320 of 400
query: what does neuroma mean
query: integration and application the definition
query: what is eschar
query: is azure ad sync installed
query: what is crystalline
processed 325 of 400
query: indeed customer service number
query: what is discount rate
query: incubation period for BKD
query: internet access in haiti
query: incubation time for HPV
processed 330 of 400
query: if you have a hysterectomy can you still get cervical cancer
query: is body tag beginning mandatory in html
query: is workers compensation considered a medical benefit
query: is burglary a felony
query: is clinical active serum
processed 335 of 400
query: what is fraud scrubbing
query: is daniel gillies on vampire diaries
query: Is 103 Prime or Composite
query: is halloumi cheese fattening
query: what is lining in a dress
processed 340 of 40

query: when did women vote in the us
query: when are the little league games being broadcast
query: the y generation definition
processed 95 of 400
query: can someone live on a ventilator indefinitely
query: the result of fdr's court-packing scheme
query: cash management definition banking
query: when dissolved beryllium chloride reacts
query: what was the result of fdr's court-packing scheme
processed 100 of 400
query: when a neuron is in the resting potential state:
query: when did india gain its independence
query: can tea tree oil be used vaginally
query: choice hotels reservation phone number
query: comcast internet customer service phone number
processed 105 of 400
query: thermage cost
query: when is gopher season in ca
query: trudef duration shingles cost
query: causes of swollen calves and ankles
query: causes of night sweats in women
processed 110 of 400
query: cholecystitis meaning
query: time warner internet prices forum
query: causes of mumps
query: when do ee bonds mature


query: how many songs can an apple touch hold
query: how many players are there on roblox
query: what is trumps agenda as president
query: how many yards of concrete can a truck hold
processed 280 of 400
query: how much does a aaa membership cost?
query: how many intelligence agencies
query: what is a soccer players salary
query: how much does a microchip cost for a dog
query: how much does a welder make in ontario canada
processed 285 of 400
query: what is bruno mars nationality
query: how much does the average american make a year
query: how much do audio-visual make
query: what is drugs addiction
query: what is true arrogance
processed 290 of 400
query: how much senior dba get paid in dubai
query: what is truth in lending (tila)
query: how much is the nose surgery in philippines
query: how much is the average retirement account
query: what is amalgam fillings
processed 295 of 400
query: how soon do i have to cash ee savings bonds
query: how to find the wavelength of a electromagneti

query: should voting be made compulsory under the law
processed 60 of 400
query: shingles symptoms in women
query: benefits of unsubsidized loans
query: what term was used to describe the power given to the male heads of families in ancient rome?
query: what the law say the legal process is to evict
query: what temperature should shingles be installed
processed 65 of 400
query: bosulif cost
query: what started the scientific revolution
query: what type of wire is used for telephone
query: cairo ga is what county
query: what town does roman atwood live in
processed 70 of 400
query: what the purpose of developing project network
query: standardization definition in chemistry
query: what river feeds the bear lake in utah
query: what to wear to granddaughter's graduation
query: business consulting services definition
processed 75 of 400
query: can myasthenia gravis affect your fingers
query: what type of element is lithium
query: what to use to clean new tile and grout before sealing
query

query: why did the vietnam war happen?
query: how is a shakespearean sonnet arranged
processed 240 of 400
query: how long does it take to feel better with uti
query: will ray donovan be renewed
query: what does partial diversity mean
query: what does pale stool mean
query: how long does it take to produce a banana after planting it
processed 245 of 400
query: what does it mean when an account is charged off
query: how long does it take for food poisoning to set in
query: what does shingles look like when it starts
query: why was the 19th amendment considered progressive
query: what do you do if a patient falls
processed 250 of 400
query: how long to cook asparagus tips
query: what does the shingles vaccine contain
query: how long is a human generation
query: how long is the typical bungee jump
query: how long it take chicken to start laying
processed 255 of 400
query: what does the average xavier student pay in tuition
query: how many calories and carbs in crab meat
query: what hotels 

processed 15 of 400
query: why do we need fish oil
query: what pro soccer players wear nike phelon
query: what language is used in haiti
query: another name for puerto rican
query: most golf courses by state
processed 20 of 400
query: what kind of degree are you supposed to have to become a psychiatrist
query: can neem oil be used on dogs
query: where is coconut oil sourced from
query: admiral customer service phone number
query: why is my facial skin so dry
processed 25 of 400
query: are any sears store closing in columbus ohio
query: what to use for oily and dry facial skin
query: what is xapps in sap
query: how to incorporate coconut oil into your diet
query: what is zip code for ft lauderdale fl
processed 30 of 400
query: are gas or electric dryers more efficient
query: what is lemon oil used for
query: asbestos removal should i remove
query: portfolio control senior associate salary
query: what medicines can lower hdl
processed 35 of 400
query: price per pound for aluminum
query: 

query: function of bcl-2
query: who created the first steam engine train
query: gelatin nutritional content
query: function abap
processed 195 of 400
query: who is father of computer
query: what county is knoxville tennessee in
query: what county is burleson tx in
query: who is jacob latimore?
query: who developed association theory
processed 200 of 400
query: who is ace hood
query: what county is luckenbach tx
query: what county is manila, utah in
query: what county is township of washington nj in
query: who makes raspberry pi
processed 205 of 400
query: how can i see travel document number
query: hashtags should they be in lowercase
query: what county is milan, il in
query: who is the father of atomic theory
query: highest wage garnishment rates
processed 210 of 400
query: who makes dolphin browser
query: who is vicki li
query: how are black sand beaches formed
query: what did rudolf virchow do for the cells
query: which u.s. state has the highest income tax?
processed 215 of 400
que

query: what is the electronic medical record
query: narcotics are derived from what plant
query: what is the best way to take activated charcoal
query: what is the difference between healthcare management and healthcare administration
processed 380 of 400
query: hemp seeds oil health benefits
query: symptoms of a faulty ignition coil
query: causes of stinging skin
query: what does the name joanne mean
query: what is the function of dna polymerase quizlet
processed 385 of 400
query: what is the general relationship between the vapor pressure  of a liquid and the strength of the intermolecular forces in the liquid
query: what is the emf of the battery?
query: what is the generic for metformin
query: can you use lavender oil on hair
query: how long for okra to come up
processed 390 of 400
query: what is the phobia of long words
query: what is the medical terminology for rba
query: what is the ph of pure life bottled water?
query: what can tea tree oil do for skin
query: what are side effe

processed 150 of 400
query: where is moosewood restaurant
query: where is homegrown from?
query: where is pb and j located in albuquerque
query: where is hyperion tree located
query: waltham abbey essex
processed 155 of 400
query: does dropbox have a free plan
query: what are zappos core values
query: where is mcway falls california
query: difference between a laxative and a stool softener
query: what basis are epithelial tissues classified
processed 160 of 400
query: where is pleasant view nursing home
query: where is naperville, il
query: what are the three subatomic parts of an atom
query: does animal gelatin smell
query: where is the house that american horror story the coven was filmed
processed 165 of 400
query: where is smokey mountain located
query: where is serbia
query: does highlighting your hair blonde make it more frizzy
query: where is the overlook hotel
query: does my purse count as a carry on united airlines
processed 170 of 400
query: which deserts do cactus grow in
qu

query: what is discount rate
query: incubation period for BKD
query: internet access in haiti
query: incubation time for HPV
processed 330 of 400
query: if you have a hysterectomy can you still get cervical cancer
query: is body tag beginning mandatory in html
query: is workers compensation considered a medical benefit
query: is burglary a felony
query: is clinical active serum
processed 335 of 400
query: what is fraud scrubbing
query: is daniel gillies on vampire diaries
query: Is 103 Prime or Composite
query: is halloumi cheese fattening
query: what is lining in a dress
processed 340 of 400
query: is lanolin safe
query: is macaulay culkin dead
query: what is loose leaf amazon
query: is sammy sosa in the hall of fame
query: is mumps a seasonal disease
processed 345 of 400
query: what is indigent status in court
query: is or tambo airport the same as johannesburg airport
query: what is mu law
query: what is my mbti animal
query: what is mass balance
processed 350 of 400
query: what is 

query: can tea tree oil be used vaginally
query: choice hotels reservation phone number
query: comcast internet customer service phone number
processed 105 of 400
query: thermage cost
query: when is gopher season in ca
query: trudef duration shingles cost
query: causes of swollen calves and ankles
query: causes of night sweats in women
processed 110 of 400
query: cholecystitis meaning
query: time warner internet prices forum
query: causes of mumps
query: when do ee bonds mature
query: town of clinton assessors
processed 115 of 400
query: when does your baby turn upside down
query: when was party train released
query: when to use a comma
query: what is the series after heroes of olympus
query: uhaul, age to rent a truck
processed 120 of 400
query: verizon phone number customer service number
query: where do you find turquoise
query: understatement literary definition
query: when was the book of philemon written
query: weather in fiji sept and oct
processed 125 of 400
query: where do smu

query: how much does a welder make in ontario canada
processed 285 of 400
query: what is bruno mars nationality
query: how much does the average american make a year
query: how much do audio-visual make
query: what is drugs addiction
query: what is true arrogance
processed 290 of 400
query: how much senior dba get paid in dubai
query: what is truth in lending (tila)
query: how much is the nose surgery in philippines
query: how much is the average retirement account
query: what is amalgam fillings
processed 295 of 400
query: how soon do i have to cash ee savings bonds
query: how to find the wavelength of a electromagnetic frequency
query: how old is jordan zimmerman
query: what is an associate of applied science
query: how much do fashion journalists make
processed 300 of 400
query: how to desolder copper pipe
query: how to diet with eating little sugar
query: how safe is nexgard
query: what is an automobile tort case
query: how to compare annual salary from hourly wage
processed 305 of

query: what started the scientific revolution
query: what type of wire is used for telephone
query: cairo ga is what county
query: what town does roman atwood live in
processed 70 of 400
query: what the purpose of developing project network
query: standardization definition in chemistry
query: what river feeds the bear lake in utah
query: what to wear to granddaughter's graduation
query: business consulting services definition
processed 75 of 400
query: can myasthenia gravis affect your fingers
query: what type of element is lithium
query: what to use to clean new tile and grout before sealing
query: average salary landscape architect northern california
query: symptoms of anemia, hair
processed 80 of 400
query: The _____ is situated behind the nasal cavity and above the oral cavity
query: synonyms for density
query: can increased wbc affect heart
query: what years was the revolutionary war fought?
query: can i wear plaid on plaid
processed 85 of 400
query: what is the best teeth white

query: how long does it take for food poisoning to set in
query: what does shingles look like when it starts
query: why was the 19th amendment considered progressive
query: what do you do if a patient falls
processed 250 of 400
query: how long to cook asparagus tips
query: what does the shingles vaccine contain
query: how long is a human generation
query: how long is the typical bungee jump
query: how long it take chicken to start laying
processed 255 of 400
query: what does the average xavier student pay in tuition
query: how many calories and carbs in crab meat
query: what hotels are on medano beach
query: how long is incubation period for gastric flu
query: what is accent furniture
processed 260 of 400
query: how many championship rings does tim duncan have
query: what industry is style crest
query: how long for body to get rid of sugar in it
query: how many calories in trigo
query: what is albuterol sulfate used to treat
processed 265 of 400
query: what is a car spindle
query: how 