In [1]:
from elasticsearch.helpers import bulk
from elasticsearch import Elasticsearch
from sentence_transformers import SentenceTransformer
import pandas as pd
from tqdm import tqdm 
import json


#  can download model here: https://ask.ml.eduworks.com/#/experiments/32/runs/9dce37b22a334b40be9acb81e7cbd984
model = SentenceTransformer('./model').to('cuda') # Likely need to change to CPU for deployment
es_host = 'http://localhost:9200'
es = Elasticsearch([es_host], http_auth=('elastic', 'changeme'))

  from .autonotebook import tqdm as notebook_tqdm
NOTE: Redirects are currently not supported in Windows or MacOs.


In [2]:
def get_return_items(item):
    item.pop('vector')
    return item


def format_hits(hits: list, correct_link: str):
    """Putting in this format to evaluate test data"""
    top_urls = [hit['url'] for hit in hits]
    metric_logs = [{"rank": i+1, "is_correct_link": hit['url']== correct_link, "url": hit['url'], 'text': hit['text'], "score": hit['score']} for i, hit in enumerate(hits)]
    return top_urls, metric_logs

def search_for_query(query: str, index: str = 'chatbot_data', num_returns: int = 5):
    """ Used to search elastic search index and test using the test dataset"""
    query_vector = model.encode(query, convert_to_tensor=True).tolist()
    cos = 'cosineSimilarity(params.query_vector, "vector") + 1.0'
    script = {'source': cos, 'params': {'query_vector': query_vector}}
    query = {
        "script_score": {
            "query": {"match_all": {}},
            "script": {
                "source": cos,
                "params": {"query_vector": query_vector}
            }
        }
    }

    response = es.search(index=index, query=query, size=num_returns)
    hits = [{'score': hit['_score'], **get_return_items(hit['_source'])} for hit in response['hits']['hits']]
    return hits 

def get_metrics(outputs: list):
    accuracy_list = []
    for item in outputs: 
        if item['Top 5 Predictions'][0]["is_correct_link"]: 
            accuracy_list.append((1, 1, 1)) # top_1, top_3, top_5
        elif True in [item['Top 5 Predictions'][i]["is_correct_link"] for i in range(3)]:
            accuracy_list.append((0, 1, 1))
        elif True in [item['Top 5 Predictions'][i]["is_correct_link"] for i in range(5)]:
            accuracy_list.append((0, 0, 1))
        else:
            accuracy_list.append((0, 0, 0))
    top_1_accuracy = sum([item[0] for item in accuracy_list])/len(accuracy_list)
    top_3_accuracy = sum([item[1] for item in accuracy_list])/len(accuracy_list)
    top_5_accuracy = sum([item[2] for item in accuracy_list])/len(accuracy_list)
    return {"top_1_accuracy": top_1_accuracy, "top_3_accuracy": top_3_accuracy, "top_5_accuracy": top_5_accuracy}
            


def test_data(fn: str, sheet_name: str):
    df = pd.read_excel(fn, sheet_name=sheet_name)
    outputs_list = []
    for i, row in tqdm(df.iterrows(), desc="Looping through test data"): 
        original_url = row['resource'] if 'resource' in df.columns else row['URL']
        if isinstance(original_url, str):
            url = f"https://{original_url}" if "http" not in original_url else original_url
            question = row['question'] if 'question' in df.columns else row['Question']
            hits = search_for_query(question)
            top_urls, metric_logs = format_hits(hits, url)
            is_correct = True if metric_logs[0]['is_correct_link'] else 0
            output_log = {"question":  question, "is_correct": is_correct, "Top 5 Predictions": metric_logs}
            
            outputs_list.append(output_log)
    with open('outputs.json', 'w', encoding='utf-8') as f:
        json.dump(outputs_list, f, ensure_ascii=False, indent=4)
    return get_metrics(outputs_list)
test_data('./data/AE_test_QA_chatbot_v2.xlsx', 'made_up_OK_OR')


Looping through test data: 617it [03:06,  3.31it/s]


{'top_1_accuracy': 0.4019448946515397,
 'top_3_accuracy': 0.5883306320907618,
 'top_5_accuracy': 0.6612641815235009}