# Evaluate a ranking algorithm
Compare results of querying an index against the labeled results using NDCG.

In [None]:
%load_ext autoreload
%autoreload 2
%load_ext dotenv
%dotenv

In [None]:
import hashlib
import json
import os
import re

import openai
import pinecone
from tqdm.autonotebook import tqdm

from models.rank_eval import get_ndcg

In [None]:
# configure
labels_path = '../data/rank/labeled_results/2023-04-21.json'
ndcg_at = 10

# pinecone
pinecone_key = os.environ['PINECONE_KEY']
pinecone_region = 'us-west1-gcp'
pinecone_index = 'conf-ada-002'

# index embedder
embedding_model = 'text-embedding-ada-002'
embedding_len = 1536
embedding_metric = 'cosine'

In [None]:
def get_query_embedding(query):
    return openai.Embedding.create(input=query, 
                                   engine=embedding_model)['data'][0]['embedding']

In [None]:
# init pinecone
pinecone.init(
    api_key=pinecone_key,
    environment=pinecone_region,
)
# connect to index
index = pinecone.Index(pinecone_index)

In [None]:
# init openai
openai.organization = os.environ['OPENAI_ORG']
openai.api_key = os.environ['OPENAI_KEY']
openai.Engine.list()

## Read labels

In [None]:
with open(labels_path, 'r') as f:
    labels = json.load(f)
print(len(labels))

### Make result id a hash of result contents so we can compare prod to dev ids

In [None]:
def hash_result(text):
    text = re.sub(r"\s+", "", text).lower().strip()
    return hashlib.md5(text.encode()).hexdigest()

In [None]:
labels = [{'query': label['query'],
           'results': [
               {'id': hash_result(result['text']),
                'score': result['score'],
                'text': result['text']
               } for result in label['results']]
          } for label in labels]

In [None]:
labels[0]

## Issue queries

In [None]:
total_ndcg = 0.0
for label in labels:
    query = label['query']
    query_embedding = get_query_embedding(query)
    res = index.query([query_embedding], top_k=20, include_metadata=True)
    results = [{
        'id': hash_result(match['metadata']['text']),
        'score': match['score'],
        'text': match['metadata']['text'],
    } for match in res['matches']]
    ndcg = get_ndcg(label['results'], results, k=ndcg_at)
    print(query, ndcg)
    total_ndcg += ndcg
avg_ndcg = total_ndcg / len(labels)

## Print Average Normalized Discounted Cumulative Gain

In [None]:
print("avg_ndcg", avg_ndcg)