# Watson Attempt 2

### Imports

In [1]:
import os
import shutil
import json

from whoosh.fields import Schema, TEXT, ID
from whoosh.index import create_in, open_dir, exists_in
from whoosh.qparser import QueryParser, OrGroup
from whoosh.scoring import BM25F
from whoosh.analysis import RegexTokenizer, LowercaseFilter, NgramFilter

In [2]:
from env import env
from shared import wiki_df, questions_df, LemmatizeFilter, filter_query, transform_category # importing from shared takes a few seconds

### Get the OpenAI API key and define the ChatGPT query

In [3]:
api_key = os.getenv("OPENAI_API_KEY")
if not api_key:
    api_key = input("Enter your OpenAI API Key: ")

Enter your OpenAI API Key:  sk-proj-SNT756928pYEEjotrbLZT3BlbkFJmLkgwk46Sn4dUQ7Q6Eeq


In [None]:
# query ChatGPT using the OpenAI API
def query_ChatGPT(query):
    import requests

    model = "gpt-3.5-turbo-0301"

    headers = {
        "Content-Type": "application/json",
        "Authorization": "Bearer " + api_key,
    }

    json_data = {
            "model": model,
            "temperature": 0,
            "messages": [
                {
                    "role": "user",
                    "content": query
                }
            ]
        }

    response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=json_data).json()
    assert "choices" in response, response
    assert len(response["choices"]) > 0, response
    assert "message" in response["choices"][0], response
    assert "content" in response["choices"][0]["message"], response
    
    return response["choices"][0]["message"]["content"]

### Define the query transform
NOTE: There were more transformations in here at one point, but they did not increase score and were removed

In [5]:
# transform the query, just filters it for now
def transform_query(query):
    query = filter_query(query)
    return query

### Define the reranking function
Asks ChatGPT to rerank the top k results. k is a hyperparameter that was tuned to nbe 15 for the best results.

In [7]:
# rerank top k results with ChatGPT
with open("ChatGPT_template/t4.txt", "r") as file:
    template4 = file.read()
def rerank_results(question, results):
    k = 15
    data = { "question": question, "guesses": [] }
    for i in range(k):
        data["guesses"].append(results[i][0])
    gptq = template4 + json.dumps(data)
    try:
        reranked_guesses = json.loads(query_ChatGPT(gptq))["reranked_guesses"]
        reranked_results = []
        for i in range(k):
            reranked_results.append((reranked_guesses[i], i + 1))
        reranked_results += results[k+1:]
        return reranked_results
    except:
        return results

### Define the Watson class

In [8]:
class Watson:
    def __init__(self):
        self.questions_df = get_questions_df()
        self.Q = len(self.questions_df.index)
        self._analyzer = self._build_analyzer()
        self._index = self._build_index()
        self._parser = self._build_parser()

    # builds the analyzer
    def _build_analyzer(self):
        return RegexTokenizer() | LowercaseFilter() | LemmatizeFilter()

    # builds the index
    def _build_index(self):
        if exists_in(env.index_path):
            ix = open_dir(env.index_path)
        else:
            if os.path.exists(env.index_path):
                shutil.rmtree(env.index_path)
            os.mkdir(env.index_path)
            schema = Schema(title=ID(stored=True),  
                    titles=TEXT(analyzer=self._analyzer), 
                    categories=TEXT(analyzer=self._analyzer), 
                    content=TEXT(analyzer=self._analyzer))
            ix = create_in(env.index_path, schema)
            with ix.writer() as writer:
                for _, row in get_wiki_df().iterrows():
                    writer.add_document(title=row.title, content=row.text)
        return ix

    # builds the parser
    def _build_parser(self):
        og = OrGroup.factory(0.9)
        return QueryParser("content", schema=self._index.schema, group=og)

    # searches the index for the category and question provided
    
    def search(self, category, question, scorer=BM25F):
        try:
            category = transform_category(category)
            question = transform_query(question)
            query = self._parser.parse(category + " " + question)
        except TypeError as e:
            print(query_pipeline(question))
            raise e
        with self._index.searcher(weighting=scorer()) as searcher:
            results = searcher.search(query, limit=None)
            if results.scored_length() == 0:
                return None
            return rerank_results(question, [(r["title"], r.rank+1) for r in results])
            
    # tests the system using MRR or precison at 1
    def test(self, scorer=BM25F, eval="mrr"):
        if eval == "mrr":
            mrr = 0.0
            for _, row in self.questions_df.iterrows():
                results = self.search(row.category, row.question, scorer)
                rank = Watson.get_rank(results, row.answer)
                if rank > 0:
                    mrr += 1 / rank
            mrr /= self.Q
            return mrr
        elif eval == "p@1":
            correct = 0
            for _, row in self.questions_df.iterrows():
                results = self.search(row.category, row.question, scorer)
                if Watson.is_correct(results, row.answer):
                    correct += 1
            return correct / self.Q
        elif eval == "both":
            mrr = 0.0
            correct = 0
            for _, row in self.questions_df.iterrows():
                results = self.search(row.category, row.question, scorer)
                rank = Watson.get_rank(results, row.answer)
                if rank > 0:
                    mrr += 1 / rank
                if Watson.is_correct(results, row.answer):
                    correct += 1
            return mrr / self.Q, correct / self.Q
        else:
            raise Exception(f"unrecognized evaluation type: {eval}")
        
    # gets the rank of an answer
    @staticmethod
    def get_rank(results, answer):
        for answer_variant in answer.split("|"):
            for (doc_title, rank) in results:
                if doc_title.lower() == answer_variant.lower():
                    return rank
        return 0

    # determines if an answer is correct
    @staticmethod
    def is_correct(results, answer):
        guess, _ = results[0]
        for answer_variant in answer.split("|"):
            if answer_variant.lower() == guess.lower():
                return True
        return False

    # gets the guess from an element in the results list
    @staticmethod
    def get_guess(results):
        guess, _ = results[0]
        return guess
    

### Instantiate Watson

In [9]:
watson = Watson()   

### Test Watson

In [10]:
mrr_score, pa1_score = watson.test(eval="both")
mrr_score, pa1_score

(0.5816188877674071, 0.54)