# Watson Attempt 1

### Imports

In [1]:
import os
import sys

from whoosh.fields import Schema, TEXT, ID
from whoosh.index import create_in, open_dir, exists_in
from whoosh.qparser import QueryParser, OrGroup
from whoosh.scoring import BM25F
from whoosh.analysis import RegexTokenizer, LowercaseFilter

In [2]:
from env import env
from shared import wiki_df, questions_df, LemmatizeFilter, filter_query # importing from shared takes a few seconds

### Define the Watson class

In [5]:
class Watson:
    def __init__(self):
        self.Q = len(questions_df.index)
        self._analyzer = self._build_analyzer()
        self._index = self._build_index()
        self._parser = self._build_parser()

    def _build_analyzer(self):
        return RegexTokenizer() | LowercaseFilter() | LemmatizeFilter()
    
    def _build_index(self):
        if exists_in(env.index_path):
            ix = open_dir(env.index_path)
        else:
            os.mkdir(env.index_path)
            schema = Schema(title=ID(stored=True),  
                    titles=TEXT(analyzer=self._analyzer), 
                    categories=TEXT(analyzer=self._analyzer), 
                    content=TEXT(analyzer=self._analyzer))
            ix = create_in(env.index_path, schema)
            with ix.writer() as writer:
                for _, row in wiki_df.iterrows():
                    writer.add_document(title=row.title, content=row.text)
        return ix
    
    def _build_parser(self):
        og = OrGroup.factory(0.9)
        return QueryParser("content", schema=self._index.schema, group=og)
    
    def search(self, category, question, scorer=BM25F):
        query = self._parser.parse(f"{category}^0.5 " + filter_query(question))
        with self._index.searcher(weighting=scorer()) as searcher:
            results = searcher.search(query, limit=None)
            if results.scored_length() == 0:
                return None
            return [(r["title"], r.rank+1) for r in results]

    def test(self, scorer=BM25F, eval="mrr"):
        if eval == "mrr":
            mrr = 0.0
            for _, row in questions_df.iterrows():
                results = self.search(row.category, row.question, scorer)
                rank = Watson.get_rank(results, row.answer)
                if rank > 0:
                    mrr += 1 / rank
            mrr /= self.Q
            return mrr
        elif eval == "p@1":
            correct = 0
            for _, row in questions_df.iterrows():
                results = self.search(row.category, row.question, scorer)
                if Watson.is_correct(results, row.answer):
                    correct += 1
            return correct / self.Q
        else:
            raise Exception(f"unrecognized evaluation type: {eval}")
        
    @staticmethod
    def get_rank(results, answer):
        for answer_variant in answer.split("|"):
            for (doc_title, rank) in results:
                if doc_title.lower() == answer_variant.lower():
                    return rank
        return 0
    
    @staticmethod
    def is_correct(results, answer):
        guess, _ = results[0]
        for answer_variant in answer.split("|"):
            if answer_variant.lower() == guess.lower():
                return True
        return False
    
    @staticmethod
    def get_guess(results):
        guess, _ = results[0]
        return guess
    

### Instantiate Watson

In [6]:
watson = Watson()

### Test Watson

In [5]:
mrr_score = watson.test(eval="mrr")
mrr_score

0.36053742784688664

In [None]:
pa1_score = watson.test(eval="p@1")
pa1_score

KeyboardInterrupt: 

### Perform Error Analysis

In [48]:
correct = []
incorrect = []
for _, row in questions_df.iterrows():
    results = watson.search(row.category, row.question)
    guess = Watson.get_guess(results)
    if Watson.is_correct(results, row.answer):
        correct.append((row, guess))
    else:
        incorrect.append((row, guess))

In [49]:
len(correct), len(correct) + len(incorrect)

(25, 100)

In [21]:
def get_wiki_page(title):
    page = wiki_df[wiki_df.title == title]
    text = page.text.to_numpy()
    if len(text) == 0:
        return None
    return text[0]

def log_result(r, file=sys.stdout):
    q, g = r
    file.write("   CATEGORY: " + q.category + "\n")
    file.write("   QUESTION: " + q.question + "\n")
    file.write("     ANSWER: " + q.answer + "\n") 
    file.write("      GUESS: " + g + "\n")
    file.write("ANSWER TEXT:\n\n")
    answer_text = None
    for answer in q.answer.split("|"):
        answer_text = get_wiki_page(answer)
        if answer_text is not None:
            break
    if answer_text is None:
        print(f"NO RESULTS FOR {q.answer}")
        answer_text = ""
    file.write(answer_text)
    if g.lower() not in q.answer.lower():
        file.write("\n\n")
        file.write(" GUESS TEXT:\n\n")                     
    file.write(get_wiki_page(g))
    file.write("\n\n\n")

In [14]:
ncorrect = 5
nincorrect = 15
if not os.path.exists("logs"):
    os.mkdir("logs")

In [15]:
if not os.path.exists("logs/correct"):
    os.mkdir("logs/correct")

for i in range(0, len(correct), len(correct) // ncorrect):
    with open(f"logs/correct/{i}.txt", "w") as file:
        log_result(correct[i], file=file)

In [50]:
if not os.path.exists("logs/incorrect"):
    os.mkdir("logs/incorrect")
    
for i in range(0, len(incorrect), len(incorrect) // nincorrect):
    print(i)
    with open(f"logs/incorrect/{i}.txt", "w") as file:
        log_result(incorrect[i], file=file)

0
5
10
15
20
25
30
35
40
45
50
55
60
65
70
NO RESULTS FOR Jfk|John F. Kennedy


## Ideas for increasing score (for attempt 2)
- Find a way to fit redirects into the index
- Boost terms in the query with low tf-idf values
- Ask ChatGPT which terms are more important in a query and boost those
- Ask ChatGPT to rephrase the query to try to solve some of the error classes