In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import csv
import requests
import time
import os
import json

from tqdm import tqdm

In [None]:
# configure
file_path = '../references/questions-45.txt'
output_path = '../data/exports'
delay_seconds = 1
use_local_server = True
search_results_only = False

server = "http://127.0.0.1:8000" if use_local_server else "https://scripturecentralqa.org/api"

if not os.path.exists(output_path):
    os.makedirs(output_path)

In [None]:
list_of_questions = []

with open(file_path, 'rt') as txt_file:
  for text in txt_file:
    text = text.strip()
    if text != '':
        list_of_questions.append(text)
len(list_of_questions)        

In [None]:
%%time

question_answers = {}
question_results = {}

for question in tqdm(list_of_questions):
    list_data = []
    list_data_extra = []
    time.sleep(delay_seconds)
    response = requests.get(f"{server}/search", params={'q': question, "query_type": 'ragonly'})
    print(response)
    if response.status_code != 200:
        print(f"ERROR on query {question}: status_code {response.status_code}")
        continue
    try:
        res = response.content.decode('utf-8')
        result_data = json.loads(res)
    except Exception as e:
        print(f"ERROR on query {question} parsing response {res}")
        continue

    question_results[question] = result_data['results']
    
    if search_results_only:
        continue
        
    time.sleep(delay_seconds)
    gpt_response = requests.get(f"{server}/search", params={'q': question, 'query_type': 'norag'})
    if gpt_response.status_code != 200:
        print(f"ERROR on query {question}: status_code {gpt_response.status_code}")
        continue
    try:
        res = gpt_response.content.decode('utf-8')
        gpt_result = json.loads(res)
    except Exception as e:
        print(f"ERROR on gpt query {question} parsing response {res}")
        continue

    
    for item in result_data['results']:
        result_temp = json.dumps(item)
        list_data.append(result_temp)
        if len(list_data) > 4:
            break

    result_set = '\n--------\n'.join(list_data)

    question_answers[question] = [result_set, result_data['answer'], gpt_result["answer"]]


In [None]:
if not search_results_only:
    with open(output_path + '/question-answers-45.csv', 'w') as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(['Question', 'Results', 'Answer', 'ChatGPT Answer'])  # header row
        for question, answers in question_answers.items():
            # for each question, the file should contain a line like this:
            writer.writerow([question, answers[0], answers[1], answers[2]])

In [None]:
with open(output_path + '/question-results-45.csv', 'w') as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow(['Question', 'ResultId', 'ResultScore', 'ResultURL', 'ResultTitle', 'ResultRank', 'ResultText'])  # header row
    for question, results in question_results.items():
        if len(results) < 20:
            print(question, len(results))
        for result in results:
            # for each question, the file should contain a line like this:
            writer.writerow([
                question, 
                result["id"], 
                result["score"], 
                result["url"], 
                result["title"], 
                result["index"], 
                result["text"],
            ])

print('End')