In [1]:
from openai import OpenAI
from nltk.tokenize import BlanklineTokenizer
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
import pandas as pd
from py_files import config

In [2]:
client = OpenAI(
    api_key=config.API_KEY,
)

In [3]:
def get_embedding(text, model="text-embedding-3-large"):
    text = text.replace("\n", " ")
    return client.embeddings.create(input = [text], dimensions = 256, model=model).data[0].embedding

In [4]:
with open('../data/raw/corpus.txt') as f:
    corpus = f.read()
blt = BlanklineTokenizer()
documents = blt.tokenize(corpus)

In [5]:
df = pd.DataFrame(documents, columns = ['faq'])
df['ada_embedding'] = df.faq.apply(lambda x: get_embedding(x))

In [6]:
def chat_with_gpt(t_message, messages):
    if t_message == 'new conversation':
        messages = [messages[0]]
    elif t_message == 'messages':
        pass
    else:
        message = f"{t_message}"
        if message:
            messages.append(
                {"role": "user", "content": message},
            )
            chat = client.chat.completions.create(model="gpt-3.5-turbo",
                                                  temperature=0.0,
                                                  messages=messages)
        reply = chat.choices[0].message.content
        return reply

In [7]:
def append_rules(idxs, t_prompts):
    if len(idxs) > 0:
        for i, rule_idx in enumerate(idxs):
            t_prompts += f'\nGuidelines {i+1}: {documents[rule_idx]}'
    else:
        t_prompts += f'\n:There are no guidelines'
    return t_prompts

In [8]:
threshold = 0.5

def main(text):
    '''messages = [{'role': 'system', 'content': 'You are a friendly and helpful question answering bot. Read \
through the guidelines provided and craft an appropriate answer to the question based on the guidelines. If you \
do not think the guidelines are applicable to the question let the user know you are unsure of the answer. Be \
sure to include a friendly outro'
            }]'''
    messages = [{'role': 'system', 'content': 'you are a helpful question answering bot. Your job is to provide \
friendly advice regarding questions you are asked. You will receive a question and up to 3 rules to guide you. however, \
the rules may not apply to the question asked. for each rule, you are to think about the question and the rule and \
decide if the rule applies. if the rule does not apply, ignore the rule. otherwise think about how the rule\
applies to answer the prompt. i want your response to include the following information: \
tell the user you are a friendly bot, \
paraphrase the question \
provide the answer based on the rules, but do not quote the rules, \
your answer must be based on one of the rules"'
            }]
    query_embedding = np.array(get_embedding(text)).reshape(1,-1)
    df['similarities'] = df.ada_embedding.apply(lambda x: cosine_similarity(np.array(x).reshape(1,-1), query_embedding))
    idxs = df[df['similarities'] > threshold].index
    if len(idxs) > 0:
        prompt_w_rules = append_rules(idxs, text)
        print(prompt_w_rules)
        return chat_with_gpt(f'Here is the question: {prompt_w_rules}?', messages)
    else:
        return "I'm sorry, please rephrase the question."

In [9]:
#Some prompts for testing
prompts = ['''Something is wrong with the on screen timer. Should i use my own timer''', 
          '''I just spent 2 hours reading the instructions. Do i get paid for this''',
          '''can i prompt the model to generate a file''', 
          '''do i penalize the model for not using the .head function''',
           '''do i rate the edited response or the original''',
           '''is “Sorry we couldn't finish the response at this time. Please try again later.” considered a canned response''',
          ]

In [10]:
print(main(prompts[0]))

Something is wrong with the on screen timer. Should i use my own timer
Guidelines 1: Use your own, separate timer for tracking payment. The timer on the screen is more related to how Data Annotation assigns tasks to different users.
Hello! I'm a friendly bot here to help. It sounds like you're having trouble with the on-screen timer. In this case, it would be a good idea to use your own timer to track time accurately. This way, you can ensure that you are keeping track of time for your tasks effectively.
