In [None]:
import openai
import os
from glob import glob
from openai import OpenAI
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
import pandas as pd

openai.api_key = os.environ["OPENAI_API_KEY"]
client = OpenAI()

In [None]:
def get_ada_embedding(text):
    result = client.embeddings.create(input=[text], model="text-embedding-ada-002")
    return np.array(result.data[0].embedding).reshape(1, -1)

def get_cosine_similarity(embedding1, embedding2):
    return cosine_similarity(embedding1, embedding2)[0][0]

def get_nearest_neighbor_text(text, df):
    embedding = get_ada_embedding(text)
    cosine_similarities = df.embedding.apply(lambda x: get_cosine_similarity(x, embedding))
    closest_document = cosine_similarities.sort_values(ascending=False).head(1)
    return df.loc[closest_document.index[0]].text

def simple_chat(prompt):
    return client.chat.completions.create(
      model="gpt-3.5-turbo",
      messages=[
        {"role": "system", "content": "You are a helpful Onboarding assistant."},
        {"role": "user", "content": prompt},
      ]
    ).choices[0].message.content

In [None]:
embeddings = [
    ("i love you", get_ada_embedding("I love you.")),
    ("i adore you", get_ada_embedding("I adore you.")),
    ("i hate you", get_ada_embedding("I hate you.")),
    ("i despise you", get_ada_embedding("I despise you.")),
    ("peanut butter sandwich", get_ada_embedding("peanut butter sandwich")),
    ("i am ambivalent toward you", get_ada_embedding("i am ambivalent toward you")),
    ("The happiness of your life depends upon the quality of your thoughts.", get_ada_embedding("The happiness of your life depends upon the quality of your thoughts.")),
    ("你生活的幸福取决于你思想的质量。", get_ada_embedding("dsakljflks'ajfo[easi'urtoaeishfja'sdoilfjas]")),
]

In [None]:
for name, embedding in embeddings:
    for name2, embedding2 in embeddings:
        if name in ("i love you", "i hate you"):
            print(f"{name} vs {name2}: {round(cosine_similarity(embedding, embedding2)[0][0], 3)}")
    print(" ")

In [None]:
raw_onboarding_docs = glob("documents/*.md")
onboarding_doc_text = []
embeddings = []

for doc in raw_onboarding_docs:
    with open(doc) as f:
        document_text = f.read()
        onboarding_doc_text.append(document_text)
        embeddings.append(get_ada_embedding(document_text))

In [None]:
onboarding_docs_df = pd.DataFrame({"text": onboarding_doc_text, "embedding": embeddings, "filename": raw_onboarding_docs})
onboarding_docs_df.head()

In [None]:
new_message = "I just started my new job as a junior software engineer. I'm so excited to be here! What should I do first?"

In [None]:
print(get_nearest_neighbor_text(new_message, onboarding_docs_df))

In [None]:
def help_me_onboard(question, onboarding_docs_df=onboarding_docs_df):
  
    closest_document_text = get_nearest_neighbor_text(question, onboarding_docs_df)

    prompt = f"""
    I would like help answering the following question:

    {question}

    Please only answer the question using this as context:

    {closest_document_text}
    """

    print(simple_chat(prompt))

In [None]:
help_me_onboard("I am a new AE. What should I do first?")

In [None]:
help_me_onboard("I am a new AE. I've attended orientation and prodcut training. What should I do next?")