In [78]:
import os
import pandas as pd
import numpy as np

from dotenv import load_dotenv
from scipy import spatial
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain.schema import SystemMessage, HumanMessage

def compute_similarity(x, y):
    return 1 - spatial.distance.cosine(x, y)

load_dotenv()
api_key = os.getenv("OPENAI_API_KEY")

chat_model_name = "gpt-3.5-turbo"
embed_model_name = "text-embedding-ada-002"
embeddings = OpenAIEmbeddings(model=embed_model_name, api_key=api_key)
chat = ChatOpenAI(model_name=chat_model_name)

db = pd.read_parquet("data/vector_db.parquet")
n_queries = 5

context = """
    You are a virtual assistant specifically designed to provide information about WPP's Atticus Journal, Volume 28. 
    You have access to a knowledge base of the specific articles within this edition of the journal. 
    Your purpose is to assist users with inquiries directly related to this journal, including discussions on marketing strategies, creative insights, and case studies presented within this volume.
    When users ask questions, your responses should be accurate and relevant to the information contained in Atticus Journal, Volume 28. If a user asks a question that falls outside the scope of this journal or relates to general topics not covered within it, you should respond with:
    'Sorry, I can't help with that. I can only answer questions relating to the Atticus Journal, Volume 28. Please feel free to ask about the content, themes, or specific articles within this edition.'
    Remember, your goal is to be helpful and informative about the Atticus Journal, Volume 28, while clearly communicating the boundaries of your expertise.
"""

messages = [SystemMessage(content=context)]

user_query = "Tell me about multicrises and advertising"
embedded_query = embeddings.embed_query(user_query)

# Use cosine similarity to get the n most similar texts
similarities_array = np.array([
    compute_similarity(embedded_query, embedding) for embedding in db["embedding"].values
]) 
top_indices = np.argsort(similarities_array)[::-1][:n_queries]
matches = db.iloc[top_indices]

appended_user_query = user_query
appended_user_query += "Based on the information retrieved, consider the following text to guide your response:\n\n"
appended_user_query += "\n\n".join(matches["text"].values)

messages.append(HumanMessage(content=appended_user_query))

response = chat(messages)
response.content

'The article "Explicit perception versus emotion in multi-crises" by Marcel Buettner and Lukas Burs of GroupM Science delves into the impact of ongoing global crises on consumer behavior and emotions. The authors challenge the simplistic notion that crises solely lead to negative consumer sentiments and reduced spending. Instead, they suggest that multi-crises can result in the formation of \'crisis-proof\' habits that act as a conscious defense mechanism, enabling consumers to maintain stability and orientation in their lives.\n\nMoreover, the article discusses how consumers perceive global crises differently from their emotional responses to these crises. It emphasizes the need to understand the nuanced relationship between consumer perceptions, emotions, and behaviors during times of crisis. By recognizing and adapting to these complexities, brands can effectively tailor their advertising strategies to resonate with consumers amidst multi-crises.\n\nIn the context of advertising acc