# Events Data Demo

In [376]:
import os
from dotenv import load_dotenv

load_dotenv()

OPENAI_API_KEY = os.environ['OPENAI_API_KEY']

## Setting up the GPT-4o model

In [379]:
from langchain_openai.chat_models import ChatOpenAI

In [381]:
model = ChatOpenAI(openai_api_key=OPENAI_API_KEY, model="gpt-4o")

## Prompt Template
Prompt templates simplify the creation and reuse of prompts by offering predefined structures and formats.

In [383]:
from langchain.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser

template = """
Answer the question based on the context below. 
If you don't know the answer reply "I am not sure about that".

Context: {context}

Question: {question}
"""

parser = StrOutputParser()
prompt = ChatPromptTemplate.from_template(template)

### Prompt Template example

In [386]:
chain = prompt | model | parser

In [390]:
chain.invoke({
    "context": "Event #40 has taken place in Ciudad de México, and event #12 was held in Medellín",
    "question": "Where was hold the event 40?"
})

'Event #40 was held in Ciudad de México.'

### Adding translation to the prompt template
This merges the outcome from the first chain with the translation prompt.

In [392]:
translation_prompt = ChatPromptTemplate.from_template("Translate {answer} to {language}")

In [394]:
from operator import itemgetter

translation_chain = ({
    "answer": chain,
    "language": itemgetter("language")
} | translation_prompt | model | parser)

In [398]:
translation_chain.invoke({
    "context": "Event #40 has taken place in Ciudad de México, and event #12 was held in Medellín",
    "question": "Where was hold the event 12?",
    "language": "Spanish"
})

'El Evento #12 se llevó a cabo en Medellín.'

# Using a Vector Store
We require a methodical approach to efficiently store data fragments along with their embeddings and conduct similarity searches at a large scale. This necessitates the utilization of a vector store, which is essentially a specialized database designed for rapid similarity searches through its collection of embeddings.

In [400]:
from langchain_community.vectorstores import DocArrayInMemorySearch
from langchain_openai.embeddings import OpenAIEmbeddings

vectorstore = DocArrayInMemorySearch.from_texts(
    [
        "Event 1 will take place in São Paulo",
        "São Paulo held 5 events",
        "There are 12 events in 4 different cities across Brasil",
        "Harley-Davidson is the organizer of events in São Paulo",
        "Other events' organizers are Harley-Davidson dealers",
        "There is one dealer event in Guadalajara"
    ],
    embedding=OpenAIEmbeddings()
)

In [402]:
vectorstore.similarity_search_with_score(query="What event will be held in São Paulo?", k=3)

[(Document(page_content='Event 1 will take place in São Paulo'),
  0.9342975238661645),
 (Document(page_content='São Paulo held 5 events'), 0.8963294324270268),
 (Document(page_content='Harley-Davidson is the organizer of events in São Paulo'),
  0.8764587219838957)]

In [404]:
vectorstore.similarity_search_with_score(query="Does Harley-Davidson have events in Colombia?", k=10)

[(Document(page_content='Harley-Davidson is the organizer of events in São Paulo'),
  0.8924789705758178),
 (Document(page_content="Other events' organizers are Harley-Davidson dealers"),
  0.8730111457629761),
 (Document(page_content='There is one dealer event in Guadalajara'),
  0.8262310296288617),
 (Document(page_content='There are 12 events in 4 different cities across Brasil'),
  0.7908518314387816),
 (Document(page_content='Event 1 will take place in São Paulo'),
  0.7870943457787175),
 (Document(page_content='São Paulo held 5 events'), 0.769982960666691)]

## Connecting the vector store to the chain
The vector store can be utilized to identify the most pertinent segments from the data for processing by the model.

In [406]:
from langchain_core.runnables import RunnableParallel, RunnablePassthrough

retriever = vectorstore.as_retriever()
setup = RunnableParallel(context=retriever, question=RunnablePassthrough())

In [408]:
chain = setup | prompt | model | parser

In [410]:
chain.invoke("Does Harley-Davidson have events in Colombia?")

'I am not sure about that.'

In [412]:
chain.invoke("Are there events in México?")

'Yes, there is one dealer event in Guadalajara, which is in México.'

In [416]:
chain.invoke("Hay eventos en Ciudad de México?")

'I am not sure about that.'