## Lang Chain 
Esta seção mostra como usar embeddings para busca semântica e responder a perguntas sobre bases de dados.
Ela usa a biblioteca LangChain.

In [3]:
from dotenv import load_dotenv
load_dotenv()
import re
import requests
import sys
import os
from dotenv import load_dotenv
load_dotenv()


API_KEY = os.getenv("AZURE_OPENAI_API_KEY","").strip()
assert API_KEY, "ERROR: Azure OpenAI Key is missing"

RESOURCE_ENDPOINT = os.getenv("AZURE_OPENAI_API_BASE","").strip()
assert RESOURCE_ENDPOINT, "ERROR: Azure OpenAI Endpoint is missing"
assert "openai.azure.com" in RESOURCE_ENDPOINT.lower(), "ERROR: Azure OpenAI Endpoint should be in the form: \n\n\t<your unique endpoint identifier>.openai.azure.com"
COMPLETIONS_MODEL = os.getenv('DEPLOYMENT_NAME')

os.environ["AZURE_OPENAI_API_KEY"] = API_KEY
os.environ["AZURE_OPENAI_ENDPOINT"] = RESOURCE_ENDPOINT


In [11]:
from langchain_openai import AzureOpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.text_splitter import CharacterTextSplitter
from langchain_openai import AzureOpenAI
from langchain.chains import RetrievalQA
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate

### Indexando documentos em uma base vetorial


In [6]:
from langchain.document_loaders import TextLoader
loader = TextLoader("./data/state_of_the_union.txt")
documents = loader.load()
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
texts = text_splitter.split_documents(documents)

embeddings = AzureOpenAIEmbeddings(
    azure_deployment="text-embedding-ada-002",
    openai_api_version="2023-05-15",
    chunk_size=1
)
docsearch = FAISS.from_documents(texts, embeddings)
print('done')

done


### Buscando no documento


In [13]:
llm = AzureOpenAI(temperature=0, deployment_name="text-davinci-003", model_name="text-davinci-003")

qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=docsearch.as_retriever())

template = """Question: {question}

Answer: Let's think step by step."""

prompt = PromptTemplate(template=template, input_variables=["question"])

llm_chain = LLMChain(prompt=prompt, llm=llm)




query = "What did the president say about Ketanji Brown Jackson"
print(llm_chain.run(query))

qa.run(query)

 President Joe Biden recently nominated Ketanji Brown Jackson to serve as a judge on the U.S. Court of Appeals for the District of Columbia Circuit. In a statement, President Biden said, "Ketanji Brown Jackson is a highly respected attorney and judge with a long and distinguished record of service. She has a keen intellect, a deep respect for the law, and a commitment to justice that makes her an excellent choice for the D.C. Circuit. I am confident she will serve the American people with distinction from the bench."


" The president said that Ketanji Brown Jackson is one of the nation's top legal minds and that she will continue Justice Breyer's legacy of excellence."

### Salvando o arquivo e carregando de novo


In [10]:
docsearch.save_local('./data/state_union/')


loaded = FAISS.load_local('./data/state_union/', embeddings)
