# Document Vector Embeddings

In [1]:
import os
import openai

from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv()) # read local .env file
openai.api_key = os.environ['OPENAI_API_KEY']



In [2]:
from langchain.document_loaders import TextLoader
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import FAISS

In [3]:
raw_documents = TextLoader('state_of_the_union.txt').load()
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
documents = text_splitter.split_documents(raw_documents)


In [4]:
embeddings = OpenAIEmbeddings()
db = FAISS.from_documents(documents, embeddings)

In [6]:
query = "What did the president say about ukraine"
docs = db.similarity_search(query)

print(docs)


[Document(page_content='For that purpose we’ve mobilized American ground forces, air squadrons, and ship deployments to protect NATO countries including Poland, Romania, Latvia, Lithuania, and Estonia. \n\nAs I have made crystal clear the United States and our Allies will defend every inch of territory of NATO countries with the full force of our collective power.  \n\nAnd we remain clear-eyed. The Ukrainians are fighting back with pure courage. But the next few days weeks, months, will be hard on them.  \n\nPutin has unleashed violence and chaos.  But while he may make gains on the battlefield – he will pay a continuing high price over the long run. \n\nAnd a proud Ukrainian people, who have known 30 years  of independence, have repeatedly shown that they will not tolerate anyone who tries to take their country backwards.  \n\nTo all Americans, I will be honest with you, as I’ve always promised. A Russian dictator, invading a foreign country, has costs around the world.', metadata={'s

In [7]:
embedding_vector = embeddings.embed_query(query)
docs = db.similarity_search_by_vector(embedding_vector)

display(embedding_vector)

[-0.028844157233834267,
 -0.013515736907720566,
 0.0046772523783147335,
 -0.0071581145748496056,
 0.001792836468666792,
 0.00016497733304277062,
 -0.0258803553879261,
 -0.021275874227285385,
 -0.01014837995171547,
 -0.003125886432826519,
 0.04792364314198494,
 -0.001940034213475883,
 -0.01565258577466011,
 -0.009605898521840572,
 -0.0062650041654706,
 -0.02193743735551834,
 0.028103206306695938,
 -0.02892354503273964,
 0.03606842830777168,
 -0.03244306147098541,
 0.0017051793402060866,
 -0.023657502606511116,
 0.009453739039599895,
 0.005606748629361391,
 -0.016869863495230675,
 0.007111805025488138,
 0.017650507390499115,
 -0.025139404460787773,
 0.010115302167832851,
 -0.01184198260307312,
 -0.0095265107229352,
 -0.03580380231142044,
 -0.015163029544055462,
 -0.006566015537828207,
 -0.021289106458425522,
 -0.02345903217792511,
 0.020693698897957802,
 -0.021222949028015137,
 0.012172764167189598,
 -0.008871563710272312,
 0.016790475696325302,
 0.0002480862312950194,
 0.012721861712634

In [8]:
from langchain.chains.question_answering import load_qa_chain
from langchain.llms import OpenAI

chain = load_qa_chain(OpenAI(temperature=0), chain_type="stuff" , verbose= True)

In [9]:
query = "What are the challenges facing voting rights? And what actions are proposed deal with these challenges? And how will these actions work?"
docs = db.similarity_search(query)

chain.run(input_documents=docs, question=query)



[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mUse the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.

And I will keep doing everything in my power to crack down on gun trafficking and ghost guns you can buy online and make at home—they have no serial numbers and can’t be traced. 

And I ask Congress to pass proven measures to reduce gun violence. Pass universal background checks. Why should anyone on a terrorist list be able to purchase a weapon? 

Ban assault weapons and high-capacity magazines. 

Repeal the liability shield that makes gun manufacturers the only industry in America that can’t be sued. 

These laws don’t infringe on the Second Amendment. They save lives. 

The most fundamental right in America is the right to vote – and to have it counted. And it’s under assault. 

In state a

' The challenge facing voting rights is that new laws have been passed in many states to suppress the vote and subvert entire elections. To deal with this challenge, the President has called on the Senate to pass the Freedom to Vote Act, the John Lewis Voting Rights Act, and the Disclose Act. The Freedom to Vote Act would make it easier for people to register and vote, the John Lewis Voting Rights Act would restore key provisions of the Voting Rights Act of 1965, and the Disclose Act would require organizations that fund political campaigns to disclose their donors.'

In [10]:
from langchain.llms import OpenAIChat

from langchain.chains import RetrievalQA

retriever = db.as_retriever()

llm = OpenAIChat(temperature = 0.0, model_name="gpt-3.5-turbo-16k-0613")

qa_chain = RetrievalQA.from_chain_type(
    llm=llm, 
    chain_type="stuff", 
    retriever=retriever, 
    verbose=True
)



In [12]:
query = "What did the president say about Justice Breyer?"

res = qa_chain.run(query)

print(res)



[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m
The president thanked Justice Breyer for his service and mentioned that he is a retired Justice of the United States Supreme Court.


In [46]:
from langchain.prompts import PromptTemplate
prompt_template = """Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.

{context}

Question: {question}
Return the answer in French:"""
PROMPT = PromptTemplate(
    template=prompt_template, input_variables=["context", "question"]
)


chain_type_kwargs = {"prompt": PROMPT}

In [49]:
qa_chain = RetrievalQA.from_chain_type(
    llm=llm, 
    chain_type="stuff", 
    retriever=retriever, 
    chain_type_kwargs=chain_type_kwargs,
    verbose=True
    )

In [50]:
query = "What did the president say about Ketanji Brown Jackson"
#res = qa_chain.run(query)
res =qa_chain({"query": query})


print(res)



[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m
{'query': 'What did the president say about Ketanji Brown Jackson', 'result': "Le président a dit que Ketanji Brown Jackson est l'une des meilleures expertes juridiques de notre pays et qu'elle continuera l'excellence du juge Breyer. Elle a reçu un large soutien depuis sa nomination, y compris de la part de la Fraternal Order of Police et d'anciens juges nommés par des démocrates et des républicains."}


In [58]:
from IPython.display import HTML, display

import pandas as pd

res = dict(res)

df = pd.DataFrame.from_dict(res[0])

KeyError: 0