In [3]:
from PyPDF2 import PdfReader
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import ElasticVectorSearch, Pinecone, Weaviate, FAISS
import tiktoken
# Get your API keys from openai, you will need to create an account. 
# Here is the link to get the keys: https://platform.openai.com/account/billing/overview
import os
from dotenv import load_dotenv

# Load environment variables from the .env file
load_dotenv()

# Access the OPENAI_API_KEY environment variable
openai_api_key = os.environ.get("OPENAI_API_KEY")
openai_api_type = os.environ.get("OPENAI_API_TYPE")
openai_api_base = os.environ.get("OPENAI_API_BASE")
openai_api_version = os.environ.get("OPENAI_API_VERSION")



In [4]:
# location of the pdf file/files. 

def get_pdf_text(pdf_docs):
    text = ""
    for pdf in pdf_docs:
        pdf_reader = PdfReader(pdf)
        for page in pdf_reader.pages:
            text += page.extract_text()
    return text

raw_text=get_pdf_text(['docs/RestrictAct.pdf', 'docs/Gandhi.pdf'])
# reader = PdfReader('docs/RestrictAct.pdf')

# reader

# # read data from the file and put them into a variable called raw_text
# raw_text = ''
# for i, page in enumerate(reader.pages):
#     text = page.extract_text()
#     if text:
#         raw_text += text

# # raw_text

raw_text



'II \n118 THCONGRESS \n1STSESSION  S. 686 \nTo authorize the Secretary of Commerce to review and prohibit certain trans-\nactions between persons in the United States and foreign adversaries, and for other purposes. \nIN THE SENATE OF THE UNITED STATES \nMARCH 7, 2023 \nMr. W ARNER (for himself, Mr. T HUNE , Ms. B ALDWIN , Mrs. F ISCHER , Mr. \nMANCHIN , Mr. M ORAN , Mr. B ENNET , Mr. S ULLIVAN , Mrs. G ILLIBRAND , \nMs. C OLLINS , Mr. H EINRICH , Mr. R OMNEY , and Mrs. C APITO ) intro-\nduced the following bill; which was read twice and referred to the Com-mittee on Commerce, Science, and Transportation \nA BILL \nTo authorize the Secretary of Commerce to review and pro-\nhibit certain transactions between persons in the United States and foreign adversaries, and for other purposes. \nBe it enacted by the Senate and House of Representa- 1\ntives of the United States of America in Congress assembled, 2\nSECTION 1. SHORT TITLE. 3\nThis Act may be cited as the ‘‘Restricting the Emer- 4\n

In [5]:
# We need to split the text that we read into smaller chunks so that during information retreival we don't hit the token size limits. 

text_splitter = CharacterTextSplitter(        
    separator = "\n",
    chunk_size = 1000,
    chunk_overlap  = 200,
    length_function = len,
)
texts = text_splitter.split_text(raw_text)

len(texts)

texts[0]



'II \n118 THCONGRESS \n1STSESSION  S. 686 \nTo authorize the Secretary of Commerce to review and prohibit certain trans-\nactions between persons in the United States and foreign adversaries, and for other purposes. \nIN THE SENATE OF THE UNITED STATES \nMARCH 7, 2023 \nMr. W ARNER (for himself, Mr. T HUNE , Ms. B ALDWIN , Mrs. F ISCHER , Mr. \nMANCHIN , Mr. M ORAN , Mr. B ENNET , Mr. S ULLIVAN , Mrs. G ILLIBRAND , \nMs. C OLLINS , Mr. H EINRICH , Mr. R OMNEY , and Mrs. C APITO ) intro-\nduced the following bill; which was read twice and referred to the Com-mittee on Commerce, Science, and Transportation \nA BILL \nTo authorize the Secretary of Commerce to review and pro-\nhibit certain transactions between persons in the United States and foreign adversaries, and for other purposes. \nBe it enacted by the Senate and House of Representa- 1\ntives of the United States of America in Congress assembled, 2\nSECTION 1. SHORT TITLE. 3\nThis Act may be cited as the ‘‘Restricting the Emer- 4'

In [6]:
texts[-1]

'(not all CCs are equal!),\nattribution must be provided, and\nany if permitted, resulting work\nmust be released in the same\nmanner.   Please  reach out and\ncontact us if you want more\ninformation: \nhttps://www.freekidsbooks.org/about\n   \n This page is added for\nidentification purposes, any\ntransmittal of this eBook version\nmust leave this page intact.Looking for more books like this one?\nhttps://www.freekidsbooks.org'

In [7]:
# Download embeddings from OpenAI
embeddings_model = "CaztonEmbedAda2"
tokenizer = tiktoken.get_encoding("cl100k_base")

embeddings = OpenAIEmbeddings(deployment=embeddings_model,
                              openai_api_base=openai_api_base,
                              openai_api_version=openai_api_version,
                              openai_api_key=openai_api_key,
                              openai_api_type=openai_api_type,
                              chunk_size=1)

docsearch = FAISS.from_texts(texts, embeddings)

docsearch


<langchain.vectorstores.faiss.FAISS at 0x16a008dd0>

In [8]:
from langchain.chains.question_answering import load_qa_chain
from langchain.llms import OpenAI

gpt3_model = "CaztonDavinci3"

chain = load_qa_chain(OpenAI(engine=gpt3_model), chain_type="stuff")

query = "who are the authors of the article?"
docs = docsearch.similarity_search(query)
chain.run(input_documents=docs, question=query)




                    engine was transferred to model_kwargs.
                    Please confirm that engine is what you intended.


' The authors of the article are Rosheen Callender, Eithne Ní Chléirigh, Des Geraghty, Dr. Siby K. Joseph, Raﬁq Kathwari, Uma Magal, Yameema Mitha, and Séamas Sheils.'

In [9]:
# Start an infinite loop to continuously ask questions
while True:
    # Prompt the user to enter a question
    query = input("Enter your question (or type 'exit' to quit): ")
    
    # Check if the user wants to exit the loop
    if query.lower() == 'exit':
        break

    # Perform similarity search using the query
    docs = docsearch.similarity_search(query)
    
    # Run the question-answering chain
    response = chain.run(input_documents=docs, question=query)
    
    # Print the response
    print(response)

# Exit message
print("Exiting the question-answering loop.")

 The RESTRICT Act is a law that aims to restrict the emergence of security threats that risk information and communications technology. It outlines specific unlawful acts, including engaging in any conduct prohibited by or contrary to, or refraining from engaging in any conduct required by any regulation, order, direction, mitigation measure, prohibition, or other authorization or directive issued under the act. It also authorizes appropriations as necessary to carry out the act.
 Gandhi's main teaching was that people should live simply, in small communities, and provide for their own needs. He also believed in the importance of non-violence and self-rule.


InvalidRequestError: [''] is not valid under any of the given schemas - 'input'