In [1]:
# Import necessary libraries
import os
import openai
import dotenv

# Load environment variables from .env file
from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv()) # read local .env file

# Set OpenAI API key from environment variable
openai.api_key  = os.getenv('OPENAI_API_KEY')

In [2]:
# Define a function for obtaining chat completions using OpenAI's GPT-3.5 Turbo
def get_chat_completion(prompt, model="gpt-3.5-turbo", max_tokens=50, api_key=None):
    from openai import OpenAI
    
    try:
        # Initialize OpenAI client
        client = OpenAI()
        
        # Generate chat completions based on user prompt
        response = client.chat.completions.create(
            messages=[{"role": "user", "content": prompt,}],
            model="gpt-3.5-turbo",
            max_tokens=max_tokens,
        )
        
        # Return the content of the response's choices
        return response.choices[0].message.content
    except Exception as e:
        # Handle exceptions and return an error message
        return f"An error occurred: {str(e)}"

In [3]:
# Define the path to the PDF file to be processed
file_to_read = "/Users/anayanapalli/Documents/Laika.pdf"

In [4]:
# Use langchain library to load and split the pages of the PDF file
from langchain.document_loaders import PyPDFLoader
loader = PyPDFLoader(file_to_read)
pages = loader.load_and_split()

In [5]:
# Use langchain library to create embeddings for the document
from langchain.embeddings import OpenAIEmbeddings
embeddings = OpenAIEmbeddings()

In [6]:
# Use langchain library to create a vector store (FAISS) from document pages and embeddings
from langchain.vectorstores import FAISS
db = FAISS.from_documents(pages, embeddings)

In [7]:
# Use langchain library to create a RetrievalQA chain with OpenAI language model and the FAISS retriever
from langchain.chains import RetrievalQA
from langchain import OpenAI
llm = OpenAI()
chain = RetrievalQA.from_llm(llm=llm, retriever=db.as_retriever())

In [8]:
# Define a list of questions related to the document
questions = [
    "What breed is Laika?",
    "Does Laika have any friends?",
    "How did Laika become interested in space?",
    "Did Laika return from space?",
]

In [9]:
# Iterate through the list of questions, generate responses using GPT-3.5 Turbo and RetrievalQA model, and print the results
for question in questions:
    print("*" * 113)
    print(f"Question: {question}")
    print(f"GPT-3.5: {get_chat_completion(question, 50)}")
    print(f"RAG : {chain(question, return_only_outputs=True)['result']}")

*****************************************************************************************************************
Question: What breed is Laika?
GPT-3.5: Laika was a mixed-breed dog, often described as a Siberian Husky or a Siberian Husky mix.
RAG :  Laika is a Shiba Inu.
*****************************************************************************************************************
Question: Does Laika have any friends?
GPT-3.5: Laika, the Soviet space dog, does not have any friends in the conventional sense as she was a dog used for scientific purposes. However, she was cared for and trained by her handlers who might have formed a bond with her during her time at
RAG :  Yes, Laika has a squirrel sidekick named Rocky and she also meets friendly alien creatures on her space missions.
*****************************************************************************************************************
Question: How did Laika become interested in space?
GPT-3.5: Laika was a stray dog who was