In [5]:
import os
import platform

import openai
import chromadb
import langchain

from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.text_splitter import TokenTextSplitter
from langchain.llms import OpenAI
from langchain.chains import ChatVectorDBChain
from langchain.document_loaders import GutenbergLoader

import os,sys
sys.path.insert(0,'../../libs')
from utils import load_json
print('Python: ', platform.python_version())

Python:  3.8.13


In [11]:
import ssl
if (not os.environ.get('PYTHONHTTPSVERIFY', '') and getattr(ssl, '_create_unverified_context', None)):
    ssl._create_default_https_context = ssl._create_unverified_context

#### Load OpenAI API key

In [7]:
## load API Key
key = load_json('/home/chuang/Dev/Keys/openai_key.json') 
os.environ['OPENAI_API_KEY'] = key['ChatGPT']['API_KEY']

#### Create index

In [33]:
def get_gutenberg(url):
    loader = GutenbergLoader(url)
    data = loader.load()
    return data
romeoandjuliet_data = get_gutenberg('https://www.gutenberg.org/cache/epub/1513/pg1513.txt')
print(romeoandjuliet_data[0].page_content[:100])
## split text into chunks
text_splitter = TokenTextSplitter(chunk_size=1000, chunk_overlap=0)
romeoandjuliet_doc = text_splitter.split_documents(romeoandjuliet_data)
print(romeoandjuliet_doc[0].page_content)

The Project Gutenberg eBook of Romeo and Juliet, by William Shakespeare





This eBook is for the
The Project Gutenberg eBook of Romeo and Juliet, by William Shakespeare





This eBook is for the use of anyone anywhere in the United States and


most other parts of the world at no cost and with almost no restrictions


whatsoever. You may copy it, give it away or re-use it under the terms


of the Project Gutenberg License included with this eBook or online at


www.gutenberg.org. If you are not located in the United States, you


will have to check the laws of the country where you are located before


using this eBook.





Title: Romeo and Juliet





Author: William Shakespeare





Release Date: November, 1998 [eBook #1513]


[Most recently updated: May 11, 2022]





Language: English








Produced by: the PG Shakespeare Team, a team of about twenty Project Gutenberg volunteers.





*** START OF THE PROJECT GUTENBERG EBOOK ROMEO AND JULIET ***














THE TRAGEDY OF R

In [34]:
persist_directory="/data/chuang/QA_LangChan/chroma/test"  ## cached directiory
embeddings = OpenAIEmbeddings()
vectordb = Chroma.from_documents(romeoandjuliet_doc, embeddings, persist_directory=persist_directory)
vectordb.persist()

Running Chroma using direct local API.
No existing DB found in /data/chuang/QA_LangChan/chroma/test, skipping load
No existing DB found in /data/chuang/QA_LangChan/chroma/test, skipping load
Persisting DB to disk, putting it in the save folder /data/chuang/QA_LangChan/chroma/test


Configure LangChain QA

In [35]:
romeoandjuliet_qa = ChatVectorDBChain.from_llm(OpenAI(temperature=0, model_name="gpt-3.5-turbo"), vectordb, return_source_documents=True)

In [36]:
chat_history = ""
query = "Have Romeo and Juliet spent the night together? Provide a verbose answer, referencing passages from the book."
result = romeoandjuliet_qa({"question": query, "chat_history": chat_history})

In [38]:
print(result['answer'])

Yes, Romeo and Juliet have spent the night together. In Act 3, Scene 5, Romeo and Juliet say their final goodbyes before Romeo leaves for Mantua. Juliet is upset and begs Romeo to stay, but he insists that he must leave. They share a passionate kiss before Romeo departs. Later in Act 5, Scene 1, Romeo receives news that Juliet has died and decides to return to Verona to be with her. He goes to an apothecary to buy poison and says, "Well, Juliet, I will lie with thee tonight" (Act 5, Scene 1, lines 34-35). This indicates that Romeo plans to take the poison and join Juliet in death. Overall, the text suggests that Romeo and Juliet have spent the night together before their tragic end.


In [42]:
print(len(result['source_documents']))

4


In [47]:
print(result['source_documents'][0].page_content.replace("\n\n\n","\n"))



ROMEO.
Yet banished? Hang up philosophy.
Unless philosophy can make a Juliet,
Displant a town, reverse a Prince’s doom,
It helps not, it prevails not, talk no more.

FRIAR LAWRENCE.
O, then I see that mad men have no ears.

ROMEO.
How should they, when that wise men have no eyes?

FRIAR LAWRENCE.
Let me dispute with thee of thy estate.

ROMEO.
Thou canst not speak of that thou dost not feel.
Wert thou as young as I, Juliet thy love,
An hour but married, Tybalt murdered,
Doting like me, and like me banished,
Then mightst thou speak, then mightst thou tear thy hair,
And fall upon the ground as I do now,
Taking the measure of an unmade grave.

 [_Knocking within._]

FRIAR LAWRENCE.
Arise; one knocks. Good Romeo, hide thyself.

ROMEO.
Not I, unless the breath of heartsick groans
Mist-like infold me from the search of eyes.

 [_Knocking._]

FRIAR LAWRENCE.
Hark, how they knock!—Who’s there?—Romeo, arise,
Thou wilt be taken.—Stay awhile.—Stand up.

 [_Knocking._]

Run to my study.—By-and-b

#### Load data from local 

In [68]:
from llama_index import GPTSimpleVectorIndex, Document, SimpleDirectoryReader,GPTListIndex,  LLMPredictor, PromptHelper


In [72]:
# Loading from a directory
Knowledge_Base_Folder = '/data/chuang/QA_LangChan/Knowledge_Base'
Index_Save_Path = '/data/chuang/QA_LangChan/KB_Index/index.json'
# set maximum input size
max_input_size = 4096
# set number of output tokens
num_outputs = 512
# set maximum chunk overlap
max_chunk_overlap = 40
# set chunk size limit
chunk_size_limit = 600

prompt_helper = PromptHelper(max_input_size, num_outputs, max_chunk_overlap, chunk_size_limit=chunk_size_limit)
# define LLM
llm_predictor = LLMPredictor(llm=OpenAI(temperature=0, model_name="gpt-3.5-turbo", max_tokens=num_outputs)) ## text-ada-001 here is a cheap model
documents = SimpleDirectoryReader(Knowledge_Base_Folder).load_data()
print("number of documents : {}".format(len(documents)))

number of documents : 3


In [73]:
index = GPTSimpleVectorIndex(documents, llm_predictor=llm_predictor, prompt_helper=prompt_helper)
index.save_to_disk(Index_Save_Path)
  

In [74]:
# Load the index from your saved index.json file
index = GPTSimpleVectorIndex.load_from_disk(Index_Save_Path)


In [84]:
query = 'What the GFSR say about the economy this year?'
response = index.query(query, response_mode="compact")
print("Question: {}".format(query))
print ("\nBot says: \n\n" + response.response + "\n\n\n")

Question: What the GFSR say about the economy this year?

Bot says: 


The GFSR states that the global economy is slowly recovering from the COVID-19 pandemic, but that there are rising risks to the inflation outlook and rapidly changing views about the likely pace of monetary policy tightening. It also notes that the balance of risks to growth has tilted more firmly to the downside, and that the sharp rise in commodity prices and more prolonged supply disruptions have exacerbated preexisting inflation pressures and led to a significant rise in inflation expectations.



