In [1]:

!python3 -m pip install --upgrade langchain deeplake openai tiktoken



In [3]:
import os
import getpass
from langchain.document_loaders import PyPDFLoader, TextLoader
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter, CharacterTextSplitter
from langchain.vectorstores import DeepLake
from langchain.chains import ConversationalRetrievalChain, RetrievalQA
from langchain.chat_models import ChatOpenAI
from langchain.llms import OpenAI

In [4]:
import openai
import os
import IPython
from dotenv import load_dotenv
load_dotenv()
openai.api_key = os.getenv("OPENAI_API_KEY")

In [11]:
ACTIVELOOP_TOKEN = os.getenv('ACTIVELOOP_TOKEN')
org = os.getenv('ACTIVELOOP_ORG')

embeddings = OpenAIEmbeddings()

dataset_path = 'hub://' + org + '/data'
print(dataset_path)

hub://will-nova/data


In [5]:
my_contract = "../sample_pennstate_lease.txt"

In [25]:
with open(my_contract) as f:
    contract = f.read()
# text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=0)

text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
pages = text_splitter.split_text(contract)
texts = text_splitter.create_documents(pages)

print (texts)

[Document(page_content='Sample Lease or Rental Agreement from https://altoona.psu.edu/offices-divisions/student-affairs/off-campus-living/sample-lease-rental-agreement\n\nBy this agreement made at [location], PA on the [day of the month] day of [month], [year], the Landlord [landlord name] and the Tenant [tenant name] agree as follows:', metadata={}), Document(page_content='1. Property\n    The landlord hereby leases to Tenant for the term of this agreement\n        the property located at: [property address]\n        And\n        the furniture and appliances on that property:\n        [description of furniture and appliances, if applicable]\n    2. Term', metadata={}), Document(page_content='[description of furniture and appliances, if applicable]\n    2. Term\n    The term of this lease is for [duration of lease], beginning on [start date], and ending on [end date]. At the expiration of said term, the lease will automatically be renewed for a period of one month unless either party n

In [26]:
# dataset_path = 'hub://'+org+'/data'
embeddings = OpenAIEmbeddings()
db = DeepLake.from_documents(texts, embeddings, dataset_path=dataset_path, overwrite=True)

Your Deep Lake dataset has been successfully created!
The dataset is private so make sure you are logged in!
This dataset can be visualized in Jupyter Notebook by ds.visualize() or at https://app.activeloop.ai/will-nova/data
hub://will-nova/data loaded successfully.








Evaluating ingest: 100%|██████████████████████████████████████| 1/1 [00:13<00:00
|

Dataset(path='hub://will-nova/data', tensors=['embedding', 'ids', 'metadata', 'text'])

  tensor     htype     shape      dtype  compression
  -------   -------   -------    -------  ------- 
 embedding  generic  (16, 1536)  float32   None   
    ids      text     (16, 1)      str     None   
 metadata    json     (16, 1)      str     None   
   text      text     (16, 1)      str     None   


 

In [20]:
db = DeepLake(dataset_path=dataset_path, read_only=True, embedding_function=embeddings)

retriever = db.as_retriever()
retriever.search_kwargs['distance_metric'] = 'cos'
retriever.search_kwargs['k'] = 4

qa = RetrievalQA.from_chain_type(llm=OpenAI(), chain_type="stuff", retriever=retriever, return_source_documents=True)

This dataset can be visualized in Jupyter Notebook by ds.visualize() or at https://app.activeloop.ai/will-nova/data





hub://will-nova/data loaded successfully.





Deep Lake Dataset in hub://will-nova/data already exists, loading from the storage
Dataset(path='hub://will-nova/data', read_only=True, tensors=['embedding', 'ids', 'metadata', 'text'])

  tensor     htype     shape      dtype  compression
  -------   -------   -------    -------  ------- 
 embedding  generic  (17, 1536)  float32   None   
    ids      text     (17, 1)      str     None   
 metadata    json     (17, 1)      str     None   
   text      text     (17, 1)      str     None   




In [21]:
query = input("Enter query:")
ans = qa({"query": query})

print("\nCustom-ChatGPT-app answer: " + ans['result'] + "\n\n")
print(ans)


Enter query: who is king charles?



Custom-ChatGPT-app answer:  I don't know.


{'query': 'who is king charles?', 'result': " I don't know.", 'source_documents': [Document(page_content='Signatures\n[include signatures and printed names of landlord and tenant]', metadata={}), Document(page_content='Landlord agrees to regularly maintain the building and grounds in a clean, orderly, and neat manner. Landlord further agrees not to maintain a public nuisance and not to conduct business or commercial activities on the premises.', metadata={}), Document(page_content='Any alternations to this Agreement shall be in writing and signed by all parties. We, the undersigned, agree to this Lease:', metadata={}), Document(page_content='on the checklist after the Tenant returns possession to the Landlord and shall give a copy to the other party.', metadata={})]}


In [23]:
query = input("Enter query:")
ans = qa({"query": query})

print("\nCustom-ChatGPT-app answer: " + ans['result'] + "\n\n")
print(ans)


Enter query: who pays for water?



Custom-ChatGPT-app answer:  The Tenant agrees to pay for water.


{'query': 'who pays for water?', 'result': ' The Tenant agrees to pay for water.', 'source_documents': [Document(page_content='water\n        garbage collection\n        snow removal\n        other\n    5. Deposit\n    Tenant has paid a deposit of $[deposit amount] of which Landlord acknowledges receipt. Upon regaining possession of the property, Landlord shall refund to Tenant the total amount of the deposit less any damages to the property, normal wear and tear expected, and less any unpaid rent.\n    6. Refund Procedures', metadata={}), Document(page_content='3. Rent\n    Tenant agrees to pay rent in the amount of [monthly rent amount] per month, each payment due on the [day of the month] day of each month and to be made at:\n    [Landlord address]\n    4. Utilities/Services\n    Landlord agrees to provide the utilities and services indicated: [circle all utilities and services that apply]\n        electricity\n     

In [24]:
query = input("Enter query:")
ans = qa({"query": query})

print("\nCustom-ChatGPT-app answer: " + ans['result'] + "\n\n")
print(ans)


Enter query: for what reasons and with what notice can landlord enter the property?



Custom-ChatGPT-app answer:  The Landlord may enter the property with at least 24 hours notice to inspect, repair, or show the premises to prospective buyers or tenants.


{'query': 'for what reasons and with what notice can landlord enter the property?', 'result': ' The Landlord may enter the property with at least 24 hours notice to inspect, repair, or show the premises to prospective buyers or tenants.', 'source_documents': [Document(page_content='The Landlord may not enter the premises without having given tenant at least 24 hours notice, except in case of emergency. Landlord may enter to inspect, repair, or show the premises to prospective buyers or tenants if notice is given.\n        Tenant agrees to occupy the premises and shall keep the same good condition, and shall not make any alternations thereon without the written consent of the landlord.', metadata={}), Document(page_content='Landlord agrees to regularly maintain the building and grounds in a clean, orderly, and neat ma