In [3]:
import openai
from openai import OpenAI
import os, sys
import requests, dotenv
from dotenv import load_dotenv
from langchain_community.vectorstores import Qdrant
from langchain_community.document_loaders import UnstructuredHTMLLoader
from langchain_openai import OpenAI, OpenAIEmbeddings
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnableParallel, RunnablePassthrough
from langchain.text_splitter import CharacterTextSplitter
from langchain_core.output_parsers import StrOutputParser


In [4]:
load_dotenv()
api_key = os.getenv('OPENAI_API_KEY')

client = OpenAI(api_key=api_key)

In [5]:
loader = UnstructuredHTMLLoader('/home/biniyam/TenAcademy/AI-Contract-Lawyer/notebook/imdb_data/21_imdb.com.html')
document = loader.load()

In [6]:
text_splitter = CharacterTextSplitter(chunk_size=512, chunk_overlap=0)
docs = text_splitter.split_documents(documents=document)

Created a chunk of size 545, which is longer than the specified 512
Created a chunk of size 1015, which is longer than the specified 512
Created a chunk of size 786, which is longer than the specified 512
Created a chunk of size 540, which is longer than the specified 512


In [7]:
embedding_function = OpenAIEmbeddings(model='text-embedding-3-large')
db = Qdrant.from_documents(docs, embedding_function, location=":memory:", collection_name="imdb_data")

AuthenticationError: Error code: 401 - {'error': {'message': 'Incorrect API key provided: sk-gWSGS***************************************lIW4. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}

In [None]:
query = 'What does it say about email communication'
res = db.similarity_search(query)
res[0]

Document(page_content='If you do not want to receive e-mail or other mail from us, please use our User Administration pages to adjust your preferences. (If you do not want to receive legal notices from us, such as this Privacy Notice, those notices will still govern your use of IMDb, and it is your responsibility to review them for changes.)\n\nIf you do not want us to use personal information that we gather to allow third parties to personalize advertisements we display to you, please adjust your Advertising Preferences.', metadata={'source': '/home/biniyam/TenAcademy/AI-Contract-Lawyer/notebook/imdb_data/21_imdb.com.html', '_id': '99a4ba12b5224ce598fb2e26b3fc1515', '_collection_name': 'imdb_data'})

### As retriever

In [13]:
from langchain.prompts import PromptTemplate
retriever = db.as_retriever()
template = """You are a my personal private legal contract lawyer who know a lot of stuff about contracts.
            You are responsible for assisting the user based on their respective questions about a certain contract
 
 {context}
 
 Question: {question}
 Helpful answer:"""
 
custom_rag_prompt = PromptTemplate.from_template(template)

### RAG Chain

In [14]:
from langchain_openai import ChatOpenAI
from langchain_core.output_parsers import StrOutputParser
llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0)

rag_chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | custom_rag_prompt
    | llm
    | StrOutputParser()
)

In [15]:
for chunk in rag_chain.stream("What does it say about emails?"):
    print(chunk, end="", flush=True)

The contract states that if you do not want to receive e-mail or other mail from the company, you can adjust your preferences using the User Administration pages. However, it is important to note that if you do not want to receive legal notices from the company, such as the Privacy Notice, those notices will still govern your use of the platform. It is your responsibility to review them for any changes.

In [1]:
import openai
from openai import OpenAI
import os, sys
import requests, dotenv
from dotenv import load_dotenv
from langchain_community.vectorstores import Qdrant
from langchain_community.document_loaders import UnstructuredHTMLLoader
from langchain_openai import OpenAI, OpenAIEmbeddings
from langchain_core.prompts import ChatPromptTemplate, PromptTemplate
from langchain_core.runnables import RunnableParallel, RunnablePassthrough
from langchain.text_splitter import CharacterTextSplitter
from langchain_core.output_parsers import StrOutputParser
from langchain_openai import ChatOpenAI

load_dotenv()
api_key = os.getenv('OPENAI_API_KEY')

client = OpenAI(api_key=api_key)


def setup_chatbot_chain(query: str) -> str:
    loader = UnstructuredHTMLLoader('/home/biniyam/TenAcademy/AI-Contract-Lawyer/notebook/imdb_data/21_imdb.com.html')
    document = loader.load()

    text_splitter = CharacterTextSplitter(chunk_size=512, chunk_overlap=0)
    docs = text_splitter.split_documents(documents=document)
    embedding_function = OpenAIEmbeddings(model='text-embedding-3-large')
    db = Qdrant.from_documents(docs, embedding_function, location=":memory:", collection_name="imdb_data")
    
    retriever = db.as_retriever()
    
    template = """You are a my personal private legal contract lawyer who know a lot of stuff about contracts.
            You are responsible for assisting the user based on their respective questions about a certain contract
 
    {context}
    
    Question: {question}
    Helpful answer:"""
    
    custom_rag_prompt = PromptTemplate.from_template(template)
    
    llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0)

    rag_chain = (
        {"context": retriever, "question": RunnablePassthrough()}
        | custom_rag_prompt
        | llm
        | StrOutputParser()
    )
    
    for chunk in rag_chain.stream(query):
        print(chunk, end="", flush=True)
    
setup_chatbot_chain("what does the contract talk about?")

Created a chunk of size 545, which is longer than the specified 512
Created a chunk of size 1015, which is longer than the specified 512
Created a chunk of size 786, which is longer than the specified 512
Created a chunk of size 540, which is longer than the specified 512


AuthenticationError: Error code: 401 - {'error': {'message': 'Incorrect API key provided: sk-gWSGS***************************************lIW4. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}

### Gen Test Data with RAGAs

In [8]:
document

[Document(page_content='|||Last Updated, December 5, 2014 . To see what has changed click here.\n\n|||IMDb knows that you care how information about you is used and shared, and we appreciate your trust that we will do so carefully and sensibly.\n\n|||By visiting IMDb, you are accepting the practices described in this Privacy Notice.\n\n|||\n\nThe information we learn from users helps us personalize and continually improve your experience at IMDb. Here are the types of information we gather.\n\n|||\n\n|||\n\n|||\n\n|||\n\n|||\n\n|||\n\nCookies are alphanumeric identifiers that we transfer to your computer\'s hard drive through your Web browser to enable our systems to recognize your browser and to provide features such as My Movies, local show times, and browsing preferences.\n\nThe "help" portion of the toolbar on most browsers will tell you how to prevent your browser from accepting new cookies, how to have the browser notify you when you receive a new cookie, or how to disable cookie

In [10]:
from ragas.testset.generator import TestsetGenerator
from ragas.testset.evolutions import simple, reasoning, multi_context

# documents = load your documents

# generator with openai models
generator = TestsetGenerator.with_openai()

# Change resulting question type distribution
distributions = {
    simple: 0.5,
    multi_context: 0.4,
    reasoning: 0.1
}

# use generator.generate_with_llamaindex_docs if you use llama-index as document loader
testset = generator.generate_with_langchain_docs(document, 10, distributions, raise_exceptions=False) 
testset.to_pandas()

Exception in thread Thread-6:                         
Traceback (most recent call last):
  File "/home/biniyam/anaconda3/envs/fooocus-api/lib/python3.10/threading.py", line 1016, in _bootstrap_inner
    self.run()
  File "/home/biniyam/anaconda3/envs/fooocus-api/lib/python3.10/site-packages/ragas/executor.py", line 75, in run
    results = self.loop.run_until_complete(self._aresults())
  File "/home/biniyam/anaconda3/envs/fooocus-api/lib/python3.10/asyncio/base_events.py", line 649, in run_until_complete
    return future.result()
  File "/home/biniyam/anaconda3/envs/fooocus-api/lib/python3.10/site-packages/ragas/executor.py", line 63, in _aresults
    raise e
  File "/home/biniyam/anaconda3/envs/fooocus-api/lib/python3.10/site-packages/ragas/executor.py", line 58, in _aresults
    r = await future
  File "/home/biniyam/anaconda3/envs/fooocus-api/lib/python3.10/asyncio/tasks.py", line 571, in _wait_for_one
    return f.result()  # May raise f.exception().
  File "/home/biniyam/anacond

ExceptionInRunner: The runner thread which was running the jobs raised an exeception. Read the traceback above to debug it. You can also pass `raise_exception=False` incase you want to show only a warning message instead.