In [1]:
import openai
from openai import OpenAI
import os, sys
import requests, dotenv
from dotenv import load_dotenv

In [4]:
load_dotenv()
api_key = os.getenv('OPENAI_API_KEY')

client = OpenAI(api_key=api_key)

response = client.chat.completions.create(
    model='gpt-3.5-turbo',
    messages=[
    {"role": "system", "content": "You are a helpful assistant."},
    {"role": "user", "content": "Who won the world series in 2020?"},
    {"role": "assistant", "content": "The Los Angeles Dodgers won the World Series in 2020."},
    {"role": "user", "content": "Where was it played?"}
  ]
)

In [20]:
response.choices[0].message.content

'The 2020 World Series was played at Globe Life Field in Arlington, Texas.'

In [21]:
from langchain_community.document_loaders import UnstructuredHTMLLoader
from langchain_community.vectorstores import Chroma
from langchain_openai import OpenAI, OpenAIEmbeddings
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnableParallel, RunnablePassthrough
from langchain.text_splitter import CharacterTextSplitter

In [23]:
loader = UnstructuredHTMLLoader('/home/biniyam/TenAcademy/AI-Contract-Lawyer/notebook/imdb_data/21_imdb.com.html')
document = loader.load()

In [26]:
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)

In [27]:
docs = text_splitter.split_documents(documents=document)

Created a chunk of size 1015, which is longer than the specified 1000


In [32]:
embedding_function = OpenAIEmbeddings(model='text-embedding-3-large')

In [33]:
db = Chroma.from_documents(docs, embedding_function)

In [42]:
query = 'What does it say about email communication'
res = db.similarity_search(query)

In [43]:
res[0]

Document(page_content='|||Our business changes constantly. This Notice and the Terms and Conditions of Use will change also, and use of information that we gather now is subject to the Privacy Notice in effect at the time of use. We may e-mail periodic reminders of our notices and conditions, unless you have instructed us not to, but you should check our Web site frequently to see recent changes. Unless stated otherwise, our current Privacy Notice applies to all information that we have about you and your account. We stand behind the promises we make, however, and will never materially change our policies and practices to make them less protective of customer information collected in the past without the consent of affected customers.', metadata={'source': '/home/biniyam/TenAcademy/AI-Contract-Lawyer/notebook/imdb_data/21_imdb.com.html'})

In [50]:
from langchain.prompts import PromptTemplate
retriever = db.as_retriever()
template = """You are a my personal private legal contract lawyer who know a lot of stuff about contracts.
            You are responsible for assisting the user based on their respective questions about a certain contract
 
 {context}
 
 Question: {question}
 Helpful answer:"""
 
custom_rag_prompt = PromptTemplate.from_template(template)

In [51]:
from langchain_openai import ChatOpenAI
from langchain_core.output_parsers import StrOutputParser
llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0)

rag_chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | custom_rag_prompt
    | llm
    | StrOutputParser()
)

In [52]:
for chunk in rag_chain.stream("What does it say about emails?"):
    print(chunk, end="", flush=True)

The contract states that the company may email periodic reminders of their notices and conditions, unless the user has instructed them not to. It is important for the user to check the website frequently for recent changes. Additionally, if the user does not want to receive email or other mail from the company, they can adjust their preferences using the User Administration pages.