In [98]:
from pathlib import Path
from pydantic.v1 import BaseModel, Field

from langchain.chat_models import ChatOpenAI
from langchain.agents import Tool
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.document_loaders import TextLoader
from langchain.chains import RetrievalQA
from langchain.agents import initialize_agent
from langchain.agents import AgentType
from langchain.llms import OpenAI
from langchain import PromptTemplate
from langchain.prompts.chat import (
    ChatPromptTemplate,
    SystemMessagePromptTemplate,
    AIMessagePromptTemplate,
    HumanMessagePromptTemplate,
)


# Plan

1. Use document compare against best summaries
2. Try prompt engineering for comparison

The sections summary seem to do the best job at capturing the details of the contracts so I will use them. 

In [13]:
save_dir = Path('data', 'summary_comparison')
summaries_dir = Path('data', 'summaries')
sections_2015_fname = Path(summaries_dir, "2015_sections_summary.txt")
sections_2023_fname = Path(summaries_dir, "2023_sections_summary.txt")


# Document Comparison Tool

In [6]:
class DocumentInput(BaseModel):
    question: str = Field()


llm = ChatOpenAI(temperature=0, model="gpt-3.5-turbo-0613")

tools = []
files = [

    {
        "name": "2015-TandCS",
        "path": str(sections_2015_fname),
    },

    {
        "name": "2023-TandCS",
        "path": str(sections_2023_fname),
    },
]

for file in files:
    loader = TextLoader(file["path"])
    pages = loader.load_and_split()
    text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
    docs = text_splitter.split_documents(pages)
    embeddings = OpenAIEmbeddings()
    retriever = FAISS.from_documents(docs, embeddings).as_retriever()

    # Wrap retrievers in a Tool
    tools.append(
        Tool(
            args_schema=DocumentInput,
            name=file["name"],
            description=f"useful when you want to answer questions about {file['name']}",
            func=RetrievalQA.from_chain_type(llm=llm, retriever=retriever),
        )
    )

In [7]:
agent = initialize_agent(
    agent=AgentType.OPENAI_FUNCTIONS,
    tools=tools,
    llm=llm,
    verbose=True,
)

In [8]:
query = "What are the key differences between the 2015-TandCS and 2023-TandCS?"
output = agent({"input": query})



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `2015-TandCS` with `{'question': 'key differences'}`


[0m[36;1m[1;3m{'query': 'key differences', 'result': 'Some key differences between the iTunes Store Service agreement and the Mac App Store, App Store, and iBooks Store agreement include:\n\n1. Scope: The iTunes Store Service agreement specifically covers the use of the iTunes Store Service provided by iTunes SARL, while the Mac App Store, App Store, and iBooks Store agreement covers the use of the Mac App Store, App Store, and iBooks Store, as well as purchasing licenses from these stores.\n\n2. Content: The iTunes Store Service agreement covers details about content availability, use of content, usage rules, content security, and service security. The Mac App Store, App Store, and iBooks Store agreement includes terms and conditions for using iBooks Store Products, including usage rules and restrictions.\n\n3. Age Requirement: Both agreements state that us

In [15]:
with open(Path(save_dir, 'document_comparison_output.txt'), 'w') as f:
    f.write(output['output'])

# Prompt Engineering - Davinci

In [66]:
openai = OpenAI(
    model="text-davinci-003", 
    temperature = 0)

In [67]:
with open(sections_2015_fname, "r") as f:
    section_2015 = f.read()
with open(sections_2023_fname, "r") as f:
    section_2023 = f.read()

In [114]:
prompt = """Compare the two texts provided below and provide the key differences between them. 
The output should be relvent for legal experts. 
The first text includes information from 2015 and is contained with three backticks (```). 
The second text contains information from 2023 and is contained wihin three hash symbols (###).
Your answer should be detailed and specifics, formatted as bullet points and refer back to the text names when describing the differences. 
                   
                      
2015 text: ```{text1}```
            
2023 text: ###{text2}###
"""
            
prompt_template = PromptTemplate(
    input_variables=["text1", "text2"],
    template=prompt
)

In [92]:
prompt_template_formatted = prompt_template.format(text1= section_2015, text2=section_2023)

In [93]:
prompt_engineering_output = openai(prompt_template_formatted)

In [94]:
print(prompt_engineering_output)



Key Differences Between 2015 and 2023 Texts:
- 2015 text: 
    - Outlines rules and responsibilities of family sharing and electronic contracting
    - Only available in the United Kingdom
    - Refunds available as per the refund policy
    - HD products can only be viewed on HD-capable devices
    - iTunes reserves the right to modify, suspend, or discontinue the service anytime without liability
    - Governed by English law
- 2023 text:
    - Outlines terms and conditions for using Apple's Services such as the App Store, Apple Music, Apple TV, among others
    - Family Sharing feature allows up to six family members to share eligible subscriptions
    - Ask to Buy feature requires organizers to approve transactions initiated by underage family members
    - Series Pass and Multi-Pass allow users to purchase and receive TV content as it becomes available
    - App licenses are provided by Apple or a third party developer, and apps may offer In-App Purchases
    - iCloud Music Libr

In [95]:
with open(Path(save_dir, 'prompt_engineering_output_davinci.txt'), 'w') as f:
    f.write(prompt_engineering_output)

# Prompt Engineering - GPT

In [116]:
human_message_prompt = HumanMessagePromptTemplate.from_template(prompt)

chat_prompt = ChatPromptTemplate.from_messages([human_message_prompt])


In [117]:
gpt_prompt_engineering_output = llm(
    chat_prompt.format_prompt(text1=section_2015, text2=section_2023).to_messages()
)

In [120]:
print(gpt_prompt_engineering_output.content)

Differences between the 2015 and 2023 texts relevant for legal experts:

- The 2015 text specifically refers to the iTunes Store Service provided by iTunes SARL, while the 2023 text outlines terms and conditions for using Apple's Services such as the App Store, Apple Music, and Apple TV, among others.
- The 2015 text states that the service is only available in the United Kingdom, while the 2023 text mentions that the services are available in the user's country of residence.
- The 2015 text includes specific details about the iTunes Store, such as purchasing licenses from the Mac App Store, App Store, and iBooks Store, while the 2023 text provides a more general overview of Apple's Services.
- The 2015 text mentions the requirement for users to have an account created by a parent or legal guardian if they are under 13 years old, while the 2023 text states that users must be at least 18 years old to be the family organizer for Family Sharing.
- The 2015 text includes information about 

In [121]:
with open(Path(save_dir, 'prompt_engineering_output_gpt.txt'), 'w') as f:
    f.write(gpt_prompt_engineering_output.content)