In [None]:
%pip install langchain
%pip install openai

In [25]:
%pip install chromadb

Collecting chromadb
  Downloading chromadb-0.4.13-py3-none-any.whl (437 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m437.8/437.8 kB[0m [31m13.0 MB/s[0m eta [36m0:00:00[0m
Collecting chroma-hnswlib==0.7.3 (from chromadb)
  Downloading chroma_hnswlib-0.7.3-cp311-cp311-macosx_11_0_arm64.whl (198 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m198.7/198.7 kB[0m [31m22.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting fastapi>=0.95.2 (from chromadb)
  Downloading fastapi-0.103.2-py3-none-any.whl (66 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m66.3/66.3 kB[0m [31m12.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting uvicorn[standard]>=0.18.3 (from chromadb)
  Downloading uvicorn-0.23.2-py3-none-any.whl (59 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m59.5/59.5 kB[0m [31m7.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting posthog>=2.4.0 (from chromadb)
  Downloading posthog-3.0.2-py2.py3-

In [125]:
import datetime
# import time

In [1]:
from langchain.schema import (
    AIMessage,
    HumanMessage,
    SystemMessage
)
from langchain.chat_models import ChatOpenAI
# from langchain.llms import OpenAI


In [145]:
# create a chat model using openAI's gpt-4 model
chat = ChatOpenAI(model_name='gpt-4', temperature=0)

# llm = OpenAI(model_name='gpt-4', temperature=0.2)

In [182]:
def escape_markdown(md_text):
    # Escape problematic characters, add more rules as needed
    md_text = md_text.replace("#", "")
    md_text = md_text.replace('"', '')
    md_text = md_text.replace("\n", "\\n")
    return md_text

# Function to split a markdown file into sections by header of 1st level (e.g. # Header)

def split_md_sections(filename):
    with open(filename, 'r') as f:
        lines = f.readlines()

    sections = []
    current_section = ""

    for line in lines:
        if line.startswith("# "):
            if current_section:
                sections.append(current_section)
            current_section = line
        else:
            current_section += line

    if current_section:
        sections.append(current_section)

    return sections

# Example usage
sections = split_md_sections("data/RecRoom_complete.md")
sections[0]

'# Privacy Policy\n\nEffective: July 25, 2023\n\nRec Room Inc., a Delaware corporation ("Company", "we", "our", and their derivatives) provides the websites, [http://www.recroom.com](http://www.recroom.com/), [http://www.rec.net](http://www.rec.net/), and the subdomains of each of the foregoing (collectively, the "Website") and the Rec Room® video game (the "Game" and, with the Website, the "Services").\n\nThis Policy sets forth how we collect, use, protect, store, and otherwise process your Personal Information (defined below). This Policy does NOT apply to information we collect offline or you provide to or is collected by any third party (except as otherwise provided below).\n\nFor our practices regarding children, please see the Children\'s section in Section 2 below.\n\n'

# create a vector store of legal code

From here https://python.langchain.com/docs/use_cases/question_answering/integrations/openai_functions_retrieval_qa

In [22]:
# load legal_code into the format before splitting by token text splitter

from langchain.document_loaders import TextLoader
loader = TextLoader("CCPA", encoding="utf-8")
documents = loader.load()


In [23]:
# split the legal code

from langchain.text_splitter import TokenTextSplitter
text_splitter = TokenTextSplitter(chunk_size=500, chunk_overlap=25)
legal_code_splits = text_splitter.split_documents(documents)

for i, text in enumerate(legal_code_splits):
    text.metadata["source"] = f"{i}-pl"

In [26]:
# Embed the legal code splits
from langchain.vectorstores import Chroma
from langchain.embeddings.openai import OpenAIEmbeddings

embed_model = OpenAIEmbeddings(model="text-embedding-ada-002")

legal_code_embedded = Chroma.from_documents(legal_code_splits, embed_model)


In [204]:
# Import Taylor's retriever

from src.OfCounselRetriever import SimilarityOfCounselRetriever

In [131]:
from src.tracking_inputs_outputs import DbInputsOutputs

db_inputs_outputs = DbInputsOutputs("data/tracking_inputs_outputs.csv")
db_inputs_outputs.print_data()

Database file does not exist.


# Create retriever

In [205]:
# create a chain

from langchain.chains.combine_documents.stuff import StuffDocumentsChain
from langchain.prompts import PromptTemplate
from langchain.chains import create_qa_with_sources_chain
from langchain.chains import RetrievalQA


qa_chain = create_qa_with_sources_chain(chat)

qa_chain_pydantic = create_qa_with_sources_chain(chat, output_parser="pydantic")

doc_prompt = PromptTemplate(
    template="*** Relevant piece from the legal code*** \n {page_content}\nSource: {source}",
    input_variables=["page_content", "source"],
)

final_qa_chain = StuffDocumentsChain(
    llm_chain=qa_chain,
    document_variable_name="context",
    document_prompt=doc_prompt,
)

final_qa_chain_pydantic = StuffDocumentsChain(
    llm_chain=qa_chain_pydantic,
    document_variable_name="context",
    document_prompt=doc_prompt,
)

retrieval_qa_basic = RetrievalQA(
    retriever=legal_code_embedded.as_retriever(search_kwargs={"k": 5, 'fetch_k': 30}),
    combine_documents_chain=final_qa_chain
)

retrieval_qa_basic_pydantic = RetrievalQA(
    retriever=legal_code_embedded.as_retriever(),
    combine_documents_chain=final_qa_chain_pydantic
)

retrieval_qa = RetrievalQA(
    retriever=SimilarityOfCounselRetriever(),
    combine_documents_chain=final_qa_chain
)

# Run the chain

In [61]:
# simple test

query = "What are the General Duties of Businesses that Collect Personal Information?"

print(retrieval_qa.run(query))

'{\n"answer": "The general duties of businesses that collect personal information, according to the California Consumer Privacy Act of 2018, include:\n\n1. Informing consumers at or before the point of collection about the categories of personal information to be collected, the purposes for which they are collected or used, and whether that information is sold or shared. \n\n2. If sensitive personal information is collected, the business must inform consumers about the categories of sensitive personal information to be collected, the purposes for which they are collected or used, and whether that information is sold or shared.\n\n3. The business must disclose the length of time it intends to retain each category of personal information, including sensitive personal information, or if that is not possible, the criteria used to determine that period.\n\n4. If the business acts as a third party controlling the collection of personal information, it may satisfy its obligations by providing

In [None]:
# Harder test: Now pass the recroom notice with the pre-prompt

In [203]:
pre_prompt_analysis_only = """ACT as a corporate attorney whose job is to check the compliance of the company's policy with the state's new laws.

Your task is to identify if the provided policy section complies with the new law.

If the policy section complies with the new law, then say so and do nothing more.

Otherwise, if the policy section does not comply with the law, make a general assessment how bad is the violation, and make a numbered list of what is missing from the policy or what is wrong with the policy, prioritizing the most important issues first. Make sure to mention the name and/or the number of the policy section you are assessing.

Note on time limits: if the policy promises shorter times to react or respond to something than the law requires, eg the company responds within 30 days while 45 days is required by law, it is NOT a violation since sooner is better, and 30 is less than 45.

For example, if the policy is "The company's address is 123 Main Street, New York, NY 10001. The company's phone number is 212-555-1234." and the new law is "You have to specify the company's address, phone number, and email address.", then the missing part is "company's email", and so the policy is not fully compliant.

As another example, if the entire policy is "The company may collect the following types of personal information: name, address, phone number, email address, and credit card number." and the new law is "You have to specify a mechanism for consumers to delete their personal information.", then the missing part is "mechanism for consumers to delete their personal information", and so the policy is not fully compliant.

As another example, if the policy is "the company will respond to a verifiable consumer request within 30 days of its receipt" and the law is "the company is required to respond within 45 days of receiving a verifiable consumer request", then the policy is fully compliant because 30 is less than 45.
 """


In [86]:
# read file "recroom notice" into a string
with open('data/recroom notice', 'r') as f:
    policy_notice = f.read()
print(policy_notice)

Rec Room Inc., a Delaware corporation (“Company”, “we”, “us”, “our”), may collect and use the below categories of your Personal Information. Third parties such as Greenhouse also may collect such Personal Information on our behalf. “Personal Information” means information that identifies, relates to, describes, is reasonably capable of being associated with, or could reasonably be linked, directly or indirectly, with you.
* Full name
* Home address
* Telephone number
* Email address
* Resume information
* Interview feedback
* Compensation expectations
* Authorization to work in the United States and/or visa requirements, if applicable
The Company collects the above categories of Personal Information to use or disclose as appropriate to:
* Recruit and evaluate job applicants for employment
* Comply with all applicable laws and regulations
* Perform data analytics
* Exercise or defend the legal rights of the Company
If you have any questions about this Notice or need to access this Notic

In [70]:
query = pre_prompt_analysis_only + f"""

*** Policy ***
{policy_notice}

"""

answer1 = retrieval_qa.run(query)
print(answer1)

{
"answer": "The policy does not fully comply with the new law. Here are the main issues:

1. The policy does not provide a mechanism for consumers to delete their personal information, as required by Section 1798.105 of the new law.
2. The policy does not provide a mechanism for consumers to correct inaccurate personal information, as required by Section 1798.106 of the new law.
3. The policy does not specify two or more designated methods for submitting requests for information, deletion, or correction. The law requires at least a toll-free telephone number and an email address for such requests, as per Section 1798.130.
4. The policy does not mention any measures to ensure compliance with the California Consumer Privacy Act of 2018, such as recordkeeping requirements or the appointment of a Chief Privacy Auditor.
5. The policy does not mention any measures to promote public awareness and understanding of the risks, rules, responsibilities, safeguards, and rights in relation to the c

In [148]:
# Next test: section 9 of recroom policy
RecRoom_policy_splits = split_md_sections("data/RecRoom_complete.md")
policy_section = RecRoom_policy_splits[9]
print(policy_section)

# 9. Additional Notice for California Residents

The following applies to California residents pursuant to the California Consumer Privacy Act of 2018 ("CCPA"):

- In the preceding 12 months, the Company may have disclosed the categories of Personal Information listed in Section 2 above to our service providers listed in Section 6 above for business purposes.
- In the preceding 12 months, the Company has not sold Personal Information. The Company only discloses Personal Information to service providers.
- You have the right to request that the Company disclose certain information to you about our collection and use of your Personal Information over the past 12 months. Once we receive and confirm your verifiable consumer request, we will disclose to you, to the extent retained by us:
  - The categories of Personal Information we collected about you.
  - The categories of sources for the Personal Information we collected about you.
  - Our business or commercial purpose for collecting or

In [206]:
query = pre_prompt_analysis_only + f"""

*** Policy ***
{policy_section}

"""

answer2_basic = retrieval_qa_basic.run(query)

answer2 = retrieval_qa.run(query)


In [207]:
print("BASIC STORE:\n" + answer2_basic)
print("TAYLOR STORE:\n" + answer2)

BASIC STORE:
{
  "answer": "The policy section does not comply with the new law. The violation is significant as it lacks several key requirements outlined in the law. Here are the main issues, prioritized by importance:\n\n1. The policy does not provide two or more designated methods for submitting requests for information disclosure, deletion, or correction as required by Section 1798.130(a)(1)(A) of the law. At a minimum, a toll-free telephone number or an email address (for businesses operating exclusively online) should be provided.\n\n2. The policy does not mention any mechanism for consumers to delete or correct their personal information, which is required by Sections 1798.105 and 1798.106 respectively.\n\n3. The policy does not specify a time frame within which the company will respond to a verifiable consumer request, which according to Section 1798.130(a)(2)(A) should be within 45 days of receiving the request.\n\n4. The policy does not mention any measures to ensure that th

In [208]:
# save inputs and outputs to disk
db_inputs_outputs.add_data({'query': query,
                            'policy_descr': "Section 9 original",
                            'llm': "gpt-4, temperature 0",
                            'retriever': "Basic retriever",
                            'answer': answer2_basic,
                            'notes': "",
                            "timestamp":  datetime.datetime.now()
                            })

db_inputs_outputs.add_data({'query': query,
                            'policy_descr': "Section 9 original",
                            'llm': "gpt-4, temperature 0",
                            'retriever': "Taylor retriever Similarity",
                            'answer': answer2,
                            'notes': "",
                            "timestamp":  datetime.datetime.now()
                            })

In [209]:
db_inputs_outputs.print_data()

                                               query        policy_descr  \
0  ACT as a corporate attorney whose job is to ch...  Section 9 original   
1  ACT as a corporate attorney whose job is to ch...  Section 9 original   
2  ACT as a corporate attorney whose job is to ch...  Section 9 original   

                     llm                    retriever  \
0  gpt-4, temperature .2              Basic retriever   
1   gpt-4, temperature 0              Basic retriever   
2   gpt-4, temperature 0  Taylor retriever Similarity   

                                              answer  notes  \
0  {\n  "answer": "The provided policy appears to...    NaN   
1  {\n  "answer": "The policy section does not co...    NaN   
2  {\n"answer": "The policy section 10 does not c...    NaN   

                    timestamp  
0  2023-10-06 16:02:53.385873  
1  2023-10-06 19:14:06.687054  
2  2023-10-06 19:14:06.689044  


In [210]:
# Next test is the redacted section 9 of the RecRoom policy
RecRoom_policy_redacted_splits = split_md_sections("data/RecRoom_complete_redacted.md")
policy_section_redacted = RecRoom_policy_redacted_splits[8]
print(policy_section_redacted)


# 8. Additional Notice for California Residents

The following applies to California residents pursuant to the California Consumer Privacy Act of 2018 ("CCPA"):

- In the preceding 12 months, the Company may have disclosed the categories of Personal Information listed in Section 2 above to our service providers listed in Section 6 above for business purposes.
- In the preceding 12 months, the Company has not sold Personal Information. The Company only discloses Personal Information to service providers.
- You have the right to request that the Company disclose certain information to you about our collection and use of your Personal Information over the past 12 months. Once we receive and confirm your verifiable consumer request, we will not disclose to you, to the extent retained by us:
  - The categories of Personal Information we collected about you.
  - The categories of sources for the Personal Information we collected about you.
  - Our business or commercial purpose for collectin

In [211]:
query = pre_prompt_analysis_only + f"""

*** Policy ***
{policy_section_redacted}

"""

answer3_basic = retrieval_qa_basic.run(query)

answer3 = retrieval_qa.run(query)


In [212]:
print("BASIC STORE:\n" + answer3_basic)
print("TAYLOR STORE:\n" + answer3)

BASIC STORE:
{
"answer": "The policy section titled '8. Additional Notice for California Residents' is not fully compliant with the California Consumer Privacy Act of 2018 (CCPA). 

1. The policy does not specify the methods for submitting requests for information, deletion, or correction. According to Section 1798.130 of the CCPA, businesses should provide at least two methods for submitting such requests, including a toll-free telephone number and, if the business operates exclusively online, an email address.

2. The policy does not mention any time frame within which the company will respond to a verifiable consumer request. As per Section 1798.130 of the CCPA, businesses are required to respond within 45 days of receiving a verifiable consumer request.

3. The policy does not provide any information about the company's recordkeeping requirements to ensure compliance with the CCPA, as required by Section 1798.185.

4. The policy does not mention the appointment of a Chief Privacy A

In [213]:
db_inputs_outputs.add_data({'query': query,
                            'policy_descr': "Section 9 redacted",
                            'llm': "gpt-4, temperature 0",
                            'retriever': "Basic retriever",
                            'answer': answer3_basic,
                            'notes': "",
                            "timestamp":  datetime.datetime.now()
                            })

db_inputs_outputs.add_data({'query': query,
                            'policy_descr': "Section 9 redacted",
                            'llm': "gpt-4, temperature 0",
                            'retriever': "Taylor retriever Similarity",
                            'answer': answer3,
                            'notes': "",
                            "timestamp":  datetime.datetime.now()
                            })

## Parse and view the chain's answer

In [97]:
# parse the answer
# pick the answer from the chain's output, which to parse
answer_n = answer2_basic

import json

data_dict = json.loads(answer_n)  # TODO: doesn't always work, the answer doesn't adhere strictly to JSON. Need to fix this

answer = data_dict.get('answer', '')
sources_list = data_dict.get('sources', [])

print(answer)


The policy is not fully compliant with the new law. The non-compliance is moderate. Here are the missing parts or wrong parts in the policy, prioritized by importance:

1. The policy states that the company will discriminate against consumers for exercising their CCPA rights, which is a direct violation of the law. The company should not deny services, charge different prices, provide different quality of services, or suggest different rates or quality of services for consumers exercising their CCPA rights.

2. The policy does not provide a mechanism for consumers to opt-out of the sale or sharing of their personal information. The law requires businesses to define requirements and technical specifications for an opt-out preference signal.

3. The policy does not mention the company's responsibility to cooperate with law enforcement agencies and government agencies in certain situations, as required by the law.

4. The policy does not mention the company's responsibility to perform a c

In [77]:
sources_list

['4-pl', '10-pl', '11-pl', '32-pl']

In [78]:
# View the sources

# Put sources manually here if the parsing doesn't work
# sources_list = ["63-pl", "33-pl", "54-pl", "10-pl"]

# Find text chunks by source numbers

source_to_text = {}
for i, entry in enumerate(legal_code_splits):
    source = f"{i}-pl"
    entry.metadata['source'] = source  # Assuming `metadata` is a dictionary attribute in `Document`
    source_to_text[source] = entry.page_content  # Replace with the actual attribute name if different

text_chunks = [source_to_text.get(source, "Not found") for source in sources_list]



In [79]:
# view the sources

print(text_chunks[1])

 Proposition 24.)

1798.130.  Notice, Disclosure, Correction, and Deletion Requirements
(a) In order to comply with Sections 1798.100, 1798.105, 1798.106, 1798.110, 1798.115, and 1798.125, a business shall, in a form that is reasonably accessible to consumers:
(1) (A) Make available to consumers two or more designated methods for submitting requests for information required to be disclosed pursuant to Sections 1798.110 and 1798.115, or requests for deletion or correction pursuant to Sections 1798.105 and 1798.106, respectively, including, at a minimum, a toll-free telephone number. A business that operates exclusively online and has a direct relationship with a consumer from whom it collects personal information shall only be required to provide an email address for submitting requests for information required to be disclosed pursuant to Sections 1798.110 and 1798.115, or for requests for deletion or correction pursuant to Sections 1798.105 and 1798.106, respectively.

(B) If the busin

# All sections of policy

For each section of the given policy, run it through the chain. Save answers with sources. Summarize it into a summary.

In [196]:
import pickle

In [140]:
def run_section_through_chain(pre_prompt: str, policy_section: str, retrieval_qa: RetrievalQA):
    query = f"""{pre_prompt}

    *** Policy section ***
    {policy_section}

    """
    answer = retrieval_qa.run(query)
    return answer

a = run_section_through_chain(pre_prompt_analysis_only, policy_section, retrieval_qa_basic)
print(a)

{
  "answer": "The provided policy appears to be largely compliant with the California Consumer Privacy Act of 2018 (\"CCPA\"). However, there are a few areas that may need further attention to ensure full compliance:

1. **Methods for Submitting Requests**: The policy mentions that consumers can submit verifiable consumer requests via email. However, the law (Section 1798.130 (a)(1)(A)) requires businesses to provide two or more designated methods for submitting such requests. This could include a toll-free telephone number or a website form, in addition to the email address already provided. 

2. **Time Frame for Response**: The policy states that the company will respond to a verifiable consumer request within 30 days of its receipt. While this is not a violation since it's shorter than the 45 days required by the law (Section 1798.130 (a)(2)(A)), the policy should mention the possibility of an extension by an additional 45 days when reasonably necessary.

3. **Disclosure of Informa

In [164]:
b = run_section_through_chain(pre_prompt_analysis_only, policy_section, retrieval_qa_basic_pydantic)
b


ValidationError: 1 validation error for AnswerWithSources
__root__
  Invalid control character at: line 2 column 195 (char 196) (type=value_error.jsondecode; msg=Invalid control character at; doc={
  "answer": "The policy section '#9. Additional Notice for California Residents' is generally compliant with the new law. However, there are a few areas where it falls short of full compliance. 

1. The policy does not specify two or more designated methods for submitting requests for information or deletion. The law requires at least two methods, including a toll-free telephone number, unless the business operates exclusively online and has a direct relationship with the consumer. In this case, an email address is sufficient. The policy only mentions an email address for submitting requests.

2. The policy does not explicitly state that the business will disclose and deliver the required information, correct inaccurate personal information, or delete a consumer’s personal information within 45 days of receiving a verifiable consumer request. Although the policy mentions a 30-day response time, which is within the legal limit, it does not explicitly state that the business will take the necessary actions within this time frame.

3. The policy does not mention the possibility of extending the 45-day period by an additional 45 days when reasonably necessary, as required by the law.

4. The policy does not explicitly state that the business will not require the consumer to create an account in order to make a verifiable consumer request.

5. The policy does not mention the requirement to disclose the required information for the 12-month period preceding the business’ receipt of the verifiable consumer request, or the possibility of a consumer requesting information beyond this period under certain conditions.

6. The policy does not mention the requirement for a business that receives a verifiable consumer request to disclose any personal information it has collected about a consumer, directly or indirectly, including through or by a service provider or contractor, to the consumer.",
  "sources": ["10-pl", "11-pl", "54-pl", "32-pl"]
}; pos=196; lineno=2; colno=195)

In [168]:
b

AnswerWithSources(answer="The policy is compliant with the new law. It covers all the necessary points as required by the law, including the right to request information, the right to request deletion, the process for making a verifiable consumer request, and the company's response timing and format. The policy also specifies that it will not discriminate against consumers for exercising their rights, which is in line with the law.", sources=['10-pl', '11-pl', '32-pl', '33-pl'])

In [166]:
b.answer

"The policy is compliant with the new law. It covers all the necessary points as required by the law, including the right to request information, the right to request deletion, the process for making a verifiable consumer request, and the company's response timing and format. The policy also specifies that it will not discriminate against consumers for exercising their rights, which is in line with the law."

In [167]:
b.sources

['10-pl', '11-pl', '32-pl', '33-pl']

In [214]:
# get recroom policy splits
RecRoom_policy_splits = split_md_sections("data/RecRoom_complete.md")
print(len(RecRoom_policy_splits))
# escape problematic markdown characters
# RecRoom_policy_splits = [escape_markdown(x) for x in RecRoom_policy_splits]
# RecRoom_policy_splits = RecRoom_policy_splits[9:11]
print(RecRoom_policy_splits[0])

'# Privacy Policy\n\nEffective: July 25, 2023\n\nRec Room Inc., a Delaware corporation ("Company", "we", "our", and their derivatives) provides the websites, [http://www.recroom.com](http://www.recroom.com/), [http://www.rec.net](http://www.rec.net/), and the subdomains of each of the foregoing (collectively, the "Website") and the Rec Room® video game (the "Game" and, with the Website, the "Services").\n\nThis Policy sets forth how we collect, use, protect, store, and otherwise process your Personal Information (defined below). This Policy does NOT apply to information we collect offline or you provide to or is collected by any third party (except as otherwise provided below).\n\nFor our practices regarding children, please see the Children\'s section in Section 2 below.\n\n'

In [215]:
# Loop over the splits
import time
all_answers_list = []

for i, policy_section in enumerate(RecRoom_policy_splits):
    print(f"Processing chunk {i}...")
    output = run_section_through_chain(pre_prompt_analysis_only, policy_section, retrieval_qa_basic)
    answer = output
    # append to all_answers
    all_answers_list.append(answer)
    # pause to avoid hitting the TPM limit. I've got the limit of 10KTPM-200RPM
    time.sleep(10)


# save the list to the disk
with open("data/all_answers_list.pkl", "wb") as f:
    pickle.dump(all_answers_list, f)


Processing section 0...
Processing section 1...
Processing section 2...
Processing section 3...


Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for 10KTPM-200RPM in organization org-rCiGgWNN24FEc7FbvDSLKfAT on tokens per min. Limit: 10000 / min. Please try again in 6ms. Contact us through our help center at help.openai.com if you continue to have issues..


Processing section 4...


Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for 10KTPM-200RPM in organization org-rCiGgWNN24FEc7FbvDSLKfAT on tokens per min. Limit: 10000 / min. Please try again in 6ms. Contact us through our help center at help.openai.com if you continue to have issues..
Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for 10KTPM-200RPM in organization org-rCiGgWNN24FEc7FbvDSLKfAT on tokens per min. Limit: 10000 / min. Please try again in 6ms. Contact us through our help center at help.openai.com if you continue to have issues..


Processing section 5...


Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for 10KTPM-200RPM in organization org-rCiGgWNN24FEc7FbvDSLKfAT on tokens per min. Limit: 10000 / min. Please try again in 6ms. Contact us through our help center at help.openai.com if you continue to have issues..
Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for 10KTPM-200RPM in organization org-rCiGgWNN24FEc7FbvDSLKfAT on tokens per min. Limit: 10000 / min. Please try again in 6ms. Contact us through our help center at help.openai.com if you continue to have issues..


Processing section 6...
Processing section 7...


Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for 10KTPM-200RPM in organization org-rCiGgWNN24FEc7FbvDSLKfAT on tokens per min. Limit: 10000 / min. Please try again in 6ms. Contact us through our help center at help.openai.com if you continue to have issues..
Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for 10KTPM-200RPM in organization org-rCiGgWNN24FEc7FbvDSLKfAT on tokens per min. Limit: 10000 / min. Please try again in 6ms. Contact us through our help center at help.openai.com if you continue to have issues..
Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for 10KTPM-200RPM in organization org-rCiGgWNN24FEc7FbvDSLKfAT on tokens p

Processing section 8...


Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for 10KTPM-200RPM in organization org-rCiGgWNN24FEc7FbvDSLKfAT on tokens per min. Limit: 10000 / min. Please try again in 6ms. Contact us through our help center at help.openai.com if you continue to have issues..
Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for 10KTPM-200RPM in organization org-rCiGgWNN24FEc7FbvDSLKfAT on tokens per min. Limit: 10000 / min. Please try again in 6ms. Contact us through our help center at help.openai.com if you continue to have issues..


Processing section 9...


Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for 10KTPM-200RPM in organization org-rCiGgWNN24FEc7FbvDSLKfAT on tokens per min. Limit: 10000 / min. Please try again in 6ms. Contact us through our help center at help.openai.com if you continue to have issues..
Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for 10KTPM-200RPM in organization org-rCiGgWNN24FEc7FbvDSLKfAT on tokens per min. Limit: 10000 / min. Please try again in 6ms. Contact us through our help center at help.openai.com if you continue to have issues..
Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for 10KTPM-200RPM in organization org-rCiGgWNN24FEc7FbvDSLKfAT on tokens p

Processing section 10...


Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for 10KTPM-200RPM in organization org-rCiGgWNN24FEc7FbvDSLKfAT on tokens per min. Limit: 10000 / min. Please try again in 6ms. Contact us through our help center at help.openai.com if you continue to have issues..
Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for 10KTPM-200RPM in organization org-rCiGgWNN24FEc7FbvDSLKfAT on tokens per min. Limit: 10000 / min. Please try again in 6ms. Contact us through our help center at help.openai.com if you continue to have issues..
Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for 10KTPM-200RPM in organization org-rCiGgWNN24FEc7FbvDSLKfAT on tokens p

Processing section 11...


In [216]:
# concatenate the string from the list all_answers_list

# load from disk if needed
if 0:
    with open("data/all_answers_list.pkl", "rb") as f:
        all_answers_list = pickle.load(f)

all_answers = "\n".join([f"__Assessment of chunk {i}__\n{answer}\n" for i, answer in enumerate(all_answers_list)])

print(all_answers)

__Assessment of chunk 0__
{
  "answer": "The provided policy section does not comply with the new law. The violation is significant as it lacks several key elements required by the California Consumer Privacy Act of 2018. Here are the main issues, prioritized by importance:

1. The policy does not specify two or more designated methods for consumers to submit requests for information disclosure, deletion, or correction (Section 1798.130 (a)(1)).
2. The policy does not mention any measures to protect the fundamental privacy rights of natural persons with respect to the use of their personal information (Section 1798.185 (c)).
3. The policy does not provide guidance to consumers regarding their rights under this title (Section 1798.185 (e)).
4. The policy does not establish a mechanism for persons doing business in California to voluntarily certify that they are in compliance with this title (Section 1798.185 (j)).
5. The policy does not mention any measures to promote public awareness a

In [200]:
# create summarization model
llm_for_summary = ChatOpenAI(model_name='gpt-4')

In [217]:
# Summarize the sections
messages = [
    SystemMessage(content="ACT as a corporate attorney whose job is to check the compliance of the company's policy with the state's new laws.  "),
    HumanMessage(content="Summarize the assessments of different sections of the policy into a single assessment, referencing individual sections of the policy as necessary. Make sure to include the relevant numbers of legal code sections for the human attorney to be able to verify: \n" + all_answers)
]
summary_answer = llm_for_summary(messages)

In [218]:
print(summary_answer.content)

__Assessment of chunk 12__
{
  "answer": "The policy section #12 'How to Contact Us' is generally compliant with the new law. However, there are a few areas that need to be addressed to ensure full compliance:

1. Response Time: The policy states that the company will respond to a consumer's request within 30 days. While this is within the 45-day response time required by the new law, the law also allows for an extension of an additional 45 days when reasonably necessary. The policy should clarify whether such extensions are possible and under what circumstances (Source: 10-pl).

2. Contact Options: The policy provides an email address and mailing address for consumers to contact the company. However, the new law requires businesses to provide at least two methods for consumers to submit requests for information, deletion, or correction. This includes, at a minimum, a toll-free telephone number (Source: 10-pl). The policy should include at least one more method for consumers to submit 