In [1]:
import os
import requests
import json
import re
import asyncio

from dotenv import load_dotenv
load_dotenv()

True

#### Function for Scraping Text from URL

In [2]:
from bs4 import BeautifulSoup

def get_text_from_url(url):
    # Send a GET request to the URL
    response = requests.get(url)
    
    # Check if the request was successful
    if response.status_code == 200:
        # Create a BeautifulSoup object from the response content
        soup = BeautifulSoup(response.content, 'html.parser')
        
        # Extract all text from the BeautifulSoup object
        text = re.sub(r'\s{4,}', ' ', soup.get_text().strip())
        
        return text
    else:
        print('Failed to retrieve the web page.')
        return None

#### Function for Extracting URL Domain

In [3]:
from urllib.parse import urlparse

def extract_domain(url):
    parsed_url = urlparse(url)
    domain_parts = parsed_url.netloc.split(".")
    if domain_parts[-1] == "com":
        domain_parts = domain_parts[:-1]
    domain = ".".join(domain_parts)
    return domain

#### Function for Serp API

In [4]:
from serpapi import GoogleSearch

def get_serp_data(url):

    SERP_API_KEY = os.getenv("SERP_API_KEY")

    query = extract_domain(url=url)

    # Create the search parameters
    params = {
        "engine": "google",
        "q": query,
        "api_key": SERP_API_KEY
    }

    # Make the API request
    search = GoogleSearch(params)
    data = search.get_dict()

    # Initialize lists to hold multiple results
    candidate_names = []
    candidate_descriptions = []
    related_searches = []

    # Extract multiple company names and snippets from top 5 organic results
    for result in data['organic_results'][:5]:
        candidate_names.append(result.get('title', ''))
        candidate_descriptions.append(result.get('snippet', ''))

    # # Extract related searches
    related_searches = [item['query'] for item in data.get('related_searches', [])]

    # # Check if Knowledge Graph exists, extract information
    kg_title = ''
    kg_description = ''
    kg_type = ''
    if 'knowledge_graph' in data:
        kg = data['knowledge_graph']
        kg_title = kg.get('title', '')
        kg_description = kg.get('description', '')
        kg_type = kg.get('type', '')

    # # Consolidate all information into a single JSON object
    serp_data = {
        "candidate_names": candidate_names,
        "candidate_descriptions": candidate_descriptions,
        "related_searches": related_searches,
        "knowledge_graph": {
            "title": kg_title,
            "description": kg_description,
            "type": kg_type
        }
    }

    return serp_data

In [5]:
# Test the function with an example URL
# print(get_serp_data("https://openai.com/policies/privacy-policy"))

#### Initialize Token Counter

In [6]:
import tiktoken
enc = tiktoken.get_encoding("cl100k_base")
assert enc.decode(enc.encode("hello world")) == "hello world"

#### LangChain Imports

In [7]:
from promptlayer.langchain.chat_models import PromptLayerChatOpenAI
from langchain.chains.summarize import load_summarize_chain
from langchain.chains import LLMChain
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.prompts import PromptTemplate, FewShotPromptTemplate
from langchain.prompts.chat import (
    ChatPromptTemplate,
    SystemMessagePromptTemplate,
    HumanMessagePromptTemplate,
)

In [8]:
# TODO
# use streaming with the llms

#### Define LLM

In [9]:
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
PROMPTLAYER_API_KEY = os.getenv("PROMPTLAYER_API_KEY")
model_name = "gpt-3.5-turbo"

llm = PromptLayerChatOpenAI(pl_tags=["Privacy Scorecard", "Google"], model_name=model_name, temperature=0.3, openai_api_key=OPENAI_API_KEY)

In [10]:
# chain = LLMChain(llm=llm, prompt=PromptTemplate(template="analyze this text {try}", input_variables=["try"]))
# response = chain.run("test")

## 1. Analyze Policy: Generate Chunk Summaries

#### Compile System Prompt

In [11]:
with open("system_analyze_policy_prompt.txt", "r") as file:
    system_prompt = file.read()
system_message_prompt = SystemMessagePromptTemplate.from_template(system_prompt)

#### Compile Few-shot Human Prompt

In [12]:
with open("chunk_category_summary_data.json", "r") as file:
    file_contents = file.read()
    prompt, categories = json.loads(file_contents)["prompt"], json.loads(file_contents)["categories"]

category_prompt_template = PromptTemplate(
    input_variables=["category", "prompt", "input", "output"],
    template="Category: {category}\n{prompt}\nInput Example: {input}\nExpected Output: {output}",
)

few_shot_prompt = FewShotPromptTemplate(
    examples=categories,
    example_prompt=category_prompt_template,
    example_separator="\n\n",
    prefix=prompt,
    suffix="ANALYZE TEXT: {input}",
    input_variables=["input"],
)

#### Get Text from URL

In [13]:
url = "https://openai.com/policies/privacy-policy"
# url = "https://app.skiff.com/docs/db93c237-84c2-4b2b-9588-19a7cd2cd45a#tyGksN9rkqbo2uGYASxsA6HVLjUoly/wTYK8tncTto8="
# url = "https://policies.google.com/privacy?hl=en-US"
text = get_text_from_url(url)
print(text)

Privacy policy CloseSearch Submit Skip to main contentSite NavigationResearchOverviewIndexProductOverviewChatGPTGPT-4DALL·E 2Customer storiesSafety standardsPricingDevelopersOverviewDocumentationAPI referenceExamplesSafetyCompanyAboutBlogCareersCharterSecuritySearch Navigation quick links Log inSign upMenu Mobile Navigation CloseSite NavigationResearchProductDevelopersSafetyCompany Quick Links Log inSign upSearch Submit Privacy policyUpdatedApril 27, 2023We at OpenAI OpCo, LLC (together with our affiliates, “OpenAI”, “we”, “our” or “us”) respect your privacy and are strongly committed to keeping secure any information we obtain from you or about you. This Privacy Policy describes our practices with respect to Personal Information we collect from or about you when you use our website and services (collectively, “Services”). This Privacy Policy does not apply to content that we process on behalf of customers of our business offerings, such as our API. Our use of that data is governed by 

In [14]:
# text = """
# Skiff Privacy Policy

# Last Updated June 6, 2022

# SKIFF PRIVACY POLICY

# Skiff World, Inc., (which operates the website skiff.org and certain services under the name “Skiff,” and is referred to as “Skiff” in this Privacy Policy), provides a secure collaboration platform for our customers. This Privacy Policy is designed to help you understand how we collect, use, and share your information, and to help you understand and exercise your privacy choices.

# Scope

# This Privacy Policy applies to information we collect from our site visitors and free users (“Platform Data”), including on our websites, mobile applications, and other online or offline offerings. To make this Privacy Policy easier to read, our websites, applications, and other offerings are collectively called “ .”‍An Important Note: This Privacy Policy does not apply to any Skiff information uploaded to our platform by enterprise customers’ end users (“Customer Data”). We process Customer Data in accordance with the contracts that we have in place with our customers and the instructions they provide us, not this Privacy Policy. If you are an end user of an enterprise customer account, any questions or requests relating to Customer Data should be directed to the customer that provides your instance of Skiff.

# Personal Information We Collect

# Skiff’s highest priority is to safeguard the privacy of the users on our platform. While you are on our platform, we collect only a minimal amount of data needed to provide you with our services. However, we may collect additional types of information when you visit our website or communicate with us via other channels, such as email or social media. We explain the types information we collect in each of these mediums below.‍     A. Information You Provide to Us Directly‍We may collect the following information that you provide to us.‍• Account Creation. We may collect information when you create an account, such as your email address and username. When setting up your account, you may optionally also provide your real name, and your interest in Skiff. Note that although you provide login credentials to access the Service, we do not store your plain-text password on our servers.• Purchases. We may collect information and details associated with your purchases, including payment information collected by our third-party payment processors (e.g., Stripe) on our behalf. We do not directly collect or store any payment card information, but may receive billing address or other non-sensitive information associated with your payment card from our third-party processors.• Digital Wallet information.  “Digital Wallet” means an application or software (e.g. any Ethereum wallet, such as MetaMask or Rainbow Wallet) that customers may use to secure their private keys and execute blockchain transactions. If you connect your Digital Wallet in order to access our Services, we may collect your Wallet public key address.• Interactive Features. We collect information you choose to share with us through our interactive features (e.g., content sharing, messaging and chat features, commenting functionalities, etc.). Any information you provide to us using the sharing features of the Services ( “User Content”) will be accessible to us and other users you share the User Content with.‍• Your Communications with Us. We may collect information such as your email address, when you request information about our Services, register for our newsletter, request customer or technical support, apply for a job, or otherwise communicate with us.• Surveys. We may contact you to participate in surveys about our Services. If you decide to participate, some surveys may ask you to provide certain information about yourself.• Job Applications. We may post job openings and opportunities on our Services. If you reply to one of these postings by submitting your application, CV and/or cover letter to us, we will collect and use your information to assess your qualifications.‍     B. Information Collected Automatically We may collect certain information automatically when you visit our website or use the services to provide and measure our services and for security and fraud prevention purposes:

# • Automatic Data Collection. In order to protect you and our platform from malicious activity and to prevent fraud, we may collect certain information automatically when you use our Services, such as your Internet protocol (IP) address (temporarily), user settings, and Skiff-provided authentication cookies.

# • Cookies and Other Technologies. Cookies are small text files placed in device browsers that store preferences and facilitate and enhance your experience. We, as well as third parties that provide content or other functionality on our Services, may use cookies, local storage, and other technologies (“Technologies”) to automatically collect information through your use of our Services.Our uses of these Technologies fall into the following general categories:‍• Operationally Necessary. This includes Technologies that allow you access to our Services and tools that are required to identify irregular website behavior, prevent fraudulent activity and improve security or that allow you to make use of our functionality;• Performance-Related. We may use Technologies to assess the performance of our Services, including as part of our analytic practices to help us understand how individuals use our Services (see Analytics below);• Functionality-Related. We may use Technologies that allow us to offer you enhanced functionality when accessing or using our Services. This may include identifying you when you sign into our Services or keeping track of your specified preferences;‍See “Your Privacy Choices” below to understand your choices regarding these Technologies.‍• Analytics. We may use Technologies and other third-party tools, including those provided by Matomo, to better understand how individuals use our platform. For more information, please visit Matomo’s Privacy Policy. Matomo, and other third party scripts, are only used on Skiff’s marketing site (“skiff.com”) and not Skiff services (“app.skiff.com”). ‍Platform: We temporarily collect only IP addresses on our platform during signup. We collect this information through operationally necessary security technology in order to provide you with our services and keep our platform online.‍

# How We Use Your Information

# Information collected through the platform: As noted above, we use the limited information collected from you while you are on our platform only to provide you with our services. We do not have access to the content you upload to Skiff, and therefore do not make any use of the information contained in that content.‍‍Other information: We use the information we collect from you through our website and other off-platform channels primarily to provide our Services, for security and fraud prevention, and for administrative purposes, and to market our products and Services, as described below.    A. Administering Our ServicesWe use information collected off-platform for administrative purposes, such as:‍• Administering your account;• Providing access to certain areas, functionalities, and features of our website;• Answering requests for customer or technical support;• Communicating with you about your account, activities on our Services, and policy changes;• Processing your financial information and other payment methods for products or Services purchased;• Processing applications if you apply for a job we post on our Services; and• Allowing you to register for events.    B. Operational Purposes‍We use your information to operate our business, such as:• Preventing and detecting security incidents, protecting against malicious, deceptive, fraudulent or illegal activity, and prosecuting those responsible for that activity;• Measuring and analyzing interest and engagement in our Services;• Short-term, transient use, such as contextual customization of ads;• Improving, upgrading or enhancing our Services;• Developing new products and Services;• Ensuring internal quality control and safety;• Authenticating and verifying individual identities, including requests to exercise your rights under this policy;• Debugging to identify and repair errors with our Services;• Auditing relating to interactions, transactions and other compliance activities;• Enforcing our agreements and policies; and• Complying with our legal obligations.‍    C. Marketing and Advertising our Products and ServicesWe may use information to market and advertise our products to you directly if you have signed up for the services and/or provided us with your email address. This includes marketing via email campaigns and notifications within the Platform. You can opt out of direct email marketing messages from us by clicking the “unsubscribe” button included in the footer of the emails we send you. For more choices about use of tracking technologies for advertising more generally, please see “Your Privacy Choices” below.‍    D. Other Purposes‍We also use your information for other purposes as requested by you or as permitted by applicable law.

# • Consent.  We may use your information for other purposes with your explicit consent.• De-identified and Aggregated Information. We may use information about you to create de-identified and/or aggregated information. We may use such aggregate or de-identified information for any purpose, and such information is not subject to the limitations set forth in this Policy.• Referral Services. Our referral services may allow you to share certain content with a friend or colleague, such as an email inviting your friend to use our Services. Please only share with us contact information of people with whom you have a relationship (e.g., relative, friend neighbor, or co-worker).

# How We Disclose Your Information

# We disclose your information to third parties for limited business purposes, including to provide payments processing, customer service, and IT support, to protect us or others, or in the event of a major business transaction such as a merger, sale, or asset transfer, as described below.    A. Disclosures to Provide our Services‍The content you upload to Skiff is stored in end-to-end encrypted form in a secure cloud database hosted by a cloud provider or on IPFS. We do not share user content with any other third party.‍We may share other information we collect with other third parties, as follows:• Service Providers. Skiff does not share any of your personal information with service providers. However, we may share your information with our third-party service providers who use that information to help us provide IT support, hosting, payment processing, customer service, and related services.• Other Users. As described above in “Personal Information We Collect,” our Services allow you to share your profile and/or User Content with other Users.    B. Disclosures to Protect Us or OthersWe may access, preserve, and disclose your information if we, in good faith, believe doing so is required or appropriate to: comply with law enforcement or national security requests and legal process, such as a court order or subpoena; protect your, our, or others’ rights, property, or safety; enforce our policies or contracts; collect amounts owed to us; or assist with an investigation or prosecution of suspected or actual illegal activity. Please note, however, that we do not have the ability to decrypt any content uploaded to our platform.     C. Disclosure in the Event of Merger, Sale, or Other Asset TransfersIf we are involved in a merger, acquisition, financing due diligence, reorganization, bankruptcy, receivership, purchase or sale of assets, or transition of service to another provider, your information may be sold or transferred as part of such a transaction, as permitted by law and/or contract.

# Your Privacy Choices and Rights

# • Email Communications. If you do not wish to receive marketing communications from us, you can use the unsubscribe link found at the bottom of the email to opt out of receiving future marketing emails. Note that you will continue to receive transaction-related emails regarding products or Services you have requested. We may also send you certain non-promotional communications regarding us and our Services, and you will not be able to opt out of those communications (e.g., communications regarding our Services or updates to our Terms or this Privacy Policy).• “Do Not Track.” Do Not Track (“DNT”) is a privacy preference that users can set in certain web browsers. Please note that we do not respond to or honor DNT signals or similar mechanisms transmitted by web browsers.• Cookies. You may stop or restrict the placement of Technologies on your device or remove them by adjusting your preferences as your browser or device permits. However, if you adjust your preferences, our Services may not work properly. Please note you must separately opt out in each browser and on each device.• Requests Regarding Your Personal Information. If you would like to request access to a copy of your personal information, or request correction or deletion of your information, please email us at the address listed in the “Contact Us” section below.

# • Mobile Settings. You can use your mobile device settings to limit use of the identifier(s) associated with your device for notifications or other settings.

# Security of Your Information

# We take steps to ensure that your information is treated securely and in accordance with this Privacy Policy. Unfortunately, no system is 100% secure, and we cannot ensure or warrant the security of any information you provide to us. We have taken appropriate safeguards to require that your personal information will remain protected and require our third-party service providers and partners to have appropriate safeguards as well.

# By using our Services or providing information to us, you agree that we may communicate with you electronically regarding security, privacy, and administrative issues relating to your use of our Services. If we are required to notify you of a security incident, we may attempt to notify you electronically by posting a notice on our Services, by mail or by sending an email to you.

# International Data Transfers

# All information processed by us may be transferred, processed, and stored anywhere in the world, including, but not limited to, the United States or other countries, which may have data protection laws that are different from the laws where you live. We endeavor to safeguard your information consistent with the requirements of applicable laws.

# Retention of Personal Information

# We store the personal information we collect as described in this Privacy Policy for as long as you use our Services or as necessary to fulfill the purpose(s) for which it was collected, provide our Services, resolve disputes, establish legal defenses, conduct audits, pursue legitimate business purposes, enforce our agreements, and comply with applicable laws.

# Supplemental Notice for Nevada Residents

# If you are a resident of Nevada, you have the right to opt-out of the sale of certain Personal Information to third parties who intend to license or sell that Personal Information. You can exercise this right by contacting us at hello@skiff.org with the subject line “Nevada Do Not Sell Request” and providing us with your name and the email address associated with your account. Please note that we do not currently sell your Personal Information as sales are defined in Nevada Revised Statutes Chapter 603A.

# Children's Information

# The Services are not directed to children under 18 , and we do not knowingly collect personal information from children.If you are a parent or guardian and believe your child has uploaded personal information to our site without your consent, you may contact us as described in “Contact Us” below. If we become aware that a child has provided us with personal information in violation of applicable law, we will delete any personal information we have collected, unless we have a legal obligation to keep it, and terminate the child’s account if applicable.

# Other Provisions

# Third-Party Websites/Applications. The Services may contain links to other websites/applications and other websites/applications may reference or link to our Services. These third-party services are not controlled by us. We encourage our users to read the privacy policies of each website and application with which they interact. We do not endorse, screen or approve, and are not responsible for, the privacy practices or content of such other websites or applications. Providing personal information to third-party websites or applications is at your own risk.Changes to our Privacy Policy. We may revise this Privacy Policy from time to time in our sole discretion. If there are any material changes to this Privacy Policy, we will notify you as required by applicable law. You understand and agree that you will be deemed to have accepted the updated Privacy Policy if you continue to use our Services after the new Privacy Policy takes effect.

# Use of GMail import feature

# Skiff’s use and transfer to any other app of information received from Google APIs will adhere to Google API Services User Data Policy, including the Limited Use requirements.

# CONTACT USIf you have any questions about our privacy practices or this Privacy Policy, or to make requests about your information as detailed in this Privacy Policy, please contact us at:Skiffhello@skiff.org

# +1 (415) 320-8575
# """

In [15]:
# Remove
# with open("sample_policy.txt", "r") as file:
#     text = file.read()

In [16]:
encoding = tiktoken.encoding_for_model(model_name=model_name)
num_tokens = len(encoding.encode(text))
print(num_tokens)

3547


#### Split Policy Text into Chunks

In [17]:
if (num_tokens > 1500):
    text_splitter = RecursiveCharacterTextSplitter(separators=["\n\n", "\n", "."], chunk_size=6000, chunk_overlap=0)
    text_chunks = text_splitter.create_documents([text])

print(text_chunks)

[Document(page_content='Privacy policy CloseSearch Submit Skip to main contentSite NavigationResearchOverviewIndexProductOverviewChatGPTGPT-4DALL·E 2Customer storiesSafety standardsPricingDevelopersOverviewDocumentationAPI referenceExamplesSafetyCompanyAboutBlogCareersCharterSecuritySearch Navigation quick links Log inSign upMenu Mobile Navigation CloseSite NavigationResearchProductDevelopersSafetyCompany Quick Links Log inSign upSearch Submit Privacy policyUpdatedApril 27, 2023We at OpenAI OpCo, LLC (together with our affiliates, “OpenAI”, “we”, “our” or “us”) respect your privacy and are strongly committed to keeping secure any information we obtain from you or about you. This Privacy Policy describes our practices with respect to Personal Information we collect from or about you when you use our website and services (collectively, “Services”). This Privacy Policy does not apply to content that we process on behalf of customers of our business offerings, such as our API. Our use of t

In [18]:
print(len(text_chunks))
print(text_chunks[0].page_content)

4
Privacy policy CloseSearch Submit Skip to main contentSite NavigationResearchOverviewIndexProductOverviewChatGPTGPT-4DALL·E 2Customer storiesSafety standardsPricingDevelopersOverviewDocumentationAPI referenceExamplesSafetyCompanyAboutBlogCareersCharterSecuritySearch Navigation quick links Log inSign upMenu Mobile Navigation CloseSite NavigationResearchProductDevelopersSafetyCompany Quick Links Log inSign upSearch Submit Privacy policyUpdatedApril 27, 2023We at OpenAI OpCo, LLC (together with our affiliates, “OpenAI”, “we”, “our” or “us”) respect your privacy and are strongly committed to keeping secure any information we obtain from you or about you. This Privacy Policy describes our practices with respect to Personal Information we collect from or about you when you use our website and services (collectively, “Services”). This Privacy Policy does not apply to content that we process on behalf of customers of our business offerings, such as our API. Our use of that data is governed b

#### Output Policy Chunks into Text File

In [19]:
# with open('policy_chunks.txt', 'w') as file:
#     for element in text_chunks:
#         file.write(element.page_content + "\n")

#### Format Human Prompts with Policy Chunks

In [20]:
human_message_prompts = [HumanMessagePromptTemplate.from_template(few_shot_prompt.format_prompt(input=chunk.page_content).to_string()) for chunk in text_chunks]

# print(human_message_prompts)

#### Format Chat Prompts with System & Human Prompt Messages

In [21]:
formatted_chat_prompts = [ChatPromptTemplate.from_messages([system_message_prompt, human_message_prompt]) for human_message_prompt in human_message_prompts]

# print(formatted_chat_prompts)

In [22]:
# print(len(encoding.encode(formatted_chat_prompts[0].to_string())))
# print(formatted_chat_prompts[0].to_string())

#### Get Summaries for All Chunks

In [23]:
# TODO
# make the response calls run concurrently

In [24]:
# responses = [llm(formatted_chat_prompt.to_messages()).content for formatted_chat_prompt in formatted_chat_prompts]

In [26]:
responses = []
async def async_generate(chain):
    resp = await chain.arun(input=[])
    responses.append(resp)

tasks = [async_generate(chain=LLMChain(llm=llm, prompt=formatted_chat_prompt)) for formatted_chat_prompt in formatted_chat_prompts[:1]]
await asyncio.gather(*tasks)



[None]

In [None]:
# print(responses)
tokens = [len(encoding.encode(text)) for text in responses]
# print(tokens)
print(sum(tokens))

## 2. Analyze Policy: Generate Coherent Summary

#### Consolidate Chunk Summaries

In [None]:
analyzed_summaries = "\n\n".join(["Summary {}:\n{}".format(index + 1, element) for index, element in enumerate(responses)])
print(analyzed_summaries)

#### Format Human & Chat Prompt

In [None]:

with open("final_category_summary_prompt.txt", "r") as file:
    human_prompt = file.read()

human_prompt_template = PromptTemplate(input_variables=["summaries"], template=human_prompt)
human_message_prompt = HumanMessagePromptTemplate(prompt=human_prompt_template)

chat_prompt = ChatPromptTemplate(input_variables=["summaries"], messages=[system_message_prompt, human_message_prompt])
formatted_chat_prompt = chat_prompt.format_prompt(summaries=analyzed_summaries)

print(formatted_chat_prompt.to_messages()[0].content)
print(formatted_chat_prompt.to_messages()[1].content)
print(len(encoding.encode(formatted_chat_prompt.to_messages()[1].content)))

#### Get Summary Report

In [None]:
response = llm(formatted_chat_prompt.to_messages())
print(response)
print(len(encoding.encode(response.content)))

In [None]:
final_report = response.content

#### Output Final Report to Text File

In [None]:
# with open("final_report.txt", "w") as file:
#     file.write(response.content + "\n")

## 3. Perform Scoring to Generate Privacy Scorecard

In [None]:
# TODO
# modify prompt to ignore categories like contact information

In [None]:
# with open("final_report.txt", "r") as file:
#     final_report = file.read()
# print(final_report)

#### Generate Company Context

In [None]:
serp_data = get_serp_data(url=url)

In [None]:
with open("context_extraction_prompt.txt", "r") as file:
    context_extraction = file.read()

context_extraction_prompt = PromptTemplate(
    input_variables=["candidate_names", "candidate_descriptions", "knowledge_graph"],
    template=context_extraction,
)

formatted_context_prompt = context_extraction_prompt.format_prompt(candidate_names=serp_data["candidate_names"], candidate_descriptions=serp_data["candidate_descriptions"], knowledge_graph=serp_data["knowledge_graph"])
print(formatted_context_prompt.to_string())

In [None]:
context_response = llm(formatted_context_prompt.to_messages())

In [None]:
company_context = context_response.content
print(company_context)

#### Define LLM

In [None]:
llm = PromptLayerChatOpenAI(pl_tags=["Privacy Scorecard", "Google"], model_name="gpt-4", temperature=0.1, openai_api_key=OPENAI_API_KEY)

#### Load Scoring Guide

In [None]:
with open("scoring_guide_data.json", "r") as file:
    file_contents = file.read()
    system_prompt, human_prompt_prefix, human_prompt_suffix, categories = json.loads(file_contents)["system_prompt"], json.loads(file_contents)["human_prompt_prefix"], json.loads(file_contents)["human_prompt_suffix"], json.loads(file_contents)["categories"]

#### Compile System Prompt

In [None]:
system_message_prompt = SystemMessagePromptTemplate.from_template(system_prompt)

#### Compile & Format Few-shot Human Prompt

In [None]:
category_prompt_template = PromptTemplate(
    input_variables=["category", "guidelines"],
    template="{category}\n{guidelines}",
)

few_shot_prompt = FewShotPromptTemplate(
    examples=categories,
    example_prompt=category_prompt_template,
    example_separator="\n\n",
    prefix=human_prompt_prefix,
    suffix=human_prompt_suffix,
    input_variables=["company_context", "final_report"],
)

formatted_few_shot_prompt = few_shot_prompt.format_prompt(company_context=company_context, final_report=final_report)
human_message_prompt = HumanMessagePromptTemplate.from_template(formatted_few_shot_prompt.to_string())

#### Format Chat Prompt

In [None]:
formatted_chat_prompt = ChatPromptTemplate.from_messages([system_message_prompt, human_message_prompt]).format_prompt()

print(formatted_chat_prompt.to_messages()[0].content)
print(formatted_chat_prompt.to_messages()[1].content)
print(len(encoding.encode(formatted_chat_prompt.to_messages()[1].content)))

In [None]:
response = llm(formatted_chat_prompt.to_messages())
print(response.content)
print(len(encoding.encode(response.content)))

In [None]:
with open('privacy_scorecard_1.txt', 'w') as file:
    file.write(response.content + "\n")

In [None]:
# TODO
# explore guardrails for output parsing