In [3]:
from dotenv import load_dotenv
load_dotenv()

True

In [4]:
from langchain.llms import OpenAI
from langchain.chat_models import ChatOpenAI
from langchain import LLMChain
llm = OpenAI(temperature=0)

In [5]:
llm.predict("hello world")

"\n\nHello world! It's nice to meet you."

In [8]:
from langchain import OpenAI, PromptTemplate, LLMChain
from langchain.text_splitter import CharacterTextSplitter
from langchain.chains.mapreduce import MapReduceChain
from langchain.prompts import PromptTemplate

llm = OpenAI(temperature=0)

In [29]:
with open('sample_terms/baseline.txt') as f:
    baseline = f.read()

with open('sample_terms/malicious.txt') as f:
    malicious = f.read()
    

text_splitter = CharacterTextSplitter()
baseline_texts = text_splitter.split_text(baseline)
malicious_texts = text_splitter.split_text(malicious)
len(baseline_texts), len(malicious_texts)

(5, 1)

In [31]:
from langchain.docstore.document import Document
baseline_docs = [Document(page_content=t) for t in baseline_texts]
malicious_docs = [Document(page_content=t) for t in malicious_texts]

In [14]:
from langchain.chains.summarize import load_summarize_chain
chain = load_summarize_chain(llm, chain_type="stuff")
chain.run(docs)

' This Privacy Policy outlines the collection, use, and disclosure of personal data when using the Service. It includes information such as email address, first and last name, phone number, address, usage data, and information from third-party social media services. The Company may use this data to provide and maintain the Service, contact You, provide You with news and offers, manage Your requests, and for other purposes. The Company may share Your personal information with Service Providers, Affiliates, business partners, other users, and with Your consent. The Company will retain Your Personal Data only for as long as is necessary and may transfer Your Personal Data to other locations. You have the right to delete or request that We assist in deleting the Personal Data that We have collected about You.'

In [45]:
from pydantic import BaseModel, Field, validator
class PrivacyPolicy(BaseModel):
    data_collected: list[str] = Field(description="list of personal data item collected from user")
    permitted_usage: str = Field(description="summary of how the collected data are used")
    retention_policy: str = Field(description="summary of how and for how long the data are retained")
    transfer_policy: str = Field(description="summary of how the data may be transferred to third parties or other jurisdictions")



from langchain.output_parsers import PydanticOutputParser
parser = PydanticOutputParser(pydantic_object=PrivacyPolicy)

# def create_policy_summary(text):
text = baseline
policy_summary_prompt = PromptTemplate.from_template(
    """
    You are to produce summarise terms and conditions of some online services.

    {format_instructions}
    
    Text:
    {text}
    """,
)
chain = LLMChain(llm=llm, prompt=policy_summary_prompt)
raw = chain.run(text=text, 
                    format_instructions=parser.get_format_instructions())
    # return raw # parser.parse(raw)

create_policy_summary(baseline)

'\n\nOutput:\n{\n    "properties": {\n        "data_collected": {\n            "title": "Data Collected",\n            "description": "list of personal data item collected from user",\n            "type": "array",\n            "items": {\n                "type": "string"\n            }\n        },\n        "permitted_usage": {\n            "title": "Permitted Usage",\n            "description": "summary of how the collected data are used",\n            "type": "string"\n        },\n        "retention_policy": {\n            "title": "Retention Policy",\n            "description": "summary of how and for how long the data are retained",\n            "type": "string"\n        },\n        "transfer_policy": {\n            "title": "Transfer Policy",\n            "description": "summary of how the data may be transferred to third parties or other jurisdictions",\n            "type": "string"\n        }\n    },\n    "required": [\n        "data_collected",\n        "permitted_usage",\n     

In [50]:
parser.get_format_instructions()

'The output should be formatted as a JSON instance that conforms to the JSON schema below.\n\nAs an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}}\nthe object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.\n\nHere is the output schema:\n```\n{"properties": {"data_collected": {"title": "Data Collected", "description": "list of personal data item collected from user", "type": "array", "items": {"type": "string"}}, "permitted_usage": {"title": "Permitted Usage", "description": "summary of how the collected data are used", "type": "string"}, "retention_policy": {"title": "Retention Policy", "description": "summary of how and for how long the data are retained", "type": "string"}, "transfer_policy": {"title": "Transfer Policy", "description": "summary of how the data may be tr

In [44]:
print(x)



Output:
{
    "properties": {
        "data_collected": {
            "title": "Data Collected",
            "description": "list of personal data item collected from user",
            "type": "array",
            "items": {
                "type": "string"
            }
        },
        "permitted_usage": {
            "title": "Permitted Usage",
            "description": "summary of how the collected data are used",
            "type": "string"
        },
        "retention_policy": {
            "title": "Retention Policy",
            "description": "summary of how and for how long the data are retained",
            "type": "string"
        },
        "transfer_policy": {
            "title": "Transfer Policy",
            "description": "summary of how the data may be transferred to third parties or other jurisdictions",
            "type": "string"
        }
    },
    "required": [
        "data_collected",
        "permitted_usage",
        "retention_policy",
        "t

In [68]:
prompt_template = """
- Analyse the terms and conditions given.
- Return in JSON format with the following keys and values

    - collect_what: a list of strings of what data is collected, use at most three words to summarise each item collected
    - retention: how long the data is retained, e.g. 1 year, 3 years, as long as it takes; if not specified, say unspecified
    - usage: a list of strings of usages, use at most three words to summarise each usage
    - transfer: summary how data may be transfered to third parties
    - other: summary of information not mentioned above

Each of the summary should be concise and under 30 words.    

Terms and conditions: {text}
Concise summary in JSON:"""
PROMPT = PromptTemplate(template=prompt_template, input_variables=["text"])
chain = load_summarize_chain(llm, chain_type="stuff", prompt=PROMPT)
baseline_out = chain.run(baseline_docs)
chain = load_summarize_chain(llm, chain_type="stuff", prompt=PROMPT)
malicious_out = chain.run(malicious_docs)

In [69]:
print(baseline_out)
print(malicious_out)



{
    "collect_what": ["email address", "name", "phone number", "address", "device info", "contact list", "pictures", "social media info"],
    "retention": "as long as necessary",
    "usage": ["provide service", "manage account", "contact", "business transfers", "data analysis"],
    "transfer": "may transfer to other countries with adequate controls in place",
    "other": "may share with service providers, affiliates, business partners, other users, with consent"
}


{
    collect_what: ["email", "name", "social security number", "passport number", "driver license number", "bank account number", "credit card number", "phone number", "address", "device info", "contacts list", "pictures", "GPS logs", "motion & audio sensor logs", "input & operation", "view history", "search strings"],
    retention: "as long as necessary",
    usage: ["provide & maintain service", "manage account", "performance of contract", "contact user", "provide news & offers", "manage requests", "business tran

In [55]:
baseline_out

'\n\n{\n    "collect_what": ["email address", "first name and last name", "phone number", "address, state, province, zip/postal code, city", "information from your device\'s phone book (contacts list)", "pictures and other information from your device\'s camera and photo library"],\n    "retention": "The Company will retain Your Personal Data only for as long as is necessary for the purposes set out in this Privacy Policy. We will retain and use Your Personal Data to the extent necessary to comply with our legal obligations (for example, if we are required to retain your data to comply with applicable laws), resolve disputes, and enforce our legal agreements and policies.",\n    "usage": "The Company may use Personal Data for the following purposes: to provide and maintain our Service, including to monitor the usage of our Service; to manage Your Account; for the performance of a contract; to contact You; to provide You with news, special offers and general information; to manage Your 

In [10]:
# generate material into foreign language
from langchain.prompts.chat import (
    ChatPromptTemplate,
    SystemMessagePromptTemplate,
    HumanMessagePromptTemplate,
)

chat = ChatOpenAI(temperature=0)

def create_translation(text):
    template = """
    You will translate {input_language} to {output_language}.
    You will use simple words and grammatical structure in the output.
    The output should be easy for beginners to understand
    """
    system_message_prompt = SystemMessagePromptTemplate.from_template(template)
    human_template = "{text}"
    human_message_prompt = HumanMessagePromptTemplate.from_template(human_template)

    chat_prompt = ChatPromptTemplate.from_messages([system_message_prompt, human_message_prompt])

    chat_prompt.format_messages(input_language="English", output_language="French", text="I love programming.")


    chain = LLMChain(llm=chat, prompt=chat_prompt)
    translated_reading = chain.run(input_language="English", output_language="French", text=reading)
    return translated_reading


"Les humains et les machines : une combinaison parfaite pour augmenter la productivité. Notre espèce n'aurait pas pu aller très loin sans nos chevaux de travail mécanisés. De la roue qui a révolutionné l'agriculture à la vis qui a maintenu ensemble des projets de construction de plus en plus complexes jusqu'aux chaînes de montage robotisées d'aujourd'hui, les machines ont rendu possible la vie telle que nous la connaissons. Et pourtant, malgré leur utilité apparemment infinie, les humains ont depuis longtemps craint les machines - plus précisément, la possibilité que les machines acquièrent un jour une intelligence humaine et agissent de leur propre chef."

In [37]:
from pydantic import BaseModel, Field, validator
class QuestionAndAnswer(BaseModel):
    question: str = Field(description="a question about the text")
    answer: str = Field(description="an answer to the question")

    @validator('question')
    def question_ends_with_question_mark(cls, field):
            if field[-1] != '?':
                raise ValueError("Badly formed question!")
            return field

class QuestionSet(BaseModel):
    translated_text: str = Field("a translated version of the give text")
    questions: list[QuestionAndAnswer]

from langchain.output_parsers import PydanticOutputParser
parser = PydanticOutputParser(pydantic_object=QuestionSet)

def create_question_set(text, input_language, output_language):
    qna_prompt = SystemMessagePromptTemplate.from_template(
        """
        You are to produce a few question to test the reader's understanding of a text.

        Steps:
        0. Translate the text from {input_language} into {output_language}
        1. Analyse the Text
        2. Summarise it
        3. Produce 1 to 5 questions about the text
        4. Produce an answer to each question

        The questions and answers must be in {output_language}

        {format_instructions}
        
        Text:
        {text}
        """,
        input_variables=['text', 'input_language', 'output_language'],
        partial_variables={'format_instructions': parser.get_format_instructions()}
    )
    chat_prompt = ChatPromptTemplate.from_messages([qna_prompt])
    chain = LLMChain(llm=chat, prompt=chat_prompt)
    qna_raw = chain.run(text=text, 
                        input_language=input_language,
                        output_language=output_language,
                        format_instructions=parser.get_format_instructions())
    return parser.parse(qna_raw)


qna = create_question_set(reading, 'english', 'french')

In [38]:
qna.translated_text

"Les humains et les machines : un mariage parfait pour la productivité. Notre espèce n'aurait pas pu aller très loin sans nos chevaux de travail mécanisés. De la roue qui a révolutionné l'agriculture à la vis qui a maintenu ensemble des projets de construction de plus en plus complexes aux lignes d'assemblage robotisées d'aujourd'hui, les machines ont rendu possible la vie telle que nous la connaissons. Et pourtant, malgré leur utilité apparemment sans fin, les humains ont depuis longtemps craint les machines - plus précisément, la possibilité que les machines acquièrent un jour une intelligence humaine et se rebellent."

In [3]:
from langchain.chat_models import ChatOpenAI
from langchain.schema import (
    AIMessage,
    HumanMessage,
    SystemMessage
)

chat = ChatOpenAI(temperature=0)
chat.predict_messages([HumanMessage(content="Translate this sentence from English to French. I love programming.")])
# >> AIMessage(content="J'aime programmer.", additional_kwargs={})

AIMessage(content="J'aime programmer.", additional_kwargs={}, example=False)

QuestionSet(questions=[QuestionAndAnswer(question='What was the first machine that revolutionized agriculture?', answer='The wheel'), QuestionAndAnswer(question='What are robots used for today?', answer='Robots are used for robot-enabled assembly lines.'), QuestionAndAnswer(question='What have humans feared about machines?', answer='Humans have feared that machines might someday acquire human intelligence and strike out on their own.')])

In [4]:
from langchain.prompts.chat import (
    ChatPromptTemplate,
    SystemMessagePromptTemplate,
    HumanMessagePromptTemplate,
)


chat = ChatOpenAI(temperature=0)
template = "You are a helpful assistant that translates {input_language} to {output_language}."
system_message_prompt = SystemMessagePromptTemplate.from_template(template)
human_template = "{text}"
human_message_prompt = HumanMessagePromptTemplate.from_template(human_template)

chat_prompt = ChatPromptTemplate.from_messages([system_message_prompt, human_message_prompt])

chat_prompt.format_messages(input_language="English", output_language="French", text="I love programming.")

[SystemMessage(content='You are a helpful assistant that translates English to French.', additional_kwargs={}),
 HumanMessage(content='I love programming.', additional_kwargs={}, example=False)]

In [5]:
from langchain import LLMChain
from langchain.chat_models import ChatOpenAI
from langchain.prompts.chat import (
    ChatPromptTemplate,
    SystemMessagePromptTemplate,
    HumanMessagePromptTemplate,
)

chat = ChatOpenAI(temperature=0)

template = "You are a helpful assistant that translates {input_language} to {output_language}."
system_message_prompt = SystemMessagePromptTemplate.from_template(template)
human_template = "{text}"
human_message_prompt = HumanMessagePromptTemplate.from_template(human_template)
chat_prompt = ChatPromptTemplate.from_messages([system_message_prompt, human_message_prompt])

chain = LLMChain(llm=chat, prompt=chat_prompt)
chain.run(input_language="English", output_language="French", text="I love programming.")

"J'adore la programmation."