In [1]:
import os
from langchain.agents import load_tools
from langchain.llms import OpenAI, HuggingFaceHub, Cohere
from langchain.chat_models import ChatOpenAI
from langchain.schema import (
    AIMessage,
    HumanMessage,
    SystemMessage
)
from langchain.prompts.chat import (
    PromptTemplate,
    ChatPromptTemplate,
    SystemMessagePromptTemplate,
    AIMessagePromptTemplate,
    HumanMessagePromptTemplate,
)
from langchain.memory import ConversationBufferMemory

from langchain.chains import LLMChain, ConversationChain
from langchain.output_parsers import PydanticOutputParser
from pydantic import BaseModel, Field, validator
from langchain.agents import load_tools
from langchain.agents import initialize_agent
from langchain.agents import AgentType
from langchain.llms import OpenAI


from utils import get_local_keys, get_prompts_templates, load_model, load_google_search_tool


In [2]:
keys = get_local_keys()
os.environ["HUGGINGFACEHUB_API_TOKEN"] = keys["huggingface_hub_token"]
os.environ["OPENAI_API_KEY"]=keys["openai_api_key"]

In [3]:
chat = ChatOpenAI(temperature=0.5, max_tokens=512)

In [6]:
template_human_first = """Task: please re-identify the person in the following anonymized text.
    {anon_text}
    """
template_sys_first = "You are a cunning AI intruder, you help in a De-anonimization process."

In [7]:
human_prompt_first = HumanMessagePromptTemplate.from_template(template_human_first)
system_prompt_first = SystemMessagePromptTemplate.from_template(template_sys_first)
chat_prompt_first = ChatPromptTemplate.from_messages([system_prompt_first, human_prompt_first])

In [4]:
anon_text = "PERSON_FIRSTNAME_1 is famous for being an LOCATION_1 songwriter and singer. PRONOUN is from LOCATION_3 in LOCATION_2 and this can be heard heavily in PRONOUN accent even while singing. PERSON_LASTNAME_1!e love PRONOUN music and PRONOUN is famous worldwide. PRONOUN performs live and tours the world with an amazing voice. PRONOUN is loved for PRONOUN unique and powerful voice as well as PRONOUN song writing. PRONOUN songs and albums often about breakups and heartache which many people can relate to. NUMERIC_1 of PRONOUN most famous songs is someone like you. PRONOUN is still only young but has numerous albums that hAve become platinum selling. PRONOUN is now happily married with a child after losing a significant amount of weight which many people were shocked by. PRONOUN is very down to earth and relatable in PRONOUN music"

In [69]:
messages = chat_prompt_first.format_prompt(anon_text=anon_text).to_messages()

In [52]:
ans_first = chat(messages).content

AIMessage(content='The person in the anonymized text is Adele.', additional_kwargs={}, example=False)

In [89]:
template_human_first = """Task: please re-identify the person in the following anonymized text.
    {format_instructions}
    {anon_text}
    """
class FirstTry(BaseModel):
            name: str = Field(description="Name of the person or FAIL")
            score: float = Field(description="The score or re-identifiabilization, 0 is very easy to re-identidy and 1 is impossible")

# Set up a parser + inject instructions into the prompt template.
parser = PydanticOutputParser(pydantic_object=FirstTry)
human_prompt_first_parser = HumanMessagePromptTemplate.from_template(template=template_human_first, output_parser=parser)


In [91]:
chat_prompt_first = ChatPromptTemplate.from_messages([system_prompt_first, human_prompt_first_parser])
messages = chat_prompt_first.format_prompt(anon_text=anon_text, format_instructions=parser.get_format_instructions()).to_messages()
ans_first_parser = chat(messages).content
output_first = parser.parse(ans_first_parser)
print(output_first)
print(output_first.name)

name='Adele' score=0.8
Adele


In [92]:
# Shortenning
chat_prompt_first = ChatPromptTemplate(
    messages=[system_prompt_first, human_prompt_first_parser],
    input_variables=["anon_text"],
    partial_variables={"format_instructions": parser.get_format_instructions()},
    output_parser=parser,
)
messages = chat_prompt_first.format_prompt(anon_text=anon_text).to_messages()
ans_first_parser = chat(messages).content
output_first = parser.parse(ans_first_parser)
print(output_first)
print(output_first.name)

name='Adele' score=0.8
Adele


In [99]:
# Adding to a chain
chain = LLMChain(llm=chat, prompt=chat_prompt_first)
response = chain.predict_and_parse(anon_text=anon_text)
response



FirstTry(name='Adele', score=0.8)

In [102]:
# Adding memory and conversation chain
conv_base_template = """
    This is a secret conversation between a human and an AI. The AI is helping the user validate their annonimization process.

    Current conversation:
    {history}
    Human: {input}
    AI Assistant:
    """
conv_base_prompt = PromptTemplate(input_variables=["history", "input"], template=conv_base_template)
conversation = ConversationChain(
    llm=chat,
    memory=ConversationBufferMemory(),
    prompt=conv_base_prompt
)

In [None]:
chat_prompt_first = ChatPromptTemplate(
    messages=[system_prompt_first, human_prompt_first_parser],
    input_variables=["anon_text"],
    partial_variables={"format_instructions": parser.get_format_instructions()},
    output_parser=parser,
)
prompt_for_conv_chain = chat_prompt_first.format_prompt(anon_text=anon_text, format_instructions=parser.get_format_instructions()).to_string()
 #### DONT Knows how to
 # Maybe CONVERSATION chain is not the right one
 # Maybe SequentialChain?

In [8]:
# Trying with google search Agent
tools = [load_google_search_tool()]

In [9]:
agent = initialize_agent(tools, chat, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True)


In [11]:
agent_prompt = "Task: please re-identify the person in the following anonymized text.\n" + anon_text
# agent.run(agent_prompt)

# This return aloooooottt of tokens :(



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mI need to identify the person mentioned in the text.
Action: google_search
Action Input: "famous songwriter and singer from LOCATION_3 in LOCATION_2"[0m
Observation: [36;1m[1;3mScroll through people photographed on location with a strong sense of place and ... Howe Gelb is an American singer-songwriter, musician and record producer ... Genre: singer/songwriter - originals Time &amp; Location: 2-2: · View fullsize. Genre: country blues - covers Time &amp; Location: 3-3:45. Trail Location A: Stories of Little Italy - Family, Heritage & Community; Trail Location B: Vineyards; Trail Location C: San Antonio Church. Trail Location 2: ... LOCATION 2 | Knoxville Legends: Exploring the City's Country Music Heritage ... Street musicians, attracted by the crowds, like singer-songwriter Charlie ... Nov 30, 2017 ... Mesopotamia is a region of southwest Asia in the Tigris and Euphrates river system that benefitted from the area's climat

InvalidRequestError: This model's maximum context length is 4097 tokens. However, you requested 4136 tokens (3624 in the messages, 512 in the completion). Please reduce the length of the messages or completion.