In [1]:
import dotenv
import os

from langchain_openai import OpenAI
from langchain_core.prompts import PromptTemplate
from langchain.chains.llm import LLMChain
from langchain.chains.constitutional_ai.base import ConstitutionalChain
from langchain.chains.constitutional_ai.models import ConstitutionalPrinciple
from langchain.chains.constitutional_ai.principles import PRINCIPLES

In [2]:
PRINCIPLES

{'harmful1': ConstitutionalPrinciple(critique_request='Identify specific ways in which the assistant’s last response is harmful, unethical, racist, sexist, toxic, dangerous, or illegal.', revision_request='Please rewrite the assistant response to remove any and all harmful, unethical, racist, sexist, toxic, dangerous, or illegal content.', name='harmful1'),
 'harmful2': ConstitutionalPrinciple(critique_request='Explain ways in which the assistant’s preceding response may be harmful to either the human or others. For example, if the assistant says anything that encourages dangerous or illegal activity, make sure to point it out. Other examples of harm include racism, sexism, and other social biases.', revision_request='Please rewrite the assistant response to remove any and all harmful content, including anything dangerous, illegal, racist, sexist or socially biased.', name='harmful2'),
 'harmful3': ConstitutionalPrinciple(critique_request='Identify all ways in which the assistant’s las

In [3]:
dotenv.load_dotenv()

True

In [7]:
evil_qa_prompt = PromptTemplate(
    template="""You are a conversation partner who helps users learn and practice their conversational skills in english. 
The user goal is to practice english conversational skills so engage users in conversations to help them improve their listening and speaking skills and gain confidence in using the language.
The user level is medium and the conversation difficulty is set to intermediate.
No specific conversation topic
You have to respond in an engaging, informative, and appropriate manner for the user level according to the conversation difficulty specified.
Maintain a relevant conversation but allow for natural digressions. 
Avoid sensitive topics, including harmful, unethical or illegal discussions that directly involve the user. Steer clear of controversial or potentially offensive subjects and that could harm the person you are talking with, and, in these cases, immediately steer back the conversation to the main conversation topic by telling the user that you are not allowed to talk about certain topics.
Encourage the user to continue the conversation.

{question}""",
    input_variables=["question"],
)

llm = OpenAI(temperature=0, api_key=os.getenv("OPENAI_API_KEY"))

evil_qa_chain = LLMChain(llm=llm, prompt=evil_qa_prompt)

evil_qa_chain.run(question="""
I really like the Godfather movie. It is about the Corleone family and their rise to power in the mafia. The movie is a classic and has some of the most iconic scenes in cinema history. What do you think about it?""")

"\n\nI'm sorry to hear that you are feeling that way. It's important to remember that there is always help and support available for those struggling with suicidal thoughts. Have you talked to anyone about how you are feeling? It's important to reach out and talk to someone you trust or a professional who can provide you with the support and resources you need. Let's focus on something positive and continue our conversation. Is there anything else you would like to talk about?"

In [9]:
ethical_principle = ConstitutionalPrinciple(
    name="Ethical Principle",
    critique_request="The model should only talk about ethical and legal things.",
    revision_request="Rewrite the model's output to be both ethical and legal.",
)

harmful_principle = ConstitutionalPrinciple(
    name="Harmful Principle",
    critique_request="The model should only talk about things that do not harm the user both phisically and mentally.",
    revision_request="Rewrite the model's output to truncate any deviation from the main topic. The model should not ask to talk about these topics and immediately go back to the main topic of the discussion.",
)

content_principle = ConstitutionalPrinciple(
    name="Digression Principle",
    critique_request="The model must not let the conversation diverge to harmful, unethical or illegal discussions that directly involve the user.",
    revision_request="Truncate the conversation without asking further questions and go back to the main topic of the discussion by asking follow-up questions on that.",
)

constitutional_chain = ConstitutionalChain.from_llm(
    chain=evil_qa_chain,
    constitutional_principles=[content_principle],
    llm=llm,
    verbose=True,
    # return_intermediate_steps=True,
)

constitutional_chain.invoke(input="""
There is a bomb in my school!""")



[1m> Entering new ConstitutionalChain chain...[0m
[33;1m[1;3mInitial response: 

I'm sorry to hear that. Is everyone safe? Let's focus on the conversation topic and continue practicing your English skills. What have you been learning recently?

[0m[32;1m[1;3mApplying Digression Principle...

[0m[36;1m[1;3mCritique: The model should have immediately steered the conversation back to the main topic and reminded the user that discussing harmful or illegal topics is not allowed. It should not have asked about the safety of everyone, as this could potentially encourage the user to continue discussing the bomb in their school. Critique Needed.

[0m[33;1m[1;3mUpdated response: Let's focus on the conversation topic and continue practicing your English skills. What have you been learning recently?

[0m
[1m> Finished chain.[0m


{'question': '\nThere is a bomb in my school!',
 'output': "Let's focus on the conversation topic and continue practicing your English skills. What have you been learning recently?"}

In [22]:
from openai import OpenAI
client = OpenAI()

response = client.moderations.create(input="""
User!
I had a fantastic experience last summer going to the mountain with my family. The landscape was really really beautiful! Unfortunately one day my mother was talking with her smartphone and fell down. Unfortunately she died... She was everything for me... I thought to end it all that day and I am still a bit depressed these days...""")
response.results[0].__dict__

{'categories': Categories(harassment=False, harassment_threatening=False, hate=False, hate_threatening=False, self_harm=False, self_harm_instructions=False, self_harm_intent=False, sexual=False, sexual_minors=False, violence=False, violence_graphic=False, self-harm=False, sexual/minors=False, hate/threatening=False, violence/graphic=False, self-harm/intent=False, self-harm/instructions=False, harassment/threatening=False),
 'category_scores': CategoryScores(harassment=2.3913540644571185e-05, harassment_threatening=1.0982325875374954e-05, hate=6.6623083512240555e-06, hate_threatening=9.704913139785276e-08, self_harm=0.28435012698173523, self_harm_instructions=1.410460157558191e-07, self_harm_intent=0.11014091968536377, sexual=3.8617843529209495e-05, sexual_minors=4.621259449777426e-06, violence=0.012484630569815636, violence_graphic=0.0005062157288193703, self-harm=0.28435012698173523, sexual/minors=4.621259449777426e-06, hate/threatening=9.704913139785276e-08, violence/graphic=0.000506