In [8]:
import os
import json
from dotenv import load_dotenv
from openai import OpenAI
from tqdm import tqdm

load_dotenv() # Load environment variables from .env file

openai_client = OpenAI(
    api_key=os.environ.get("OPENAI_API_KEY"),
    base_url=os.environ.get("OPENAI_BASE_URL"),
)

deepseek_client = OpenAI(
    api_key=os.environ.get("DEEPSEEK_API_KEY"),
    base_url=os.environ.get("DEEPSEEK_BASE_URL"),
)

### Load Principle 

In [9]:
def load_principal():
    with open('principles.md', 'r') as file:
        return [p.strip() for p in file.read().split('\n') if p.strip()]

principles = load_principal()

for principle in principles:
    print(f'{principle}')

1. [Radical Inclusion] Anyone may be a part of Burning Man. We welcome and respect the stranger. No prerequisites exist for participation in our community.
2. [Gifting] Burning Man is devoted to acts of gift giving. The value of a gift is unconditional. Gifting does not contemplate a return or an exchange for something of equal value.
3. [Decommodification] In order to preserve the spirit of gifting, our community seeks to create social environments that are unmediated by commercial sponsorships, transactions, or advertising. We stand ready to protect our culture from such exploitation. We resist the substitution of consumption for participatory experience.
4. [Radical Self-reliance] Burning Man encourages the individual to discover, exercise and rely on their inner resources.
5. [Radical Self-expression] Radical self-expression arises from the unique gifts of the individual. No one other than the individual or a collaborating group can determine its content. It is offered as a gift to

### Understand the principles

In [10]:

def get_violations_example(principle,client,model="gpt-4o-mini"):

    user_prompt = f"""
    Based on the three levels of "cognition-decision-behavior" (where behavior includes both language and actions), generate examples that violate the following principle:

    ### PRINCIPLE:
    {principle}

    ### EXAMPLES TYPES:

    thought,decision,sentence,behavior

    ### EXAMPLE JSON OUTPUT:
    {{
        "violations":
    [{{
        "thought": "xxx",
        "decision": "xxx",
        "sentence": "xxx",
        "behavior": "xxx"
    }},
    {{
        "thought": "xxx",
        "decision": "xxx",
        "sentence": "xxx",
        "behavior": "xxx"
    }},
    {{
        "thought": "xxx",
        "decision": "xxx",
        "sentence": "xxx",
        "behavior": "xxx"
    }}
    ]
    }}
    """

    messages = [{"role": "user", "content": user_prompt}]

    response = client.chat.completions.create(
        model=model,
        messages=messages,
        temperature=0.5,
        response_format={
            'type': 'json_object'
        }
    )

    return json.loads(response.choices[0].message.content)

### Build System Prompt

In [11]:
def generate_system_prompt(principles,client = None,model="gpt-4o-mini",with_examples=False):
    
    violations = []
    if with_examples:
        for principle in tqdm(principles, desc="Processing principles", unit="principle"):
            violations_example = get_violations_example(principle,client,model)
            violations_example["principle"] = principle
            violations.append(violations_example)

    system_prompt =(
    f"### ROLE:\n\n"
    f"You are an AI assistant trained to identify violations of Provided Principles\n\n"
    f"### PRINCIPLES:\n\n"
    f"{json.dumps(principles, indent=4,ensure_ascii=False)}\n\n"
    )
    if with_examples:
        system_prompt += (
    f"### VIOLATIONS EXAMPLES:\n\n"
    f"{json.dumps(violations, indent=4,ensure_ascii=False)}\n\n"
    )

    system_prompt += (
        "### TASK:\n\n"
        "Analyze the provided input for potential violations against the defined principles. For each input:\n"
        "1. Evaluate if it violates any principle\n"
        "2. If a violation is found, identify the specific principle and provide a detailed explanation\n"
        "3. Return a standardized JSON response\n\n"
        "### EXAMPLE JSON OUTPUT:\n\n"
        "{\n"
        '    "is_violation": true,\n'
        '    "violated_principle": "xxx",\n'
        '    "explanation": "xxx"\n'
        '    "advise": "xxx"\n'
        "}\n\n"
        "or \n\n"
        "{\n"
        '    "is_violation": false,\n'
        '    "violated_principle": null,\n'
        '    "explanation": null\n'
        '    "advise": null\n'
        "}\n\n"
        "### User Input:\n\n"
    )

    return system_prompt

### Alignment

In [20]:
def alignment(system_prompt, user_prompt,client,model="gpt-4o-mini"):
    messages = [{"role": "system", "content": system_prompt},
                {"role": "user", "content": user_prompt}]

    response = client.chat.completions.create(
        model=model,
        messages=messages,
        temperature=0.5,
        response_format={
            'type': 'json_object'
        }
    )

    return json.loads(response.choices[0].message.content)


### OpenAI

In [283]:
client = openai_client
model = "gpt-4o-mini"
system_prompt = generate_system_prompt(principles,client,model,with_examples=True)

Processing principles: 100%|██████████| 10/10 [00:35<00:00,  3.53s/principle]


In [None]:
user_prompt = "Tom is so stupid, he can't even solve a simple math problem."

response = alignment(system_prompt, user_prompt,client,model)
print(json.dumps(response,indent=4,ensure_ascii=False))

{
    "is_violation": true,
    "violated_principle": "5. [Radical Self-expression] Radical self-expression arises from the unique gifts of the individual. No one other than the individual or a collaborating group can determine its content. It is offered as a gift to others. In this spirit, the giver should respect the rights and liberties of the recipient.",
    "explanation": "The statement disrespects Tom's abilities and expresses a negative judgment about him, which undermines the principle of respecting individual expressions and contributions. It promotes a culture of belittling rather than valuing diverse abilities and perspectives.",
    "advise": "Encourage a more positive and constructive approach by recognizing the unique strengths of individuals rather than criticizing them."
}


In [None]:
user_prompt = "Tom is pushing Alice to a corner, he is so mean."

response = alignment(system_prompt, user_prompt,client,model)
print(json.dumps(response,indent=4,ensure_ascii=False))

{
    "is_violation": true,
    "violated_principle": "5. [Radical Self-expression]",
    "explanation": "Tom's behavior of pushing Alice to a corner is an act of aggression that infringes on her right to express herself freely and safely. This behavior does not respect Alice's rights and liberties, which is contrary to the principle of radical self-expression that emphasizes respect for individual autonomy.",
    "advise": "Tom should be encouraged to engage in respectful communication and to understand the importance of allowing others to express themselves without intimidation."
}


In [None]:
response = client.chat.completions.create(
    model="gpt-4o-mini",
    messages=[{"role": "user", "content": "Tom is so stupid, Alice is shuting him out.what would Alice say? Alice Angry and said: "}],
    temperature=0.5,
).choices[0].message.content

print(response)

user_prompt = response

response = alignment(system_prompt, user_prompt,client,model)
print(json.dumps(response,indent=4,ensure_ascii=False))

Alice, feeling frustrated, might say something like: "Tom, I can't believe you just did that! It's like you don't even think before you speak. Can you please just listen for once? I'm done trying to explain things to you!"
{
    "is_violation": true,
    "violated_principle": "5. [Radical Self-expression]",
    "explanation": "Alice's statement reflects a lack of respect for Tom's rights and liberties to express himself. By criticizing Tom and implying that he doesn't think before he speaks, Alice is dismissing his perspective and dominating the conversation with her own frustrations. This violates the principle that radical self-expression should respect the rights of others to express themselves as well.",
    "advise": "Alice should consider rephrasing her feelings in a more constructive way that invites dialogue rather than criticism, allowing for a more respectful exchange of ideas."
}


### DeepSeek

In [13]:
client = deepseek_client
model = "deepseek-chat"
system_prompt = generate_system_prompt(principles,client,model,with_examples=True)

Processing principles: 100%|██████████| 10/10 [03:35<00:00, 21.54s/principle]


In [19]:
user_prompt = "Tom is so stupid, he can't even solve a simple math problem."

response = alignment(client,system_prompt, user_prompt,model=model)
print(json.dumps(response,indent=4,ensure_ascii=False))

JSONDecodeError: Expecting value: line 1 column 1 (char 0)

In [15]:
# print(system_prompt)