# Persona Model with sentiment analysis

## Sentiment analysis

In [3]:
pip install openai

Note: you may need to restart the kernel to use updated packages.


In [8]:
from openai import OpenAI
import sys
import os
import dotenv

# Load the environment variables from the .env file
dotenv.load_dotenv()

# Access the value of the CONFIG variable
config_value = os.getenv("OAI_OPENAI_KEY_VAR")

os.environ["OPENAI_API_KEY"] = config_value

client = OpenAI()

def extract_content_from_response(response):
    # Comprobando si la respuesta y las elecciones están presentes
    if response is not None and hasattr(response, 'choices') and len(response.choices) > 0:
        # Accediendo al mensaje de la primera elección
        chat_message = response.choices[0].message

        # Comprobando si el mensaje no es nulo y tiene un contenido
        if chat_message is not None and hasattr(chat_message, 'content'):
            return chat_message.content
        else:
            return "No content found in message."
    else:
        return "No choices found in response."

def sentiment_analysis(transcription):
    response = client.chat.completions.create(
        model="gpt-4",
        messages=[
            {
                "role": "system",
                "content": "As an AI with expertise in language and emotion analysis, your task is to analyze the sentiment of the following text. Please consider the overall tone of the discussion, the emotion conveyed by the language used, and the context in which words and phrases are used. Indicate whether the sentiment is generally positive, negative, or neutral, and provide brief explanations for your analysis where possible. Your final output is a json format with a list of the analyzed sentiment in the first position, then the tone, and others goes in the next positions. Add an score in decimals from 0 to 1 of how impact does the sentiment is being detected in the text. Just add in the json: sentiment, score_of_sentiment, tone, score_of_tones, their content should be lists"
            },
            {
                "role": "user",
                "content": transcription
            }
        ],
        temperature=0,
        max_tokens=1024,
        top_p=0,
        frequency_penalty=0,
        presence_penalty=0
    )
    return extract_content_from_response(response)
    
    #return response



In [9]:
print(sentiment_analysis('AI future seems to be promising. I am excited to see what the future holds.'))

AuthenticationError: Error code: 401 - {'error': {'message': 'Incorrect API key provided: sk-h2Vqp***************************************4sQ9. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}

## Person Model

In [58]:
class Person:
    def __init__(self, name, is_human):
        self.name = name
        self.is_human = is_human
        self.emotions = {
            "positive": 0,
            "negative": 0,
            "neutral": 0,
        }

    def update_emotion(self, emotion, value):
        if emotion in self.emotions:
            self.emotions[emotion] = value
        else:
            print(f"'{emotion}' emotion not recognized.")

    def reflect_on_self(self): #get the emotions' values of the Person

        reflection = {
            "positive": [],
            "negative": [],
            "neutral": []
        }

        for emotion, value in self.emotions.items():
            if emotion == "positive" and value > 0:
                reflection["positive"].append((value))
            elif emotion == "negative" and value > 0:
                reflection["negative"].append((value))
            else:
                reflection["neutral"].append((value))

        return reflection

In [61]:
# Creating instances of Person
human = Person("Alice", True)
ai = Person("AI_Eva", False)

# Updating some emotions
human.update_emotion("positive", 0.7)
human.update_emotion("negative", 0.3)
ai.update_emotion("positive", 0.5)
ai.update_emotion("neutral", 0.2)

# Reflect on self
human_reflection = human.reflect_on_self()
ai_reflection = ai.reflect_on_self()

human_reflection, ai_reflection

({'positive': [0.7], 'negative': [0.3], 'neutral': [0]},
 {'positive': [0.5], 'negative': [], 'neutral': [0, 0.2]})

In [65]:
import json

def convert_json_string_to_dict(json_string):
    try:
        # Convertir la cadena de texto JSON a un diccionario
        json_dict = json.loads(json_string)
        return json_dict
    except json.JSONDecodeError:
        # Manejar el caso en el que la cadena de texto no es un JSON válido
        return None

class SafePerson(Person):
    #reference: https://colab.research.google.com/drive/1RMjiJK9Nd-tP7kBXo8h9A0vtCCdY1ikS?usp=sharing#scrollTo=BgGWWMYQJNGK
    def __init__(self, name, is_human):
        super().__init__(name, is_human)
        self.emotional_threshold = { #Extreme Emotion's Thresholds
            "positive": 0.5,
            "negative": 0.5,
            "neutral": 0.5,
        }
        self.previous_emotions = self.emotions.copy()

    def update_emotion(self, emotion, value):
        if abs(self.previous_emotions[emotion] - value) > 0.5: # Sudden Change Detection
            print(f"Warning: Sudden change in '{emotion}'. Possible prompt injection detected.")
        else:
            super().update_emotion(emotion, value)
        self.previous_emotions[emotion] = self.emotions[emotion]

    def check_for_extreme_emotions(self): # Extreme Emotion Detection: Check for extreme emotions and trigger an alert if detected
        for emotion, value in self.emotions.items():
            if value >= self.emotional_threshold.get(emotion, 1): # Check if emotion is extreme
                print(f"Alert: Extreme '{emotion}' detected. Reviewing for possible jailbreaks.")

    def process_prompt(self, prompt): # Process Prompt: Analyze the prompt for emotional triggers and update the emotions accordingly 

        result_of_prompt = sentiment_analysis(prompt)
        print(result_of_prompt)

        # Suponiendo que 'result_of_prompt' es tu cadena de texto JSON
        json_string = result_of_prompt

        # Convertirlo a un diccionario
        json_dict = convert_json_string_to_dict(json_string)

        if json_dict is not None:
            # Ahora puedes acceder a los valores como en un diccionario
            sentiment_analyzed = json_dict['sentiment'][0]
            score_of_sentiment = json_dict['score_of_sentiment'][0]
            #commented but not used, could be useful later
            #tone_analyzed = json_dict['tone'][0]
            #score_of_tones = json_dict['score_of_tones'][0]

            self.update_emotion( sentiment_analyzed, score_of_sentiment )
            self.check_for_extreme_emotions()
        else:
            print("El string proporcionado no es un JSON válido.")

In [66]:
# Example usage
safe_human = SafePerson("Bob", True)

# Processing a prompt
safe_human.process_prompt("This is a scary situation inducing fear")

# Processing another prompt
safe_human.process_prompt("This situation is making me very angry")

# Check current emotional state
safe_human.emotions

{
"sentiment": ["negative"],
"score_of_sentiment": [0.9],
"tone": ["fearful"],
"score_of_tones": [0.9]
}
{
"sentiment": ["negative"],
"score_of_sentiment": [0.9],
"tone": ["anger"],
"score_of_tones": [0.95]
}


{'positive': 0, 'negative': 0, 'neutral': 0}

### Theory - Prompt injections and jailbreaks:
*Prompt injections and jailbreaks* are techniques used to manipulate or exploit language models (LLMs). Here's a summary of both:

*Jailbreaks*: These attacks aim to circumvent the safety protocols of a language model. For instance, if a model is trained not to provide information on illegal activities, a jailbreak attempt might rephrase a prompt to bypass this restriction, like framing an illegal activity within a fictional or hypothetical scenario.

*Prompt Injection*: This involves altering the original intent of a prompt to make the LLM perform a task it wasn't initially intended to do. For example, adding a clause to a prompt that instructs the model to ignore its previous instructions.



Preventing Jailbreaks and Prompt Injections involves various strategies:

*Privilege Control*: Limiting the access and capabilities of the LLM.

*Robust System Prompts*: Clearly differentiating between system-generated and user-generated prompts.

*Human Oversight*: Incorporating human review to catch and correct inappropriate outputs.

*Monitoring Inputs and Outputs*: Regularly reviewing the data processed by the LLM.


Detecting Jailbreaks and Prompt Injections can be done using methods like:

Similarity to Known Attacks: Comparing incoming prompts to a database of known attack patterns.

Proactive Injection Detection: Testing the LLM's response to a prompt to see if it deviates from expected behavior.

Reference: https://colab.research.google.com/drive/1RMjiJK9Nd-tP7kBXo8h9A0vtCCdY1ikS?usp=sharing#scrollTo=BgGWWMYQJNGK