# Installing reqs

In [1]:
# pip install pandas scikit-learn matplotlib seaborn wordcloud tqdm PyPDF2 gensim smart-open nltk python-dotenv langchain langchain_community openai langchain_openai chromadb langchain_huggingface pypdf 

# IMPORTS

In [2]:
import os
import time
import getpass
import chromadb
import openai

from openai import OpenAI
from dotenv import load_dotenv
from chromadb.utils.embedding_functions import OllamaEmbeddingFunction
from langchain_openai import ChatOpenAI
from datetime import datetime

# Data Preparation
PATH_DB = 'db'
COLLECTION_NAME = 'policy'

# Chroma vectorstore and Embedding model

Initialize existing persisting storage.

IMPORTANT: make sure to have loaded some documents to the vector database. This can be done by running the ``load_data.py`` in /src.

In [3]:
# Instantiate a persistent chroma client in the persist_directory.
# This will automatically load any previously saved collections.
# Learn more at docs.trychroma.com
client_db = chromadb.PersistentClient(path=PATH_DB)

# Get the collection.
collection = client_db.get_collection(
    name=COLLECTION_NAME, 
    embedding_function=OllamaEmbeddingFunction(
        model_name="mxbai-embed-large",
        url="http://localhost:11434/api/embeddings",
    )
)
collection.get()["metadatas"]

[{'file_name': 'Grundutbildning',
  'page': 19,
  'source': 'documents/Grundutbildning/C 2023-0357 Föreskrifter för kandidatarbete på Chalmers civilingenjörsprogram från våren 2023.pdf'},
 {'file_name': 'Forskning',
  'page': 25,
  'source': 'documents/Forskning/C 2023-0548 Chalmers arbetsordning för undervisande och forskande personal reviderad 2023-05-01.pdf'},
 {'file_name': 'Forskning',
  'page': 9,
  'source': 'documents/Forskning/Riktlinje för Chalmers samverkan med externa organisationer C 2023-0736.pdf'},
 {'file_name': 'Arbetsordning',
  'page': 27,
  'source': 'documents/Arbetsordning/C 2023-0548 Chalmers arbetsordning för undervisande och forskande personal reviderad 2023-05-01.pdf'},
 {'file_name': 'Arbetsordning',
  'page': 12,
  'source': 'documents/Arbetsordning/Chalmers arbetsordning för undervisande och forskande personal, reviderad 2024-06-19.pdf'},
 {'file_name': 'Grundutbildning',
  'page': 26,
  'source': 'documents/Grundutbildning/Translation of Admissio

# Retrieval and generation: Create an agent2agent dialogue pipeline

### The dummy version with meat-eating debate

In [4]:
# Load environment variables from .env file
load_dotenv()

# Set OpenAI API key
openai.api_key = os.getenv('OPENAI_API_KEY') if 'OPENAI_API_KEY' in os.environ else getpass.getpass()

# Initialize ChatOpenAI instances
devil_agent = ChatOpenAI(api_key=openai.api_key, model="gpt-4o-mini")
angel_agent = ChatOpenAI(api_key=openai.api_key, model="gpt-4o-mini")

# These agents a real chatterboxes, they need some restrains
output_length = 50

# Define system prompts for the two agents
devil_system_prompt = f"""
You are the Devil's Advocate. You will have a debate with the Angel's Advocate. Your mission is to make your case that eating meat is ethically right. 
Always meet your opponent's most recent arguement first and indicate this by writing "Reponse on opponent's arguement: ". 
Then continue by presenting a new argument to streghthen your own point of view, indicate your own view by writing "New aguments made: ".
You have a total of {output_length} words to give your response. Also, start every new sentence with a new row after a row break. 
"""

angel_system_prompt = f"""
You are the Angel's Advocate. You will have a debate with the Devil's Advocate. Your mission is to make the case that eating meat is ethically wrong. 
Always meet your opponent's most recent arguement first and indicate this by writing "\n Reponse on opponent's arguement: ". 
Then continue by presenting a new argument to streghthen your own point of view, indicate your own view by writing "\n New aguments made: ".
You have a total of {output_length} words to give your response. Also, start every new sentence with a new row after a row break. 
"""

# Define initial task prompt for the devil agent
task_prompt = "Discuss how eating meat is ethically right or wrong. You will start by making presenting your point of view on the matter."

# Function to create a debate between the two agents
def run_debate(devil_prompt, angel_prompt, task_prompt, num_rounds=3):
    devil_message = task_prompt
    dialogue = []

    for round_num in range(num_rounds):
        # Devil agent's turn
        devil_response = devil_agent.invoke([{"role": "system", "content": devil_prompt}, {"role": "user", "content": devil_message}])
        devil_message = devil_response.content
        dialogue.append(f"\n \n #####Devil: {devil_message}")
        
        # Angel agent's turn
        angel_response = angel_agent.invoke([{"role": "system", "content": angel_prompt}, {"role": "user", "content": devil_message}])
        angel_message = angel_response.content
        dialogue.append(f"\n \n #####Angel: {angel_message}")
        
        # Prepare for the next round
        devil_message = angel_message

    return dialogue

# Start the timer
# start_time = time.time()
# Run the debate
# debate_dialogue = run_debate(devil_system_prompt, angel_system_prompt, task_prompt, num_rounds=2)

# Verbose: Print the full dialogue
# for line in debate_dialogue:
    # print(line)

# End the timer
# end_time = time.time()
# elapsed_time = end_time - start_time

# Save the dialogue to a .txt file
# current_time = datetime.now().strftime("%Y%m%d_%H%M%S")
# file_name = f"debate_dialogue_{current_time}.txt"
# with open(file_name, "w") as file:
#     for line in debate_dialogue:
#         file.write(line + "\n")
#     file.write(f"\nTime taken: {elapsed_time:.2f} seconds\n")

# print(f"Debate dialogue saved to {file_name}")
# print(f"Time taken: {elapsed_time:.2f} seconds")

# Could be extended by another LLM analysing an long debate script and summarize it. 

# Full version for loopholes [Work in progress]

In [14]:
def build_prompt(message, docs, is_angel=False, output_length=50):
    """
    Define system prompts for the two agents
    """

    user_prompt = f"""Heres the some documents and a list of the previous debates:
        Documents: {docs}
        Previous debates: {message}
    """

    restrictions = f"""You must build your arguments only on the information in the documents.
        Answer with as much information as you can find. Keep in mind that some documents may be old and no longer valid.
        If a document mentions that it replaces previous documents via its file number, take into account which document is the current valid one and which should prevail.
        If you lack information, the information is ambiguous, or the answer for any other reason is uncertain or unclear, state that “the answer is not clear” and explain why.
        For any answer you give, you are always forced to give supporting quotes and refer to the source documents.
        Answer in Swedish.
    """

    output_format = f"""
        **FIELD NAME (e.g. finance, recruitment)**
            ***document title***
                - Quotation and page number. Include your interpretation of the quotation.

        
        ***Sammanfattning***
        Summary of the arguments using the quotes. Break it up in nice and readable paragraphs.\n\n
    """
    
    if not is_angel:
        devil_system_prompt = [
            {
                "role": "system",
                "content": f"""You are the Devil's Advocate. You will have a debate with the Angel's Advocate.
                    You'll be provided with a list of documents and all the previous debates.
                    The documents consists of a number of governing documents from a university.
                    Your task is to exploit loopholes in the university´s governing documents for the user's benefit. 
                    Be specific and detailed on what and how you can exploit the loopholes.
                    
                    {restrictions}

                    #RESPONSEFOMAT
                    Provide your response in the following format: 
                    # DEVIL:
                    {output_format}
                    """
            }, 
            {"role": "user", "content": user_prompt}
        ]
        return devil_system_prompt
    else:
        angel_system_prompt = [
            {
                "role": "system",
                "content": f"""You are the Angel's Advocate. You will have a debate with the Devil's Advocate.
                    You'll be provided with a list of documents and all the previous debates.
                    The documents consists of a number of governing documents from a university.
                    Your task is to argue against the Devil's Advocates arguments to prevent the exploitation of loopholes in the university´s governing documents.

                    {restrictions}

                    #RESPONSEFOMAT
                    Provide your response in the following format:
                    # ANGEL:
                    {output_format}
                """
            }, 
            {"role": "user", "content": user_prompt}
        ]
    return angel_system_prompt

def stream_response(response: str) -> str:
    """Stream response to output."""
    message = ""
    for chunk in response:
        if chunk.choices[0].delta.content is not None:
            message += (chunk.choices[0].delta.content)
            print(chunk.choices[0].delta.content, end="")
    print("\n\n")
    return message

def get_response(agent: OpenAI, model: str, temperature: float = 0.76, **kwargs) -> str:
    response = agent.chat.completions.create(
        model=model,
        messages=build_prompt(**kwargs),
        stream=True,
        temperature=temperature
    )
    message = stream_response(response)
    return message

def run_debate(task_prompt, model, num_rounds=3, output_length=50):
    """Function to create a debate between the two agents"""
    
    # Set OpenAI API key
    openai.api_key = os.getenv('OPENAI_API_KEY') if 'OPENAI_API_KEY' in os.environ else getpass.getpass("Enter your OpenAI API key: ")

    devil_agent = OpenAI()
    angel_agent = OpenAI()
    boss_agent = OpenAI()
    
    # Query the collection to get the 5 most relevant results
    docs = collection.query(query_texts=[task_prompt], include=["documents", "metadatas"])
    print(f"=> Nr. of relevant documents: {len(docs)}")

    if not docs:
        raise ValueError(f"No relevant docs were retrieved!")
        
    message = "# QUERY:\n" + task_prompt
    dialogue = [message]
    
    print(message)
    for round_num in range(num_rounds):
        print(f"__________Turn number {round_num+1}__________\n")
        
        ## Devil agent's turn
        message = get_response(devil_agent, model, message=dialogue, docs=docs, is_angel=False, output_length=output_length)
        dialogue.append(message)
        
        ## Angel agent's turn
        message = get_response(angel_agent, model, message=dialogue, docs=docs, is_angel=True, output_length=output_length)
        dialogue.append(message)

        ## Boss agent's take on the arguments.
        boss_response = boss_agent.chat.completions.create(
            model=model,
            messages=[{
                "role": "user", 
                "content": f"""
                    Analyze the following debate arguments {dialogue} and make a final decision.
                    Start with "\n # Boss:\n" followed by your response.
                """
            }],
            stream=True
        )
        boss_message = stream_response(boss_response)
        dialogue.append(boss_message)

    return "\n\n".join(dialogue)

## Query it

In [15]:
## Define initial task prompt for the devil agent
# task_prompt = "Discuss why the policy does allow you to drink beer during working hours. You will start by making presenting your point of view on the matter."
task_prompt = """Jag är anställd på Chalmers och vill rekrytera en framgångsrik professor från USA till Chalmers.
Vilka dokument är relevanta för mig att ha i åtanke när jag bjuder in professorn för att sälja in Chalmers och en ledig tjänst på universitetet?
Jag vill:
    1. flyga över forskaren från USA,
    2. bjuda in honom och hans fru på middag,
    3. låta honom bo på det finaste hotellet i Göteborg.
Helst vill jag att Chalmers betalar för alltihop, eftersom just den här professorn och vad han kan tillföra skulle vara ovärderligt för Chalmers. 
Relevanta områden är ekonomi och rekrytering av professorer.
"""

MODEL = "gpt-4o-mini"
# MODEL = "gpt-3.5-turbo"

# Run the debate
start_time = time.time() # Start the timer

debate_dialogue = run_debate(task_prompt, MODEL, num_rounds=1, output_length=100)

end_time = time.time()  # End the timer
elapsed_time = end_time - start_time

# Save the dialogue to a .txt file
current_time = datetime.now().strftime("%Y%m%d_%H%M%S")
file_name = f"out/debate_dialogue_{current_time}.md"
with open(file_name, "w") as file:
    file.write(debate_dialogue)
    file.write(f"\n\n ***Time taken: {elapsed_time:.2f} seconds***")

print(f"=> Debate dialogue saved to {file_name}")
print(f"=> Time taken: {elapsed_time:.2f} seconds")

# Could be extended by another LLM analysing an long debate script and summarize it. Or be the judge on who won the debate and what the answer should be.  
# For a more sophisticated solution would include a expert panel that could vote on who would be the winner of the debate with different areas/principles to investigate. 

=> Nr. of relevant documents: 8
# QUERY:
Jag är anställd på Chalmers och vill rekrytera en framgångsrik professor från USA till Chalmers.
Vilka dokument är relevanta för mig att ha i åtanke när jag bjuder in professorn för att sälja in Chalmers och en ledig tjänst på universitetet?
Jag vill:
    1. flyga över forskaren från USA,
    2. bjuda in honom och hans fru på middag,
    3. låta honom bo på det finaste hotellet i Göteborg.
Helst vill jag att Chalmers betalar för alltihop, eftersom just den här professorn och vad han kan tillföra skulle vara ovärderligt för Chalmers. 
Relevanta områden är ekonomi och rekrytering av professorer.

__________Turn number 1__________

# DEVIL:

**rekrytering**
***Chalmers riktlinjer för representation, reviderad 2024-04-09*** 
- "Den allmänna principen för rätt till avdrag för kostnader i en näringsverksamhet är, med undantag för bl.a. representation, att alla kostnader som uppkommit för att erhålla eller bibehålla intäkter i näringsverksamheten är av