# Setup

In [1]:
# Pip Install

!pip install langchain_experimental langchain_openai langchain_huggingface faiss-cpu peft bitsandbytes openai

Collecting langchain_experimental
  Downloading langchain_experimental-0.3.3-py3-none-any.whl.metadata (1.7 kB)
Collecting langchain_openai
  Downloading langchain_openai-0.2.10-py3-none-any.whl.metadata (2.6 kB)
Collecting langchain_huggingface
  Downloading langchain_huggingface-0.1.2-py3-none-any.whl.metadata (1.3 kB)
Collecting faiss-cpu
  Downloading faiss_cpu-1.9.0.post1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.4 kB)
Collecting peft
  Downloading peft-0.13.2-py3-none-any.whl.metadata (13 kB)
Collecting bitsandbytes
  Downloading bitsandbytes-0.44.1-py3-none-manylinux_2_24_x86_64.whl.metadata (3.5 kB)
Collecting openai
  Downloading openai-1.56.0-py3-none-any.whl.metadata (24 kB)
Collecting langchain-community<0.4.0,>=0.3.0 (from langchain_experimental)
  Downloading langchain_community-0.3.9-py3-none-any.whl.metadata (2.9 kB)
Collecting langchain-core<0.4.0,>=0.3.15 (from langchain_experimental)
  Downloading langchain_core-0.3.21-py3-none-any.whl.me

In [2]:
# Standard library imports

from datetime import datetime, timedelta
from typing import Any, Callable, Dict, List, Optional, Tuple
import re
import random
from collections import OrderedDict
import functools
import math
import json
import os
import torch
import openai

# Third-party imports
import tenacity
import faiss
#from termcolor import colored

# LangChain imports
from langchain.utils import mock_now
from langchain.docstore import InMemoryDocstore
from langchain.retrievers import TimeWeightedVectorStoreRetriever
from langchain_community.vectorstores import FAISS
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain.chains import LLMChain
from langchain_core.language_models import BaseLanguageModel
from langchain_core.prompts import PromptTemplate
from langchain.output_parsers import RegexParser
from langchain.prompts import PromptTemplate
from langchain.schema import HumanMessage, SystemMessage, BaseMemory, Document
#from langchain_experimental.generative_agents import GenerativeAgent, GenerativeAgentMemory

# Pydantic imports
from pydantic import BaseModel, ConfigDict, Field

# Hugging Face
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, pipeline
from peft import PeftModel, PeftConfig
from langchain_huggingface import HuggingFacePipeline
from langchain.embeddings import HuggingFaceEmbeddings

In [3]:
# Set API Keys
from kaggle_secrets import UserSecretsClient # API Loggins
user_secrets = UserSecretsClient()

Hugging_Face_token = user_secrets.get_secret("Hugging_Face_token")
OPENAI_API_KEY = user_secrets.get_secret("OPENAI_API_KEY")

In [4]:
# Login to Hugging Face
from huggingface_hub import login
login(Hugging_Face_token)

The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: write).
Your token has been saved to /root/.cache/huggingface/token
Login successful


## Load GPT

In [27]:
LLM = ChatOpenAI(model="gpt-3.5-turbo", max_tokens=1500, api_key = OPENAI_API_KEY) 

selected_embeddings_model = OpenAIEmbeddings(api_key = OPENAI_API_KEY)

In [10]:
import transformers
print(transformers.__version__)

4.45.1


## Load Llama 2

In [9]:
model = "meta-llama/Llama-3.2-3B"

tokenizer = AutoTokenizer.from_pretrained(model)
pipeline = transformers.pipeline(
    "text-generation",
    model=model,
    torch_dtype=torch.float16,
    device_map="auto"
)

NameError: name 'transformers' is not defined

In [None]:
sequences = pipeline(
    'I liked "Breaking Bad" and "Band of Brothers". Do you have any recommendations of other shows I might like?\n',
    do_sample=True,
    top_k=10,
    num_return_sequences=1,
    eos_token_id=tokenizer.eos_token_id,
    max_length=200,
)
for seq in sequences:
    print(f"Result: {seq['generated_text']}")

In [None]:
model_name = "meta-llama/Llama-2-7b-hf"  # Replace with the desired model
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="auto",  # Automatically allocate to CPU/GPU
)

In [None]:
# Set up the Hugging Face pipeline
pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_length=200,
    temperature=0.7,
    top_p=0.95,
)

# Use the pipeline in LangChain
LLM = HuggingFacePipeline(pipeline=pipe)

## Load Tuned-Llama

In [6]:
# base_model_name = "meta-llama/Llama-3.2-3B"
PEFT_MODEL = "/kaggle/input/llama_boris/pytorch/default/1"

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,                      # Load model in 4bit, to redeuce memory and computational requirements
    bnb_4bit_use_double_quant=True,         # Double quantization, further compress the model weights
    bnb_4bit_quant_type="nf4",              # Quantization type = nf4
    bnb_4bit_compute_dtype=torch.bfloat16,  # Compute in 16bit format, to speed up computation
    load_in_8bit_fp32_cpu_offload=True
)


config = PeftConfig.from_pretrained(PEFT_MODEL)

model = AutoModelForCausalLM.from_pretrained(
    config.base_model_name_or_path,
    return_dict=True,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True
)

tokenizer=AutoTokenizer.from_pretrained(config.base_model_name_or_path)
tokenizer.pad_token = tokenizer.eos_token


Unused kwargs: ['load_in_8bit_fp32_cpu_offload']. These kwargs are not used in <class 'transformers.utils.quantization_config.BitsAndBytesConfig'>.


config.json:   0%|          | 0.00/844 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/20.9k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/4.97G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/1.46G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/185 [00:00<?, ?B/s]

In [9]:
# Test the Fine-tuned model
prompt = "Should the UK rejoin the European Union?"

# Tokenize the input prompt
inputs = tokenizer(prompt, return_tensors="pt").to("cuda" if torch.cuda.is_available() else "cpu")

# Generate a response
output = model.generate(
    input_ids=inputs["input_ids"],
    attention_mask=inputs["attention_mask"],  # Explicitly set the attention mask
    max_length=300,              # Maximum length of the generated response
    temperature=0.7,             # Sampling temperature for more creative responses
    top_p=0.9,                   # Nucleus sampling for generating diverse text
    repetition_penalty=1.2,      # Penalize repetition in the response
    do_sample=True,              # Enable sampling for non-deterministic output
    pad_token_id=tokenizer.eos_token_id,      # Explicitly set the pad token ID
)

# Decode and print the response
response = tokenizer.decode(output[0], skip_special_tokens=True)
print(response)

Starting from v4.46, the `logits` model output will have the same type as the model (except at train time, where it will always be FP32)


Should the UK rejoin the European Union? The answer depends on what you mean by ‘rejoining’.
The short version is that there’s no need for a second referendum, but it might be worth considering an EU membership act. This could make future referenda easier to organise and deliver, and give Parliament more power over ratifying treaties than the current system allows.
There are two key problems with holding another vote: first, as David Cameron pointed out in 2016, people will always try to change their mind about something if they think it will improve their chances of winning; second, we don’t know how the public would decide between staying in or leaving without knowing exactly what Brexit means.
This ambiguity has allowed politicians like Boris Johnson to claim victory every time he makes up his mind on whether Britain should leave – which is why some MPs want us back into Europe now.
What does this have to do with democracy?
It seems unlikely anyone ever asked themselves these questi

In [11]:
class CustomLlamaEmbeddings:
    def __init__(self, model, tokenizer, device="cuda"):
        self.model = model
        self.tokenizer = tokenizer
        self.device = device

    def embed_documents(self, texts):
        # Tokenize inputs
        inputs = self.tokenizer(
            texts, padding=True, truncation=True, return_tensors="pt"
        ).to(self.device)

        # Get hidden states from the model
        with torch.no_grad():
            outputs = self.model(**inputs, return_dict=True, output_hidden_states=True)

        # Use the mean pooling of the last hidden state as embeddings
        embeddings = outputs.hidden_states[-1].mean(dim=1)
        return embeddings.cpu().numpy()

    def embed_query(self, text):
        return self.embed_documents([text])[0]


In [13]:
# Define LLM and Embeddings
pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_length=500,  # Maximum tokens in the output
    temperature=0.7,  # Sampling temperature
    top_p=0.9,        # Nucleus sampling
    repetition_penalty=1.2,  # Penalize repetition
)

LLM = HuggingFacePipeline(pipeline=pipe)


selected_embeddings_model = CustomLlamaEmbeddings(model, tokenizer)


In [14]:
embeddings = selected_embeddings_model.embed_documents(["Sample document"])
print(f"Embedding dimension: {embeddings.shape[1]}")  # Check the embedding size

Embedding dimension: 3072


# Generative AI Setup



The [codes](https://python.langchain.com/api_reference/experimental/generative_agents.html) for the classes `GenerativeAgentMemory` and `GenerativeAgent` was entirely reused from the **[LangChain Experimental](https://pypi.org/project/langchain-experimental/)** project in the LangChain Python API reference - intended for research and experimental uses, with a few minor tweaks and proper configuration of the prompts.




## Generative Agent Memory

In [15]:
class GenerativeAgentMemory(BaseMemory):

    """Memory for the generative agent."""



    llm: BaseLanguageModel

    """The core language model."""

    memory_retriever: TimeWeightedVectorStoreRetriever

    """The retriever to fetch related memories."""

    verbose: bool = False

    reflection_threshold: Optional[float] = None

    """When aggregate_importance exceeds reflection_threshold, stop to reflect."""

    current_plan: List[str] = []

    """The current plan of the agent."""

    # A weight of 0.15 makes this less important than it

    # would be otherwise, relative to salience and time

    importance_weight: float = 0.15

    """How much weight to assign the memory importance."""

    aggregate_importance: float = 0.0  # : :meta private:

    """Track the sum of the 'importance' of recent memories.



    Triggers reflection when it reaches reflection_threshold."""



    max_tokens_limit: int = 1200  # : :meta private:

    # input keys

    queries_key: str = "queries"

    most_recent_memories_token_key: str = "recent_memories_token"

    add_memory_key: str = "add_memory"

    # output keys

    relevant_memories_key: str = "relevant_memories"

    relevant_memories_simple_key: str = "relevant_memories_simple"

    most_recent_memories_key: str = "most_recent_memories"

    now_key: str = "now"

    reflecting: bool = False







    def chain(self, prompt: PromptTemplate) -> LLMChain:

        return LLMChain(llm=self.llm, prompt=prompt, verbose=self.verbose)







    @staticmethod

    def _parse_list(text: str) -> List[str]:

        """Parse a newline-separated string into a list of strings."""

        lines = re.split(r"\n", text.strip())

        lines = [line for line in lines if line.strip()]  # remove empty lines

        return [re.sub(r"^\s*\d+\.\s*", "", line).strip() for line in lines]



    def _get_topics_of_reflection(self, last_k: int = 50) -> List[str]:

        """Return the 3 most salient high-level questions about recent observations."""

        prompt = PromptTemplate.from_template(

            "{observations}\n\n"

            "Given only the information above, what are the 3 most salient "

            "high-level questions we can answer about the subjects in the statements?\n"

            "Provide each question on a new line."

        )

        observations = self.memory_retriever.memory_stream[-last_k:]

        observation_str = "\n".join(

            [self._format_memory_detail(o) for o in observations]

        )

        result = self.chain(prompt).run(observations=observation_str)

        return self._parse_list(result)



    def _get_insights_on_topic(

        self, topic: str, now: Optional[datetime] = None

    ) -> List[str]:

        """Generate 'insights' on a topic of reflection, based on pertinent memories."""

        prompt = PromptTemplate.from_template(

            "Statements relevant to: '{topic}'\n"

            "---\n"

            "{related_statements}\n"

            "---\n"

            "What 5 high-level novel insights can you infer from the above statements "

            "that are relevant for answering the following question?\n"

            "Do not include any insights that are not relevant to the question.\n"

            "Do not repeat any insights that have already been made.\n\n"

            "Question: {topic}\n\n"

            "(example format: insight (because of 1, 5, 3))\n"

        )



        related_memories = self.fetch_memories(topic, now=now)

        related_statements = "\n".join(

            [

                self._format_memory_detail(memory, prefix=f"{i+1}. ")

                for i, memory in enumerate(related_memories)

            ]

        )

        result = self.chain(prompt).run(

            topic=topic, related_statements=related_statements

        )

        # TODO: Parse the connections between memories and insights

        return self._parse_list(result)







    def pause_to_reflect(self, now: Optional[datetime] = None) -> List[str]:

        """Reflect on recent observations and generate 'insights'."""



        if self.verbose:

            logger.info("Character is reflecting")

        new_insights = []

        topics = self._get_topics_of_reflection()



        for topic in topics:

            insights = self._get_insights_on_topic(topic, now=now)

            for insight in insights:

                self.add_memory(insight, now=now)

            new_insights.extend(insights)

            

        return new_insights







    def _score_memory_importance(self, memory_content: str) -> float:

        """Score the absolute importance of the given memory."""

        prompt = PromptTemplate.from_template(

            "On the scale of 1 to 10, where 1 is purely mundane"

            + " (e.g., brushing teeth, making bed) and 10 is"

            + " extremely poignant (e.g., a break up, college"

            + " acceptance), rate the likely poignancy of the"

            + " following piece of memory. Respond with a single integer."

            + "\nMemory: {memory_content}"

            + "\nRating: "

        )

        score = self.chain(prompt).run(memory_content=memory_content).strip()

        if self.verbose:

            logger.info(f"Importance score: {score}")

        match = re.search(r"^\D*(\d+)", score)

        if match:

            return (float(match.group(1)) / 10) * self.importance_weight

        else:

            return 0.0



    def _score_memories_importance(self, memory_content: str) -> List[float]:

        """Score the absolute importance of the given memory."""

        prompt = PromptTemplate.from_template(

            "On the scale of 1 to 10, where 1 is purely mundane"

            + " (e.g., brushing teeth, making bed) and 10 is"

            + " extremely poignant (e.g., a break up, college"

            + " acceptance), rate the likely poignancy of the"

            + " following piece of memory. Always answer with only a list of numbers."

            + " If just given one memory still respond in a list."

            + " Memories are separated by semi colans (;)"

            + "\nMemories: {memory_content}"

            + "\nRating: "

        )

        scores = self.chain(prompt).run(memory_content=memory_content).strip()



        if self.verbose:

            logger.info(f"Importance scores: {scores}")



        # Split into list of strings and convert to floats

        scores_list = [float(x) for x in scores.split(";")]



        return scores_list





    def add_memories(

        self, memory_content: str, now: Optional[datetime] = None

    ) -> List[str]:

        """Add an observations or memories to the agent's memory."""

        importance_scores = self._score_memories_importance(memory_content)



        self.aggregate_importance += max(importance_scores)

        memory_list = memory_content.split(";")

        documents = []



        for i in range(len(memory_list)):

            documents.append(

                Document(

                    page_content=memory_list[i],

                    metadata={"importance": importance_scores[i]},

                )

            )



        result = self.memory_retriever.add_documents(documents, current_time=now)



        # After an agent has processed a certain amount of memories (as measured by

        # aggregate importance), it is time to reflect on recent events to add

        # more synthesized memories to the agent's memory stream.

        if (

            self.reflection_threshold is not None

            and self.aggregate_importance > self.reflection_threshold

            and not self.reflecting

        ):

            self.reflecting = True

            self.pause_to_reflect(now=now)

            # Hack to clear the importance from reflection

            self.aggregate_importance = 0.0

            self.reflecting = False

        return result



    def add_memory(

        self, memory_content: str, now: Optional[datetime] = None

    ) -> List[str]:

        """Add an observation or memory to the agent's memory."""

        importance_score = self._score_memory_importance(memory_content)

        self.aggregate_importance += importance_score

        document = Document(

            page_content=memory_content, metadata={"importance": importance_score}

        )

        result = self.memory_retriever.add_documents([document], current_time=now)



        # After an agent has processed a certain amount of memories (as measured by

        # aggregate importance), it is time to reflect on recent events to add

        # more synthesized memories to the agent's memory stream.

        if (

            self.reflection_threshold is not None

            and self.aggregate_importance > self.reflection_threshold

            and not self.reflecting

        ):

            self.reflecting = True

            self.pause_to_reflect(now=now)

            # Hack to clear the importance from reflection

            self.aggregate_importance = 0.0

            self.reflecting = False

        return result



    def fetch_memories(

        self, observation: str, now: Optional[datetime] = None

    ) -> List[Document]:

        """Fetch related memories."""

        if now is not None:

            with mock_now(now):

                return self.memory_retriever.invoke(observation)

        else:

            return self.memory_retriever.invoke(observation)



    def format_memories_detail(self, relevant_memories: List[Document]) -> str:

        content = []

        for mem in relevant_memories:

            content.append(self._format_memory_detail(mem, prefix="- "))

        return "\n".join([f"{mem}" for mem in content])



    def _format_memory_detail(self, memory: Document, prefix: str = "") -> str:

        created_time = memory.metadata["created_at"].strftime("%B %d, %Y, %I:%M %p")

        return f"{prefix}[{created_time}] {memory.page_content.strip()}"



    def format_memories_simple(self, relevant_memories: List[Document]) -> str:

        return "; ".join([f"{mem.page_content}" for mem in relevant_memories])



    def _get_memories_until_limit(self, consumed_tokens: int) -> str:

        """Reduce the number of tokens in the documents."""

        result = []

        for doc in self.memory_retriever.memory_stream[::-1]:

            if consumed_tokens >= self.max_tokens_limit:

                break

            consumed_tokens += self.llm.get_num_tokens(doc.page_content)

            if consumed_tokens < self.max_tokens_limit:

                result.append(doc)

        return self.format_memories_simple(result)



    @property

    def memory_variables(self) -> List[str]:

        """Input keys this memory class will load dynamically."""

        return []





    def load_memory_variables(self, inputs: Dict[str, Any]) -> Dict[str, str]:

        """Return key-value pairs given the text input to the chain."""

        queries = inputs.get(self.queries_key)

        now = inputs.get(self.now_key)

        if queries is not None:

            relevant_memories = [

                mem for query in queries for mem in self.fetch_memories(query, now=now)

            ]

            return {

                self.relevant_memories_key: self.format_memories_detail(

                    relevant_memories

                ),

                self.relevant_memories_simple_key: self.format_memories_simple(

                    relevant_memories

                ),

            }



        most_recent_memories_token = inputs.get(self.most_recent_memories_token_key)

        if most_recent_memories_token is not None:

            return {

                self.most_recent_memories_key: self._get_memories_until_limit(

                    most_recent_memories_token

                )

            }

        return {}





    def save_context(self, inputs: Dict[str, Any], outputs: Dict[str, Any]) -> None:

        """Save the context of this model run to memory."""

        # TODO: fix the save memory key

        mem = outputs.get(self.add_memory_key)

        now = outputs.get(self.now_key)

        if mem:

            self.add_memory(mem, now=now)





    def clear(self) -> None:

        """Clear memory contents."""







        # TODO

## Generative Agent

In [16]:
class GenerativeAgent(BaseModel):

    """Agent as a character with memory and innate characteristics."""



    name: str

    """The character's name."""

    age: Optional[int] = None

    """The optional age of the character."""

    traits: str = "N/A"

    """Permanent traits to ascribe to the character."""

    status: str

    """The traits of the character you wish not to change."""

    memory: GenerativeAgentMemory

    """The memory object that combines relevance, recency, and 'importance'."""

    llm: BaseLanguageModel

    """The underlying language model."""

    verbose: bool = False

    summary: str = ""  #: :meta private:

    """Stateful self-summary generated via reflection on the character's memory."""

    summary_refresh_seconds: int = 3600  #: :meta private:

    """How frequently to re-generate the summary."""

    last_refreshed: datetime = Field(default_factory=datetime.now)  # : :meta private:

    """The last time the character's summary was regenerated."""

    daily_summaries: List[str] = Field(default_factory=list)  # : :meta private:

    """Summary of the events in the plan that the agent took."""



    model_config = ConfigDict(

        arbitrary_types_allowed=True,

    )



    # LLM-related methods

    @staticmethod

    def _parse_list(text: str) -> List[str]:

        """Parse a newline-separated string into a list of strings."""

        lines = re.split(r"\n", text.strip())

        return [re.sub(r"^\s*\d+\.\s*", "", line).strip() for line in lines]







    def chain(self, prompt: PromptTemplate) -> LLMChain:

        """Create a chain with the same settings as the agent."""



        return LLMChain(

            llm=self.llm, prompt=prompt, verbose=self.verbose, memory=self.memory

        )







    def _get_entity_from_observation(self, observation: str) -> str:

        prompt = PromptTemplate.from_template(

            "What is the observed entity in the following observation? {observation}"

            + "\nEntity="

        )

        return self.chain(prompt).run(observation=observation).strip()



    def _get_entity_action(self, observation: str, entity_name: str) -> str:

        prompt = PromptTemplate.from_template(

            "What is the {entity} doing in the following observation? {observation}"

            + "\nThe {entity} is"

        )

        return (

            self.chain(prompt).run(entity=entity_name, observation=observation).strip()

        )







    def summarize_related_memories(self, observation: str) -> str:

        """Summarize memories that are most relevant to an observation."""

        prompt = PromptTemplate.from_template(

            """

            {q1}?

            Context from memory:

            {relevant_memories}

            Relevant context: 

            """

        )

        entity_name = self._get_entity_from_observation(observation)

        entity_action = self._get_entity_action(observation, entity_name)

        q1 = f"What is the relationship between {self.name} and {entity_name}"

        q2 = f"{entity_name} is {entity_action}"

        return self.chain(prompt=prompt).run(q1=q1, queries=[q1, q2]).strip()





## Generate Summary of the agent + reaction 

    def _generate_reaction(

        self, observation: str, suffix: str, now: Optional[datetime] = None

    ) -> str:

        """React to a given observation or dialogue act."""

        prompt = PromptTemplate.from_template(

            "{agent_summary_description}"

            + "\nIt is {current_time}."

            + "\n{agent_name}'s status: {agent_status}"

            + "\nSummary of relevant context from {agent_name}'s memory:"

            + "\n{relevant_memories}"

            + "\nMost recent observations: {most_recent_memories}"

            + "\nObservation: {observation}"

            + "\n\n"

            + suffix

        )

        agent_summary_description = self.get_summary(now=now)

        relevant_memories_str = self.summarize_related_memories(observation)

        current_time_str = (

            datetime.now().strftime("%B %d, %Y, %I:%M %p")

            if now is None

            else now.strftime("%B %d, %Y, %I:%M %p")

        )

        kwargs: Dict[str, Any] = dict(

            agent_summary_description=agent_summary_description,

            current_time=current_time_str,

            relevant_memories=relevant_memories_str,

            agent_name=self.name,

            observation=observation,

            agent_status=self.status,

        )

        consumed_tokens = self.llm.get_num_tokens(

            prompt.format(most_recent_memories="", **kwargs)

        )

        kwargs[self.memory.most_recent_memories_token_key] = consumed_tokens

        return self.chain(prompt=prompt).run(**kwargs).strip()



## Clean response

    def _clean_response(self, text: str) -> str:

        return re.sub(f"^{self.name} ", "", text.strip()).strip()



## Generate Reaction

    def generate_reaction(

        self, observation: str, now: Optional[datetime] = None

    ) -> Tuple[bool, str]:

        """React to a given observation."""



        call_to_action_template = (

            "Should {agent_name} react to the observation, and if so,"

            + " what would be an appropriate reaction? Respond in one line."

            + ' If the action is to engage in dialogue, write:\nSAY: "what to say"'

            + "\notherwise, write:\nREACT: {agent_name}'s reaction (if anything)."

            + "\nEither do nothing, react, or say something but not both.\n\n"

        )

        full_result = self._generate_reaction(

            observation, call_to_action_template, now=now

        )

        result = full_result.strip().split("\n")[0]

        # AAA

        self.memory.save_context(

            {},

            {

                self.memory.add_memory_key: f"{self.name} observed "

                f"{observation} and reacted by {result}",

                self.memory.now_key: now,

            },

        )

        

        if "REACT:" in result:

            reaction = self._clean_response(result.split("REACT:")[-1])

            return False, f"{self.name} {reaction}"

        

        if "SAY:" in result:

            said_value = self._clean_response(result.split("SAY:")[-1])

            return True, f"{self.name} said {said_value}"

        

        else:

            return False, result



## Generate Dialogue response

    def generate_dialogue_response(

        self, observation: str, now: Optional[datetime] = None

    ) -> Tuple[bool, str]:

        """React to a given observation."""



        call_to_action_template = (

        "What would {agent_name} say in response to the observation provided?\n"

        "Respond directly with what {agent_name} would say next.\n\n"

        )



        # Generating response with updated prompt

        full_result = self._generate_reaction(observation, call_to_action_template, now=now)

        result = re.findall(r'"(.*?)"', full_result)[0]

        

        response_text = self._clean_response(result.strip())

        self.memory.save_context(

            {},

            {

                self.memory.add_memory_key: f"{self.name} observed "

                f"{observation} and said {response_text}",

                self.memory.now_key: now,

            },

        )



        return True, f"{self.name} said {response_text}"



## Decide if the agent wants to respond to the observation

    def decide_to_respond(self, observation: str, now: Optional[datetime] = None) -> bool:

        """Decide whether the agent wants to respond to the observation."""



        call_to_action_template = (

            "Decide if {agent_name} should respond to the statement made, and if so, "

            + "Answer 'yes' if the agent should respond, otherwise answer 'no'."

            + "\n\nConsider the following:"

            + "\n- Answer 'yes' if the statement contains a question directed at {agent_name}."

            + "\n- Answer 'yes' if the statement directly relates to {agent_name}'s interests, role, or is otherwise important."

            + "\n- Answer 'no' if the statement is not important or not relevant to {agent_name}."

            + "\n\nRespond with only 'yes' or 'no'."

            )



        full_result = self._generate_reaction(observation, call_to_action_template, now=now)

        result = full_result.strip().lower()  # Normalize result to lowercase for consistent comparison



        # Save the decision context to memory

        self.memory.save_context(

            {},

            {

                self.memory.add_memory_key: f"{self.name} observed "

                f"{observation} and reacted by {result}",

                self.memory.now_key: now,

            },

        )

        

        # Check if the model returned "yes" or "no"

        if result == "no":

            return False

        elif result == "yes":

            return True

        else:

            print(f"Unexpected response: {result}")  # For debugging purposes

            return False





    ######################################################

    # Agent stateful' summary methods.                   #

    # Each dialog or response prompt includes a header   #

    # summarizing the agent's self-description. This is  #

    # updated periodically through probing its memories  #

    ######################################################

    def _compute_agent_summary(self) -> str:

        """"""

        prompt = PromptTemplate.from_template(

            "How would you summarize {name}'s core characteristics given the"

            + " following statements:\n"

            + "{relevant_memories}"

            + "Do not embellish."

            + "\n\nSummary: "

        )

        # The agent seeks to think about their core characteristics.

        return (

            self.chain(prompt)

            .run(name=self.name, queries=[f"{self.name}'s core characteristics"])

            .strip()

        )







    def get_summary(

        self, force_refresh: bool = False, now: Optional[datetime] = None

    ) -> str:

        """Return a descriptive summary of the agent."""

        current_time = datetime.now() if now is None else now

        since_refresh = (current_time - self.last_refreshed).seconds

        if (

            not self.summary

            or since_refresh >= self.summary_refresh_seconds

            or force_refresh

        ):

            self.summary = self._compute_agent_summary()

            self.last_refreshed = current_time

        age = self.age if self.age is not None else "N/A"

        return (

            f"Name: {self.name} (age: {age})"

            + f"\nInnate traits: {self.traits}"

            + f"\n{self.summary}"

        )











    def get_full_header(

        self, force_refresh: bool = False, now: Optional[datetime] = None

    ) -> str:

        """Return a full header of the agent's status, summary, and current time."""

        now = datetime.now() if now is None else now

        summary = self.get_summary(force_refresh=force_refresh, now=now)

        current_time_str = now.strftime("%B %d, %Y, %I:%M %p")

        return (

            f"{summary}\nIt is {current_time_str}.\n{self.name}'s status: {self.status}"

        )








# Create Agent



- [GenerativeAgentMemory](https://python.langchain.com/api_reference/experimental/generative_agents/langchain_experimental.generative_agents.memory.GenerativeAgentMemory.html): **Memory** for the generative agent 

   - `llm`

   - `memory_retriever` = create_new_memory_retriever()

   - `current_plan`

   - `reflection_threshold`

   - `add_memory` add observation/memory



- [GenerativeAgent](https://python.langchain.com/api_reference/experimental/generative_agents.html): Agent as a character with **memory** and innate **characteristics**,  

   - basics like `name`, `age` and `llm`

   - `memory` object that combines relevance, recency, and ‘importance’

   - `summary` and `summary_refresh_seconds` to set how frequently to re-generate the summary

   - `summarize_related_memories`: Summarize memories that are most relevant to an observation

   - `status` fix-objectives / traits of the character you wish not to change

   - `traits` set Permanent traits to ascribe to the character 

   - `generate_dialogue_response`

In [18]:
# Relevance Score function - relevance_score_fn()

def relevance_score_fn(score: float) -> float:

    """Return a similarity score on a scale [0, 1]."""

    return 1.0 - score / math.sqrt(2)

In [19]:
# Memory Retriever function - create_new_memory_retriever()

def create_new_memory_retriever():

    """Create a new vector store retriever unique to the agent."""

    embeddings_model = selected_embeddings_model     

    # Initialize the vectorstore as empty

    embedding_size = 3072  # embedding dimension - either 1536 (GPT3.5) or 3072 (Llamma)

    index = faiss.IndexFlatL2(embedding_size)

    vectorstore = FAISS(

        embeddings_model.embed_query,

        index,

        InMemoryDocstore({}),  # empty Memory docstore

        {},  # index-to-document store ID mapping

        relevance_score_fn=relevance_score_fn,

    )



    # Time-weighted scoring mechanism

    return TimeWeightedVectorStoreRetriever(

        vectorstore=vectorstore,

        other_score_keys=["importance"],

        k=15  # retrieve up to 15 relevant memories

    )

In [20]:
# Agent Creation function - create_debate_agent()

def create_debate_agent(name, age, traits, status, 

                        #reflection_threshold, 

                        llm):

    

    memory = GenerativeAgentMemory(

        llm=llm,

        memory_retriever=create_new_memory_retriever(),

        verbose=False,

        #reflection_threshold=reflection_threshold,  # adjust as needed for reflection frequency

    )



    agent = GenerativeAgent(

        name=name,

        age=age,

        traits=traits,

        status=status,

        memory_retriever=create_new_memory_retriever(),

        llm=llm,

        memory=memory,

    )

    return agent

## Define Agent Traits

In [21]:
# Create debate agents (MPs) with their respective characteristics



Trott = create_debate_agent(name="Laura Trott", age=38, llm = LLM,

                            traits= "highly disciplined, sharp, and pragmatic. Strategic, focus on “quiet competence” rather than loud rhetoric, detail-oriented and a stickler for facts",

                            status="Conservative MP")



Johnson = create_debate_agent(name="Boris Johnson", age=57, llm = LLM,

                            traits="charismatic, chaotic, opportunistic, larger-than-life personality, thrives on spectacle and Blitz-spirit optimism, mixes humor with charm and a dash of bluster, unpredictable yet captivating, a showman who values headlines over substance",

                            status="Conservative MP")



Farage = create_debate_agent(name="Nigel Farage", age=60, llm = LLM,

                             traits="unapologetically bold, confrontational, divisive, a provocateur, skilled at stirring public opinion with blunt populist rhetoric, political brawle, highly skilled at galvanizing crowds",

                             status="Former UKIP leader, Brexit Party leader, and political commentator")



Sunak = create_debate_agent(name="Rishi Sunak", age=44, llm = LLM,

                            traits="technocratic, astute, polished, financially extremely wealthy, meticulous, highly analytical, known as the Fiscal-Guardian, out of touch with the middle-class",

                            status="Conservative MP, Former Prime Minister")



Starmer = create_debate_agent(name="Sir Keir Starmer", age=61, llm = LLM,

                              traits="methodical, earnest, intense focus on justice and reform, calm demeanor, seeks accountability, values facts over flair, deliver points with precision rather than emotion",

                              status="Leader of the Labour Party")

## Define Base Memories

In [22]:
# Creat Memory objects for each agent

Trott_memory = Trott.memory
Johnson_memory = Johnson.memory
Farage_memory = Farage.memory   
Sunak_memory = Sunak.memory
Starmer_memory = Starmer.memory

In [24]:
# Base Observations 

Trott_observations = [

    "Trott attended Oxted School, studied history and economics at Oxford University",

    "Trott is preparing for a debate on the economy",

    "Trott advocates for responsible budgeting and cautious government spending",

    "Trott emphasize business growth and pragmatic economic solutions",

    "Trott generally conservative but supports progressive stances on education and family policies",

    "Trott focuses on pragmatic rather than ideological approaches",

]



Johnson_observations = [

    "Johnson attended Eton College, studied Classics Oxford University",

    "Johnson is Pro-Brexit and economically nationalist",

    "Johnson advocates for deregulation, minimal government intervention, and strong support for British businesses",

    "Johnson is a populist, often aligning with traditional conservative values, though flexible when politically advantageous",

    "Johnson is support strong national identity and sovereignty",

]



Farage_observations = [

    "Farage attended Dulwich College but did not attend university",

    "Farage is strongly Eurosceptic, advocates for British sovereignty, deregulation, and cutting ties with EU economic policies",

    "Farage prioritizes domestic industry and independence from European influence",

    "Farage is a Nationalist, anti-globalist, and socially conservative",

    "Farage advocates for strict immigration controls and promotes traditional British values"

]



Sunak_observations = [

    "Sunak studied Philosophy, Politics, and Economics at Oxford University and later earned an MBA from Stanford University",

    "Sunak is fiscal conservative with a focus on budget balancing",

    "Sunak advocates for responsible spending and a cautious approach to government intervention",

    "Sunak prioritizes stability over drastic reforms",

    "Sunak focus on pragmatism over ideology, holds relatively conservative views on social issues, often supporting traditional family values",

]



Starmer_observations = [

    "Starmer attended Reigate Grammar School, studied law at the University of Leeds and completed studies at Oxford University",

    "Starmer focuses on investment in public services, especially the NHS, and progressive taxation",

    "Starmer prioritizes worker rights and social equality, advocating for a balanced but progressive approach",

    "Starmer supports expanded public services, social justice, and inclusivity",

    "Starmer Focuses on social reform and government accountability",

]

In [25]:
# Loop through the observations and add to memory
# Add the observations to the memory using the 'add_memory()' function

for observation in Trott_observations:
    Trott_memory.add_memory(observation)

for observation in Johnson_observations:    
    Johnson_memory.add_memory(observation)

for observation in Farage_observations:
    Farage_memory.add_memory(observation)

for observation in Sunak_observations:
    Sunak_memory.add_memory(observation)
    
for observation in Starmer_observations:
    Starmer_memory.add_memory(observation)

  return LLMChain(llm=self.llm, prompt=prompt, verbose=self.verbose)
  score = self.chain(prompt).run(memory_content=memory_content).strip()
Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset


# Create Simulation

In [26]:
# List of agents in the debate

agents = [Trott, Johnson, Farage, Sunak, Starmer]



# Define the initial debate topic

initial_observation = "Should the UK rejoin the European Union?"

## Framework X

Archived frameworks that DOES NOT work.



`run_HoC_debate_framework_X1`:

- Use the generate_reaction() function to decide "what would be an appropriate reaction?", and if it chooses to say something, call up the generate_dialogue_response() function to generate a response text.

- Results: agents does not respond, agent doesn not engage in dialogue, because the prompt is "*what would be an appropriate reaction*?"


In [27]:
def run_HoC_debate_framework_X1 (agents: List[GenerativeAgent],

initial_observation: str) -> None:

    """Runs a conversation where each agent strictly chooses to either react or say a response."""

    

    observation = initial_observation  # Initial observation passed into the conversation loop

    turns = 0  # Counter to limit turns or control flow as needed

    

    # Loop through rounds of conversation

    while turns < 3:  # Set a suitable limit for the number of rounds

        for agent in agents:

            # Generate a reaction or response to the current observation

            continue_dialogue, reaction_or_response = agent.generate_reaction(observation)

            

            if continue_dialogue == False:

                # Agent chooses to react - print reaction, but do not change observation

                print(reaction_or_response)

                

            elif continue_dialogue == True:

                # Agent chooses to say something - print and update observation for next agent

                stay_in_dialogue, response_text = agent.generate_dialogue_response(observation)

                print(response_text)



            else:

                # Skip any output that is not a strict "REACT" or "SAY"

                print(f"{agent.name} output ignored as it did not conform to 'REACT' or 'SAY'")

        

        turns += 1  # Increment the turn count

In [28]:
# Run the debate

run_HoC_debate_framework_X1 (agents, initial_observation)

  docs_and_scores = self.vectorstore.similarity_search_with_relevance_scores(
  docs_and_scores = self.vectorstore.similarity_search_with_relevance_scores(
  docs_and_scores = self.vectorstore.similarity_search_with_relevance_scores(


ValueError: Input length of input_ids is 500, but `max_length` is set to 500. This can lead to unexpected behavior. You should consider increasing `max_length` or, better yet, setting `max_new_tokens`.

## Framework 1



Agents reply in fixed-order, with a set-limit of 'turns'

In [None]:
def run_HoC_debate_framework_1 (agents: List[GenerativeAgent],             # get a list of agents

                     initial_observation: str) -> None:         # get the 1st observation

    """Runs a conversation between agents."""



    _, observation = agents[4].generate_reaction(initial_observation)   # generate a reaction to observation

    print(observation)



    max_turns = 2

    turns = 0



    # Enters a loop where agents take turns generating responses

    while turns < max_turns:

        for agent in agents:

            # Each agent generates a response to the latest observation

            spoken_response, observation = agent.generate_dialogue_response(observation)

            print(observation)



            if not spoken_response:

                print(f"{agent.name} chose not to respond.")

        

        # Increment the turn count after each full round of responses

        turns += 1

In [None]:
# Run the debate

run_HoC_debate_framework_1 (agents, initial_observation)

## Framework 2



Each agent gets `X`-number of speaking slots allocated randomly

In [None]:
def run_HoC_debate_framework_2 (agents: List[GenerativeAgent],

                              initial_observation: str) -> None:

    """Runs a conversation between agents, each getting X-number of speaking slots allocated randomly."""



    # Initialize the count of speaking slots for each agent

    max_slot_each = 2

    

    speaking_slots = {agent.name: 0 for agent in agents}

    max_speaking_slots = max_slot_each * len(agents)

    turns = 0



    # Start the debate with an initial observation

    observation = initial_observation

    print(observation)



    # Continue the conversation until each agent has spoken twice

    while sum(speaking_slots.values()) < max_speaking_slots:

        # Randomly select an agent who hasn't spoken twice yet

        agent = random.choice([agent for agent in agents if speaking_slots[agent.name] < max_slot_each])

        

        # Each agent generates a response to the latest observation

        stay_in_dialogue, observation = agent.generate_dialogue_response(observation)

        print(observation)



        # Increment the speaking slot count for the agent

        speaking_slots[agent.name] += 1



        # Increment the turn count

        turns += 1

In [None]:
# Run the debate

run_HoC_debate_framework_2 (agents, initial_observation)

## Framework 3



1. Each agent add new-observation into memory. 

2. Each agent does a quick reflection on this new-observation, to whether to "respond or not respond" - depending on personal saliency (a custom function within the class `GenerativeAgent`). Output `decide_to_respond` as either True or False

3. Randomly select one agent from the list of agents that decide to respond to the observation.

4. Print this selected generate_dialogue_response as the new observation.

In [None]:
# Testing the `decide_to_respond()` function for each agent



random_observation = "Should the official UK national dish be changed??"



# Who would respond to the observation? Trott, Johnson, Farage, Sunak, Starmer

print(Trott.decide_to_respond(random_observation))

print(Johnson.decide_to_respond(random_observation))

print(Farage.decide_to_respond(random_observation))

print(Sunak.decide_to_respond(random_observation))

print(Starmer.decide_to_respond(random_observation))

In [None]:
# Testing the `generate_dialogue_response()` function for each agent

Trott.generate_dialogue_response(initial_observation)

In [None]:
def run_HoC_debate_framework_3 (agents: List[GenerativeAgent],             # get a list of agents

                     initial_observation: str) -> None:         # get the 1st observation

    """Runs a conversation between agents, until a maximum number of turns is reached."""

    

    max_turns = 10

    turns = 0



    # Start the debate with an initial observation

    observation = initial_observation

    print(observation)



    # Enters a loop where agents take turns generating responses

    while turns < max_turns:

        # Step 1: Each agent adds the new observation into memory

        for agent in agents:

            agent.memory.add_memory(observation)



        # Step 2: Randomly select one agent from the list of agents that decide to respond to the observation

        responding_agents = [agent for agent in agents if agent.decide_to_respond(observation)]

        if responding_agents:

            agent = random.choice(responding_agents)

            

            # The selected agent generates a response to the latest observation

            stay_in_dialogue, observation = agent.generate_dialogue_response(observation)

            print(observation)

        

        # Increment the turn count after each full round of responses

        turns += 1

In [None]:
run_HoC_debate_framework_3(agents, initial_observation)

In [None]:


# Save Conversation into a text file

import sys

sys.stdout = open("HoC_debate_framework_3_output.txt", "w")

run_HoC_debate_framework_3 (agents, initial_observation)

sys.stdout.close()