# Setup

In [5]:
# Standard library imports
import pandas as pd
import os
import re
import math
import json
import random
import functools
from datetime import datetime, timedelta
from typing import Any, Callable, Dict, List, Optional, Tuple
from collections import OrderedDict

# Third-party imports
import torch
import openai
import faiss
import tenacity

# LangChain imports
from langchain.utils import mock_now
from langchain.docstore import InMemoryDocstore
from langchain.retrievers import TimeWeightedVectorStoreRetriever
from langchain_community.vectorstores import FAISS
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain.chains import LLMChain
from langchain_core.language_models import BaseLanguageModel
from langchain.prompts import PromptTemplate
from langchain.schema import HumanMessage, SystemMessage, BaseMemory, Document
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.output_parsers import RegexParser

# Pydantic imports
from pydantic import BaseModel, Field, ConfigDict

# Hugging Face imports
import transformers
from transformers import (AutoModelForCausalLM, AutoTokenizer, AutoConfig, pipeline, AutoModel)
from peft import PeftModel, PeftConfig
from langchain_huggingface import HuggingFacePipeline

In [6]:
# Set API Keys
from kaggle_secrets import UserSecretsClient # API Loggins
user_secrets = UserSecretsClient()

## Hugging Face
Hugging_Face_token = user_secrets.get_secret("Hugging_Face_token")

## Openai
OPENAI_API_KEY = user_secrets.get_secret("OPENAI_API_KEY")

#from dotenv import load_dotenv # Get OPENAI_API_KEY from .env file
#load_dotenv()
#openai.api_key = os.getenv("OPENAI_API_KEY") # Set API Key



In [7]:
# Login to Hugging Face
from huggingface_hub import login

login(Hugging_Face_token)

The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: write).
Your token has been saved to /root/.cache/huggingface/token
Login successful


# Load Data

In [8]:
df_HoC_2000s_raw = pd.read_csv('/kaggle/input/parlspeech/df_HoC_2000s.csv')

In [9]:
df_HoC_2000s = df_HoC_2000s_raw[['date', 'agenda', 'speechnumber', 'speaker', 'party','text']]
df_HoC_2000s.columns


Index(['date', 'agenda', 'speechnumber', 'speaker', 'party', 'text'], dtype='object')

In [10]:
df_HoC_2000s.shape

(1332311, 6)

In [11]:
def get_boris_dialogues(df, gap_threshold=5, context=2):
    # Initialize a list to store all relevant segments
    segments = []

    # Get all unique (date, agenda) pairs where Boris Johnson speaks
    boris_rows = df[df['speaker'] == 'Boris Johnson']
    relevant_contexts = boris_rows[['date', 'agenda']].drop_duplicates()

    for date, agenda in relevant_contexts.values:
        # Subset for the same date and agenda
        sub_df = df[(df['date'] == date) & (df['agenda'] == agenda)]
        boris_indices = sub_df[sub_df['speaker'] == 'Boris Johnson'].index

        # Skip if no Boris Johnson rows found
        if boris_indices.empty:
            continue

        boris_speechnumbers = sub_df.loc[boris_indices, 'speechnumber']

        # Group Boris rows based on gaps in speechnumber
        groups = []
        current_group = [boris_indices[0]]
        for i in range(1, len(boris_indices)):
            if boris_speechnumbers.iloc[i] - boris_speechnumbers.iloc[i - 1] <= gap_threshold:
                current_group.append(boris_indices[i])
            else:
                groups.append(current_group)
                current_group = [boris_indices[i]]
        groups.append(current_group)

        # Extract dialogue for each group and include context
        for group in groups:
            start_idx = max(group[0] - context, sub_df.index[0])
            end_idx = min(group[-1] + context, sub_df.index[-1] + 1)

            # Map original indices to positions relative to sub_df
            sub_df_positions = sub_df.index.tolist()
            start_pos = sub_df_positions.index(start_idx) if start_idx in sub_df_positions else 0
            end_pos = sub_df_positions.index(end_idx) + 1 if end_idx in sub_df_positions else len(sub_df)

            # Append the slice to segments
            segments.append(sub_df.iloc[start_pos:end_pos])

    # Combine all segments into a single DataFrame
    if segments:
        combined_df = pd.concat(segments).drop_duplicates().sort_values(['date', 'speechnumber'])
    else:
        combined_df = pd.DataFrame(columns=df.columns)  # Return an empty DataFrame with the same structure

    return combined_df

# Call the function and get Boris dialogues
boris_dialogues = get_boris_dialogues(df_HoC_2000s, gap_threshold=3, context=2)
boris_dialogues

Unnamed: 0,date,agenda,speechnumber,speaker,party,text
96084,2001-07-12,The Countryside,303,Alan Beith,LibDem,Farmers in my constituency are usually very cr...
96085,2001-07-12,The Countryside,304,Joyce Quin,Lab,Indeed I am The right hon. Gentleman's constit...
96086,2001-07-12,The Countryside,305,Boris Johnson,Con,It is a great pleasure to follow the right hon...
96087,2001-07-12,The Countryside,306,David Hamilton,Lab,I do not intend to attempt to try to follow th...
96088,2001-07-12,The Countryside,307,Hugo Swire,Con,"It may seem presumptuous for me, as I am about..."
...,...,...,...,...,...,...
1332296,2019-12-17,Election of Speaker,4,CHAIR,,"Before I take the Chair as Speaker-Elect, I wi..."
1332297,2019-12-17,Election of Speaker,5,CHAIR,,"Before I call the Prime Minister, I just say t..."
1332298,2019-12-17,Election of Speaker,6,Boris Johnson,Con,"Mr Speaker-Elect, I am sure that the whole Hou..."
1332299,2019-12-17,Election of Speaker,7,Jeremy Corbyn,Lab,May I join the Prime Minister in remembering t...


In [36]:
# Filter 
boris_dialogues_2015 = boris_dialogues[boris_dialogues['date'].str.contains('2015', na=False)]
boris_dialogues_2015

Unnamed: 0,date,agenda,speechnumber,speaker,party,text
974168,2015-05-27,Debate on the Address,95,Geoffrey Clifton-Brown,Con,My right hon. Friend is making a very cogent s...
974169,2015-05-27,Debate on the Address,96,Cheryl Gillan,Con,I am grateful for my hon. Friend's support. I ...
974170,2015-05-27,Debate on the Address,97,Boris Johnson,Con,Does my right hon. Friend agree that her const...
974171,2015-05-27,Debate on the Address,98,Cheryl Gillan,Con,My hon. Friend has an impeccable record of pro...
974172,2015-05-27,Debate on the Address,99,Dominic Grieve,Con,Will my right hon. Friend give way?
...,...,...,...,...,...,...
1018722,2015-12-16,West London Coroner's Court,600,Andrew Slaughter,Lab,I am grateful for the comment that the hon. Ge...
1018723,2015-12-16,West London Coroner's Court,601,James Berry,Con,I do not intend to go into the technicalities ...
1018724,2015-12-16,West London Coroner's Court,602,Boris Johnson,Con,I congratulate my hon. Friend on securing this...
1018725,2015-12-16,West London Coroner's Court,603,James Berry,Con,That encapsulates the sentiment of many MPs on...


# Load Model

## Load Model: GPT

In [12]:
LLM = ChatOpenAI(model="gpt-3.5-turbo", 
                 max_tokens=1500, 
                 api_key = OPENAI_API_KEY) 

In [13]:
selected_embeddings_model = OpenAIEmbeddings(api_key = OPENAI_API_KEY)

In [14]:
embedding_size_selectedLLM = len(selected_embeddings_model.embed_query("This is a test."))
print(f"Embedding size: {embedding_size_selectedLLM}")

Embedding size: 1536


## Load Model: Llama

In [None]:
# Set up Tokenizer & Model & Pipeline
model_id = "meta-llama/Llama-3.2-3B-Instruct"

tokenizer = AutoTokenizer.from_pretrained(model_id)
tokenizer.add_special_tokens({'pad_token': '[PAD]'})
model = AutoModel.from_pretrained(model_id, torch_dtype=torch.float16, device_map="auto")

In [10]:
# Define LLM Pipeline
pipeline = transformers.pipeline(
    "text-generation",
    model=model_id,
    tokenizer=tokenizer,
    max_new_tokens = 500,  # Maximum new tokens to generate
    torch_dtype=torch.float16,
    device_map="auto",
    truncation=True,
    #temperature=0.7,  # Sampling temperature
    #top_p=0.9,        # Nucleus sampling
    #repetition_penalty=1.2,  # Penalize repetition
)

LLM = HuggingFacePipeline(pipeline=pipeline)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/189 [00:00<?, ?B/s]

[**Decoding Strategies**](https://towardsdatascience.com/decoding-strategies-that-you-need-to-know-for-response-generation-ba95ee0faadc)

`do_sample=True` use Sample Decoding

`do_sample=False` use Greedy decoding

In [None]:
# Test Model Output
def response(prompt):
     sequences = pipeline(prompt,
                          do_sample=True,   # adjust
                          top_k=10,        # adjust vocabulary size
                          )
     #print('Question: ' , prompt + '\n')
     print('response: ', sequences[0]['generated_text'][len(prompt):] + '\n')


discussion = "Should the UK rejoin the European Union?"
agent_name = "Boris Johnson" 
prompt = (
    f"Consider the following discussion:\n\n"
    f"{discussion}\n\n"
    f"As {agent_name}, on a scale of 1 to 10, how relevant is this discussion to you? "
    f"Provide a number between 1 (not relevant at all) and 10 (extremely relevant)."
    f"Output only a single number, nothing else!"
)


response(prompt)

When switching to any other LLaMA-based model, you need to replace the embedding model (OpenAIEmbeddings) with an embedding generation mechanism that works with your local LLaMA model. OpenAI provides embeddings as a service, but with LLaMA, **you need to generate embeddings using the model locally**.

In [None]:
# Custom Llama Embeddings
class CustomLlamaEmbeddings:   # Copilot genereated
    def __init__(self, model, tokenizer, device="cuda"):
        self.model = model
        self.tokenizer = tokenizer
        self.device = device
        
    def embed_documents(self, texts):
        """Generate embeddings for a single query string."""
        # Tokenize inputs
        inputs = self.tokenizer(
            texts, padding=True, truncation=True, return_tensors="pt").to(self.device)
        
        # Get hidden states from the model
        with torch.no_grad():
            outputs = self.model(**inputs, return_dict=True, output_hidden_states=True)
            
        # Use the mean pooling of the last hidden state as embeddings
        embeddings = outputs.hidden_states[-1].mean(dim=1)
        return embeddings.cpu().numpy()
        
    def embed_query(self, text):
        """Generate embeddings for a list of documents."""
        return self.embed_documents([text])[0]

selected_embeddings_model = CustomLlamaEmbeddings(model=model, tokenizer=tokenizer)

In [None]:
config = AutoConfig.from_pretrained(model_id)
# Print the embedding size
embedding_size_selectedLLM = config.hidden_size
print(f"Embedding Size (hidden size): {config.hidden_size}")

## Load Model: Tuned Boris

In [None]:
# base_model_name = "meta-llama/Llama-3.2-3B"
PEFT_MODEL = "/kaggle/input/llama_boris/pytorch/default/1"
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,                      # Load model in 4bit, to redeuce memory and computational requirements
    bnb_4bit_use_double_quant=True,         # Double quantization, further compress the model weights
    bnb_4bit_quant_type="nf4",              # Quantization type = nf4
    bnb_4bit_compute_dtype=torch.bfloat16,  # Compute in 16bit format, to speed up computation
    load_in_8bit_fp32_cpu_offload=True
)
config = PeftConfig.from_pretrained(PEFT_MODEL)
model = AutoModelForCausalLM.from_pretrained(
    config.base_model_name_or_path,
    return_dict=True,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True
)
tokenizer=AutoTokenizer.from_pretrained(config.base_model_name_or_path)
tokenizer.pad_token = tokenizer.eos_token


In [None]:
# Test the Fine-tuned model
prompt = "Should the UK rejoin the European Union?"
# Tokenize the input prompt
inputs = tokenizer(prompt, return_tensors="pt").to("cuda" if torch.cuda.is_available() else "cpu")

# Generate a response
output = model.generate(
    input_ids=inputs["input_ids"],
    attention_mask=inputs["attention_mask"],  # Explicitly set the attention mask
    max_length=300,              # Maximum length of the generated response
    temperature=0.7,             # Sampling temperature for more creative responses
    top_p=0.9,                   # Nucleus sampling for generating diverse text
    repetition_penalty=1.2,      # Penalize repetition in the response
    do_sample=True,              # Enable sampling for non-deterministic output
    pad_token_id=tokenizer.eos_token_id,      # Explicitly set the pad token ID
)
# Decode and print the response
response = tokenizer.decode(output[0], skip_special_tokens=True)
print(response)

In [None]:
class CustomLlamaEmbeddings:
    def __init__(self, model, tokenizer, device="cuda"):
        self.model = model
        self.tokenizer = tokenizer
        self.device = device
        
    def embed_documents(self, texts):
        # Tokenize inputs
        inputs = self.tokenizer(
            texts, padding=True, truncation=True, return_tensors="pt").to(self.device)
        
        # Get hidden states from the model
        with torch.no_grad():
            outputs = self.model(**inputs, return_dict=True, output_hidden_states=True)
            
        # Use the mean pooling of the last hidden state as embeddings
        embeddings = outputs.hidden_states[-1].mean(dim=1)
        return embeddings.cpu().numpy()
        
    def embed_query(self, text):
        """Generate embeddings for a list of documents."""
        return self.embed_documents([text])[0]


In [None]:
# Define LLM and Embeddings
pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_length=500,  # Maximum tokens in the output
    temperature=0.7,  # Sampling temperature
    top_p=0.9,        # Nucleus sampling
    repetition_penalty=1.2,  # Penalize repetition
)
LLM = HuggingFacePipeline(pipeline=pipe)
selected_embeddings_model = CustomLlamaEmbeddings(model, tokenizer)


In [1]:
embeddings = selected_embeddings_model.embed_documents(["Sample document"])
print(f"Embedding dimension: {embeddings.shape[1]}")  # Check the embedding size

NameError: name 'selected_embeddings_model' is not defined

# Generative AI Setup
The [codes](https://python.langchain.com/api_reference/experimental/generative_agents.html) for the classes `GenerativeAgentMemory` and `GenerativeAgent` was entirely reused from the **[LangChain Experimental](https://pypi.org/project/langchain-experimental/)** project in the LangChain Python API reference - intended for research and experimental uses, with a few minor tweaks and proper configuration of the prompts.


## Generative Agent Memory

In [21]:
class GenerativeAgentMemory(BaseMemory):
    """Memory for the generative agent."""
    
    llm: BaseLanguageModel
    """The core language model."""
    
    memory_retriever: TimeWeightedVectorStoreRetriever
    """The retriever to fetch related memories."""
    
    verbose: bool = False
    reflection_threshold: Optional[float] = None
    """When aggregate_importance exceeds reflection_threshold, stop to reflect."""
    
    current_plan: List[str] = []
    """The current plan of the agent."""
    
    # A weight of 0.15 makes this less important than it
    # would be otherwise, relative to salience and time
    importance_weight: float = 0.15
    """How much weight to assign the memory importance."""
    
    aggregate_importance: float = 0.0  # : :meta private:
    """Track the sum of the 'importance' of recent memories.
    Triggers reflection when it reaches reflection_threshold."""
    
    max_tokens_limit: int = 1200  # : :meta private:
    
    # input keys
    queries_key: str = "queries"
    most_recent_memories_token_key: str = "recent_memories_token"
    add_memory_key: str = "add_memory"
    
    # output keys
    relevant_memories_key: str = "relevant_memories"
    relevant_memories_simple_key: str = "relevant_memories_simple"
    most_recent_memories_key: str = "most_recent_memories"
    now_key: str = "now"
    reflecting: bool = False
    
    def chain(self, prompt: PromptTemplate) -> LLMChain:
        return LLMChain(llm=self.llm, prompt=prompt, verbose=self.verbose)
    @staticmethod
    
    def _parse_list(text: str) -> List[str]:
        """Parse a newline-separated string into a list of strings."""
        lines = re.split(r"\n", text.strip())
        lines = [line for line in lines if line.strip()]  # remove empty lines
        return [re.sub(r"^\s*\d+\.\s*", "", line).strip() for line in lines]
    
    def _get_topics_of_reflection(self, last_k: int = 50) -> List[str]:
        """Return the 3 most salient high-level questions about recent observations."""
        prompt = PromptTemplate.from_template(
            "{observations}\n\n"
            "Given only the information above, what are the 3 most salient "
            "high-level questions we can answer about the subjects in the statements?\n"
            "Provide each question on a new line."
        )
        observations = self.memory_retriever.memory_stream[-last_k:]
        observation_str = "\n".join(
            [self._format_memory_detail(o) for o in observations]
        )
        result = self.chain(prompt).run(observations=observation_str)
        return self._parse_list(result)
    
    def _get_insights_on_topic(
        self, topic: str, now: Optional[datetime] = None
    ) -> List[str]:
        """Generate 'insights' on a topic of reflection, based on pertinent memories."""
        prompt = PromptTemplate.from_template(
            "Statements relevant to: '{topic}'\n"
            "---\n"
            "{related_statements}\n"
            "---\n"
            "What 5 high-level novel insights can you infer from the above statements "
            "that are relevant for answering the following question?\n"
            "Do not include any insights that are not relevant to the question.\n"
            "Do not repeat any insights that have already been made.\n\n"
            "Question: {topic}\n\n"
            "(example format: insight (because of 1, 5, 3))\n"
        )
        related_memories = self.fetch_memories(topic, now=now)
        related_statements = "\n".join(
            [
                self._format_memory_detail(memory, prefix=f"{i+1}. ")
                for i, memory in enumerate(related_memories)
            ]
        )
        result = self.chain(prompt).run(
            topic=topic, related_statements=related_statements
        )
        # TODO: Parse the connections between memories and insights
        return self._parse_list(result)
    
    def pause_to_reflect(self, now: Optional[datetime] = None) -> List[str]:
        """Reflect on recent observations and generate 'insights'."""
        if self.verbose:
            logger.info("Character is reflecting")
        new_insights = []
        topics = self._get_topics_of_reflection()
        for topic in topics:
            insights = self._get_insights_on_topic(topic, now=now)
            for insight in insights:
                self.add_memory(insight, now=now)
            new_insights.extend(insights)
        return new_insights
    
    def _score_memory_importance(self, memory_content: str) -> float:
        """Score the absolute importance of the given memory."""
        prompt = PromptTemplate.from_template(
            "On the scale of 1 to 10, where 1 is purely mundane"
            + " (e.g., brushing teeth, making bed) and 10 is"
            + " extremely poignant (e.g., a break up, college"
            + " acceptance), rate the likely poignancy of the"
            + " following piece of memory. Respond with a single integer."
            + "\nMemory: {memory_content}"
            + "\nRating: "
        )
        score = self.chain(prompt).run(memory_content=memory_content).strip()
        if self.verbose:
            logger.info(f"Importance score: {score}")
        match = re.search(r"^\D*(\d+)", score)
        if match:
            return (float(match.group(1)) / 10) * self.importance_weight
        else:
            return 0.0
    
    def _score_memories_importance(self, memory_content: str) -> List[float]:
        """Score the absolute importance of the given memory."""
        prompt = PromptTemplate.from_template(
            "On the scale of 1 to 10, where 1 is purely mundane"
            + " (e.g., brushing teeth, making bed) and 10 is"
            + " extremely poignant (e.g., a break up, college"
            + " acceptance), rate the likely poignancy of the"
            + " following piece of memory. Always answer with only a list of numbers."
            + " If just given one memory still respond in a list."
            + " Memories are separated by semi colans (;)"
            + "\nMemories: {memory_content}"
            + "\nRating: "
        )
        scores = self.chain(prompt).run(memory_content=memory_content).strip()
        if self.verbose:
            logger.info(f"Importance scores: {scores}")
        # Split into list of strings and convert to floats
        scores_list = [float(x) for x in scores.split(";")]
        return scores_list
    
    def add_memories(
        self, memory_content: str, now: Optional[datetime] = None
    ) -> List[str]:
        """Add an observations or memories to the agent's memory."""
        importance_scores = self._score_memories_importance(memory_content)
        self.aggregate_importance += max(importance_scores)
        
        memory_list = memory_content.split(";")
        documents = []
        for i in range(len(memory_list)):
            documents.append(
                Document(
                    page_content=memory_list[i],
                    metadata={"importance": importance_scores[i]},
                )
            )
        result = self.memory_retriever.add_documents(documents, current_time=now)
        # After an agent has processed a certain amount of memories (as measured by
        # aggregate importance), it is time to reflect on recent events to add
        # more synthesized memories to the agent's memory stream.
        if (
            self.reflection_threshold is not None
            and self.aggregate_importance > self.reflection_threshold
            and not self.reflecting
        ):
            self.reflecting = True
            self.pause_to_reflect(now=now)
            # Hack to clear the importance from reflection
            self.aggregate_importance = 0.0
            self.reflecting = False
        return result
    
    def add_memory(
        self, 
        memory_content: str, 
        now: Optional[datetime] = None) -> List[str]:
        """Add an observation or memory to the agent's memory."""
        
        importance_score = self._score_memory_importance(memory_content)
        self.aggregate_importance += importance_score
        
        document = Document(
            page_content=memory_content, 
            metadata={"importance": importance_score}
        )
        
        result = self.memory_retriever.add_documents([document], current_time=now)
        
        # After an agent has processed a certain amount of memories (as measured by
        # aggregate importance), it is time to reflect on recent events to add
        # more synthesized memories to the agent's memory stream.
        
        if (
            self.reflection_threshold is not None
            and self.aggregate_importance > self.reflection_threshold
            and not self.reflecting
        ):
            self.reflecting = True
            self.pause_to_reflect(now=now)
            # Hack to clear the importance from reflection
            self.aggregate_importance = 0.0
            self.reflecting = False
        return result

    def add_historical_memory(
        self, memory_content: str, created_at: datetime) -> List[str]:
        """Add a historical observation or memory to the agent's memory with a specific creation date."""
        
        importance_score = self._score_memory_importance(memory_content)
        self.aggregate_importance += importance_score
        
        document = Document(
            page_content=memory_content,
            metadata={
                "importance": importance_score,
                "created_at": created_at,  # Historical creation date
                "last_accessed_at": created_at  # Optional: same as created_at for historical data
            }
        )

        result = self.memory_retriever.add_documents([document], current_time=created_at)
        
        # Check if reflection is needed based on importance
        if (
            self.reflection_threshold is not None
            and self.aggregate_importance > self.reflection_threshold
            and not self.reflecting
        ):
            self.reflecting = True
            self.pause_to_reflect(now=created_at)
            self.aggregate_importance = 0.0
            self.reflecting = False
            
        return result
    
    def fetch_memories(
        self, observation: str, now: Optional[datetime] = None
    ) -> List[Document]:
        """Fetch related memories."""
        if now is not None:
            with mock_now(now):
                return self.memory_retriever.invoke(observation)
        else:
            return self.memory_retriever.invoke(observation)
    
    def format_memories_detail(self, relevant_memories: List[Document]) -> str:
        content = []
        for mem in relevant_memories:
            content.append(self._format_memory_detail(mem, prefix="- "))
        return "\n".join([f"{mem}" for mem in content])
    
    def _format_memory_detail(self, memory: Document, prefix: str = "") -> str:
        created_time = memory.metadata["created_at"].strftime("%B %d, %Y, %I:%M %p")
        return f"{prefix}[{created_time}] {memory.page_content.strip()}"
    
    def format_memories_simple(self, relevant_memories: List[Document]) -> str:
        return "; ".join([f"{mem.page_content}" for mem in relevant_memories])
    
    def _get_memories_until_limit(self, consumed_tokens: int) -> str:
        """Reduce the number of tokens in the documents."""
        result = []
        for doc in self.memory_retriever.memory_stream[::-1]:
            if consumed_tokens >= self.max_tokens_limit:
                break
            consumed_tokens += self.llm.get_num_tokens(doc.page_content)
            if consumed_tokens < self.max_tokens_limit:
                result.append(doc)
        return self.format_memories_simple(result)
    @property
    
    def memory_variables(self) -> List[str]:
        """Input keys this memory class will load dynamically."""
        return []
   
    def load_memory_variables(self, inputs: Dict[str, Any]) -> Dict[str, str]:
        """Return key-value pairs given the text input to the chain."""
        queries = inputs.get(self.queries_key)
        now = inputs.get(self.now_key)
        if queries is not None:
            relevant_memories = [
                mem for query in queries for mem in self.fetch_memories(query, now=now)
            ]
            return {
                self.relevant_memories_key: self.format_memories_detail(
                    relevant_memories
                ),
                self.relevant_memories_simple_key: self.format_memories_simple(
                    relevant_memories
                ),
            }
        most_recent_memories_token = inputs.get(self.most_recent_memories_token_key)
        if most_recent_memories_token is not None:
            return {
                self.most_recent_memories_key: self._get_memories_until_limit(
                    most_recent_memories_token
                )
            }
        return {}
    
    def save_context(self, inputs: Dict[str, Any], outputs: Dict[str, Any]) -> None:
        """Save the context of this model run to memory."""
        # TODO: fix the save memory key
        mem = outputs.get(self.add_memory_key)
        now = outputs.get(self.now_key)
        if mem:
            self.add_memory(mem, now=now)
    
    def clear(self) -> None:
        """Clear memory contents."""
        # TODO

## Generative Agent

In [28]:
class GenerativeAgent(BaseModel):
    """Agent as a character with memory and innate characteristics."""
    name: str
    """The character's name."""
    age: Optional[int] = None
    """The optional age of the character."""
    traits: str = "N/A"
    """Permanent traits to ascribe to the character."""
    status: str
    """The traits of the character you wish not to change."""
    memory: GenerativeAgentMemory
    """The memory object that combines relevance, recency, and 'importance'."""
    llm: BaseLanguageModel
    """The underlying language model."""
    verbose: bool = False
    summary: str = ""  #: :meta private:
    """Stateful self-summary generated via reflection on the character's memory."""
    summary_refresh_seconds: int = 3600  #: :meta private:
    """How frequently to re-generate the summary."""
    last_refreshed: datetime = Field(default_factory=datetime.now)  # : :meta private:
    """The last time the character's summary was regenerated."""
    daily_summaries: List[str] = Field(default_factory=list)  # : :meta private:
    """Summary of the events in the plan that the agent took."""
    model_config = ConfigDict(
        arbitrary_types_allowed=True,
    )
    # LLM-related methods
    @staticmethod
    
    def _parse_list(text: str) -> List[str]:
        """Parse a newline-separated string into a list of strings."""
        lines = re.split(r"\n", text.strip())
        return [re.sub(r"^\s*\d+\.\s*", "", line).strip() for line in lines]
        
    def chain(self, prompt: PromptTemplate) -> LLMChain:
        """Create a chain with the same settings as the agent."""
        return LLMChain(
            llm=self.llm, prompt=prompt, verbose=self.verbose, memory=self.memory
        )
        
    def _get_entity_from_observation(self, observation: str) -> str:
        prompt = PromptTemplate.from_template(
            "What is the observed entity in the following observation? {observation}"
            + "\nEntity="
        )
        return self.chain(prompt).run(observation=observation).strip()
        
    def _get_entity_action(self, observation: str, entity_name: str) -> str:
        prompt = PromptTemplate.from_template(
            "What is the {entity} doing in the following observation? {observation}"
            + "\nThe {entity} is"
        )
        return (
            self.chain(prompt).run(entity=entity_name, observation=observation).strip()
        )

## Summarize Most relevant memories
    def summarize_related_memories(self, observation: str) -> str:
        """Summarize memories that are most relevant to an observation."""
        prompt = PromptTemplate.from_template(
            """
            {q1}?
            Context from memory:
            {relevant_memories}
            Relevant context: 
            """
        )
        entity_name = self._get_entity_from_observation(observation)
        entity_action = self._get_entity_action(observation, entity_name)
        q1 = f"What is the relationship between {self.name} and {entity_name}"
        q2 = f"{entity_name} is {entity_action}"
        return self.chain(prompt=prompt).run(q1=q1, queries=[q1, q2]).strip()
        
## Generate Summary of the agent + reaction 
    def _generate_reaction(
        self, observation: str, suffix: str, now: Optional[datetime] = None
    ) -> str:
        """React to a given observation or dialogue act."""
        prompt = PromptTemplate.from_template(
            "{agent_summary_description}"
            + "\nIt is {current_time}."
            + "\n{agent_name}'s status: {agent_status}"
            + "\nSummary of relevant context from {agent_name}'s memory:"
            + "\n{relevant_memories}"
            + "\nMost recent observations: {most_recent_memories}"
            + "\nObservation: {observation}"
            + "\n\n"
            + suffix
        )
        agent_summary_description = self.get_summary(now=now)
        relevant_memories_str = self.summarize_related_memories(observation)
        current_time_str = (
            datetime.now().strftime("%B %d, %Y, %I:%M %p")
            if now is None
            else now.strftime("%B %d, %Y, %I:%M %p")
        )
        kwargs: Dict[str, Any] = dict(
            agent_summary_description=agent_summary_description,
            current_time=current_time_str,
            relevant_memories=relevant_memories_str,
            agent_name=self.name,
            observation=observation,
            agent_status=self.status,
        )
        consumed_tokens = self.llm.get_num_tokens(
            prompt.format(most_recent_memories="", **kwargs)
        )
        kwargs[self.memory.most_recent_memories_token_key] = consumed_tokens
        return self.chain(prompt=prompt).run(**kwargs).strip()
        
## Clean response
    def _clean_response(self, text: str) -> str:
        return re.sub(f"^{self.name} ", "", text.strip()).strip()
    
## Generate Dialogue response
    def generate_dialogue_response(
        self, observation: str, now: Optional[datetime] = None) -> Tuple[bool, str]:
        """React to a given observation."""
        
        call_to_action_template = (
            "You are {agent_name}, responding to the Observation.\n"
            + "Respond exactly as {agent_name} would speak, staying fully in character and addressing the observation directly"
        )
        
        # Generating response with updated prompt
        full_result = self._generate_reaction(observation, call_to_action_template, now=now)
        #result = re.findall(r'"(.*?)"', full_result)[0]
        
        response_text = self._clean_response(full_result.strip())
        self.memory.save_context(
            {},
            {
                self.memory.add_memory_key: f"{self.name} observed "
                f"{observation} and said {response_text}",
                self.memory.now_key: now,
            },
        )
        return True, f"{self.name} said {response_text}"

## Decide if the agent wants to respond to the observation
    def decide_to_respond(self, observation: str, now: Optional[datetime] = None,
                          threshold: float = 7.0) -> bool:
        """Decide whether the agent wants to respond to the observation."""

        call_to_action_template = (
            "Consider the following discussion:\n\n"
            "{observation}\n\n"
            "As {agent_name}, on a scale of 1 to 10, how relevant is this discussion to you? "
            "Provide a number between 1 (not relevant at all) and 10 (extremely relevant)."
            "Output only a number, nothing else!"
            )
        
        full_result = self._generate_reaction(observation, call_to_action_template, now=now)
        result = full_result.strip().lower()  # Normalize result to lowercase for consistent comparison
        
        try:
            relevance_score = float(result)
        except ValueError:
            logging.warning(f"Unexpected non-numeric response from agent: {result}")
            relevance_score = 0  # Default low relevance for unexpected responses

        # Save the decision context to memory
        self.memory.save_context(
            {},
            {
                self.memory.add_memory_key: f"{self.name} observed "
                f"that the relevance of the discussion '{observation}' was scored as {result}",
                self.memory.now_key: now,
            },
        )
         
        # Check if the model returned "yes" or "no"
        if relevance_score < threshold:
            return False
        elif relevance_score >= threshold:
            return True
        else:
            print(f"Unexpected response: {result}")  # For debugging purposes
            return False
    
    ######################################################
    # Agent stateful' summary methods.                   #
    # Each dialog or response prompt includes a header   #
    # summarizing the agent's self-description. This is  #
    # updated periodically through probing its memories  #
    ######################################################
    
    def _compute_agent_summary(self) -> str:
        """"""
        prompt = PromptTemplate.from_template(
            "How would you summarize {name}'s core characteristics given the"
            + " following statements:\n"
            + "{relevant_memories}"
            + "Do not embellish."
            + "\n\nSummary: "
        )
        # The agent seeks to think about their core characteristics.
        return (
            self.chain(prompt)
            .run(name=self.name, queries=[f"{self.name}'s core characteristics"])
            .strip()
        )
    
    def get_summary(
        self, force_refresh: bool = False, now: Optional[datetime] = None
    ) -> str:
        """Return a descriptive summary of the agent."""
        current_time = datetime.now() if now is None else now
        since_refresh = (current_time - self.last_refreshed).seconds
        if (
            not self.summary
            or since_refresh >= self.summary_refresh_seconds
            or force_refresh
        ):
            self.summary = self._compute_agent_summary()
            self.last_refreshed = current_time
        age = self.age if self.age is not None else "N/A"
        return (
            f"Name: {self.name} (age: {age})"
            + f"\nInnate traits: {self.traits}"
            + f"\n{self.summary}"
        )
    
    def get_full_header(
        self, force_refresh: bool = False, now: Optional[datetime] = None
    ) -> str:
        """Return a full header of the agent's status, summary, and current time."""
        now = datetime.now() if now is None else now
        summary = self.get_summary(force_refresh=force_refresh, now=now)
        current_time_str = now.strftime("%B %d, %Y, %I:%M %p")
        return (
            f"{summary}\nIt is {current_time_str}.\n{self.name}'s status: {self.status}"
        )

# Create Agent
- [GenerativeAgentMemory](https://python.langchain.com/api_reference/experimental/generative_agents/langchain_experimental.generative_agents.memory.GenerativeAgentMemory.html): **Memory** for the generative agent 
   - `llm`
   - `memory_retriever` = create_new_memory_retriever()
   - `current_plan`
   - `reflection_threshold`
   - `add_memory` add observation/memory
- [GenerativeAgent](https://python.langchain.com/api_reference/experimental/generative_agents.html): Agent as a character with **memory** and innate **characteristics**,  
   - basics like `name`, `age` and `llm`
   - `memory` object that combines relevance, recency, and ‘importance’
   - `summary` and `summary_refresh_seconds` to set how frequently to re-generate the summary
   - `summarize_related_memories`: Summarize memories that are most relevant to an observation
   - `status` fix-objectives / traits of the character you wish not to change
   - `traits` set Permanent traits to ascribe to the character 
   - `generate_dialogue_response`

In [22]:
# Relevance Score function - relevance_score_fn()
def relevance_score_fn(score: float) -> float:
    """Return a similarity score on a scale [0, 1]."""
    return 1.0 - score / math.sqrt(2)

In [25]:
# Memory Retriever function - create_new_memory_retriever()
def create_new_memory_retriever():
    """Create a new vector store retriever unique to the agent."""
    
    embeddings_model = selected_embeddings_model  
    
    # Initialize the vectorstore as empty
    embedding_size = embedding_size_selectedLLM           #use: 1536 (GPT3.5) or 3072 (Llamma)
    
    index = faiss.IndexFlatL2(embedding_size)
    vectorstore = FAISS(
        embeddings_model.embed_query,  #use: embeddings_model.embed_query OR llama_embedding_function
        index,
        InMemoryDocstore({}),  # empty Memory docstore
        {},  # index-to-document store ID mapping
        relevance_score_fn=relevance_score_fn,
    )
    
    # Time-weighted scoring mechanism
    return TimeWeightedVectorStoreRetriever(
        vectorstore=vectorstore,
        other_score_keys=["importance"],
        k=15  # retrieve up to 15 relevant memories
    )

In [24]:
# Agent Creation function - create_debate_agent()
def create_debate_agent(name, age, traits, status, 
                        llm):
   
    memory = GenerativeAgentMemory(
        llm=llm,
        memory_retriever=create_new_memory_retriever(),
        verbose=False,
        reflection_threshold= 7.5,  # adjust as needed for reflection frequency
    )
    
    agent = GenerativeAgent(
        name=name,
        age=age,
        traits=traits,
        status=status,
        memory_retriever=create_new_memory_retriever(),
        llm=llm,
        memory=memory,
    )
    return agent

## Define Agent Traits

In [30]:
# Create debate agents (MPs) with their respective characteristics
Trott = create_debate_agent(name="Laura Trott", age=38, llm = LLM,
                            traits= "highly disciplined, sharp, and pragmatic. Strategic, focus on “quiet competence” rather than loud rhetoric, detail-oriented and a stickler for facts",
                            status="Conservative MP")

Johnson = create_debate_agent(name="Boris Johnson", age=57, llm = LLM,
                            traits="charismatic, chaotic, opportunistic, larger-than-life personality, thrives on spectacle and Blitz-spirit optimism, mixes humor with charm and a dash of bluster, unpredictable yet captivating, a showman who values headlines over substance",
                            status="Conservative MP")

Farage = create_debate_agent(name="Nigel Farage", age=60, llm = LLM,
                             traits="unapologetically bold, confrontational, divisive, a provocateur, skilled at stirring public opinion with blunt populist rhetoric, political brawle, highly skilled at galvanizing crowds",
                             status="Former UKIP leader, Brexit Party leader, and political commentator")

Sunak = create_debate_agent(name="Rishi Sunak", age=44, llm = LLM,
                            traits="technocratic, astute, polished, financially extremely wealthy, meticulous, highly analytical, known as the Fiscal-Guardian, out of touch with the middle-class",
                            status="Conservative MP, Former Prime Minister")

Starmer = create_debate_agent(name="Sir Keir Starmer", age=61, llm = LLM,
                              traits="methodical, earnest, intense focus on justice and reform, calm demeanor, seeks accountability, values facts over flair, deliver points with precision rather than emotion",
                              status="Leader of the Labour Party")

## Define Base Memories

In [31]:
# Creat Memory objects for each agent
Trott_memory = Trott.memory
Johnson_memory = Johnson.memory
Farage_memory = Farage.memory   
Sunak_memory = Sunak.memory
Starmer_memory = Starmer.memory

In [20]:
# Base Observations 
Trott_observations = [
    "Trott attended Oxted School, studied history and economics at Oxford University",
    "Trott is preparing for a debate on the economy",
    "Trott advocates for responsible budgeting and cautious government spending",
    "Trott emphasize business growth and pragmatic economic solutions",
    "Trott generally conservative but supports progressive stances on education and family policies",
    "Trott focuses on pragmatic rather than ideological approaches",
]
Johnson_observations = [
    "Johnson attended Eton College, studied Classics Oxford University",
    "Johnson is Pro-Brexit and economically nationalist",
    "Johnson advocates for deregulation, minimal government intervention, and strong support for British businesses",
    "Johnson is a populist, often aligning with traditional conservative values, though flexible when politically advantageous",
    "Johnson is support strong national identity and sovereignty",
]
Farage_observations = [
    "Farage attended Dulwich College but did not attend university",
    "Farage is strongly Eurosceptic, advocates for British sovereignty, deregulation, and cutting ties with EU economic policies",
    "Farage prioritizes domestic industry and independence from European influence",
    "Farage is a Nationalist, anti-globalist, and socially conservative",
    "Farage advocates for strict immigration controls and promotes traditional British values"
]
Sunak_observations = [
    "Sunak studied Philosophy, Politics, and Economics at Oxford University and later earned an MBA from Stanford University",
    "Sunak is fiscal conservative with a focus on budget balancing",
    "Sunak advocates for responsible spending and a cautious approach to government intervention",
    "Sunak prioritizes stability over drastic reforms",
    "Sunak focus on pragmatism over ideology, holds relatively conservative views on social issues, often supporting traditional family values",
]
Starmer_observations = [
    "Starmer attended Reigate Grammar School, studied law at the University of Leeds and completed studies at Oxford University",
    "Starmer focuses on investment in public services, especially the NHS, and progressive taxation",
    "Starmer prioritizes worker rights and social equality, advocating for a balanced but progressive approach",
    "Starmer supports expanded public services, social justice, and inclusivity",
    "Starmer Focuses on social reform and government accountability",
]

In [24]:
# Loop through the observations and add to memory
tuples = [(Trott_observations, Trott_memory), (Johnson_observations, Johnson_memory), 
          (Farage_observations, Farage_memory), (Sunak_observations, Sunak_memory), (Starmer_observations, Starmer_memory)]

for observations, memory in tuples:
    for observation in observations:
        memory.add_memory(observation)

  return LLMChain(llm=self.llm, prompt=prompt, verbose=self.verbose)
  score = self.chain(prompt).run(memory_content=memory_content).strip()


In [40]:
for observation in Johnson_observations:
    Johnson_memory.add_memory(observation)

In [None]:
# View stored memories
print("Trott's stored memories:")
print(Trott_memory.memory_retriever.memory_stream)

print("\nJohnson's stored memories:")
print(Johnson_memory.memory_retriever.memory_stream)

print("\nFarage's stored memories:")
print(Farage_memory.memory_retriever.memory_stream)

print("\nSunak's stored memories:")
print(Sunak_memory.memory_retriever.memory_stream)

## Add Historical Memories

In [39]:
def process_historical_events (df, agent_memory):
    previous_date = None
    current_time = None

    for index, row in boris_dialogues.iterrows():
        date_str = row['date']
        date = datetime.strptime(date_str, "%Y-%m-%d")  # Updated date format
        if date != previous_date:
            current_time = datetime(date.year, date.month, date.day, 10, 0)  # 10:00 AM
            previous_date = date
        else:
            current_time += timedelta(minutes=3)  # Add 3 minutes for subsequent speeches

        memory_content = f"{row['speaker']} ({row['party']}) said: {row['text']}"
        agent_memory.add_historical_memory(memory_content, current_time)


In [40]:
process_historical_events(boris_dialogues_2015, Johnson_memory)

  return LLMChain(llm=self.llm, prompt=prompt, verbose=self.verbose)
  score = self.chain(prompt).run(memory_content=memory_content).strip()


BadRequestError: Error code: 400 - {'error': {'message': "This model's maximum context length is 16385 tokens. However, your messages resulted in 18745 tokens. Please reduce the length of the messages.", 'type': 'invalid_request_error', 'param': 'messages', 'code': 'context_length_exceeded'}}

# Create Simulation

In [30]:
# List of agents in the debate
agents = [Trott, Johnson, Farage, Sunak, Starmer]
# Define the initial debate topic
initial_observation = "Should the UK rejoin the European Union?"

## Framework 2
Each agent gets `X`-number of speaking slots allocated **randomly**

In [None]:
def run_HoC_debate_framework_2 (agents: List[GenerativeAgent],
                              initial_observation: str) -> None:
    """Runs a conversation between agents, each getting X-number of speaking slots allocated randomly."""
    
    # Initialize the count of speaking slots for each agent
    max_slot_each = 2
    speaking_slots = {agent.name: 0 for agent in agents}
    max_speaking_slots = max_slot_each * len(agents)
    turns = 0
    
    # Start the debate with an initial observation
    observation = initial_observation
    print(observation)
    
    # Continue the conversation until each agent has spoken twice
    while sum(speaking_slots.values()) < max_speaking_slots:
        
        # Randomly select an agent who hasn't spoken twice yet
        agent = random.choice([agent for agent in agents if speaking_slots[agent.name] < max_slot_each])
        
        # Each agent generates a response to the latest observation
        stay_in_dialogue, observation = agent.generate_dialogue_response(observation)
        print(observation)
        
        # Increment the speaking slot count for the agent
        speaking_slots[agent.name] += 1
        
        # Increment the turn count
        turns += 1

In [None]:
# Run the debate
run_HoC_debate_framework_2 (agents, initial_observation)

## Framework 3
1. Each agent add new-observation into memory. 
2. Each agent does a quick reflection on this new-observation, to whether to "respond or not respond" - depending on personal saliency (a custom function within the class `GenerativeAgent`). Output `decide_to_respond` as either True or False
3. Randomly select one agent from the list of agents that decide to respond to the observation.
4. Print this selected generate_dialogue_response as the new observation.

In [None]:
# Testing the `decide_to_respond()` function for each agent
random_observation = "Should the official UK national dish be changed??"
random_observation = initial_observation
# Who would respond to the observation? Trott, Johnson, Farage, Sunak, Starmer
print(Trott.decide_to_respond(random_observation))
print(Johnson.decide_to_respond(random_observation))
print(Farage.decide_to_respond(random_observation))
print(Sunak.decide_to_respond(random_observation))
print(Starmer.decide_to_respond(random_observation))

In [None]:
# Testing the `generate_dialogue_response()` function
Starmer.generate_dialogue_response(initial_observation)

In [None]:
def run_HoC_debate_framework_3 (agents: List[GenerativeAgent],             # get a list of agents
                     initial_observation: str) -> None:         # get the 1st observation
    """Runs a conversation between agents, until a maximum number of turns is reached."""
    
    max_turns = 10
    turns = 0
    
    # Start the debate with an initial observation
    observation = initial_observation
    print(observation)
    
    # Enters a loop where agents take turns generating responses
    while turns < max_turns:
        
        # Step 1: Each agent adds the new observation into memory
        for agent in agents:
            agent.memory.add_memory(observation)
            
        # Step 2: Randomly select one agent from the list of agents that decide to respond to the observation
        responding_agents = [agent for agent in agents if agent.decide_to_respond(observation)]
        if responding_agents:
            agent = random.choice(responding_agents)
            # The selected agent generates a response to the latest observation
            stay_in_dialogue, observation = agent.generate_dialogue_response(observation)
            print(observation)
            
        # Increment the turn count after each full round of responses
        turns += 1

In [None]:
run_HoC_debate_framework_3(agents, initial_observation)