In [41]:
import random
from rich import print
import sys
import json
sys.path.append("../../")
from sotopia.database import AgentProfile, EpisodeLog, EnvironmentProfile

In [4]:
all_character_pks = list(AgentProfile.all_pks())
for pk in all_character_pks:
    print(AgentProfile.get(pk))

In [35]:
all_task_pks = list(EnvironmentProfile.all_pks())
pk = random.choice(all_task_pks)

env = EnvironmentProfile.get(pk)
print(env)

In [43]:
print(extract_and_format_diplomacy_data(env.scenario))

In [39]:
def extract_and_format_diplomacy_data(scenario):
    # 提取 centers 和 units 数据
    centers_match = re.search(r"centers: (\{.*?\})", scenario)
    units_match = re.search(r"units: (\{.*?\})", scenario)
    
    if not centers_match or not units_match:
        return "Error: Could not find centers or units data in the scenario."

    # 解析 JSON 字符串为字典
    centers = json.loads(centers_match.group(1).replace("'", '"'))
    units = json.loads(units_match.group(1).replace("'", '"'))

    def format_country_data(data, separator):
        return '; '.join([f"{country}: {separator.join(items)}" for country, items in data.items()])

    # 格式化 units 数据
    units_formatted = format_country_data(units, ', ')
    
    # 格式化 centers 数据
    centers_formatted = format_country_data(centers, ', ')

    return f"units: {units_formatted}\ncenters: {centers_formatted}"

### Episode

In [46]:
all_pks = list(EpisodeLog.all_pks())
pk = random.choice(all_pks)

episode = EpisodeLog.get(pk)
print(episode)

In [11]:
# for message in episode.messages:
#     print(message)
episode.messages[1]

[('Environment',
  'James Anderson',
  'Turn #0: James Anderson said: "Hello Emily, it\'s a pleasure to be playing together. I\'m James Anderson, representing Germany. I\'m looking forward to building a strong alliance with England. Have you given any thought to our opening moves? Are there any armies you\'re planning to move this turn that could benefit both our countries?"\n'),
 ('Environment',
  'Emily Carter',
  'Turn #0: James Anderson said: "Hello Emily, it\'s a pleasure to be playing together. I\'m James Anderson, representing Germany. I\'m looking forward to building a strong alliance with England. Have you given any thought to our opening moves? Are there any armies you\'re planning to move this turn that could benefit both our countries?"\n'),
 ('James Anderson', 'Environment', 'did nothing'),
 ('Emily Carter',
  'Environment',
  'said: "Ah, James, it\'s great to finally negotiate with you. I\'ve been thinking about our opening moves, and I\'m interested in moving some of my 

In [50]:
# Assuming your data is stored in a variable called 'conversation_data'
formatted_conversation = process_conversation(episode.messages)
if formatted_conversation:
    print(formatted_conversation)
else:
    print("No conversation data found.")

In [48]:
replaced_text = replace_names_with_countries(formatted_conversation, profiles)
print(replaced_text)

In [62]:
print(process_conversation_to_intent(replaced_text))

In [61]:
import re

def process_conversation(data):
    formatted_messages = []
    
    for items in data:
        for item in items:
            if item[0] == 'Environment' and 'Turn #' in item[2]:
            # Extract turn number, speaker, and message from Environment tuple
                match = re.search(rf'Turn #(\d+): ({item[1]}) said: "(.*?)"', item[2])
                if match:
                    turn_number, speaker, message = match.groups()
                    formatted_message = f"Turn #{turn_number}\n{speaker} said: \"{message}\""
                    formatted_messages.append(formatted_message)
    return "\n".join(formatted_messages)

def process_conversation_to_intent(text):
    turns = re.split(r'Turn #\d+\n', text)[1:]
    
    cities = set()
    for turn in turns:
        match = re.match(r'(\w+) said:', turn.strip())
        if match:
            cities.add(match.group(1))
    
    formatted_messages = []
    message_count = 0
    
    for turn in turns:
        match = re.match(r'(\w+) said: "(.*?)"$', turn.strip(), re.DOTALL)
        if match:
            speaker, message = match.groups()
            recipient = next(city for city in cities if city != speaker)
            
            formatted_message = f"{message_count} {speaker.upper()} -> {recipient.upper()}: {message}"
            formatted_messages.append(formatted_message)
            message_count += 1
    
    return "\n".join(formatted_messages)

def get_country_from_name(name, profiles):
    for profile in profiles:
        if name in [profile.first_name, profile.last_name, f"{profile.first_name} {profile.last_name}"]:
            return profile.country
    return name

def replace_names_with_countries(text, profiles):
    def replace_name(match):
        full_name = match.group(0)
        return get_country_from_name(full_name, profiles)
    name_pattern = '|'.join([f"{p.first_name}|{p.last_name}|{p.first_name} {p.last_name}" for p in profiles])
    replaced_text = re.sub(name_pattern, replace_name, text)
    replaced_text = re.sub(r'(\w+)\s+\1', r'\1', replaced_text)
    return replaced_text


In [31]:
profiles = []
all_character_pks = list(AgentProfile.all_pks())
for pk in all_character_pks:
    profiles.append(AgentProfile.get(pk))