## Agents.

Now we have agents in this format:

```json
{
        "id": 0,
        "name": "Simran Walia",
        "name_group": "north",
        "occupation": "Forestry Technician",
        "occupation_group": "Agriculture",
        "age": 26,
        "age_group": "25\u201329",
        "income": 193452,
        "income_group": "Lower-Middle Class",
        "hobbies": "trading cards and toy collecting",
        "hobbies_group": "collecting"
}
```

When performing schelling simulation, we would need to perform `calculate_similarity` between two distinct `id`s. <br/>
The result would be invariant between rounds. Hence we can pre-compute the similarity and store them to avoid repeated computation (_and save on bills_)

In [31]:
api_key = None

with open(".env", "r") as f:
    for line in f:
        if line.startswith("OPENAI_API_KEY"):
            api_key = line.split("=")[1].strip()

from openai import OpenAI
client = OpenAI(api_key=api_key)

In [2]:
class Person:
    def __init__(self, **kwargs):
        ## assign all args to self.
        for key, value in kwargs.items():
            setattr(self, key, value)
    def __str__(self):
        return f"""
            {self.name} is a {self.occupation} who is {self.age} years old.
            They have an annual income of {self.income}
            They are quite interested in {self.hobbies}
        """

In [3]:
class USPerson(Person):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        self.income = str(self.income) + " USD"

In [4]:
class IndiaPerson(Person):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        self.income = str(self.income) + " INR"

In [5]:
import json
with open("raw_agents/india_exp_agents_sampled.json", "r") as f:
    india_agents_sampled = json.load(f)

In [6]:
india_agents_sampled = [IndiaPerson(**agent) for agent in india_agents_sampled]

In [7]:
india_agents_sampled[0].income

'361034 INR'

In [8]:
def check_compatability_prepare_messages(person1, person2):
    messages=[
        { "role" : "system", "content": "You are a specialist in real estate and public policy, You are tasked with assessing the compatibility between two potential neighours."},
        { "role": "user", "content": f"""
            Assess the compatibility between
                Person A: {str(person1)} 
                
                and
            
            Person B: {str(person2)}, Respond in this format: 
            ```json
            {{
                "CompatibilityExplanation": "string", // Explain the reasoning behind your answer.
                "CompatibilityPercentage": "number" // The percentage of compatibility between the two individuals.
            }}
            ```         
        """}
    ]

    return messages

In [9]:
import tiktoken

total_tokens_gpt35_turbo = 0
total_tokens_gpt4o_mini = 0

encoder_gpt35_turbo = tiktoken.encoding_for_model("gpt-3.5-turbo")
encoder_gpt4o_mini = tiktoken.encoding_for_model("gpt-4o-mini")

for ag_i in india_agents_sampled:
    for ag_j in india_agents_sampled:
        id_i, id_j = ag_i.id, ag_j.id
        if id_i == id_j:
            continue
        messages = check_compatability_prepare_messages(ag_i, ag_j)

        message_token_cost = 3 ## we dont know for 4omini.
        system_token_cost = 2

        total_tokens_gpt35_turbo += system_token_cost
        total_tokens_gpt4o_mini += system_token_cost

        for message in messages:
            total_tokens_gpt35_turbo += message_token_cost
            total_tokens_gpt35_turbo += len(encoder_gpt35_turbo.encode(message["role"]))
            total_tokens_gpt35_turbo += len(encoder_gpt35_turbo.encode(message["content"]))
            total_tokens_gpt4o_mini += message_token_cost
            total_tokens_gpt4o_mini += len(encoder_gpt4o_mini.encode(message["role"]))
            total_tokens_gpt4o_mini += len(encoder_gpt4o_mini.encode(message["content"]))

total_tokens_gpt35_turbo, total_tokens_gpt4o_mini

(30989532, 30505944)

In [10]:
sample_output_4omini = encoder_gpt4o_mini.encode('```json\n{\n    "CompatibilityExplanation": "Person A and Person B have some commonalities in terms of their occupations being in the agriculture sector. However, their age difference is quite significant, with Person A being 26 and Person B being 61. This age gap may lead to differences in lifestyle preferences, priorities, and interests. Person A enjoys hobbies related to collecting, while Person B\'s hobbies are more intellectually focused. Additionally, there is a notable income disparity between the two individuals, with Person B belonging to the high-income group and Person A being in the lower-middle class. These differences in age, interests, and income may impact the compatibility between Person A and Person B.",\n    "CompatibilityPercentage": "50"\n}\n```')
len(sample_output_4omini)

149

In [11]:
# print(india_agents_sampled[0])

In [12]:
# 150 x 400 x 400 tokens in output
# 4omini
# 46 mn -- input : 30 x 0.15 = 4$
# 25 mn -- output: 15$

# -- alone costs ~19-20$

In [13]:
args = []

for agent_i in india_agents_sampled:
    for agent_j in india_agents_sampled:
        id_i, id_j = agent_i.id, agent_j.id
        if id_i == id_j:
            continue
        messages = check_compatability_prepare_messages(agent_i, agent_j)
        args.append((id_i, id_j, messages))

In [14]:
def check_compatability_parallel(id_1, id_2, messages, model):
    print(f"Checking compatibility between {id_1} and {id_2}")
    completion = client.chat.completions.create(
        temperature=0,
        top_p=0.95,
        model = model,
        messages=messages
    )

    content = completion.choices[0].message.content
    print(f"Compatibility between {id_1} and {id_2} is {content}")

    return id_1, id_2, content

In [15]:
# args[2]

### Side Note:

- The temperature has been set to 0, the top-P to 0.95.
- Ofc, this means the top choice of the sample is always chosen.
- This should also mean that the response is highly deterministic, it would be very unlikely that a token that belongs to `0.05%` of the probability at any step eventually leads to the most likely sequence. 

In [16]:
completion_results = {}

with open("logs_run0.txt", "r") as f:
    full_data = f.read()

In [17]:
full_data[0:10000]

'score_262_1653\n```json\n{\n    "CompatibilityExplanation": "Neha and Sunil have different professional backgrounds and interests, which may lead to a diverse neighborhood dynamic. Neha\'s interest in hosting events and group gaming suggests she enjoys social interactions and community engagement, while Sunil\'s interests in martial arts and dancing indicate a focus on physical activity and personal discipline. Their age difference is minimal, and both are in similar life stages, which can foster mutual understanding. However, their differing interests may lead to limited common ground for social activities. Overall, they could complement each other well if they are open to exploring each other\'s hobbies.",\n    "CompatibilityPercentage": 65\n}\n```score_262_2323\n```json\n{\n    "CompatibilityExplanation": "Neha and Madhavan have different interests and lifestyles that may affect their compatibility as neighbors. Neha enjoys hosting events and group gaming, which suggests she may ha

In [18]:
## score_{num0}_{num1}\n is the key, the value is whatever is in-between.
import re

pattern = re.compile(r"score_\d+_\d+\n")
matches = pattern.finditer(full_data)

for match in matches:
    start, end = match.span()
    key = full_data[start:end].strip()
    value = full_data[end:].split("score")[0]
    completion_results[key] = value

In [19]:
more_logs = ["logs_run1.txt", "logs_run2.txt", "logs_run3.txt"]

for log_file in more_logs:
    with open(log_file, "r") as f:
        full_data = f.read()

    pattern = re.compile(r"score_\d+_\d+\n")
    matches = pattern.finditer(full_data)

    for match in matches:
        start, end = match.span()
        key = full_data[start:end].strip()
        value = full_data[end:].split("score")[0]
        completion_results[key] = value

In [20]:
list(completion_results.keys())[-1], len(completion_results)

('score_491_1036', 10064)

In [21]:
completion_results[list(completion_results.keys())[-1]]

'```json\n{\n    "CompatibilityExplanation": "Lalit Ahuja and Anamika Mishra have different professional backgrounds and interests, which may lead to a diverse and enriching neighborhood dynamic. Lalit\'s focus on aquaculture and interests in writing and sewing contrast with Anamika\'s beekeeping and interests in debating and networking. Their age difference of 13 years may also influence their perspectives and lifestyle choices. However, both individuals have a stable income, which suggests financial stability. The compatibility is moderate due to their differing interests but could be enhanced by mutual respect and curiosity about each other\'s professions.",\n    "CompatibilityPercentage": 60\n}\n```'

In [22]:
with open("logs_run_bb.txt", "r") as f:
    more_data = f.read()

In [23]:
more_data[0:10000]

'Checking compatibility between 491 and 885\nChecking compatibility between 491 and 2975\nChecking compatibility between 491 and 1741\nChecking compatibility between 491 and 1853\nCompatibility between 491 and 885 is ```json\n{\n    "CompatibilityExplanation": "Lalit Ahuja and Dipanita Daimari have different professional backgrounds and interests, which may lead to a diverse but potentially harmonious neighborhood dynamic. Lalit\'s focus on aquaculture and creative hobbies like writing and sewing contrasts with Dipanita\'s technical interests in coding and philosophical pursuits. Their age difference of 14 years may also influence their lifestyle preferences and social interactions. However, both individuals have a stable income, which suggests financial stability. The compatibility is moderate due to their differing interests but could foster a unique exchange of ideas and perspectives.",\n    "CompatibilityPercentage": 60\n}\n```\nChecking compatibility between 491 and 2920\nCompatib

In [24]:
## Capture regex as "Compatability between {num1} and {num2} is ```json{content}```"

log_pattern = re.compile(
    r"Compatibility between (\d+) and (\d+) is ```json\s*{\s*"
    r'"CompatibilityExplanation":\s*"([^"]*)",\s*'
    r'"CompatibilityPercentage":\s*(\d+)\s*}\s*```',
    re.DOTALL
)

matches = log_pattern.finditer(more_data)
matches_list = list(matches)

len(matches_list)

# for match in matches:
    # print(match.groups())

20805

In [25]:
for match in matches_list:
    id_1 = match.group(1)
    id_2 = match.group(2)
    content = {
        "CompatibilityExplanation": match.group(3),
        "CompatibilityPercentage": match.group(4)
    }

    completion_results[f"score_{id_1}_{id_2}"] = content

In [26]:
completion_results[list(completion_results.keys())[-1]]

{'CompatibilityExplanation': 'Nivedita Puri and Tushar Noronha have significant differences in their professional backgrounds, income levels, and interests. Nivedita, as a civil engineer, has a higher income and is engaged in intellectual pursuits like chess and language learning, which may indicate a preference for structured and analytical activities. In contrast, Tushar, as a machine operator, has a much lower income and is interested in sewing and crafting, which are more hands-on and creative activities. These differences could lead to potential misunderstandings or conflicts in lifestyle and social interactions. However, both individuals are engaged in their respective fields and have hobbies that could provide opportunities for mutual respect and learning. Overall, while there are some commonalities in their work ethic, the stark differences in income and interests suggest a moderate level of compatibility.',
 'CompatibilityPercentage': '45'}

In [27]:
from tenacity import retry, stop_after_attempt, wait_random_exponential

In [29]:
@retry(stop=stop_after_attempt(6), wait=wait_random_exponential(multiplier=1, max=60))
def check_compatability_with_backoff(id_1, id_2, messages, model):
    return check_compatability_parallel(id_1, id_2, messages, model)

In [None]:
import concurrent.futures

with concurrent.futures.ThreadPoolExecutor(max_workers=16) as executor:
    results = {executor.submit(check_compatability_with_backoff, arg[0], arg[1], arg[2], "gpt-4o-mini"): arg for arg in args if f"score_{arg[0]}_{arg[1]}" not in completion_results}
    for future in concurrent.futures.as_completed(results):
        idx, idy, content = future.result()
        # with open("logs.txt", "a") as f:
        #     f.write(f"score_{idx}_{idy}\n")
        #     f.write(content)
        # print(f"score_{idx}_{idy}")
        # print(content)
        completion_results[f"score_{idx}_{idy}"] = content

with open("raw_agents/india_exp_mat_scores_4o_mini.json", "w") as f:
    json.dump(completion_results, f, indent=4)

In [32]:
with open("raw_agents/us_exp_agents_sampled.json", "r") as f:
    us_agents = json.load(f)

In [33]:
us_agents = [USPerson(**agent) for agent in us_agents]

In [34]:
us_args = []

for agent_i in us_agents:
    for agent_j in us_agents:
        id_i, id_j = agent_i.id, agent_j.id
        if id_i == id_j:
            continue
        messages = check_compatability_prepare_messages(agent_i, agent_j)
        us_args.append((id_i, id_j, messages))

In [35]:
len(us_args)

159600

In [36]:
us_completion_results = {}

In [None]:
import concurrent.futures

with concurrent.futures.ThreadPoolExecutor(max_workers=16) as executor:
    results = {executor.submit(check_compatability_with_backoff, arg[0], arg[1], arg[2], "gpt-4o-mini"): arg for arg in us_args if f"score_{arg[0]}_{arg[1]}" not in us_completion_results}
    for future in concurrent.futures.as_completed(results):
        idx, idy, content = future.result()
        us_completion_results[f"score_{idx}_{idy}"] = content

with open("raw_agents/us_exp_mat_scores_4o_mini.json", "w") as f:
    json.dump(us_completion_results, f, indent=4)