# Code agent names as numbers

In [31]:
import pandas as pd

# Load the matchmaking log CSV file into a pandas DataFrame
file_path = '/media/alexmatthews/Alex_011/ZOOL412/week_6/Experiment_Results/matchmaking_log.csv'  # Update with your file path
df = pd.read_csv(file_path)

# Convert the agent names into unique integer IDs
agent_names = pd.concat([df['Agent1'], df['Agent2']]).unique()
agent_name_to_id = {name: idx for idx, name in enumerate(agent_names)}

# Add the integer IDs to the DataFrame
df['agent_1_id'] = df['Agent1'].map(agent_name_to_id)
df['agent_2_id'] = df['Agent2'].map(agent_name_to_id)

# Initialize new columns for the actual frequency each agent beeped at in the current round
df['Agent1_Actual_Frequency'] = pd.Series(dtype=float)
df['Agent2_Actual_Frequency'] = pd.Series(dtype=float)

# Loop through the dataframe to assign actual frequencies
for i in range(len(df) - 1):
    # Check if the next row has the same pair number and round is incremented by 1
    if df.loc[i, 'Pair'] == df.loc[i + 1, 'Pair'] and df.loc[i, 'Round'] + 1 == df.loc[i + 1, 'Round']:
        df.loc[i, 'Agent1_Actual_Frequency'] = df.loc[i + 1, 'Agent1_Last_Frequency']
        df.loc[i, 'Agent2_Actual_Frequency'] = df.loc[i + 1, 'Agent2_Last_Frequency']

# Display the updated DataFrame to check the results
df


Unnamed: 0,Round,Pair,Agent1,Agent2,Agent1_Last_Frequency,Agent2_Last_Frequency,agent_1_id,agent_2_id,Agent1_Actual_Frequency,Agent2_Actual_Frequency
0,1,1,Abigail,Fleur,,,0,18,,
1,1,2,Kai,Luz,,,1,16,,
2,1,3,Jackson,Olivia,,,2,11,,
3,1,4,Yannis,Anke,,,3,17,,
4,1,5,Lotte,Aiden,,,4,12,,
5,1,6,Ethan,Harper,,,5,14,,
6,1,7,Rangi,Mason,,,6,19,,
7,1,8,Klaus,Miguel,,,7,10,,
8,1,9,Mae,James,,,8,13,,
9,1,10,Sophia,Fritz,,,9,15,,


In [19]:
import numpy as np

# Define a function to categorize frequencies
def categorize_frequency(freq):
    if np.isnan(freq):
        return np.nan
    elif freq < 7:
        return 'low'
    elif freq > 20:
        return 'high'
    else:
        return np.nan

# Apply the function to create the new columns
df['agent_1_last_freq_category'] = df['Agent1_Last_Frequency'].apply(categorize_frequency)
df['agent_2_last_freq_category'] = df['Agent2_Last_Frequency'].apply(categorize_frequency)

# Display the DataFrame to check the results
df.head()


Unnamed: 0,Round,Pair,Agent1,Agent2,Agent1_Last_Frequency,Agent2_Last_Frequency,agent_1_id,agent_2_id,agent_1_last_freq_category,agent_2_last_freq_category
0,1,1,Abigail,Fleur,,,0,18,,
1,1,2,Kai,Luz,,,1,16,,
2,1,3,Jackson,Olivia,,,2,11,,
3,1,4,Yannis,Anke,,,3,17,,
4,1,5,Lotte,Aiden,,,4,12,,


In [20]:

# Step 1: Extract the relevant data into a new DataFrame
df_agent1 = df[['Round', 'agent_1_id', 'agent_1_last_freq_category']].rename(columns={
    'agent_1_id': 'animal_id',
    'agent_1_last_freq_category': 'category'
})

df_agent2 = df[['Round', 'agent_2_id', 'agent_2_last_freq_category']].rename(columns={
    'agent_2_id': 'animal_id',
    'agent_2_last_freq_category': 'category'
})

df_combined = pd.concat([df_agent1, df_agent2])

# Remove Round 1 data as it doesn't have previous frequency
df_combined = df_combined[df_combined['Round'] > 1]

# Step 2: Shift the 'category' column to get the previous round's category
df_combined['prev_category'] = df_combined.groupby('animal_id')['category'].shift(1)

# Remove rows where 'prev_category' is NaN
df_combined = df_combined.dropna(subset=['prev_category'])

# Step 3: Calculate the proportion of animals going from low to high and high to low
low_to_high = df_combined[(df_combined['prev_category'] == 'low') & (df_combined['category'] == 'high')].shape[0]
low_to_high_total = df_combined[df_combined['prev_category'] == 'low'].shape[0]

high_to_low = df_combined[(df_combined['prev_category'] == 'high') & (df_combined['category'] == 'low')].shape[0]
high_to_low_total = df_combined[df_combined['prev_category'] == 'high'].shape[0]

# Proportions (handle cases where total count is zero)
low_to_high_proportion = low_to_high / low_to_high_total if low_to_high_total > 0 else np.nan
high_to_low_proportion = high_to_low / high_to_low_total if high_to_low_total > 0 else np.nan

# Step 4: Calculate confidence intervals
if low_to_high_total > 0:
    low_to_high_ci_lower, low_to_high_ci_upper = stats.binom.interval(0.95, low_to_high_total, low_to_high_proportion)
    low_to_high_ci = (low_to_high_ci_lower / low_to_high_total, low_to_high_ci_upper / low_to_high_total)
else:
    low_to_high_ci = (np.nan, np.nan)

if high_to_low_total > 0:
    high_to_low_ci_lower, high_to_low_ci_upper = stats.binom.interval(0.95, high_to_low_total, high_to_low_proportion)
    high_to_low_ci = (high_to_low_ci_lower / high_to_low_total, high_to_low_ci_upper / high_to_low_total)
else:
    high_to_low_ci = (np.nan, np.nan)

# Display the results
print(f"Probability of Low to High: {low_to_high_proportion:.2f} with 95% CI: {low_to_high_ci}")
print(f"Probability of High to Low: {high_to_low_proportion:.2f} with 95% CI: {high_to_low_ci}")

Probability of Low to High: 0.55 with 95% CI: (0.2727272727272727, 0.8181818181818182)
Probability of High to Low: 0.25 with 95% CI: (0.09375, 0.40625)


# frequency laG

In [26]:
# Add a column for each agent's frequency in the previous round (lag of 1)
# Group by agent id and round, and then shift the frequency to create the lag

df['agent_1_freq_lag_1'] = df.groupby('agent_1_id')['Agent1_Last_Frequency'].shift(1)
df['agent_2_freq_lag_1'] = df.groupby('agent_2_id')['Agent2_Last_Frequency'].shift(1)

# Display the updated DataFrame to check the results
df


Unnamed: 0,Round,Pair,Agent1,Agent2,Agent1_Last_Frequency,Agent2_Last_Frequency,agent_1_id,agent_2_id,agent_1_freq_lag_1,agent_2_freq_lag_1
0,1,1,Abigail,Fleur,,,0,18,,
1,1,2,Kai,Luz,,,1,16,,
2,1,3,Jackson,Olivia,,,2,11,,
3,1,4,Yannis,Anke,,,3,17,,
4,1,5,Lotte,Aiden,,,4,12,,
5,1,6,Ethan,Harper,,,5,14,,
6,1,7,Rangi,Mason,,,6,19,,
7,1,8,Klaus,Miguel,,,7,10,,
8,1,9,Mae,James,,,8,13,,
9,1,10,Sophia,Fritz,,,9,15,,
