In [None]:
import numpy as np
import matplotlib.pyplot as plt

In [None]:
def q_learning_2x2_game_with_pushforward(payoffs, T, alpha, beta0, k, messages, message_probabilities):
    message_probabilities = np.array(message_probabilities) / np.sum(message_probabilities)
    if sum(message_probabilities) != 1:
        raise ValueError("Message probabilities must sum to 1.")
    num_players = 2
    num_actions = 2
    msg_1 = [sublist[0] for sublist in messages]
    msg_2 = [sublist[1] for sublist in messages]
    msgs = [list(set(sublist)) for sublist in [msg_1, msg_2]]
    msg_1, msg_2 = msgs
    #msg_1 is the messages that player 0 can see
    
    def choose_message(messages, probabilities):
        index = np.random.choice(len(messages), p=probabilities)
        return messages[index]
    
    choose_message(messages, message_probabilities)
    
    num_messages = len(messages)
    
    Q = [
        {message: np.zeros(num_actions) for message in msgs[_]}
        for _ in range(num_players)
    ]
    
    strategies = [{message: [] for message in msgs[i]} for i in range(num_players)]
    action_counts = [
        {message: list(np.zeros(num_actions)) for message in msgs[i]}
        for i in range(num_players)
    ]
    
    action_history = [[] for _ in range(num_players)]
    
    # Compute the average correlated strategy (a distribution over joint actions)
    action_profiles = [(0, 0), (0, 1), (1, 0), (1, 1)]  # All possible joint actions (a1, a2)
    #correlated_strategy = {profile: 0 for profile in action_profiles}
    
    #2
    for t in range(1, T + 1):
        # Sample a message
        
        beta = beta0 + k*t
        message = choose_message(messages, probabilities=message_probabilities)
        
        mixed_strategies = []
        
        # Compute mixed strategies for the selected message
        for i in range(num_players):
            mixed_strategy = np.exp(beta * Q[i][message[i]] - np.max(beta * Q[i][message[i]]))
            mixed_strategy /= np.sum(mixed_strategy)
            mixed_strategies.append(mixed_strategy)
            
        for i in range(num_players):
            strategies[i][message[i]].append(mixed_strategies[i]) 
        
        
        actions = [np.random.choice(num_actions, p=mixed_strategies[i]) for i in range(num_players)] 
        
        # Record actions
        for i in range(num_players): 
            action_history[i].append(actions[i]) 
        
        # Update action counts
        for i in range(num_players):
            action_counts[i][message[i]][actions[i]] += 1
        
        # Compute payoffs for the current actions
        rewards = [payoffs[i][actions[i]][actions[1-i]] for i in range(num_players)]
        rewards_counterf = [payoffs[i][1-actions[i]][actions[1-i]] for i in range(num_players)]

        # Update Q-values for the selected message
        for i in range(num_players):
            Q[i][message[i]][actions[i]] = (1-alpha) * Q[i][message[i]][actions[i]] + rewards[i]
 
        
    #3 
    
    avg_mixed_strategies = []
    
    left_mean_0_m1 = sum(arr[0] for arr in strategies[0]['m1'])/ len(strategies[0]['m1']) if len(strategies[0]['m1']) > 0 else 0
    left_mean_0_m2 = sum(arr[0] for arr in strategies[0]['m2']) / len(strategies[0]['m2']) if len(strategies[0]['m2']) > 0 else 0
    
    right_mean_0_m1 = sum(arr[1] for arr in strategies[0]['m1']) / len(strategies[0]['m1']) if len(strategies[0]['m1']) > 0 else 0
    right_mean_0_m2 = sum(arr[1] for arr in strategies[0]['m2']) / len(strategies[0]['m2']) if len(strategies[0]['m2']) > 0 else 0
         
    left_mean_1_m1 = sum(arr[0] for arr in strategies[1]['m1']) / len(strategies[1]['m1']) if len(strategies[1]['m1']) > 0 else 0
    left_mean_1_m2 = sum(arr[0] for arr in strategies[1]['m2']) / len(strategies[1]['m2']) if len(strategies[1]['m2']) > 0 else 0
    
    right_mean_1_m1 = sum(arr[1] for arr in strategies[1]['m1']) / len(strategies[1]['m1']) if len(strategies[1]['m1']) > 0 else 0
    right_mean_1_m2 = sum(arr[1] for arr in strategies[1]['m2']) / len(strategies[1]['m2']) if len(strategies[1]['m2']) > 0 else 0
    
    
    avg_mixed_strategies = [{'m1': [left_mean_0_m1, right_mean_0_m1], 'm2': [left_mean_0_m2, right_mean_0_m2]}, {'m1': [left_mean_1_m1, right_mean_1_m1], 'm2': [left_mean_1_m2, right_mean_1_m2]}]
    
    print(avg_mixed_strategies)
    
    
    expected_payoff_player0 = 0.0
    expected_payoff_player1 = 0.0
    
    
    for i, (m0, m1) in enumerate(messages):
        prob_m = message_probabilities[i]
        if prob_m == 0:
            continue  # No contribution if probability is zero
        
        # Retrieve each player's mixed strategy for these messages
        p0 = avg_mixed_strategies[0][m0]  # e.g. [p(0), p(1)] for player 0
        p1 = avg_mixed_strategies[1][m1]  # e.g. [p(0), p(1)] for player 1
    
        # Compute conditional expected payoffs
        # sum_{a0 in {0,1}} sum_{a1 in {0,1}} payoffs[i][a0][a1] * p0[a0] * p1[a1]
        E0 = 0.0
        E1 = 0.0
        for a0 in [0, 1]:
            for a1 in [0, 1]:
                joint_prob = p0[a0] * p1[a1]
                E0 += payoffs[0][a0][a1] * joint_prob
                E1 += payoffs[1][a0][a1] * joint_prob
        
        # Weight by the probability of this message profile
        expected_payoff_player0 += prob_m * E0
        expected_payoff_player1 += prob_m * E1
        
    avg_social_welfare = expected_payoff_player0 + expected_payoff_player1

    
    last_iterate_strategies = [{m: strategies[i][m][-1] if len(strategies[i][m]) > 0 else [0.5, 0.5] for m in msgs[i]}  for i in range(num_players)]
    

    
    expected_payoff_player0 = 0.0
    expected_payoff_player1 = 0.0
    
    
    for i, (m0, m1) in enumerate(messages):
        prob_m = message_probabilities[i]
        if prob_m == 0:
            continue  # No contribution if probability is zero
        
        # Retrieve each player's mixed strategy for these messages
        p0 = last_iterate_strategies[0][m0]  # e.g. [p(0), p(1)] for player 0
        p1 = last_iterate_strategies[1][m1]  # e.g. [p(0), p(1)] for player 1
    
        # Compute conditional expected payoffs
        # sum_{a0 in {0,1}} sum_{a1 in {0,1}} payoffs[i][a0][a1] * p0[a0] * p1[a1]
        E0 = 0.0
        E1 = 0.0
        for a0 in [0, 1]:
            for a1 in [0, 1]:
                joint_prob = p0[a0] * p1[a1]
                E0 += payoffs[0][a0][a1] * joint_prob
                E1 += payoffs[1][a0][a1] * joint_prob
        
        # Weight by the probability of this message profile
        expected_payoff_player0 += prob_m * E0
        expected_payoff_player1 += prob_m * E1
        
    last_social_welfare = expected_payoff_player0 + expected_payoff_player1
    
    return {
        "last_iterate": last_iterate_strategies,
        "avg_mixed_strategy": avg_mixed_strategies,
        "avg_social_welfare": avg_social_welfare,
        "last_social_welfare": last_social_welfare,
        #"pushforward_average_strategy": total_average_payoff,
        #"pushforward_last_strategy": total_average_payoff_last,
        "action_history": action_history,
        "strategies": strategies
        
    }

In [None]:
# Prototype 1


# Correlated Eq, same Beta

T, alpha, beta0, k= 5000, 0.5, 0.7, 0
messages = [['m1', 'm1'], ['m1', 'm2'], ['m2', 'm1'], ['m2', 'm2']]

ms = ['m1', 'm2']
message_probabilities = [0.5, 0.25, 0.25, 0]

num_players = 2
num_actions = 2
#results = q_learning_2x2_game_with_pushforward(payoffs, T, alpha, beta, messages, message_probabilities)

mixed_strategy_history = [{'m1': [], 'm2': []},{'m1': [], 'm2': []}]

for j in range(10):
    results = q_learning_2x2_game_with_pushforward(payoffs, T, alpha, beta0, k, messages, message_probabilities)
    for i in range(num_players):
        for m in ms:
            mixed_strategy_history[i][m].append(results['strategies'][i][m])
            

plt.style.use('seaborn-v0_8-darkgrid')
fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(12, 8))
colors = plt.cm.viridis(np.linspace(0, 1, 10))

for i in range(num_players):
    for j, m in enumerate(ms):
        ax = axes[i, j]

        # Plot each of the 10 simulation time-series:
        for sim_idx, sim_data in enumerate(mixed_strategy_history[i][m]):
            # sim_data is assumed shape (T, 2). 
            # We'll plot the probability of playing action 1 vs. time.
            sim_data = np.array(sim_data)  # Now sim_data is T×2
            prob_action_1 = sim_data[:, 1]
            ax.plot(prob_action_1,
                    color=colors[sim_idx],
                    alpha=0.4,
                    linewidth=2,
                    label=f"Simulation {sim_idx+1}" if sim_idx == 0 else None)

        # Make it look nice
        ax.set_title(f"Player {i}, Message '{m}'", fontsize=14, fontweight='bold')
        ax.set_xlabel("Time", fontsize=12)
        ax.set_ylabel("Probability of Action 1", fontsize=12)
        ax.set_ylim(0, 1)  # Probabilities should be between 0 and 1
        ax.spines["top"].set_visible(False)
        ax.spines["right"].set_visible(False)

# If you want a single legend outside, you can do this:
# fig.legend(*axes[0,0].get_legend_handles_labels(), loc="upper center",
#            bbox_to_anchor=(0.5, 1.05), ncol=5, fontsize=10)

plt.tight_layout()
fig.savefig("qlearning_results1.pdf", format="pdf", bbox_inches="tight")
plt.show()

In [None]:
# Prototype 2


# Correlated Eq, increasing beta

T, alpha, beta0, k= 5000, 0.5, 0.1, 0.01
messages = [['m1', 'm1'], ['m1', 'm2'], ['m2', 'm1'], ['m2', 'm2']]

ms = ['m1', 'm2']
message_probabilities = [0.5, 0.25, 0.25, 0]

num_players = 2
num_actions = 2
#results = q_learning_2x2_game_with_pushforward(payoffs, T, alpha, beta, messages, message_probabilities)

mixed_strategy_history = [{'m1': [], 'm2': []},{'m1': [], 'm2': []}]

for j in range(10):
    results = q_learning_2x2_game_with_pushforward(payoffs, T, alpha, beta0, k, messages, message_probabilities)
    for i in range(num_players):
        for m in ms:
            mixed_strategy_history[i][m].append(results['strategies'][i][m])
            

plt.style.use('seaborn-v0_8-darkgrid')
fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(12, 8))
colors = plt.cm.viridis(np.linspace(0, 1, 10))

for i in range(num_players):
    for j, m in enumerate(ms):
        ax = axes[i, j]

        # Plot each of the 10 simulation time-series:
        for sim_idx, sim_data in enumerate(mixed_strategy_history[i][m]):
            # sim_data is assumed shape (T, 2). 
            # We'll plot the probability of playing action 1 vs. time.
            sim_data = np.array(sim_data)  # Now sim_data is T×2
            prob_action_1 = sim_data[:, 1]
            ax.plot(prob_action_1,
                    color=colors[sim_idx],
                    alpha=0.4,
                    linewidth=2,
                    label=f"Simulation {sim_idx+1}" if sim_idx == 0 else None)

        # Make it look nice
        ax.set_title(f"Player {i}, Message '{m}'", fontsize=14, fontweight='bold')
        ax.set_xlabel("Time", fontsize=12)
        ax.set_ylabel("Probability of Action 1", fontsize=12)
        ax.set_ylim(0, 1)  # Probabilities should be between 0 and 1
        ax.spines["top"].set_visible(False)
        ax.spines["right"].set_visible(False)

# If you want a single legend outside, you can do this:
# fig.legend(*axes[0,0].get_legend_handles_labels(), loc="upper center",
#            bbox_to_anchor=(0.5, 1.05), ncol=5, fontsize=10)

plt.tight_layout()
fig.savefig("qlearning_results2.pdf", format="pdf", bbox_inches="tight")
plt.show()



In [None]:
# Prototype 3


# Correlated Eq, same Beta, different message probabilities

T, alpha, beta0, k= 5000, 0.5, 0.7, 0
messages = [['m1', 'm1'], ['m1', 'm2'], ['m2', 'm1'], ['m2', 'm2']]

ms = ['m1', 'm2']
message_probabilities = [1/3, 1/3, 1/3, 0]

num_players = 2
num_actions = 2
#results = q_learning_2x2_game_with_pushforward(payoffs, T, alpha, beta, messages, message_probabilities)

mixed_strategy_history = [{'m1': [], 'm2': []},{'m1': [], 'm2': []}]

for j in range(10):
    results = q_learning_2x2_game_with_pushforward(payoffs, T, alpha, beta0, k, messages, message_probabilities)
    for i in range(num_players):
        for m in ms:
            mixed_strategy_history[i][m].append(results['strategies'][i][m])
            

plt.style.use('seaborn-v0_8-darkgrid')
fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(12, 8))
colors = plt.cm.viridis(np.linspace(0, 1, 10))

for i in range(num_players):
    for j, m in enumerate(ms):
        ax = axes[i, j]

        # Plot each of the 10 simulation time-series:
        for sim_idx, sim_data in enumerate(mixed_strategy_history[i][m]):
            # sim_data is assumed shape (T, 2). 
            # We'll plot the probability of playing action 1 vs. time.
            sim_data = np.array(sim_data)  # Now sim_data is T×2
            prob_action_1 = sim_data[:, 1]
            ax.plot(prob_action_1,
                    color=colors[sim_idx],
                    alpha=0.4,
                    linewidth=2,
                    label=f"Simulation {sim_idx+1}" if sim_idx == 0 else None)

        # Make it look nice
        ax.set_title(f"Player {i}, Message '{m}'", fontsize=14, fontweight='bold')
        ax.set_xlabel("Time", fontsize=12)
        ax.set_ylabel("Probability of Action 1", fontsize=12)
        ax.set_ylim(0, 1)  # Probabilities should be between 0 and 1
        ax.spines["top"].set_visible(False)
        ax.spines["right"].set_visible(False)

# If you want a single legend outside, you can do this:
# fig.legend(*axes[0,0].get_legend_handles_labels(), loc="upper center",
#            bbox_to_anchor=(0.5, 1.05), ncol=5, fontsize=10)

plt.tight_layout()
fig.savefig("qlearning_results3.pdf", format="pdf", bbox_inches="tight")
plt.show()


In [None]:
# Prototype 4

# Correlated Eq, increasing Beta, different message probabilities

T, alpha, beta0, k= 5000, 0.5, 0.1, 0.001
messages = [['m1', 'm1'], ['m1', 'm2'], ['m2', 'm1'], ['m2', 'm2']]

ms = ['m1', 'm2']
message_probabilities = [1/3, 1/3, 1/3, 0]

num_players = 2
num_actions = 2
#results = q_learning_2x2_game_with_pushforward(payoffs, T, alpha, beta, messages, message_probabilities)

mixed_strategy_history = [{'m1': [], 'm2': []},{'m1': [], 'm2': []}]

for j in range(10):
    results = q_learning_2x2_game_with_pushforward(payoffs, T, alpha, beta0, k, messages, message_probabilities)
    for i in range(num_players):
        for m in ms:
            mixed_strategy_history[i][m].append(results['strategies'][i][m])
            

plt.style.use('seaborn-v0_8-darkgrid')
fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(12, 8))
colors = plt.cm.viridis(np.linspace(0, 1, 10))

for i in range(num_players):
    for j, m in enumerate(ms):
        ax = axes[i, j]

        # Plot each of the 10 simulation time-series:
        for sim_idx, sim_data in enumerate(mixed_strategy_history[i][m]):
            # sim_data is assumed shape (T, 2). 
            # We'll plot the probability of playing action 1 vs. time.
            sim_data = np.array(sim_data)  # Now sim_data is T×2
            prob_action_1 = sim_data[:, 1]
            ax.plot(prob_action_1,
                    color=colors[sim_idx],
                    alpha=0.4,
                    linewidth=2,
                    label=f"Simulation {sim_idx+1}" if sim_idx == 0 else None)

        # Make it look nice
        ax.set_title(f"Player {i}, Message '{m}'", fontsize=14, fontweight='bold')
        ax.set_xlabel("Time", fontsize=12)
        ax.set_ylabel("Probability of Action 1", fontsize=12)
        ax.set_ylim(0, 1)  # Probabilities should be between 0 and 1
        ax.spines["top"].set_visible(False)
        ax.spines["right"].set_visible(False)

# If you want a single legend outside, you can do this:
# fig.legend(*axes[0,0].get_legend_handles_labels(), loc="upper center",
#            bbox_to_anchor=(0.5, 1.05), ncol=5, fontsize=10)

plt.tight_layout()
fig.savefig("qlearning_results4.pdf", format="pdf", bbox_inches="tight")
plt.show()



In [None]:
# Prototype 5

# NOT Correlated Eq, same Beta

T, alpha, beta0, k= 5000, 0.5, 0.7, 0
messages = [['m1', 'm1'], ['m1', 'm2'], ['m2', 'm1'], ['m2', 'm2']]

ms = ['m1', 'm2']
message_probabilities = [1, 0, 0, 0]

num_players = 2
num_actions = 2
#results = q_learning_2x2_game_with_pushforward(payoffs, T, alpha, beta, messages, message_probabilities)

mixed_strategy_history = [{'m1': [], 'm2': []},{'m1': [], 'm2': []}]

for j in range(10):
    results = q_learning_2x2_game_with_pushforward(payoffs, T, alpha, beta0, k, messages, message_probabilities)
    for i in range(num_players):
        for m in ms:
            mixed_strategy_history[i][m].append(results['strategies'][i][m])
            

plt.style.use('seaborn-v0_8-darkgrid')
fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(12, 8))
colors = plt.cm.viridis(np.linspace(0, 1, 10))

for i in range(num_players):
    for j, m in enumerate(ms):
        ax = axes[i, j]

        # Plot each of the 10 simulation time-series:
        for sim_idx, sim_data in enumerate(mixed_strategy_history[i][m]):
            # sim_data is assumed shape (T, 2). 
            # We'll plot the probability of playing action 1 vs. time.
            sim_data = np.array(sim_data)  # Now sim_data is T×2
            if sim_data.shape[0] == 0:  # Check if sim_data is empty
                print(f"Skipping Simulation {sim_idx} due to empty data.")
                continue  # Skip this simulation
            prob_action_1 = sim_data[:, 1]
            ax.plot(prob_action_1,
                    color=colors[sim_idx],
                    alpha=0.4,
                    linewidth=2,
                    label=f"Simulation {sim_idx+1}" if sim_idx == 0 else None)

        # Make it look nice
        ax.set_title(f"Player {i}, Message '{m}'", fontsize=14, fontweight='bold')
        ax.set_xlabel("Time", fontsize=12)
        ax.set_ylabel("Probability of Action 1", fontsize=12)
        ax.set_ylim(0, 1)  # Probabilities should be between 0 and 1
        ax.spines["top"].set_visible(False)
        ax.spines["right"].set_visible(False)

# If you want a single legend outside, you can do this:
# fig.legend(*axes[0,0].get_legend_handles_labels(), loc="upper center",
#            bbox_to_anchor=(0.5, 1.05), ncol=5, fontsize=10)

plt.tight_layout()
fig.savefig("qlearning_results5.pdf", format="pdf", bbox_inches="tight")
plt.show()




In [None]:
# Prototype 6

# NOT Correlated Eq, increasing beta

T, alpha, beta0, k= 5000, 0.5, 0.1, 0.001
messages = [['m1', 'm1'], ['m1', 'm2'], ['m2', 'm1'], ['m2', 'm2']]

ms = ['m1', 'm2']
message_probabilities = [1, 0, 0, 0]

num_players = 2
num_actions = 2
#results = q_learning_2x2_game_with_pushforward(payoffs, T, alpha, beta, messages, message_probabilities)

mixed_strategy_history = [{'m1': [], 'm2': []},{'m1': [], 'm2': []}]

for j in range(10):
    results = q_learning_2x2_game_with_pushforward(payoffs, T, alpha, beta0, k, messages, message_probabilities)
    for i in range(num_players):
        for m in ms:
            mixed_strategy_history[i][m].append(results['strategies'][i][m])
            

plt.style.use('seaborn-v0_8-darkgrid')
fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(12, 8))
colors = plt.cm.viridis(np.linspace(0, 1, 10))

for i in range(num_players):
    for j, m in enumerate(ms):
        ax = axes[i, j]

        # Plot each of the 10 simulation time-series:
        for sim_idx, sim_data in enumerate(mixed_strategy_history[i][m]):
            # sim_data is assumed shape (T, 2). 
            # We'll plot the probability of playing action 1 vs. time.
            sim_data = np.array(sim_data)  # Now sim_data is T×2
            if sim_data.shape[0] == 0:  # Check if sim_data is empty
                print(f"Skipping Simulation {sim_idx} due to empty data.")
                continue  # Skip this simulation
            prob_action_1 = sim_data[:, 1]
            ax.plot(prob_action_1,
                    color=colors[sim_idx],
                    alpha=0.4,
                    linewidth=2,
                    label=f"Simulation {sim_idx+1}" if sim_idx == 0 else None)

        # Make it look nice
        ax.set_title(f"Player {i}, Message '{m}'", fontsize=14, fontweight='bold')
        ax.set_xlabel("Time", fontsize=12)
        ax.set_ylabel("Probability of Action 1", fontsize=12)
        ax.set_ylim(0, 1)  # Probabilities should be between 0 and 1
        ax.spines["top"].set_visible(False)
        ax.spines["right"].set_visible(False)

# If you want a single legend outside, you can do this:
# fig.legend(*axes[0,0].get_legend_handles_labels(), loc="upper center",
#            bbox_to_anchor=(0.5, 1.05), ncol=5, fontsize=10)

plt.tight_layout()
fig.savefig("qlearning_results6.pdf", format="pdf", bbox_inches="tight")
plt.show()

In [None]:
# Prototype 7
# NOT Correlated Eq, increasing beta, higher initial beta

T, alpha, beta0, k= 5000, 0.5, 0.7, 0.01
messages = [['m1', 'm1'], ['m1', 'm2'], ['m2', 'm1'], ['m2', 'm2']]

ms = ['m1', 'm2']
message_probabilities = [1, 0, 0, 0]

num_players = 2
num_actions = 2
#results = q_learning_2x2_game_with_pushforward(payoffs, T, alpha, beta, messages, message_probabilities)

mixed_strategy_history = [{'m1': [], 'm2': []},{'m1': [], 'm2': []}]

for j in range(10):
    results = q_learning_2x2_game_with_pushforward(payoffs, T, alpha, beta0, k, messages, message_probabilities)
    for i in range(num_players):
        for m in ms:
            mixed_strategy_history[i][m].append(results['strategies'][i][m])
            

plt.style.use('seaborn-v0_8-darkgrid')
fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(12, 8))
colors = plt.cm.viridis(np.linspace(0, 1, 10))

for i in range(num_players):
    for j, m in enumerate(ms):
        ax = axes[i, j]

        # Plot each of the 10 simulation time-series:
        for sim_idx, sim_data in enumerate(mixed_strategy_history[i][m]):
            # sim_data is assumed shape (T, 2). 
            # We'll plot the probability of playing action 1 vs. time.
            sim_data = np.array(sim_data)  # Now sim_data is T×2
            if sim_data.shape[0] == 0:  # Check if sim_data is empty
                print(f"Skipping Simulation {sim_idx} due to empty data.")
                continue  # Skip this simulation
            prob_action_1 = sim_data[:, 1]
            ax.plot(prob_action_1,
                    color=colors[sim_idx],
                    alpha=0.4,
                    linewidth=2,
                    label=f"Simulation {sim_idx+1}" if sim_idx == 0 else None)

        # Make it look nice
        ax.set_title(f"Player {i}, Message '{m}'", fontsize=14, fontweight='bold')
        ax.set_xlabel("Time", fontsize=12)
        ax.set_ylabel("Probability of Action 1", fontsize=12)
        ax.set_ylim(0, 1)  # Probabilities should be between 0 and 1
        ax.spines["top"].set_visible(False)
        ax.spines["right"].set_visible(False)

# If you want a single legend outside, you can do this:
# fig.legend(*axes[0,0].get_legend_handles_labels(), loc="upper center",
#            bbox_to_anchor=(0.5, 1.05), ncol=5, fontsize=10)

plt.tight_layout()
fig.savefig("qlearning_results7.pdf", format="pdf", bbox_inches="tight")
plt.show()


In [None]:
# Prototype 8
T, alpha, beta0, k= 5000, 0.5, 0.1, 0.01
messages = [['m1', 'm1'], ['m1', 'm2'], ['m2', 'm1'], ['m2', 'm2']]

ms = ['m1', 'm2']
message_probabilities = [1, 0, 0, 0]

num_players = 2
num_actions = 2
#results = q_learning_2x2_game_with_pushforward(payoffs, T, alpha, beta, messages, message_probabilities)

mixed_strategy_history = [{'m1': [], 'm2': []},{'m1': [], 'm2': []}]

for j in range(10):
    results = q_learning_2x2_game_with_pushforward(payoffs, T, alpha, beta0, k, messages, message_probabilities)
    for i in range(num_players):
        for m in ms:
            mixed_strategy_history[i][m].append(results['strategies'][i][m])
            

plt.style.use('seaborn-v0_8-darkgrid')
fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(12, 8))
colors = plt.cm.viridis(np.linspace(0, 1, 10))

for i in range(num_players):
    for j, m in enumerate(ms):
        ax = axes[i, j]

        # Plot each of the 10 simulation time-series:
        for sim_idx, sim_data in enumerate(mixed_strategy_history[i][m]):
            # sim_data is assumed shape (T, 2). 
            # We'll plot the probability of playing action 1 vs. time.
            sim_data = np.array(sim_data)  # Now sim_data is T×2
            if sim_data.shape[0] == 0:  # Check if sim_data is empty
                print(f"Skipping Simulation {sim_idx} due to empty data.")
                continue  # Skip this simulation
            prob_action_1 = sim_data[:, 1]
            ax.plot(prob_action_1,
                    color=colors[sim_idx],
                    alpha=0.4,
                    linewidth=2,
                    label=f"Simulation {sim_idx+1}" if sim_idx == 0 else None)

        # Make it look nice
        ax.set_title(f"Player {i}, Message '{m}'", fontsize=14, fontweight='bold')
        ax.set_xlabel("Time", fontsize=12)
        ax.set_ylabel("Probability of Action 1", fontsize=12)
        ax.set_ylim(0, 1)  # Probabilities should be between 0 and 1
        ax.spines["top"].set_visible(False)
        ax.spines["right"].set_visible(False)

# If you want a single legend outside, you can do this:
# fig.legend(*axes[0,0].get_legend_handles_labels(), loc="upper center",
#            bbox_to_anchor=(0.5, 1.05), ncol=5, fontsize=10)

plt.tight_layout()
fig.savefig("qlearning_results8.pdf", format="pdf", bbox_inches="tight")
plt.show()
