In [None]:
%matplotlib notebook
import numpy as np
import matplotlib.pyplot as plt
import itertools

from typing import NamedTuple

In [None]:
castle_values = list(range(1, 13))

distribution = np.array([1.0] * 12)
distribution = distribution / np.sum(distribution)

In [None]:
def draw_allocations(distribution, rng, num_allocations=10, allocation_sum=1000):
    assert np.abs(np.sum(distribution) - 1.0) < 1e-6
    num_castles = len(distribution)
    samples = rng.multinomial(1000, distribution, num_allocations)
    return samples

In [None]:
rng = np.random.default_rng(12345)
players = [distribution / np.sum(distribution)] * 2
NUM_ALLOCATIONS = 100
result_0 = draw_allocations(players[0], rng, num_allocations=NUM_ALLOCATIONS)
result_1 = draw_allocations(players[1], rng, num_allocations=NUM_ALLOCATIONS)




In [None]:
class FightResult(NamedTuple):
    # note that first index is player_1's selection
    player_1_wins: list[tuple[int, int]]
    # note that first index is player_2's selection
    player_2_wins: list[tuple[int, int]]
    num_fights: int

def fight(alloc_1, alloc_2, battlefield_values) -> FightResult:
    assert alloc_1.shape[1] == alloc_2.shape[1]
    player_1_wins = []
    player_2_wins = []
    num_fights = 0
    for i in range(alloc_1.shape[0]):
        num_fights += alloc_2.shape[0]
        # alloc_2.shape[0] x num_castles
        delta = alloc_1[i] - alloc_2
        player_1_mask = delta > 0
        player_2_mask = delta < 0
        # sum over 
        player_1_score = np.sum(player_1_mask * battlefield_values, axis=1)
        player_2_score = np.sum(player_2_mask * battlefield_values, axis=1)
        
        curr_player_1_wins = np.nonzero(player_1_score > player_2_score)
        curr_player_2_wins = np.nonzero(player_2_score > player_1_score)
        
        for j in curr_player_1_wins[0]:
            player_1_wins.append((i, j))
        for j in curr_player_2_wins[0]:
            player_2_wins.append((j, i))
        
#         if player_1_score > player_2_score:
#             player_1_wins.append((i, j))
#         elif player_2_score > player_1_score:
#             player_2_wins.append((j, i))
    return FightResult(player_1_wins = player_1_wins, player_2_wins = player_2_wins, num_fights = num_fights)
        
    

In [None]:
fight_result = fight(result_0, result_1, battlefield_values=castle_values)

In [None]:
indices, counts = np.unique([i for i, j in fight_result.player_1_wins], return_counts=True)

In [None]:
indices

In [None]:
counts

In [None]:
result_0.shape

In [None]:
winners = result_0[indices]

In [None]:
plt.figure()
plt.hist(winners[:,[0, 11]], label=[0, 11])
plt.legend()


In [None]:
new_dist = np.average(result_0[indices], axis=0, weights = counts)
new_dist = new_dist/np.sum(new_dist)

In [None]:
def update_distribution(initial_dist, opponent_dist, num_allocations, castle_values, rng):
    player_alloc = draw_allocations(initial_dist, rng, num_allocations)
    oppo_alloc = draw_allocations(opponent_dist, rng, num_allocations)
    
    fight_result = fight(player_alloc, oppo_alloc, battlefield_values=castle_values)
    
    player_1_winners, player_1_counts = np.unique([i for i, j in fight_result.player_1_wins], return_counts=True)
    
    new_dist = np.average(player_alloc[player_1_winners], axis=0, weights = player_1_counts)
    new_dist = new_dist/np.sum(new_dist)
    return new_dist, float(len(fight_result.player_1_wins)) / fight_result.num_fights


In [None]:
new_dist = update_distribution(distribution, distribution, 100, castle_values, rng)

In [None]:

def find_equilibrium(initial_distribution, num_iterations=10000):
    player_dists = [initial_distribution]
    win_percentage = []
    for i in range(num_iterations):

        if i < 100:
            oppo_idx = 0
        else:
            oppo_delta = int(100 * np.exp(-(i - 100)/2000))
            oppo_idx = i - oppo_delta
        if i % 1000 == 0:
            print(i, len(player_dists) - oppo_idx)

        new_dist, win_pct = update_distribution(player_dists[-1], player_dists[oppo_idx], 100, castle_values, rng)
        player_dists.append(new_dist)
        win_percentage.append(win_pct)
    return player_dists, win_percentage



In [None]:
distribution = np.array([1.0] * 12)
distribution = distribution / np.sum(distribution)
new_dists, win_percentage = find_equilibrium(distribution, 50000)

In [None]:
def plot_dists(dists, win_pct):
    plt.figure()
    for i, dist in enumerate(dists[::2000]):
        plt.plot(dist, label=i)
    plt.legend()

    plt.figure()
    plt.plot(win_pct)
    plt.title('Win Fraction over Time')
    plt.xlabel('Iteration')
    plt.ylabel('Win Fraction')

In [None]:
plot_dists(new_dists, win_percentage)

In [None]:
low_val_dist = np.array([0.01, 0.01, 1, 1, 1, 1, 1, 1, 1, 0.01, 0.01, 0.01])
low_val_dist = low_val_dist / np.sum(low_val_dist)

low_val_dists, low_val_win_percentage = find_equilibrium(low_val_dist)

In [None]:
plot_dists(low_val_dists, low_val_win_percentage)

In [None]:
weird_dist = np.array([1.0, 1.0, 0.25, 0.25,0.25, 0.25, 0.25, 0.25, 0.25, 1.0, 1.0, 1.0])
weird_dist = weird_dist / np.sum(weird_dist)

weird_dists, weird_win_percentage = find_equilibrium(weird_dist)

In [None]:
plot_dists(weird_dists, weird_win_percentage)

In [None]:
p

In [None]:
adversarial_dist = new_dists[-2].copy()
adversarial_dist[0:6] = 0.001
adversarial_dist = adversarial_dist / np.sum(adversarial_dist)

In [None]:
f_1 = draw_allocations(new_dists[-2], rng, 100)
f_2 = draw_allocations(adversarial_dist, rng, 100)

result = fight(f_1, f_2, castle_values)

In [None]:
new_adversarial, new_adversarial_pct = find_equilibrium(adversarial_dist)

In [None]:
plot_dists(new_adversarial, new_adversarial_pct)