In [59]:
import itertools
import matplotlib.pyplot as plt
import json
import seaborn
from pathlib import Path
import numpy as np
from statistics import mean, median
import pandas as pd

In [60]:
def split_simulations_with_and_without_expansion(simulations):
    with_expansions = []
    without_expansions = []
    for simulations_per_model in simulations:
        for simulations_per_game in simulations_per_model:
            for simulations_per_move in simulations_per_game:
                with_expansions_for_move, without_expansions_for_move = simulations_per_move
                with_expansions.append(with_expansions_for_move)
                without_expansions.append(without_expansions_for_move)
    return with_expansions, without_expansions
            
game = "othello"
with_state_evaluator = False
results = []
complex_simulations = []
simple_simulations = []
for result_file in Path(f"{game}/complex_rollouts/{'with' if with_state_evaluator else 'without'}_state_evaluator/").iterdir():
    with open(result_file) as f:
        result = json.load(f)
        results.append(result["results"])
        complex_simulations.append(result["complex_simulations"])
        simple_simulations.append(result["simple_simulations"])
results = np.array(results)
wins = results[:, 0:2, :].reshape((20, 4))# + 0.5 * results[:, 1, :]
wins = pd.DataFrame(wins, columns=["first_wins", "second_wins", "first_draws", "second_draws"])
wins = wins[["first_wins", "first_draws", "second_wins", "second_draws"]]
wins.insert(0, "model", np.arange(20))
wins.insert(5, "overall_wins", (wins["first_wins"] + wins["second_wins"]) / 2)
wins.insert(6, "overall_draws", (wins["first_draws"] + wins["second_draws"]) / 2)
wins = wins.set_index("model")

In [61]:
index = pd.MultiIndex.from_product([["first", "second", "overall"], ["win", "draw"], ["value", "sem"]])
wins.insert(1, "first_wins_sem", -1)
wins.insert(3, "first_draws_sem", -1)
wins.insert(5, "second_wins_sem", -1)
wins.insert(7, "second_draws_sem", -1)
wins.insert(9, "overall_wins_sem", -1)
wins.insert(11, "overall_draws_sem", -1)
wins.columns = index
for player, type in itertools.product(["first", "second", "overall"], ["win", "draw"]):
    wins[player, type, "sem"] = np.sqrt(wins[player, type, "value"] * (1 - wins[player, type, "value"]) / (240 if player == "overall" else 120))
totals = []
for player, type in itertools.product(["first", "second", "overall"], ["win", "draw"]):
    total = np.mean(wins[player, type, "value"])
    totals.append(total)
    totals.append(np.sqrt(total * (1 - total) / (20 * (240 if player == "overall" else 120))))
wins.loc["Total", :] = totals
wins.index = wins.index.rename("Model")

In [62]:
latex_wins = pd.DataFrame(columns=pd.MultiIndex.from_product([["first", "second", "overall"], ["win", "draw"]]))
latex_wins.index = latex_wins.index.rename("Model")
def f(df):
    def inner(player, type):
        return f'${df[player, type, "value"]:.2f} \pm {df[player, type, "sem"] * 1.96:.2f}$'
    return [inner(player, type) for player in ["first", "second", "overall"] for type in ["win", "draw"]]
latex_wins[[("first", "win"), ("first", "draw"), ("second", "win"), ("second", "draw"), ("overall", "win"), ("overall", "draw")]] = wins.apply(func=f, axis="columns", result_type="expand")
# latex_wins = latex_wins.rename(columns={"first": "First player win ratio", "second": "Second player win ratio", "overall": "Overall win ratio", "value": "Value", "sem": "SEM"})
with open(f"{game}_complex_rollouts_{'with' if with_state_evaluator else 'without'}_state_evaluator.txt", "w", encoding="utf-8") as f:
   latex_wins.to_latex(f, escape=False)
total_overall_latex_wins = latex_wins.loc["Total", "overall"]
total_overall_loss = 1 - (wins.loc["Total", ("overall", "win", "value")] + wins.loc["Total", ("overall", "draw", "value")])
total_overall_loss_sem = np.sqrt(total_overall_loss * (1 - total_overall_loss) / (20 * 240))
total_overall_latex_wins["loss"] = f'${total_overall_loss:.2f} \pm {total_overall_loss_sem * 1.96:.2f}$'
with open(f"{game}_complex_rollouts_overall_total_{'with' if with_state_evaluator else 'without'}_state_evaluator.txt", "w", encoding="utf-8") as f:
    total_overall_latex_wins.to_latex(f, escape=False, header=False)
total_overall_latex_wins


win     $0.42 \pm 0.01$
draw    $0.05 \pm 0.01$
loss    $0.53 \pm 0.01$
Name: Total, dtype: object

In [63]:
complex_simulations_with_expansion, complex_simulations_without_expansion = split_simulations_with_and_without_expansion(complex_simulations)
simple_simulations_with_expansion, simple_simulations_without_expansion = split_simulations_with_and_without_expansion(simple_simulations)
average_complex_simulations = mean(complex_simulations_with_expansion), mean(complex_simulations_without_expansion)
average_simple_simulations = mean(simple_simulations_with_expansion), mean(simple_simulations_without_expansion)
print(average_complex_simulations)
print(average_simple_simulations)
simulations = pd.DataFrame.from_dict({"Random rollouts": [mean(simple_simulations_with_expansion), sum(average_simple_simulations)], "Policy network rollouts": [mean(complex_simulations_with_expansion), sum(average_complex_simulations)]}, orient="index", columns=["Rollouts", "Simulations"])
with open(f"{game}_complex_rollouts_simulations_{'with' if with_state_evaluator else 'without'}_state_evaluator.txt", "w", encoding="utf-8") as f:
    simulations.to_latex(f, float_format="{:.0f}".format)

(59.00146599528332, 16578.349544266683)
(344.4308202154101, 19529.9628959276)
