In [None]:
import os
import sys
import json
import time
from pathlib import Path

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from numba import cuda

In [None]:
# Add project src to sys.path for imports
root = os.path.abspath('.')
src_path = os.path.join(root, 'mcts_numba_cuda-main', 'src')

if src_path not in sys.path:
    sys.path.insert(0, src_path)

In [None]:
from c4 import C4
from mctsnc import MCTSNC

In [None]:
if cuda.is_available():
    dev = cuda.get_current_device()
    print('CUDA device name:', dev.name)
else:
    print('CUDA is not available.')

In [None]:
# Directories for outputs
results_dir = Path("results")
plots_dir = Path("plots")
results_dir.mkdir(exist_ok=True)
plots_dir.mkdir(exist_ok=True)

In [None]:
# Helpers
def _to_jsonable(obj):
    """Convert numpy / nested types into JSON-serializable Python types."""
    if isinstance(obj, dict):
        return {k: _to_jsonable(v) for k, v in obj.items()}
    if isinstance(obj, (list, tuple)):
        return [_to_jsonable(v) for v in obj]
    if isinstance(obj, np.generic):
        return obj.item()
    if isinstance(obj, np.ndarray):
        return obj.tolist()
    return obj


def run_trials(n_trees, n_playouts, trials=5, forced_search_steps_limit=5, seed_base=0, verbose=False):
    """Run `trials` independent experiments for given configuration and save per-trial JSONs.

    Returns list of per-trial dict summaries.
    """
    trial_results = []
    for t in range(trials):
        seed = seed_base + t
        ai = MCTSNC(C4.get_board_shape(), C4.get_extra_info_memory(), C4.get_max_actions(),
                    variant='ocp_thrifty', n_trees=n_trees, n_playouts=n_playouts, seed=seed,
                    verbose_info=False, verbose_debug=False, action_index_to_name_function=C4.action_index_to_name)
        ai.init_device_side_arrays()

        start_time = time.time()
        best_action = ai.run(C4().get_board(), C4().get_extra_info(), C4().get_turn(), forced_search_steps_limit=forced_search_steps_limit)
        elapsed = ai.time_total

        perf = ai._make_performance_info()
        actions = ai._make_actions_info_thrifty()
        best_entry = actions.get('best', None)

        trial = {
            'n_trees': int(n_trees),
            'n_playouts': int(n_playouts),
            'trial': int(t),
            'seed': int(seed),
            'best_action': int(best_action),
            'best_q': float(best_entry['q']) if best_entry else None,
            'best_n': int(best_entry['n']) if best_entry else None,
            'best_n_wins': int(best_entry['n_wins']) if best_entry else None,
            'time_total': float(elapsed),
            'steps': int(ai.steps),
            'playouts': int(perf.get('playouts', np.nan)),
            'playouts_per_second': float(perf.get('playouts_per_second', np.nan)),
            **{f"times_{k}": float(v) for k, v in perf.get('times_[ms]', {}).items()}
        }

        fname = results_dir / f"ocp_thrifty_ntrees-{n_trees}_nplayouts-{n_playouts}_trial-{t}.json"
        with open(fname, 'w') as f:
            json.dump(_to_jsonable({'config': {'n_trees': n_trees, 'n_playouts': n_playouts, 'trial': t, 'seed': seed}, 'trial': trial}), f, indent=2)

        trial_results.append(trial)
        if verbose:
            print(f"Saved {fname}: best_action={trial['best_action']} best_q={trial['best_q']} playouts/s={trial['playouts_per_second']}")
    return trial_results


In [None]:
# Orchestration: grid search, save summary, and plotting
grid_n_trees = [1, 4, 8]
grid_n_playouts = [32, 64, 128, 256]
trials_per_config = 5
forced_steps = 5

all_rows = []
for nt in grid_n_trees:
    for np_ in grid_n_playouts:
        print(f"Running config n_trees={nt}, n_playouts={np_} ({trials_per_config} trials)")
        rows = run_trials(nt, np_, trials=trials_per_config, forced_search_steps_limit=forced_steps, seed_base=1000)
        all_rows.extend(rows)

In [None]:
df = pd.DataFrame(all_rows)
summary_csv = results_dir / "ocp_thrifty_summary.csv"
df.to_csv(summary_csv, index=False)
print(f"Saved summary: {summary_csv}")

In [None]:
# Plotting
sns.set_theme(style="whitegrid")
summary = df.groupby(["n_trees", "n_playouts"]).agg(
mean_playouts_per_second=("playouts_per_second", "mean"),
std_playouts_per_second=("playouts_per_second", "std"),
mean_best_q=("best_q", "mean"),
std_best_q=("best_q", "std")
).reset_index()

# playouts/sec
plt.figure(figsize=(8, 5))
sns.lineplot(data=summary, x="n_playouts", y="mean_playouts_per_second", hue="n_trees", marker="o")
plt.title("Mean playouts/sec vs n_playouts (ocp_thrifty)")
plt.xlabel("n_playouts")
plt.ylabel("playouts/sec")
plt.xscale("log", base=2)
plt.tight_layout()
plt.savefig(plots_dir / "playouts_per_second_vs_n_playouts.png")
plt.close()

# best_q
plt.figure(figsize=(8, 5))
sns.lineplot(data=summary, x="n_playouts", y="mean_best_q", hue="n_trees", marker="o")
plt.title("Mean best_q vs n_playouts (ocp_thrifty)")
plt.xlabel("n_playouts")
plt.ylabel("best_q")
plt.xscale("log", base=2)
plt.tight_layout()
plt.savefig(plots_dir / "best_q_vs_n_playouts.png")
plt.close()

# boxplot
plt.figure(figsize=(10, 6))
sns.boxplot(data=df, x="n_playouts", y="best_q", hue="n_trees")
plt.title("Best q distribution per n_playouts and n_trees")
plt.xlabel("n_playouts")
plt.ylabel("best_q")
plt.xscale("log", base=2)
plt.tight_layout()
plt.savefig(plots_dir / "best_q_boxplot.png")
plt.close()

Running config n_trees=1, n_playouts=32 (5 trials)
MCTSNC RUN... [MCTSNC(search_time_limit=5.0, search_steps_limit=inf, n_trees=1, n_playouts=32, variant='ocp_thrifty', device_memory=2.0, ucb_c=2.0, seed: 1000)]
MCTSNC RUN DONE. [time: 0.011096715927124023 s; best action: 2 (2), best win_flag: False, best n: 32, best n_wins: 19, best q: 0.59375]
MCTSNC RUN... [MCTSNC(search_time_limit=5.0, search_steps_limit=inf, n_trees=1, n_playouts=32, variant='ocp_thrifty', device_memory=2.0, ucb_c=2.0, seed: 1001)]
MCTSNC RUN DONE. [time: 0.007336139678955078 s; best action: 2 (2), best win_flag: False, best n: 32, best n_wins: 23, best q: 0.71875]
MCTSNC RUN... [MCTSNC(search_time_limit=5.0, search_steps_limit=inf, n_trees=1, n_playouts=32, variant='ocp_thrifty', device_memory=2.0, ucb_c=2.0, seed: 1002)]
MCTSNC RUN DONE. [time: 0.007035970687866211 s; best action: 2 (2), best win_flag: False, best n: 32, best n_wins: 23, best q: 0.71875]
MCTSNC RUN... [MCTSNC(search_time_limit=5.0, search_steps_