In [None]:
### the goal of this notebook is just to get a better handle on how slow it will be to run
### many games in the current set up, and decide whether there is a better way to be doing it


""" 
todo

1. make it so that we explore to a bunch of random stuff and then start playing, 
    BUT we don't want to save any games that are WON or LOST from random play...
2. keep working on efficiency gains that will make the whole loop work well
3. think of how to work tournaments in, with several different networks
4. way to save lots of different iterations and generations of games down, to learn from
    
"""

In [1]:
import random
import matplotlib.pyplot as plt
import datetime
import os
from pathlib import Path

import numpy as np
import copy
from importlib import reload
import torch
import int_to_board
import pickle

reload(int_to_board)

import game
reload(game)

import plotting
reload(plotting)

import mcts.mcts
reload(mcts.mcts)

import mcts.networks
reload(mcts.networks)

import mcts.agent
reload(mcts.agent)

import augmentor
reload(augmentor)

from game import Patterns
from plotting import PatternPlotter

from mcts.mcts import Tree, Node
from mcts.networks import PatternsNet
from mcts.agent import Agent

from augmentor import StateAugmentor

rseed = 12387623
random.seed(rseed)
torch.manual_seed(rseed)
np.random.seed(rseed)

my_device = torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu")

In [2]:
# Load up previous best networks to help play the games (lets see how it does!).
# in time, have best of a generation playing against each other!
network_path = os.path.join(Path.cwd(), 'saved_networks', 'v4', '2025_07_11_15_25.pt')
my_network = PatternsNet(in_channels=47, out_channels=128)
my_network.load_state_dict(torch.load(network_path, weights_only=True))


<All keys matched successfully>

In [6]:
### Agent parameters:
NUM_TREES = 500
TARGET_GAMES = 500
NUM_IT = 1 # 1 validation, 15*30k games:
SCHEDULE = [
    (0, 0), # explore randomly for the first moves:
    (1, 250), # explore with search tree after depth 1:
]

SAVE_DEPTH = 5

TOPN = 10
RANDN = 5
SELECTION_TEMP = None

In [7]:
my_agent = Agent(
        agent_id='1', # not relevant for a single machine:
        network = my_network,
        device = my_device,
        num_trees = NUM_TREES, # number of in series trees to use:
        target_games = TARGET_GAMES, # total number of games in the data set
        selection_temperature = SELECTION_TEMP,
        topn=TOPN, # only explore the top 5 best moves
        randn=RANDN, # explore an additional 5 random moves
        save_depth=SAVE_DEPTH, # for this, only save the final state and move
        explore_steps_schedule=SCHEDULE,  
        debug=False,
    )


Generating initial games:
Evaluating tensor states...
Provisioning inference to root nodes...


In [8]:
# run the games until target amount reached:
%prun my_agent.run_games()

1 games have been completed!
2 games have been completed!
3 games have been completed!
4 games have been completed!
5 games have been completed!
6 games have been completed!
7 games have been completed!
8 games have been completed!
9 games have been completed!
10 games have been completed!
11 games have been completed!
12 games have been completed!
13 games have been completed!
14 games have been completed!
15 games have been completed!
16 games have been completed!
17 games have been completed!
18 games have been completed!
19 games have been completed!
20 games have been completed!
21 games have been completed!
22 games have been completed!
24 games have been completed!
25 games have been completed!
26 games have been completed!
27 games have been completed!
28 games have been completed!
29 games have been completed!
30 games have been completed!
31 games have been completed!
32 games have been completed!
33 games have been completed!
34 games have been completed!
35 games have been 

         170210416 function calls (169390016 primitive calls) in 524.197 seconds

   Ordered by: internal time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
  6179853   57.757    0.000   57.757    0.000 mcts.py:222(calculate_child_exploration_scores)
  6179853   38.098    0.000   97.235    0.000 mcts.py:874(calculate_child_puct_scores)
  1131306   31.315    0.000   31.315    0.000 {built-in method torch.tensor}
  2054619   30.930    0.000   30.930    0.000 mcts.py:92(__init__)
  1131306   27.395    0.000  101.507    0.000 mcts.py:504(create_tensor_state_from_parent)
  6383613   24.702    0.000  157.695    0.000 mcts.py:843(next_node)
  1465000   19.311    0.000   38.169    0.000 mcts.py:905(back_propagate)
  7634256   15.902    0.000   18.246    0.000 mcts.py:888(update_parent_child_scores)
  7598680   14.877    0.000   19.681    0.000 game.py:292(_easy_win_placing)
  1465000   13.061    0.000  172.178    0.000 mcts.py:808(get_leaf_node)
  6194815   12.409   

In [None]:
puct_scores = np.random.rand(20)
num_actions = 20

losing_action_arguments = [4, 6, 7, 10, 15, 18]

losing_mask = np.zeros(num_actions, dtype=bool)
losing_mask[losing_action_arguments] = True

not_losing_mask = ~losing_mask

# determine which arguments are losing:
masked_scores = np.where(not_losing_mask, puct_scores, -np.inf)
best_arg = np.argmax(masked_scores)

print(puct_scores, best_arg)



In [None]:
jim = np.

In [None]:
my_agent.trees[0].

In [None]:
_leaf = copy_leaf

while _leaf is not None:
    tensor_state = _leaf.tensor_state
    aug = StateAugmentor(state=tensor_state)
    new_game = aug.create_game_from_state()
    print(new_game.calculate_score())
    print(sorted(new_game.get_actions()))
    pplotter = PatternPlotter(new_game)
    pplotter.plot()
    _leaf = _leaf.parent


In [None]:
game = copy.deepcopy(new_game)

In [None]:
p1_order = game.active_color_order if game.active_player == 1 else game.passive_color_order
p2_order = game.active_color_order if game.active_player == -1 else game.passive_color_order
player_orders = [p1_order, p2_order]

# assign the color groups to each player:
p1_color_groups = game.active_color_groups if game.active_player == 1 else game.passive_color_groups
p2_color_groups = game.active_color_groups if game.active_player == -1 else game.passive_color_groups
player_color_groups = [p1_color_groups, p2_color_groups]

token_points = [8, 4]
y_offsets = [[0.0, -1.2], [-1.2, 0.0]]
face_colors = [(0.8, 0.1, 0.8), (0.8, 0.8, 0.9)]


In [None]:
from matplotlib import patches

In [None]:
_leaf.game.passive_color_order

In [None]:
leaf_game = copy.deepcopy(_leaf.game)
parent_game = copy.deepcopy(_leaf.parent.game)
tensor_game = copy.deepcopy(game)

In [None]:
leaf_game.passive_color_order, parent_game.active_color_order, tensor_game.passive_color_order

In [None]:
parent_game.active_bowl_token, _leaf.parent_action_arg, _leaf.parent.possible_actions

In [None]:
leaf_game.passive_color_groups, parent_game.active_color_groups, tensor_game.passive_color_groups

In [None]:
p2_order, p2_color_groups

In [None]:
pcgroup, porder

In [None]:
fig, axis = plt.subplots()

for porder, pcgroup, tstart, npoints, fcol, yoff in zip(player_orders, player_color_groups,
                                                        pplotter.token_starts, token_points,
                                                        face_colors, y_offsets):
    # populate the ordering tiles for each player, from 1-6:
    untaken = 5

    for _color, _order in enumerate(porder):
        # if color group exists, place star token on board and indicate color order in token row:
        if _order > 0:
            # add color token to token row:
            color_center = (tstart[0] + 1.2 * (_order - 1), tstart[1] + yoff[0])
            axis.add_patch(patches.Circle(color_center,
                                          radius=0.35,
                                          alpha=0.8,
                                          facecolor=pplotter.token_colors[_color],
                                          edgecolor='black',
                                          linewidth=0.5))

            # place star token over the first color taken in the group:
            token_coordsij = pcgroup[_color][0]
            token_coords = (token_coordsij[1], 7 - token_coordsij[0])
            token_string = str(_order)

        else:
            token_coords = (tstart[0] + 1.2 * untaken, tstart[1] + yoff[1])
            token_string = str(untaken + 1)
            untaken -= 1

        star_patch = pplotter.get_star_token(token_coords,
                                         num_points=npoints,
                                         face_color=fcol)
        axis.add_patch(star_patch)
        axis.text(token_coords[0], token_coords[1],
                  token_string,
                  fontsize=12.,
                  horizontalalignment='center',
                  verticalalignment='center', )

In [None]:
pplotter.plot()

In [None]:
james = [1, 2, 3]
jim = james[:]
jim[0] = 5
james[1] = 20
jim, james 