In [1]:
import numpy as np
import random as r
import torch
from torchvision import datasets, transforms
import torch.nn.functional as F 

# defining string constants for specifying policies to use
RANDOM_POLICY = "random_policy"
GREEDY_POLICY = "greedy_policy"
GREEDY_BUT_TYTHING_POLICY = "greedy_but_tything_policy"
DISCOUNT_FACTOR = 0.8

In [2]:
# Creating the environment for our agent to interact with.  A gameboard for tic tac toe.
class GameBoard:
    def __init__(self, dim=3, grid = None):
        if np.any(grid == None):
            self.grid = np.zeros((dim, dim), dtype=np.int)
            self.dim = dim
        else:
            self.grid = grid
            self.dim = grid.shape[0]
        self.game_over = False
        self.game_has_winner = False
        self.outcome = 0 #default value to be replaced by 1 for "X" or -1 for "O"
        
    def available_moves(self):
        result = np.where(self.grid == 0)
        listOfCoordinates = list(zip(result[0], result[1]))
        return listOfCoordinates

    #enter a move, 1 being "X", -1 being "O"
    def update_state(self, coords, val):
        self.grid[coords] = val
    
    def get_state(self):
        return torch.tensor(self.grid)
    
    def view_state(self):
        print(self.grid)
        
    def evaluate_position(self):
        # first check if there is a winner
        # if a row, column, or diagonal adds up to dim or -dim, it mean X or O has won, respectively
        row_comp = np.any(np.abs(np.sum(self.grid, axis = 1)) == self.dim)
        col_comp = np.any(np.abs(np.sum(self.grid, axis = 0)) == self.dim)
        diag_comp = np.abs(np.trace(self.grid)) == self.dim
        anti_diag_comp = np.abs(np.trace(np.flipud(self.grid))) == self.dim
        if row_comp or col_comp or diag_comp or anti_diag_comp:
            self.game_over = True
            self.game_has_winner = True
        # check if the grid is filled
        # a draw, since, neither player has won by the above criteria
        elif not np.any(self.grid == 0):
            self.game_over = True
            self.game_has_winner = False
    
    def determine_winner(self):
        self.evaluate_position()
        if self.game_has_winner:
            row_comp = np.any(np.sum(self.grid, axis = 1) == self.dim)
            col_comp = np.any(np.sum(self.grid, axis = 0) == self.dim)
            diag_comp = np.trace(self.grid) == self.dim
            anti_diag_comp = np.trace(np.flipud(self.grid)) == self.dim
            if row_comp or col_comp or diag_comp or anti_diag_comp:
                self.outcome = 1
            else:
                self.outcome = -1
    
    def result(self):
        self.determine_winner()
        if self.game_over and self.game_has_winner:
            if self.outcome == 1:
                print("X won!")
            else:
                print("O won!")
        elif self.game_over:
            print("It's a draw")
        else:
            print("Game in progress")

In [4]:
def valuate_move_bellman(move, grid, nn):
    game = GameBoard(dim = grid.shape[0], grid=grid)
    available_moves = game.available_moves() # i.e. get unfilled positions in grid
    move_scores = []
    for available_move in available_moves:
        game.update_state(available_move, move) # getting position after potential move is made
        move_scores.append(nn(game.get_state())) # getting value of new position
        game.update_state(available_move, 0) # resetting to the original game position
    max_score = max(move_scores)
    max_inds = [i for i,j in enumerate(move_scores) if j==max_score]
    return max_score, max_inds

def whose_move(i):
    if i%2 == 0:
        return 1
    else:
        return -1

def get_label(prediction, outcome, learning_rate, move_number, grid, nn, is_last_move = False):
    old_val = prediction
    if is_last_move:
        new_val = outcome
    else:
        new_val, _ = valuate_move_bellman(whose_move(move_number+1), grid, nn) # in current position it's opponent's move
        new_val =  (-1) * DISCOUNT_FACTOR * new_val
    return old_val + learning_rate * (new_val - old_val)

In [18]:
# defining our agent that will be playing the game
# to simplify things, the agent's move can be specified at the time the move is made
class Agent:
    def __init__(self, tythe_rate = 0.5):
        self.tythe_rate = tythe_rate
        self.game_list = []
        
    def reset(self):
        self.game_list = []
        
    def random_policy(self, move, game,  nn):
        return r.choice(game.available_moves())

    def greedy_policy(self, move, game, nn):
        max_score, max_inds = valuate_move_bellman(move, game.grid, nn)
        max_index = r.choice(max_inds)
        return game.available_moves()[max_index]
    
    def greedy_but_tything_policy(self, move, game, nn): 
    # i.e. 1/10th of the time it makes a random move, the rest of the time it is greedy
        if r.random() < self.tythe_rate:
            return self.random_policy(move, game, nn)
        else:
            return self.greedy_policy(move, game, nn)
    
    def take_action(self, move, game, nn, policy_string):
        if policy_string == RANDOM_POLICY:
            policy = self.random_policy
        elif policy_string == GREEDY_POLICY:
            policy = self.greedy_policy
        elif policy_string == GREEDY_BUT_TYTHING_POLICY:
            policy = self.greedy_but_tything_policy
        action = policy(move, game, nn)
        game.update_state(action, move)
    
    def play_training_game(self, dim, nn, policy_string):
        self.reset()
        game = GameBoard(dim=dim)
        move = 1
#         self.game_list.append(game.grid.copy())
        while not game.game_over:
            self.take_action(move, game, nn, policy_string)
            self.game_list.append(game.grid.copy())
            move *=-1
            game.evaluate_position()
        game.determine_winner()
        return game.outcome, agent.game_list
    
    def play_validation_game(self, dim, nn, policy_strings):
        self.reset()
        game = GameBoard(dim=dim)
        move = 1
        index = 0
        inc = 1
#         self.game_list.append(game.grid.copy())
        while not game.game_over:
            policy_string = policy_strings[index]
            self.take_action(move, game, nn, policy_string)
            self.game_list.append(game.grid.copy())
            move *=-1
            inc *=-1
            index +=inc
            game.evaluate_position()
        game.determine_winner()
        return game.outcome, agent.game_list

In [20]:
class NeuralNetworkClass(torch.nn.Module):
    def __init__(self, dim):
        super().__init__()    # initialise parent module
        self.layer1 = torch.nn.Linear(dim**2, dim**3)
        self.layer2 = torch.nn.Linear(dim**3, dim**2)
        self.layer3 = torch.nn.Linear(dim**2, 1)
        self.dim = dim
        
    def forward(self, x):
        x = x.view(-1, self.dim**2)
        x = x.type(torch.FloatTensor)
        x = self.layer1(x)
        x = torch.tanh(x)
        x = self.layer2(x)
        x = torch.tanh(x)
        x = self.layer3(x)
        return x

In [28]:
learning_rate = 0.1
myNeuralNetwork = NeuralNetworkClass(3)
dim = 3

# CREATE OUR OPTIMISER
optimiser = torch.optim.Adam(              # what optimiser should we use?
    myNeuralNetwork.parameters(),          # what should it optimise?
    lr=learning_rate                       # using what learning rate?
)

# CREATE OUR CRITERION
criterion = torch.nn.MSELoss()            # callable class that compares our predictions to our labels and returns our loss

# SET UP TRAINING VISUALISATION
from torch.utils.tensorboard import SummaryWriter
writer = SummaryWriter()                            # we will use this to show our models performance on a graph

agent = Agent()

# TRAINING LOOP
def train(model, epochs):
    model.train()                                  # put the model into training mode (more on this later)
    for epoch in range(epochs):
        outcome, inputs = agent.play_training_game(dim=dim, nn=myNeuralNetwork, policy_string=GREEDY_BUT_TYTHING_POLICY)
#         inputs = torch.tensor(move_list)
        is_last_move = True
        for i, mem in reversed(list(enumerate(inputs))):
#             inputs, labels = minibatch
            prediction = model(torch.tensor(mem))             # pass the data forward through the model
            label = get_label(prediction, outcome, learning_rate, i, mem, model, is_last_move)
            is_last_move = False
            loss = criterion(prediction, label)   # compute the loss
            print('Epoch:', epoch, '\tMove:', i, '\tLoss:', loss)
            optimiser.zero_grad()                  # reset the gradients attribute of each of the model's params to zero
            loss.backward()                        # backward pass to compute and set all of the model param's gradients
            optimiser.step()                       # update the model's parameters
            writer.add_scalar('Loss/Train', loss, epoch + i)    # write loss to a graph
            
            
train(myNeuralNetwork, 1000)

Epoch: 0 	Move: 8 	Loss: tensor(0.0053, grad_fn=<MeanBackward0>)
Epoch: 0 	Move: 7 	Loss: tensor(0.2164, grad_fn=<MeanBackward0>)
Epoch: 0 	Move: 6 	Loss: tensor(0.0001, grad_fn=<MeanBackward0>)
Epoch: 0 	Move: 5 	Loss: tensor(0.0301, grad_fn=<MeanBackward0>)
Epoch: 0 	Move: 4 	Loss: tensor(1.5747e-05, grad_fn=<MeanBackward0>)
Epoch: 0 	Move: 3 	Loss: tensor(0.0023, grad_fn=<MeanBackward0>)
Epoch: 0 	Move: 2 	Loss: tensor(0.0023, grad_fn=<MeanBackward0>)
Epoch: 0 	Move: 1 	Loss: tensor(0.0011, grad_fn=<MeanBackward0>)
Epoch: 0 	Move: 0 	Loss: tensor(0.0002, grad_fn=<MeanBackward0>)
Epoch: 1 	Move: 7 	Loss: tensor(0.0098, grad_fn=<MeanBackward0>)
Epoch: 1 	Move: 6 	Loss: tensor(0.0007, grad_fn=<MeanBackward0>)
Epoch: 1 	Move: 5 	Loss: tensor(0.0030, grad_fn=<MeanBackward0>)
Epoch: 1 	Move: 4 	Loss: tensor(0.0012, grad_fn=<MeanBackward0>)
Epoch: 1 	Move: 3 	Loss: tensor(3.9745e-05, grad_fn=<MeanBackward0>)
Epoch: 1 	Move: 2 	Loss: tensor(0.0010, grad_fn=<MeanBackward0>)
Epoch: 1 	Move: 1

Epoch: 30 	Move: 2 	Loss: tensor(0.0058, grad_fn=<MeanBackward0>)
Epoch: 30 	Move: 1 	Loss: tensor(0.0029, grad_fn=<MeanBackward0>)
Epoch: 30 	Move: 0 	Loss: tensor(0.0002, grad_fn=<MeanBackward0>)
Epoch: 31 	Move: 8 	Loss: tensor(0.0216, grad_fn=<MeanBackward0>)
Epoch: 31 	Move: 7 	Loss: tensor(2.5549e-05, grad_fn=<MeanBackward0>)
Epoch: 31 	Move: 6 	Loss: tensor(0.0012, grad_fn=<MeanBackward0>)
Epoch: 31 	Move: 5 	Loss: tensor(0.0020, grad_fn=<MeanBackward0>)
Epoch: 31 	Move: 4 	Loss: tensor(7.9331e-05, grad_fn=<MeanBackward0>)
Epoch: 31 	Move: 3 	Loss: tensor(0.0003, grad_fn=<MeanBackward0>)
Epoch: 31 	Move: 2 	Loss: tensor(1.1547e-05, grad_fn=<MeanBackward0>)
Epoch: 31 	Move: 1 	Loss: tensor(2.0179e-06, grad_fn=<MeanBackward0>)
Epoch: 31 	Move: 0 	Loss: tensor(5.0393e-05, grad_fn=<MeanBackward0>)
Epoch: 32 	Move: 6 	Loss: tensor(0.0139, grad_fn=<MeanBackward0>)
Epoch: 32 	Move: 5 	Loss: tensor(0.0053, grad_fn=<MeanBackward0>)
Epoch: 32 	Move: 4 	Loss: tensor(0.0238, grad_fn=<MeanBa

Epoch: 64 	Move: 5 	Loss: tensor(0.0037, grad_fn=<MeanBackward0>)
Epoch: 64 	Move: 4 	Loss: tensor(0.0033, grad_fn=<MeanBackward0>)
Epoch: 64 	Move: 3 	Loss: tensor(0.0066, grad_fn=<MeanBackward0>)
Epoch: 64 	Move: 2 	Loss: tensor(0.0087, grad_fn=<MeanBackward0>)
Epoch: 64 	Move: 1 	Loss: tensor(0.0008, grad_fn=<MeanBackward0>)
Epoch: 64 	Move: 0 	Loss: tensor(0.0003, grad_fn=<MeanBackward0>)
Epoch: 65 	Move: 8 	Loss: tensor(0.0002, grad_fn=<MeanBackward0>)
Epoch: 65 	Move: 7 	Loss: tensor(0.0012, grad_fn=<MeanBackward0>)
Epoch: 65 	Move: 6 	Loss: tensor(0.0026, grad_fn=<MeanBackward0>)
Epoch: 65 	Move: 5 	Loss: tensor(0.0021, grad_fn=<MeanBackward0>)
Epoch: 65 	Move: 4 	Loss: tensor(0.0009, grad_fn=<MeanBackward0>)
Epoch: 65 	Move: 3 	Loss: tensor(6.3320e-05, grad_fn=<MeanBackward0>)
Epoch: 65 	Move: 2 	Loss: tensor(0.0002, grad_fn=<MeanBackward0>)
Epoch: 65 	Move: 1 	Loss: tensor(0.0009, grad_fn=<MeanBackward0>)
Epoch: 65 	Move: 0 	Loss: tensor(1.9029e-05, grad_fn=<MeanBackward0>)
Ep

Epoch: 95 	Move: 3 	Loss: tensor(0.0009, grad_fn=<MeanBackward0>)
Epoch: 95 	Move: 2 	Loss: tensor(0.0002, grad_fn=<MeanBackward0>)
Epoch: 95 	Move: 1 	Loss: tensor(1.2047e-05, grad_fn=<MeanBackward0>)
Epoch: 95 	Move: 0 	Loss: tensor(4.8633e-11, grad_fn=<MeanBackward0>)
Epoch: 96 	Move: 8 	Loss: tensor(0.0060, grad_fn=<MeanBackward0>)
Epoch: 96 	Move: 7 	Loss: tensor(0.0018, grad_fn=<MeanBackward0>)
Epoch: 96 	Move: 6 	Loss: tensor(2.0309e-06, grad_fn=<MeanBackward0>)
Epoch: 96 	Move: 5 	Loss: tensor(0.0003, grad_fn=<MeanBackward0>)
Epoch: 96 	Move: 4 	Loss: tensor(0.0016, grad_fn=<MeanBackward0>)
Epoch: 96 	Move: 3 	Loss: tensor(0.0014, grad_fn=<MeanBackward0>)
Epoch: 96 	Move: 2 	Loss: tensor(0.0009, grad_fn=<MeanBackward0>)
Epoch: 96 	Move: 1 	Loss: tensor(9.1023e-05, grad_fn=<MeanBackward0>)
Epoch: 96 	Move: 0 	Loss: tensor(0.0002, grad_fn=<MeanBackward0>)
Epoch: 97 	Move: 5 	Loss: tensor(0.0124, grad_fn=<MeanBackward0>)
Epoch: 97 	Move: 4 	Loss: tensor(0.0001, grad_fn=<MeanBackwa

Epoch: 127 	Move: 1 	Loss: tensor(0.0087, grad_fn=<MeanBackward0>)
Epoch: 127 	Move: 0 	Loss: tensor(0.0011, grad_fn=<MeanBackward0>)
Epoch: 128 	Move: 6 	Loss: tensor(0.0038, grad_fn=<MeanBackward0>)
Epoch: 128 	Move: 5 	Loss: tensor(0.0047, grad_fn=<MeanBackward0>)
Epoch: 128 	Move: 4 	Loss: tensor(0.0022, grad_fn=<MeanBackward0>)
Epoch: 128 	Move: 3 	Loss: tensor(0.0002, grad_fn=<MeanBackward0>)
Epoch: 128 	Move: 2 	Loss: tensor(0.0002, grad_fn=<MeanBackward0>)
Epoch: 128 	Move: 1 	Loss: tensor(0.0076, grad_fn=<MeanBackward0>)
Epoch: 128 	Move: 0 	Loss: tensor(0.0088, grad_fn=<MeanBackward0>)
Epoch: 129 	Move: 4 	Loss: tensor(0.0136, grad_fn=<MeanBackward0>)
Epoch: 129 	Move: 3 	Loss: tensor(9.8586e-06, grad_fn=<MeanBackward0>)
Epoch: 129 	Move: 2 	Loss: tensor(0.0004, grad_fn=<MeanBackward0>)
Epoch: 129 	Move: 1 	Loss: tensor(0.0013, grad_fn=<MeanBackward0>)
Epoch: 129 	Move: 0 	Loss: tensor(0.0004, grad_fn=<MeanBackward0>)
Epoch: 130 	Move: 7 	Loss: tensor(0.0064, grad_fn=<MeanBac

Epoch: 160 	Move: 2 	Loss: tensor(0.0960, grad_fn=<MeanBackward0>)
Epoch: 160 	Move: 1 	Loss: tensor(0.0511, grad_fn=<MeanBackward0>)
Epoch: 160 	Move: 0 	Loss: tensor(0.0070, grad_fn=<MeanBackward0>)
Epoch: 161 	Move: 8 	Loss: tensor(0.0351, grad_fn=<MeanBackward0>)
Epoch: 161 	Move: 7 	Loss: tensor(0.1701, grad_fn=<MeanBackward0>)
Epoch: 161 	Move: 6 	Loss: tensor(0.1567, grad_fn=<MeanBackward0>)
Epoch: 161 	Move: 5 	Loss: tensor(0.0527, grad_fn=<MeanBackward0>)
Epoch: 161 	Move: 4 	Loss: tensor(0.0209, grad_fn=<MeanBackward0>)
Epoch: 161 	Move: 3 	Loss: tensor(0.0033, grad_fn=<MeanBackward0>)
Epoch: 161 	Move: 2 	Loss: tensor(0.0318, grad_fn=<MeanBackward0>)
Epoch: 161 	Move: 1 	Loss: tensor(0.0514, grad_fn=<MeanBackward0>)
Epoch: 161 	Move: 0 	Loss: tensor(0.1085, grad_fn=<MeanBackward0>)
Epoch: 162 	Move: 7 	Loss: tensor(0.0087, grad_fn=<MeanBackward0>)
Epoch: 162 	Move: 6 	Loss: tensor(0.0214, grad_fn=<MeanBackward0>)
Epoch: 162 	Move: 5 	Loss: tensor(0.0010, grad_fn=<MeanBackwar

Epoch: 195 	Move: 0 	Loss: tensor(5.1340e-05, grad_fn=<MeanBackward0>)
Epoch: 196 	Move: 6 	Loss: tensor(0.0112, grad_fn=<MeanBackward0>)
Epoch: 196 	Move: 5 	Loss: tensor(0.0013, grad_fn=<MeanBackward0>)
Epoch: 196 	Move: 4 	Loss: tensor(0.0028, grad_fn=<MeanBackward0>)
Epoch: 196 	Move: 3 	Loss: tensor(0.0062, grad_fn=<MeanBackward0>)
Epoch: 196 	Move: 2 	Loss: tensor(0.0074, grad_fn=<MeanBackward0>)
Epoch: 196 	Move: 1 	Loss: tensor(0.0059, grad_fn=<MeanBackward0>)
Epoch: 196 	Move: 0 	Loss: tensor(0.0003, grad_fn=<MeanBackward0>)
Epoch: 197 	Move: 8 	Loss: tensor(0.0234, grad_fn=<MeanBackward0>)
Epoch: 197 	Move: 7 	Loss: tensor(4.4856e-05, grad_fn=<MeanBackward0>)
Epoch: 197 	Move: 6 	Loss: tensor(0.0093, grad_fn=<MeanBackward0>)
Epoch: 197 	Move: 5 	Loss: tensor(0.0099, grad_fn=<MeanBackward0>)
Epoch: 197 	Move: 4 	Loss: tensor(0.0079, grad_fn=<MeanBackward0>)
Epoch: 197 	Move: 3 	Loss: tensor(0.0050, grad_fn=<MeanBackward0>)
Epoch: 197 	Move: 2 	Loss: tensor(0.0015, grad_fn=<Mea

Epoch: 229 	Move: 4 	Loss: tensor(0.0008, grad_fn=<MeanBackward0>)
Epoch: 229 	Move: 3 	Loss: tensor(0.0005, grad_fn=<MeanBackward0>)
Epoch: 229 	Move: 2 	Loss: tensor(0.0001, grad_fn=<MeanBackward0>)
Epoch: 229 	Move: 1 	Loss: tensor(2.5734e-08, grad_fn=<MeanBackward0>)
Epoch: 229 	Move: 0 	Loss: tensor(0.0001, grad_fn=<MeanBackward0>)
Epoch: 230 	Move: 7 	Loss: tensor(0.0120, grad_fn=<MeanBackward0>)
Epoch: 230 	Move: 6 	Loss: tensor(3.7822e-06, grad_fn=<MeanBackward0>)
Epoch: 230 	Move: 5 	Loss: tensor(5.9900e-05, grad_fn=<MeanBackward0>)
Epoch: 230 	Move: 4 	Loss: tensor(5.2388e-06, grad_fn=<MeanBackward0>)
Epoch: 230 	Move: 3 	Loss: tensor(9.4093e-06, grad_fn=<MeanBackward0>)
Epoch: 230 	Move: 2 	Loss: tensor(5.3230e-05, grad_fn=<MeanBackward0>)
Epoch: 230 	Move: 1 	Loss: tensor(9.3745e-05, grad_fn=<MeanBackward0>)
Epoch: 230 	Move: 0 	Loss: tensor(9.8957e-05, grad_fn=<MeanBackward0>)
Epoch: 231 	Move: 4 	Loss: tensor(0.0222, grad_fn=<MeanBackward0>)
Epoch: 231 	Move: 3 	Loss: ten

Epoch: 265 	Move: 4 	Loss: tensor(0.0002, grad_fn=<MeanBackward0>)
Epoch: 265 	Move: 3 	Loss: tensor(4.4309e-05, grad_fn=<MeanBackward0>)
Epoch: 265 	Move: 2 	Loss: tensor(1.3562e-09, grad_fn=<MeanBackward0>)
Epoch: 265 	Move: 1 	Loss: tensor(3.4585e-05, grad_fn=<MeanBackward0>)
Epoch: 265 	Move: 0 	Loss: tensor(0.0002, grad_fn=<MeanBackward0>)
Epoch: 266 	Move: 4 	Loss: tensor(0.0083, grad_fn=<MeanBackward0>)
Epoch: 266 	Move: 3 	Loss: tensor(0.0008, grad_fn=<MeanBackward0>)
Epoch: 266 	Move: 2 	Loss: tensor(0.0012, grad_fn=<MeanBackward0>)
Epoch: 266 	Move: 1 	Loss: tensor(5.3692e-05, grad_fn=<MeanBackward0>)
Epoch: 266 	Move: 0 	Loss: tensor(0.0009, grad_fn=<MeanBackward0>)
Epoch: 267 	Move: 4 	Loss: tensor(0.0078, grad_fn=<MeanBackward0>)
Epoch: 267 	Move: 3 	Loss: tensor(0.0005, grad_fn=<MeanBackward0>)
Epoch: 267 	Move: 2 	Loss: tensor(0.0004, grad_fn=<MeanBackward0>)
Epoch: 267 	Move: 1 	Loss: tensor(0.0002, grad_fn=<MeanBackward0>)
Epoch: 267 	Move: 0 	Loss: tensor(1.9690e-05, 

Epoch: 302 	Move: 4 	Loss: tensor(0.0074, grad_fn=<MeanBackward0>)
Epoch: 302 	Move: 3 	Loss: tensor(0.0025, grad_fn=<MeanBackward0>)
Epoch: 302 	Move: 2 Epoch: 303 	Move: 3 	Loss: tensor(0.0007, grad_fn=<MeanBackward0>)
Epoch: 303 	Move: 2 	Loss: tensor(0.0005, grad_fn=<MeanBackward0>)
Epoch: 303 	Move: 1 	Loss: tensor(0.0019, grad_fn=<MeanBackward0>)
Epoch: 303 	Move: 0 	Loss: tensor(3.7343e-06, grad_fn=<MeanBackward0>)
Epoch: 304 	Move: 6 	Loss: tensor(0.0043, grad_fn=<MeanBackward0>)
Epoch: 304 	Move: 5 	Loss: tensor(0.0005, grad_fn=<MeanBackward0>)
Epoch: 304 	Move: 4 	Loss: tensor(0.0010, grad_fn=<MeanBackward0>)
Epoch: 304 	Move: 3 	Loss: tensor(0.0002, grad_fn=<MeanBackward0>)
Epoch: 304 	Move: 2 	Loss: tensor(0.0001, grad_fn=<MeanBackward0>)
Epoch: 304 	Move: 1 	Loss: tensor(0.0107, grad_fn=<MeanBackward0>)
Epoch: 304 	Move: 0 	Loss: tensor(0.0007, grad_fn=<MeanBackward0>)
Epoch: 305 	Move: 7 	Loss: tensor(0.0225, grad_fn=<MeanBackward0>)
Epoch: 305 	Move: 6 	Loss: tensor(0.00

Epoch: 337 	Move: 0 	Loss: tensor(0.0062, grad_fn=<MeanBackward0>)
Epoch: 338 	Move: 5 	Loss: tensor(0.0354, grad_fn=<MeanBackward0>)
Epoch: 338 	Move: 4 	Loss: tensor(0.0172, grad_fn=<MeanBackward0>)
Epoch: 338 	Move: 3 	Loss: tensor(0.0053, grad_fn=<MeanBackward0>)
Epoch: 338 	Move: 2 	Loss: tensor(3.6743e-06, grad_fn=<MeanBackward0>)
Epoch: 338 	Move: 1 	Loss: tensor(0.0039, grad_fn=<MeanBackward0>)
Epoch: 338 	Move: 0 	Loss: tensor(0.0092, grad_fn=<MeanBackward0>)
Epoch: 339 	Move: 8 	Loss: tensor(0.0034, grad_fn=<MeanBackward0>)
Epoch: 339 	Move: 7 	Loss: tensor(0.0088, grad_fn=<MeanBackward0>)
Epoch: 339 	Move: 6 	Loss: tensor(0.0058, grad_fn=<MeanBackward0>)
Epoch: 339 	Move: 5 Epoch: 340 	Move: 3 	Loss: tensor(0.0164, grad_fn=<MeanBackward0>)
Epoch: 340 	Move: 2 	Loss: tensor(0.0328, grad_fn=<MeanBackward0>)
Epoch: 340 	Move: 1 	Loss: tensor(0.0814, grad_fn=<MeanBackward0>)
Epoch: 340 	Move: 0 	Loss: tensor(0.0479, grad_fn=<MeanBackward0>)
Epoch: 341 	Move: 5 	Loss: tensor(0.01

Epoch: 378 	Move: 6 	Loss: tensor(0.0020, grad_fn=<MeanBackward0>)
Epoch: 378 	Move: 5 	Loss: tensor(0.0258, grad_fn=<MeanBackward0>)
Epoch: 378 	Move: 4 	Loss: tensor(0.0215, grad_fn=<MeanBackward0>)
Epoch: 378 	Move: 3 	Loss: tensor(0.0363, grad_fn=<MeanBackward0>)
Epoch: 378 	Move: 2 	Loss: tensor(0.0257, grad_fn=<MeanBackward0>)
Epoch: 378 	Move: 1 	Loss: tensor(0.0112, grad_fn=<MeanBackward0>)
Epoch: 378 	Move: 0 	Loss: tensor(0.0015, grad_fn=<MeanBackward0>)
Epoch: 379 	Move: 8 	Loss: tensor(0.0125, grad_fn=<MeanBackward0>)
Epoch: 379 	Move: 7 	Loss: tensor(0.0028, grad_fn=<MeanBackward0>)
Epoch: 379 	Move: 6 	Loss: tensor(0.0052, grad_fn=<MeanBackward0>)
Epoch: 379 	Move: 5 	Loss: tensor(0.0114, grad_fn=<MeanBackward0>)
Epoch: 379 	Move: 4 	Loss: tensor(0.0168, grad_fn=<MeanBackward0>)
Epoch: 379 	Move: 3 	Loss: tensor(0.0014, grad_fn=<MeanBackward0>)
Epoch: 379 	Move: 2 	Loss: tensor(3.8977e-05, grad_fn=<MeanBackward0>)
Epoch: 379 	Move: 1 	Loss: tensor(0.0005, grad_fn=<MeanBac

Epoch: 413 	Move: 6 	Loss: tensor(0.0196, grad_fn=<MeanBackward0>)
Epoch: 413 	Move: 5 	Loss: tensor(0.0020, grad_fn=<MeanBackward0>)
Epoch: 413 	Move: 4 	Loss: tensor(0.0053, grad_fn=<MeanBackward0>)
Epoch: 413 	Move: 3 	Loss: tensor(3.4331e-06, grad_fn=<MeanBackward0>)
Epoch: 413 	Move: 2 	Loss: tensor(0.0004, grad_fn=<MeanBackward0>)
Epoch: 413 	Move: 1 	Loss: tensor(0.0010, grad_fn=<MeanBackward0>)
Epoch: 413 	Move: 0 	Loss: tensor(0.0013, grad_fn=<MeanBackward0>)
Epoch: 414 	Move: 6 	Loss: tensor(0.0058, grad_fn=<MeanBackward0>)
Epoch: 414 	Move: 5 	Loss: tensor(0.0030, grad_fn=<MeanBackward0>)
Epoch: 414 	Move: 4 	Loss: tensor(0.0032, grad_fn=<MeanBackward0>)
Epoch: 414 	Move: 3 	Loss: tensor(0.0025, grad_fn=<MeanBackward0>)
Epoch: 414 	Move: 2 	Loss: tensor(0.0005, grad_fn=<MeanBackward0>)
Epoch: 414 	Move: 1 	Loss: tensor(1.1321e-05, grad_fn=<MeanBackward0>)
Epoch: 414 	Move: 0 	Loss: tensor(0.0004, grad_fn=<MeanBackward0>)
Epoch: 415 	Move: 6 	Loss: tensor(0.0136, grad_fn=<Mea

Epoch: 446 	Move: 2 	Loss: tensor(3.2216e-05, grad_fn=<MeanBackward0>)
Epoch: 446 	Move: 1 	Loss: tensor(0.0001, grad_fn=<MeanBackward0>)
Epoch: 446 	Move: 0 	Loss: tensor(6.8032e-05, grad_fn=<MeanBackward0>)
Epoch: 447 	Move: 8 	Loss: tensor(0.0095, grad_fn=<MeanBackward0>)
Epoch: 447 	Move: 7 	Loss: tensor(0.0001, grad_fn=<MeanBackward0>)
Epoch: 447 	Move: 6 	Loss: tensor(1.5422e-06, grad_fn=<MeanBackward0>)
Epoch: 447 	Move: 5 	Loss: tensor(0.0004, grad_fn=<MeanBackward0>)
Epoch: 447 	Move: 4 	Loss: tensor(0.0004, grad_fn=<MeanBackward0>)
Epoch: 447 	Move: 3 	Loss: tensor(0.0003, grad_fn=<MeanBackward0>)
Epoch: 447 	Move: 2 	Loss: tensor(0.0001, grad_fn=<MeanBackward0>)
Epoch: 447 	Move: 1 	Loss: tensor(1.1955e-05, grad_fn=<MeanBackward0>)
Epoch: 447 	Move: 0 	Loss: tensor(0.0004, grad_fn=<MeanBackward0>)
Epoch: 448 	Move: 5 	Loss: tensor(0.0022, grad_fn=<MeanBackward0>)
Epoch: 448 	Move: 4 	Loss: tensor(0.0060, grad_fn=<MeanBackward0>)
Epoch: 448 	Move: 3 	Loss: tensor(0.0085, grad

Epoch: 482 	Move: 7 	Loss: tensor(0.0123, grad_fn=<MeanBackward0>)
Epoch: 482 	Move: 6 	Loss: tensor(9.5382e-06, grad_fn=<MeanBackward0>)
Epoch: 482 	Move: 5 	Loss: tensor(0.0003, grad_fn=<MeanBackward0>)
Epoch: 482 	Move: 4 	Loss: tensor(0.0005, grad_fn=<MeanBackward0>)
Epoch: 482 	Move: 3 	Loss: tensor(0.0008, grad_fn=<MeanBackward0>)
Epoch: 482 	Move: 2 	Loss: tensor(0.0007, grad_fn=<MeanBackward0>)
Epoch: 482 	Move: 1 	Loss: tensor(0.0005, grad_fn=<MeanBackward0>)
Epoch: 482 	Move: 0 	Loss: tensor(0.0001, grad_fn=<MeanBackward0>)
Epoch: 483 	Move: 7 	Loss: tensor(0.0104, grad_fn=<MeanBackward0>)
Epoch: 483 	Move: 6 	Loss: tensor(3.5825e-07, grad_fn=<MeanBackward0>)
Epoch: 483 	Move: 5 	Loss: tensor(8.8992e-06, grad_fn=<MeanBackward0>)
Epoch: 483 	Move: 4 	Loss: tensor(6.1310e-06, grad_fn=<MeanBackward0>)
Epoch: 483 	Move: 3 	Loss: tensor(1.3152e-06, grad_fn=<MeanBackward0>)
Epoch: 483 	Move: 2 	Loss: tensor(3.0834e-07, grad_fn=<MeanBackward0>)
Epoch: 483 	Move: 1 	Loss: tensor(0.00

Epoch: 516 	Move: 4 	Loss: tensor(7.5513e-05, grad_fn=<MeanBackward0>)
Epoch: 516 	Move: 3 	Loss: tensor(0.0001, grad_fn=<MeanBackward0>)
Epoch: 516 	Move: 2 	Loss: tensor(1.9581e-05, grad_fn=<MeanBackward0>)
Epoch: 516 	Move: 1 	Loss: tensor(1.1583e-05, grad_fn=<MeanBackward0>)
Epoch: 516 	Move: 0 	Loss: tensor(0.0001, grad_fn=<MeanBackward0>)
Epoch: 517 	Move: 7 	Loss: tensor(0.0085, grad_fn=<MeanBackward0>)
Epoch: 517 	Move: 6 	Loss: tensor(0.0009, grad_fn=<MeanBackward0>)
Epoch: 517 	Move: 5 	Loss: tensor(0.0011, grad_fn=<MeanBackward0>)
Epoch: 517 	Move: 4 	Loss: Epoch: 517 	Move: 3 	Loss: tensor(9.6899e-05, grad_fn=<MeanBackward0>)
Epoch: 517 	Move: 2 	Loss: tensor(2.5024e-06, grad_fn=<MeanBackward0>)
Epoch: 517 	Move: 1 	Loss: tensor(0.0003, grad_fn=<MeanBackward0>)
Epoch: 517 	Move: 0 	Loss: tensor(0.0003, grad_fn=<MeanBackward0>)
Epoch: 518 	Move: 8 	Loss: tensor(0.0001, grad_fn=<MeanBackward0>)
Epoch: 518 	Move: 7 	Loss: tensor(0.0005, grad_fn=<MeanBackward0>)
Epoch: 518 	Mov

Epoch: 551 	Move: 1 	Loss: tensor(4.8416e-05, grad_fn=<MeanBackward0>)
Epoch: 551 	Move: 0 	Loss: tensor(0.0013, grad_fn=<MeanBackward0>)
Epoch: 552 	Move: 8 	Loss: tensor(0.0164, grad_fn=<MeanBackward0>)
Epoch: 552 	Move: 7 	Loss: tensor(0.0005, grad_fn=<MeanBackward0>)
Epoch: 552 	Move: 6 	Loss: tensor(7.5155e-05, grad_fn=<MeanBackward0>)
Epoch: 552 	Move: 5 	Loss: tensor(0.0012, grad_fn=<MeanBackward0>)
Epoch: 552 	Move: 4 	Loss: tensor(0.0024, grad_fn=<MeanBackward0>)
Epoch: 552 	Move: 3 	Loss: tensor(0.0026, grad_fn=<MeanBackward0>)
Epoch: 552 	Move: 2 	Loss: tensor(0.0016, grad_fn=<MeanBackward0>)
Epoch: 552 	Move: 1 	Loss: tensor(0.0004, grad_fn=<MeanBackward0>)
Epoch: 552 	Move: 0 	Loss: tensor(0.0005, grad_fn=<MeanBackward0>)
Epoch: 553 	Move: 7 	Loss: tensor(0.0067, grad_fn=<MeanBackward0>)
Epoch: 553 	Move: 6 	Loss: tensor(0.0031, grad_fn=<MeanBackward0>)
Epoch: 553 	Move: 5 	Loss: tensor(0.0009, grad_fn=<MeanBackward0>)
Epoch: 553 	Move: 4 	Loss: tensor(0.0002, grad_fn=<Mea

Epoch: 586 	Move: 2 	Loss: tensor(0.0012, grad_fn=<MeanBackward0>)
Epoch: 586 	Move: 1 	Loss: tensor(0.0491, grad_fn=<MeanBackward0>)
Epoch: 586 	Move: 0 	Loss: tensor(0.0937, grad_fn=<MeanBackward0>)
Epoch: 587 	Move: 6 	Loss: tensor(0.0418, grad_fn=<MeanBackward0>)
Epoch: 587 	Move: 5 	Loss: tensor(0.0049, grad_fn=<MeanBackward0>)
Epoch: 587 	Move: 4 	Loss: tensor(2.9198e-05, grad_fn=<MeanBackward0>)
Epoch: 587 	Move: 3 	Loss: tensor(0.0012, grad_fn=<MeanBackward0>)
Epoch: 587 	Move: 2 	Loss: tensor(0.0108, grad_fn=<MeanBackward0>)
Epoch: 587 	Move: 1 	Loss: tensor(0.0142, grad_fn=<MeanBackward0>)
Epoch: 587 	Move: 0 	Loss: tensor(0.0171, grad_fn=<MeanBackward0>)
Epoch: 588 	Move: 8 	Loss: tensor(0.1494, grad_fn=<MeanBackward0>)
Epoch: 588 	Move: 7 	Loss: tensor(0.0930, grad_fn=<MeanBackward0>)
Epoch: 588 	Move: 6 	Loss: tensor(0.0674, grad_fn=<MeanBackward0>)
Epoch: 588 	Move: 5 	Loss: tensor(0.0237, grad_fn=<MeanBackward0>)
Epoch: 588 	Move: 4 	Loss: tensor(0.0134, grad_fn=<MeanBac

Epoch: 622 	Move: 2 	Loss: tensor(2.5218e-05, grad_fn=<MeanBackward0>)
Epoch: 622 	Move: 1 	Loss: tensor(0.0047, grad_fn=<MeanBackward0>)
Epoch: 622 	Move: 0 	Loss: tensor(0.0092, grad_fn=<MeanBackward0>)
Epoch: 623 	Move: 8 	Loss: tensor(0.0269, grad_fn=<MeanBackward0>)
Epoch: 623 	Move: 7 	Loss: tensor(0.0092, grad_fn=<MeanBackward0>)
Epoch: 623 	Move: 6 	Loss: tensor(0.0034, grad_fn=<MeanBackward0>)
Epoch: 623 	Move: 5 	Loss: tensor(0.0002, grad_fn=<MeanBackward0>)
Epoch: 623 	Move: 4 	Loss: tensor(3.8553e-05, grad_fn=<MeanBackward0>)
Epoch: 623 	Move: 3 	Loss: tensor(5.5757e-05, grad_fn=<MeanBackward0>)
Epoch: 623 	Move: 2 	Loss: tensor(0.0054, grad_fn=<MeanBackward0>)
Epoch: 623 	Move: 1 	Loss: tensor(0.0073, grad_fn=<MeanBackward0>)
Epoch: 623 	Move: 0 	Loss: tensor(0.0076, grad_fn=<MeanBackward0>)
Epoch: 624 	Move: 6 	Loss: tensor(0.0006, grad_fn=<MeanBackward0>)
Epoch: 624 	Move: 5 	Loss: tensor(0.0180, grad_fn=<MeanBackward0>)
Epoch: 624 	Move: 4 	Loss: tensor(0.0109, grad_fn=

Epoch: 661 	Move: 6 	Loss: tensor(2.3454e-05, grad_fn=<MeanBackward0>)
Epoch: 661 	Move: 5 	Loss: tensor(0.0010, grad_fn=<MeanBackward0>)
Epoch: 661 	Move: 4 	Loss: tensor(0.0002, grad_fn=<MeanBackward0>)
Epoch: 661 	Move: 3 	Loss: tensor(0.0004, grad_fn=<MeanBackward0>)
Epoch: 661 	Move: 2 	Loss: tensor(0.0005, grad_fn=<MeanBackward0>)
Epoch: 661 	Move: 1 	Loss: tensor(0.0024, grad_fn=<MeanBackward0>)
Epoch: 661 	Move: 0 	Loss: tensor(0.0021, grad_fn=<MeanBackward0>)
Epoch: 662 	Move: 8 	Loss: tensor(0.0001, grad_fn=<MeanBackward0>)
Epoch: 662 	Move: 7 	Loss: tensor(0.0005, grad_fn=<MeanBackward0>)
Epoch: 662 	Move: 6 	Loss: tensor(0.0004, grad_fn=<MeanBackward0>)
Epoch: 662 	Move: 5 	Loss: tensor(0.0005, grad_fn=<MeanBackward0>)
Epoch: 662 	Move: 4 	Loss: tensor(0.0023, grad_fn=<MeanBackward0>)
Epoch: 662 	Move: 3 	Loss: tensor(6.7923e-10, grad_fn=<MeanBackward0>)
Epoch: 662 	Move: 2 	Loss: tensor(0.0016, grad_fn=<MeanBackward0>)
Epoch: 662 	Move: 1 	Loss: tensor(0.0005, grad_fn=<Mea

Epoch: 696 	Move: 2 	Loss: tensor(0.0068, grad_fn=<MeanBackward0>)
Epoch: 696 	Move: 1 	Loss: tensor(0.0031, grad_fn=<MeanBackward0>)
Epoch: 696 	Move: 0 	Loss: tensor(0.0222, grad_fn=<MeanBackward0>)
Epoch: 697 	Move: 6 Epoch: 698 	Move: 7 	Loss: tensor(0.0130, grad_fn=<MeanBackward0>)
Epoch: 698 	Move: 6 	Loss: tensor(0.0034, grad_fn=<MeanBackward0>)
Epoch: 698 	Move: 5 	Loss: tensor(0.0024, grad_fn=<MeanBackward0>)
Epoch: 698 	Move: 4 	Loss: tensor(0.0053, grad_fn=<MeanBackward0>)
Epoch: 698 	Move: 3 	Loss: tensor(0.0036, grad_fn=<MeanBackward0>)
Epoch: 698 	Move: 2 	Loss: tensor(0.0376, grad_fn=<MeanBackward0>)
Epoch: 698 	Move: 1 	Loss: tensor(0.0121, grad_fn=<MeanBackward0>)
Epoch: 698 	Move: 0 	Loss: tensor(0.0158, grad_fn=<MeanBackward0>)
Epoch: 699 	Move: 8 	Loss: tensor(0.0954, grad_fn=<MeanBackward0>)
Epoch: 699 	Move: 7 	Loss: tensor(0.1237, grad_fn=<MeanBackward0>)
Epoch: 699 	Move: 6 	Loss: tensor(2.4569e-07, grad_fn=<MeanBackward0>)
Epoch: 699 	Move: 5 	Loss: tensor(0.03

Epoch: 734 	Move: 4 	Loss: tensor(0.0282, grad_fn=<MeanBackward0>)
Epoch: 734 	Move: 3 	Loss: tensor(0.0101, grad_fn=<MeanBackward0>)
Epoch: 734 	Move: 2 	Loss: tensor(0.0007, grad_fn=<MeanBackward0>)
Epoch: 734 	Move: 1 	Loss: tensor(0.0087, grad_fn=<MeanBackward0>)
Epoch: 734 	Move: 0 	Loss: tensor(0.0243, grad_fn=<MeanBackward0>)
Epoch: 735 	Move: 6 	Loss: tensor(0.0264, grad_fn=<MeanBackward0>)
Epoch: 735 	Move: 5 	Loss: tensor(0.0263, grad_fn=<MeanBackward0>)
Epoch: 735 	Move: 4 	Loss: tensor(0.0132, grad_fn=<MeanBackward0>)
Epoch: 735 	Move: 3 	Loss: tensor(0.0006, grad_fn=<MeanBackward0>)
Epoch: 735 	Move: 2 	Loss: tensor(5.2322e-05, grad_fn=<MeanBackward0>)
Epoch: 735 Epoch: 736 	Move: 3 	Loss: tensor(0.0008, grad_fn=<MeanBackward0>)
Epoch: 736 	Move: 2 	Loss: tensor(0.0122, grad_fn=<MeanBackward0>)
Epoch: 736 	Move: 1 	Loss: tensor(0.0275, grad_fn=<MeanBackward0>)
Epoch: 736 	Move: 0 	Loss: tensor(0.0309, grad_fn=<MeanBackward0>)
Epoch: 737 	Move: 8 	Loss: tensor(0.0095, grad_

Epoch: 771 	Move: 6 	Loss: tensor(0.0016, grad_fn=<MeanBackward0>)
Epoch: 771 	Move: 5 	Loss: tensor(0.0018, grad_fn=<MeanBackward0>)
Epoch: 771 	Move: 4 	Loss: tensor(0.0013, grad_fn=<MeanBackward0>)
Epoch: 771 	Move: 3 	Loss: tensor(0.0019, grad_fn=<MeanBackward0>)
Epoch: 771 	Move: 2 	Loss: tensor(9.3947e-06, grad_fn=<MeanBackward0>)
Epoch: 771 	Move: 1 	Loss: tensor(0.0002, grad_fn=<MeanBackward0>)
Epoch: 771 	Move: 0 	Loss: tensor(0.0005, grad_fn=<MeanBackward0>)
Epoch: 772 	Move: 8 	Loss: tensor(0.0147, grad_fn=<MeanBackward0>)
Epoch: 772 	Move: 7 	Loss: tensor(0.0005, grad_fn=<MeanBackward0>)
Epoch: 772 	Move: 6 	Loss: tensor(0.0003, grad_fn=<MeanBackward0>)
Epoch: 772 	Move: 5 	Loss: tensor(0.0057, grad_fn=<MeanBackward0>)
Epoch: 772 	Move: 4 	Loss: tensor(0.0048, grad_fn=<MeanBackward0>)
Epoch: 772 	Move: 3 	Loss: tensor(0.0037, grad_fn=<MeanBackward0>)
Epoch: 772 	Move: 2 	Loss: tensor(0.0013, grad_fn=<MeanBackward0>)
Epoch: 772 	Move: 1 	Loss: tensor(1.3331e-05, grad_fn=<Mea

Epoch: 807 	Move: 5 	Loss: tensor(0.0014, grad_fn=<MeanBackward0>)
Epoch: 807 	Move: 4 	Loss: tensor(0.0002, grad_fn=<MeanBackward0>)
Epoch: 807 	Move: 3 	Loss: tensor(2.9431e-05, grad_fn=<MeanBackward0>)
Epoch: 807 	Move: 2 	Loss: tensor(0.0005, grad_fn=<MeanBackward0>)
Epoch: 807 	Move: 1 	Loss: tensor(0.0010, grad_fn=<MeanBackward0>)
Epoch: 807 	Move: 0 	Loss: tensor(0.0034, grad_fn=<MeanBackward0>)
Epoch: 808 	Move: 7 	Loss: tensor(0.0038, grad_fn=<MeanBackward0>)
Epoch: 808 	Move: 6 	Loss: tensor(0.0046, grad_fn=<MeanBackward0>)
Epoch: 808 	Move: 5 	Loss: tensor(0.0027, grad_fn=<MeanBackward0>)
Epoch: 808 	Move: 4 	Loss: tensor(0.0007, grad_fn=<MeanBackward0>)
Epoch: 808 	Move: 3 	Loss: tensor(4.0640e-06, grad_fn=<MeanBackward0>)
Epoch: 808 	Move: 2 	Loss: tensor(0.0008, grad_fn=<MeanBackward0>)
Epoch: 808 	Move: 1 	Loss: tensor(0.0020, grad_fn=<MeanBackward0>)
Epoch: 808 	Move: 0 	Loss: tensor(3.6728e-05, grad_fn=<MeanBackward0>)
Epoch: 809 	Move: 8 	Loss: tensor(0.0030, grad_fn=

Epoch: 840 	Move: 1 	Loss: tensor(0.0014, grad_fn=<MeanBackward0>)
Epoch: 840 	Move: 0 	Loss: tensor(0.0312, grad_fn=<MeanBackward0>)
Epoch: 841 	Move: 7 	Loss: tensor(0.1089, grad_fn=<MeanBackward0>)
Epoch: 841 	Move: 6 	Loss: tensor(0.1429, grad_fn=<MeanBackward0>)
Epoch: 841 	Move: 5 	Loss: tensor(0.0668, grad_fn=<MeanBackward0>)
Epoch: 841 	Move: 4 	Loss: tensor(0.0005, grad_fn=<MeanBackward0>)
Epoch: 841 	Move: 3 	Loss: tensor(0.1051, grad_fn=<MeanBackward0>)
Epoch: 841 	Move: 2 	Loss: tensor(0.0407, grad_fn=<MeanBackward0>)
Epoch: 841 	Move: 1 	Loss: tensor(0.1027, grad_fn=<MeanBackward0>)
Epoch: 841 	Move: 0 	Loss: tensor(0.0928, grad_fn=<MeanBackward0>)
Epoch: 842 	Move: 4 	Loss: tensor(0.0156, grad_fn=<MeanBackward0>)
Epoch: 842 	Move: 3 	Loss: tensor(0.0005, grad_fn=<MeanBackward0>)
Epoch: 842 	Move: 2 	Loss: tensor(0.0086, grad_fn=<MeanBackward0>)
Epoch: 842 	Move: 1 	Loss: tensor(0.0733, grad_fn=<MeanBackward0>)
Epoch: 842 	Move: 0 	Loss: tensor(0.0799, grad_fn=<MeanBackwar

Epoch: 873 	Move: 1 	Loss: tensor(0.0048, grad_fn=<MeanBackward0>)
Epoch: 873 	Move: 0 	Loss: Epoch: 874 	Move: 3 	Loss: tensor(0.0022, grad_fn=<MeanBackward0>)
Epoch: 874 	Move: 2 	Loss: tensor(1.0891e-05, grad_fn=<MeanBackward0>)
Epoch: 874 	Move: 1 	Loss: tensor(0.0007, grad_fn=<MeanBackward0>)
Epoch: 874 	Move: 0 	Loss: tensor(0.0023, grad_fn=<MeanBackward0>)
Epoch: 875 	Move: 5 	Loss: tensor(0.0091, grad_fn=<MeanBackward0>)
Epoch: 875 	Move: 4 	Loss: tensor(0.0003, grad_fn=<MeanBackward0>)
Epoch: 875 	Move: 3 	Loss: tensor(0.0005, grad_fn=<MeanBackward0>)
Epoch: 875 	Move: 2 	Loss: tensor(0.0005, grad_fn=<MeanBackward0>)
Epoch: 875 	Move: 1 	Loss: tensor(0.0224, grad_fn=<MeanBackward0>)
Epoch: 875 	Move: 0 	Loss: tensor(0.0127, grad_fn=<MeanBackward0>)
Epoch: 876 	Move: 7 	Loss: tensor(0.0207, grad_fn=<MeanBackward0>)
Epoch: 876 	Move: 6 	Loss: tensor(0.0124, grad_fn=<MeanBackward0>)
Epoch: 876 	Move: 5 	Loss: tensor(0.0130, grad_fn=<MeanBackward0>)
Epoch: 876 	Move: 4 	Loss: tens

Epoch: 907 	Move: 0 	Loss: tensor(0.0036, grad_fn=<MeanBackward0>)
Epoch: 908 	Move: 6 	Loss: tensor(0.0109, grad_fn=<MeanBackward0>)
Epoch: 908 	Move: 5 	Loss: tensor(4.6264e-05, grad_fn=<MeanBackward0>)
Epoch: 908 	Move: 4 	Loss: tensor(0.0002, grad_fn=<MeanBackward0>)
Epoch: 908 	Move: 3 	Loss: tensor(0.0010, grad_fn=<MeanBackward0>)
Epoch: 908 	Move: 2 	Loss: tensor(0.0007, grad_fn=<MeanBackward0>)
Epoch: 908 	Move: 1 	Loss: tensor(0.0003, grad_fn=<MeanBackward0>)
Epoch: 908 	Move: 0 	Loss: tensor(0.0005, grad_fn=<MeanBackward0>)
Epoch: 909 	Loss: tensor(0.0133, grad_fn=<MeanBackward0>)
Epoch: 909 	Move: 7 	Loss: tensor(0.0002, grad_fn=<MeanBackward0>)
Epoch: 909 	Move: 6 	Loss: tensor(1.1607e-05, grad_fn=<MeanBackward0>)
Epoch: 909 	Move: 5 	Loss: tensor(0.0020, grad_fn=<MeanBackward0>)
Epoch: 909 	Move: 4 	Loss: tensor(0.0046, grad_fn=<MeanBackward0>)
Epoch: 909 	Move: 3 	Loss: tensor(0.0029, grad_fn=<MeanBackward0>)
Epoch: 909 	Move: 2 	Loss: tensor(0.0008, grad_fn=<MeanBackward

Epoch: 942 	Move: 8 	Loss: tensor(0.0115, grad_fn=<MeanBackward0>)
Epoch: 942 	Move: 7 	Loss: tensor(3.3833e-05, grad_fn=<MeanBackward0>)
Epoch: 942 	Move: 6 	Loss: tensor(0.0003, grad_fn=<MeanBackward0>)
Epoch: 942 	Move: 5 	Loss: tensor(0.0002, grad_fn=<MeanBackward0>)
Epoch: 942 	Move: 4 	Loss: tensor(0.0019, grad_fn=<MeanBackward0>)
Epoch: 942 	Move: 3 	Loss: tensor(0.0016, grad_fn=<MeanBackward0>)
Epoch: 942 	Move: 2 	Loss: tensor(0.0007, grad_fn=<MeanBackward0>)
Epoch: 942 	Move: 1 	Loss: tensor(0.0019, grad_fn=<MeanBackward0>)
Epoch: 942 	Move: 0 	Loss: tensor(0.0024, grad_fn=<MeanBackward0>)
Epoch: 943 	Move: 8 	Loss: tensor(0.0213, grad_fn=<MeanBackward0>)
Epoch: 943 	Move: 7 	Loss: tensor(0.0008, grad_fn=<MeanBackward0>)
Epoch: 943 	Move: 6 	Loss: tensor(0.0020, grad_fn=<MeanBackward0>)
Epoch: 943 	Move: 5 	Loss: tensor(0.0028, grad_fn=<MeanBackward0>)
Epoch: 943 	Move: 4 	Loss: tensor(0.0027, grad_fn=<MeanBackward0>)
Epoch: 943 	Move: 3 	Loss: tensor(0.0012, grad_fn=<MeanBac

Epoch: 977 	Move: 6 	Loss: tensor(0.0010, grad_fn=<MeanBackward0>)
Epoch: 977 	Move: 5 	Loss: tensor(4.5962e-05, grad_fn=<MeanBackward0>)
Epoch: 977 	Move: 4 	Loss: tensor(0.0013, grad_fn=<MeanBackward0>)
Epoch: 977 	Move: 3 	Loss: tensor(0.0035, grad_fn=<MeanBackward0>)
Epoch: 977 	Move: 2 	Loss: tensor(0.0026, grad_fn=<MeanBackward0>)
Epoch: 977 	Move: 1 	Loss: tensor(0.0010, grad_fn=<MeanBackward0>)
Epoch: 977 	Move: 0 	Loss: tensor(2.7034e-05, grad_fn=<MeanBackward0>)
Epoch: 978 	Move: 6 	Loss: tensor(0.0086, grad_fn=<MeanBackward0>)
Epoch: 978 	Move: 5 	Loss: tensor(0.0019, grad_fn=<MeanBackward0>)
Epoch: 978 	Move: 4 	Loss: tensor(0.0027, grad_fn=<MeanBackward0>)
Epoch: 978 	Move: 3 	Loss: tensor(0.0008, grad_fn=<MeanBackward0>)
Epoch: 978 	Move: 2 	Loss: tensor(0.0053, grad_fn=<MeanBackward0>)
Epoch: 978 	Move: 1 	Loss: tensor(0.0022, grad_fn=<MeanBackward0>)
Epoch: 978 	Move: 0 	Loss: tensor(0.0004, grad_fn=<MeanBackward0>)
Epoch: 979 	Move: 4 	Loss: tensor(0.0099, grad_fn=<Mea

In [19]:
agent = Agent()
mynn = NeuralNetworkClass(3)
agent.play_single_game(dim=3, nn=mynn, policy_string=GREEDY_BUT_TYTHING_POLICY)
print(agent.game_list)

[array([[0, 1, 0],
       [0, 0, 0],
       [0, 0, 0]]), array([[ 0,  1,  0],
       [ 0,  0,  0],
       [ 0, -1,  0]]), array([[ 0,  1,  0],
       [ 1,  0,  0],
       [ 0, -1,  0]]), array([[ 0,  1,  0],
       [ 1,  0,  0],
       [ 0, -1, -1]]), array([[ 0,  1,  0],
       [ 1,  0,  0],
       [ 1, -1, -1]]), array([[ 0,  1, -1],
       [ 1,  0,  0],
       [ 1, -1, -1]]), array([[ 1,  1, -1],
       [ 1,  0,  0],
       [ 1, -1, -1]])]


In [33]:
my_nn(x)

tensor([[0.4013],
        [0.3578],
        [0.3356],
        [0.2358],
        [0.2059],
        [0.2987],
        [0.3139],
        [0.3303],
        [0.2632]], grad_fn=<AddmmBackward>)

In [23]:
# X = torch.tensor(X)
asdf = torch.flatten(X,start_dim=1)
asdf

tensor([[ 0,  0,  0,  0,  0,  0,  0,  0,  0],
        [ 0,  0,  1,  0,  0,  0,  0,  0,  0],
        [ 0,  0,  1,  0,  0,  0,  0,  0, -1],
        [ 0,  0,  1,  0,  0,  0,  0,  1, -1],
        [ 0,  0,  1,  0,  0, -1,  0,  1, -1],
        [ 1,  0,  1,  0,  0, -1,  0,  1, -1],
        [ 1,  0,  1,  0,  0, -1, -1,  1, -1],
        [ 1,  1,  1,  0,  0, -1, -1,  1, -1]], dtype=torch.int32)

In [21]:
x = torch.tensor(agent.game_list)
x = x.view(-1, 9)
# x=x.type(torch.DoubleTensor)
x

tensor([[ 0,  0,  0,  0,  0,  0,  0,  0,  0],
        [ 0,  0,  0,  0,  0,  1,  0,  0,  0],
        [ 0,  0,  0,  0, -1,  1,  0,  0,  0],
        [ 0,  0,  0,  0, -1,  1,  1,  0,  0],
        [ 0, -1,  0,  0, -1,  1,  1,  0,  0],
        [ 0, -1,  1,  0, -1,  1,  1,  0,  0],
        [-1, -1,  1,  0, -1,  1,  1,  0,  0],
        [-1, -1,  1,  0, -1,  1,  1,  1,  0],
        [-1, -1,  1,  0, -1,  1,  1,  1, -1]], dtype=torch.int32)

In [4]:
# Making a random game list
agent = Agent()
game = GameBoard(3)
move = 1
agent.game_list.append(game.grid.copy())
while not game.game_over:
    agent.take_action(game, move)
    agent.game_list.append(game.grid.copy())
    move *=-1
    game.evaluate_position()
    game.view_state()

game.determine_winner()
game.result()
print(agent.game_list)

[[0 0 0]
 [0 0 1]
 [0 0 0]]
[[ 0  0  0]
 [ 0 -1  1]
 [ 0  0  0]]
[[ 0  0  0]
 [ 0 -1  1]
 [ 1  0  0]]
[[ 0 -1  0]
 [ 0 -1  1]
 [ 1  0  0]]
[[ 0 -1  1]
 [ 0 -1  1]
 [ 1  0  0]]
[[-1 -1  1]
 [ 0 -1  1]
 [ 1  0  0]]
[[-1 -1  1]
 [ 0 -1  1]
 [ 1  1  0]]
[[-1 -1  1]
 [ 0 -1  1]
 [ 1  1 -1]]
O won!
[array([[0, 0, 0],
       [0, 0, 0],
       [0, 0, 0]]), array([[0, 0, 0],
       [0, 0, 1],
       [0, 0, 0]]), array([[ 0,  0,  0],
       [ 0, -1,  1],
       [ 0,  0,  0]]), array([[ 0,  0,  0],
       [ 0, -1,  1],
       [ 1,  0,  0]]), array([[ 0, -1,  0],
       [ 0, -1,  1],
       [ 1,  0,  0]]), array([[ 0, -1,  1],
       [ 0, -1,  1],
       [ 1,  0,  0]]), array([[-1, -1,  1],
       [ 0, -1,  1],
       [ 1,  0,  0]]), array([[-1, -1,  1],
       [ 0, -1,  1],
       [ 1,  1,  0]]), array([[-1, -1,  1],
       [ 0, -1,  1],
       [ 1,  1, -1]])]


In [32]:
my_nn = NeuralNetworkClass(3)