<a href="https://colab.research.google.com/github/jal9o3/OLA/blob/dev/OLA_Training.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Load the data

In [1]:
import pandas as pd
import torch
from torch.utils.data import Dataset, DataLoader

INPUT_SIZE = 147
OUTPUT_SIZE = 254

class CustomDataset(Dataset):
    def __init__(self, csv_file):
        self.data = pd.read_csv(csv_file)
        self.inputs = torch.tensor(self.data.iloc[:, :INPUT_SIZE].values, dtype=torch.float32)
        self.outputs = torch.tensor(self.data.iloc[:, INPUT_SIZE:INPUT_SIZE+OUTPUT_SIZE].values, dtype=torch.float32)

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.inputs[idx], self.outputs[idx]

# Usage
dataset = CustomDataset('training_data.csv')
dataloader = DataLoader(dataset, batch_size=32, shuffle=True)


# Define the neural network

In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class NeuralNet(nn.Module):
    def __init__(self):
        super(NeuralNet, self).__init__()
        self.fc1 = nn.Linear(INPUT_SIZE, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, OUTPUT_SIZE)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        x = F.softmax(x, dim=1)  # Apply softmax along the appropriate dimension
        return x

# Initialize the model
model = NeuralNet()

# Train the model

In [3]:
import torch.optim as optim

# Loss and optimizer
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
num_epochs = 100
for epoch in range(num_epochs):
    for inputs, targets in dataloader:
        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, targets)

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')


Epoch [1/100], Loss: 0.0002
Epoch [2/100], Loss: 0.0002
Epoch [3/100], Loss: 0.0001
Epoch [4/100], Loss: 0.0001
Epoch [5/100], Loss: 0.0001
Epoch [6/100], Loss: 0.0001
Epoch [7/100], Loss: 0.0001
Epoch [8/100], Loss: 0.0001
Epoch [9/100], Loss: 0.0001
Epoch [10/100], Loss: 0.0001
Epoch [11/100], Loss: 0.0001
Epoch [12/100], Loss: 0.0001
Epoch [13/100], Loss: 0.0001
Epoch [14/100], Loss: 0.0001
Epoch [15/100], Loss: 0.0001
Epoch [16/100], Loss: 0.0001
Epoch [17/100], Loss: 0.0001
Epoch [18/100], Loss: 0.0001
Epoch [19/100], Loss: 0.0001
Epoch [20/100], Loss: 0.0001
Epoch [21/100], Loss: 0.0001
Epoch [22/100], Loss: 0.0001
Epoch [23/100], Loss: 0.0001
Epoch [24/100], Loss: 0.0001
Epoch [25/100], Loss: 0.0001
Epoch [26/100], Loss: 0.0000
Epoch [27/100], Loss: 0.0001
Epoch [28/100], Loss: 0.0001
Epoch [29/100], Loss: 0.0001
Epoch [30/100], Loss: 0.0001
Epoch [31/100], Loss: 0.0001
Epoch [32/100], Loss: 0.0001
Epoch [33/100], Loss: 0.0001
Epoch [34/100], Loss: 0.0000
Epoch [35/100], Loss: 0

# Save the model

In [4]:
torch.save(model.state_dict(), 'model.pth')

# Load model from file

In [5]:
model = NeuralNet()
model.load_state_dict(torch.load('model.pth'))
model.eval()

  model.load_state_dict(torch.load('model.pth'))


NeuralNet(
  (fc1): Linear(in_features=147, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=64, bias=True)
  (fc3): Linear(in_features=64, out_features=254, bias=True)
)

# Clone the OLA engine repository

In [6]:
!git clone https://github.com/jal9o3/OLA.git

Cloning into 'OLA'...
remote: Enumerating objects: 860, done.[K
remote: Counting objects: 100% (102/102), done.[K
remote: Compressing objects: 100% (62/62), done.[K
remote: Total 860 (delta 61), reused 77 (delta 40), pack-reused 758 (from 1)[K
Receiving objects: 100% (860/860), 200.67 KiB | 5.02 MiB/s, done.
Resolving deltas: 100% (522/522), done.


# Define new training simulator

In [10]:
import os
os.chdir('./OLA')

In [41]:
import random

from core import Board, Infostate, Player
from constants import Controller
from training import TimelessBoard
from simulation import MatchSimulator

def get_model_decision(model, board: Board, infostate: Infostate):
    """
    This is for obtaining the model's chosen action.
    """
    input_infostate = list(map(int, str(infostate).split(" ")))
    # Convert input_infostate to a PyTorch Tensor
    input_infostate = torch.tensor(input_infostate, dtype=torch.float32)
    # Reshape the input to have an extra dimension
    input_infostate = input_infostate.unsqueeze(0)  # Add a batch dimension
    full_strategy = model(input_infostate)
    # Get the probabilities for each action from the model output
    full_strategy = full_strategy.squeeze(0).tolist()  # Remove batch dimension and convert to list

    fullgame_actions = TimelessBoard.actions()
    valid_actions = board.actions()
    strategy = [0.0 for _ in range(len(valid_actions))]
    for action in fullgame_actions:
        if action not in valid_actions:
            full_strategy[fullgame_actions.index(action)] = 0.0
    if sum(full_strategy) > 0:
        full_strategy = [x / sum(full_strategy) for x in full_strategy]

    for i, action in enumerate(fullgame_actions):
        if action in valid_actions:
          strategy[valid_actions.index(action)] = full_strategy[i]
    if sum(strategy) <= 0:
        strategy = [1/len(board.actions) for _ in range(len(valid_actions))]

    action = random.choices(valid_actions, weights=strategy, k=1)[0]
    return action


class TestingController(Controller):
  """
  This adds the model as a new possible controller.
  """
  MODEL = 3


class ModelTestingSimulator(MatchSimulator):
  """
  This simulates a match that utilizes a trained OLA model.
  """

  def get_controller_input(self, arbiter_board: Board, infostate: Infostate):
      """
      This is for obtaining the controller's chosen action, be it human or
      bot.
      """
      valid_actions = arbiter_board.actions()
      if self.get_current_controller(arbiter_board) == Controller.RANDOM:
          action = random.choice(valid_actions)
      elif self.get_current_controller(arbiter_board) == Controller.HUMAN:
          while action not in valid_actions:
              action = input("Choose a move: ")
      elif self.get_current_controller(arbiter_board) == TestingController.MODEL:
          model = NeuralNet()
          model.load_state_dict(torch.load('../model.pth'))
          model.eval()
          action = get_model_decision(model, arbiter_board, infostate)

      return action

  def start(self, iterations: int = 1, target: int = None):
    """
    This is the starting method for the match simulation.
    """
    arbiter_board = Board(self.setup_arbiter_matrix(),
                                  player_to_move=Player.BLUE,
                                  blue_anticipating=False, red_anticipating=False)
    blue_infostate, red_infostate = MatchSimulator._starting_infostates(
                arbiter_board)
    turn_number = 1
    branches_encountered = 0
    while not arbiter_board.is_terminal():
        self.manage_pov_switching(arbiter_board)

        MatchSimulator._print_game_status(turn_number, arbiter_board,
                                          infostates=[
                                              blue_infostate,
                                              red_infostate],
                                          pov=self.pov)
        valid_actions = arbiter_board.actions()
        branches_encountered += len(valid_actions)

        action = ""  # Initialize variable for storing chosen action
        relevant_infostate = (blue_infostate if arbiter_board.player_to_move == Player.BLUE
                              else red_infostate)
        action = self.get_controller_input(arbiter_board, relevant_infostate)
        print(f"Chosen Move: {action}")
        if self.save_data:
            self.game_history.append(action)

        new_arbiter_board = arbiter_board.transition(action)
        result = arbiter_board.classify_action_result(action,
                                                      new_arbiter_board)
        blue_infostate, red_infostate = MatchSimulator._update_infostates(
            blue_infostate, red_infostate, action=action, result=result
        )
        arbiter_board = new_arbiter_board
        turn_number += 1

    MatchSimulator._print_result(arbiter_board)
    model_color = "BLUE" if self.player_two_color == Player.BLUE else "RED"
    print(f"Model: {model_color}")

In [47]:
from constants import Ranking, Controller, POV

# Sample random formations
blue_formation = list(
    Player.get_sensible_random_formation(
        piece_list=Ranking.SORTED_FORMATION)
)
red_formation = list(
    Player.get_sensible_random_formation(
        piece_list=Ranking.SORTED_FORMATION)
)

match_simulator = ModelTestingSimulator(formations=[blue_formation, red_formation],
                                  controllers=[
                                      Controller.RANDOM, TestingController.MODEL],
                                  save_data=False,
                                  pov=POV.WORLD)
match_simulator.start()

Turn Number: 1

 0  bF bA b8 b2 bB [34mb1[0m b2  - b4 
 1  bD b7 bE b3  -  - b2 b2 b2 
 2  b5 b6 b9  -  - bF b2  - bC 
 3   -  -  -  -  -  -  -  -  - 
 4   -  -  -  -  -  -  -  -  - 
 5  r6 r5  - r2 r2 rA rE r3 rF 
 6  r2 [31mr1[0m  - r4  - r9 r2 r2 rF 
 7   - r7 r2 r8 rB rD  -  - rC 

     0  1  2  3  4  5  6  7  8 
Player to move: 1
Chosen Move: 2131
Turn Number: 2

 0  bF bA b8 b2 bB [34mb1[0m b2  - b4 
 1  bD b7 bE b3  -  - b2 b2 b2 
 2  b5  - b9  -  - bF b2  - bC 
 3   - b6  -  -  -  -  -  -  - 
 4   -  -  -  -  -  -  -  -  - 
 5  r6 r5  - r2 r2 rA rE r3 rF 
 6  r2 [31mr1[0m  - r4  - r9 r2 r2 rF 
 7   - r7 r2 r8 rB rD  -  - rC 

     0  1  2  3  4  5  6  7  8 
Player to move: 2
Chosen Move: 5040
Turn Number: 3

 0  bF bA b8 b2 bB [34mb1[0m b2  - b4 
 1  bD b7 bE b3  -  - b2 b2 b2 
 2  b5  - b9  -  - bF b2  - bC 
 3   - b6  -  -  -  -  -  -  - 
 4  r6  -  -  -  -  -  -  -  - 
 5   - r5  - r2 r2 rA rE r3 rF 
 6  r2 [31mr1[0m  - r4  - r9 r2 r2 rF 
 7   - r7 r2 r8 rB rD  -

  model.load_state_dict(torch.load('../model.pth'))


Chosen Move: 3747
Turn Number: 62

 0   - bA b8 b2  - [34mb1[0m b2  - b4 
 1   - bF  - bB b2  - b2  -  - 
 2  bD  - bE  -  -  -  -  - b2 
 3  b5 b7 b9 r2 b3  - b2  -  - 
 4  b6  - r6  -  -  -  - bC rF 
 5   -  -  - r5  -  - rE r3  - 
 6   - r2 [31mr1[0m r4 r9 rA rD  - rC 
 7   - r7 r2 r8 rB  - r2 rF r2 

     0  1  2  3  4  5  6  7  8 
Player to move: 2
Chosen Move: 4243
Turn Number: 63

 0   - bA b8 b2  - [34mb1[0m b2  - b4 
 1   - bF  - bB b2  - b2  -  - 
 2  bD  - bE  -  -  -  -  - b2 
 3  b5 b7 b9 r2 b3  - b2  -  - 
 4  b6  -  - r6  -  -  - bC rF 
 5   -  -  - r5  -  - rE r3  - 
 6   - r2 [31mr1[0m r4 r9 rA rD  - rC 
 7   - r7 r2 r8 rB  - r2 rF r2 

     0  1  2  3  4  5  6  7  8 
Player to move: 1
Chosen Move: 3444
Turn Number: 64

 0   - bA b8 b2  - [34mb1[0m b2  - b4 
 1   - bF  - bB b2  - b2  -  - 
 2  bD  - bE  -  -  -  -  - b2 
 3  b5 b7 b9 r2  -  - b2  -  - 
 4  b6  -  - r6 b3  -  - bC rF 
 5   -  -  - r5  -  - rE r3  - 
 6   - r2 [31mr1[0m r4 r9 rA rD  - rC 
 7 