<a href="https://colab.research.google.com/github/jal9o3/OLA/blob/dev/OLA_Training.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Load the data

In [2]:
import pandas as pd
import torch
from torch.utils.data import Dataset, DataLoader

INPUT_SIZE = 147
OUTPUT_SIZE = 254

class CustomDataset(Dataset):
    def __init__(self, csv_file):
        self.data = pd.read_csv(csv_file)
        self.inputs = torch.tensor(self.data.iloc[:, :INPUT_SIZE].values, dtype=torch.float32)
        self.outputs = torch.tensor(self.data.iloc[:, INPUT_SIZE:INPUT_SIZE+OUTPUT_SIZE].values, dtype=torch.float32)

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.inputs[idx], self.outputs[idx]

# Usage
dataset = CustomDataset('training_data.csv')
dataloader = DataLoader(dataset, batch_size=32, shuffle=True)


FileNotFoundError: [Errno 2] No such file or directory: 'training_data.csv'

# Define the neural network

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F

INPUT_SIZE = 147
OUTPUT_SIZE = 254

class NeuralNet(nn.Module):
    def __init__(self):
        super(NeuralNet, self).__init__()
        self.fc1 = nn.Linear(INPUT_SIZE, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, OUTPUT_SIZE)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        x = F.softmax(x, dim=1)  # Apply softmax along the appropriate dimension
        return x

# Initialize the model
model = NeuralNet()

# Train the model

In [None]:
import torch.optim as optim

# Loss and optimizer
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
num_epochs = 100
for epoch in range(num_epochs):
    for inputs, targets in dataloader:
        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, targets)

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')


Epoch [1/100], Loss: 0.0002
Epoch [2/100], Loss: 0.0001
Epoch [3/100], Loss: 0.0001
Epoch [4/100], Loss: 0.0001
Epoch [5/100], Loss: 0.0001
Epoch [6/100], Loss: 0.0001
Epoch [7/100], Loss: 0.0001
Epoch [8/100], Loss: 0.0001
Epoch [9/100], Loss: 0.0001
Epoch [10/100], Loss: 0.0001
Epoch [11/100], Loss: 0.0001
Epoch [12/100], Loss: 0.0001
Epoch [13/100], Loss: 0.0001
Epoch [14/100], Loss: 0.0001
Epoch [15/100], Loss: 0.0001
Epoch [16/100], Loss: 0.0001
Epoch [17/100], Loss: 0.0001
Epoch [18/100], Loss: 0.0001
Epoch [19/100], Loss: 0.0001
Epoch [20/100], Loss: 0.0001
Epoch [21/100], Loss: 0.0001
Epoch [22/100], Loss: 0.0001
Epoch [23/100], Loss: 0.0001
Epoch [24/100], Loss: 0.0001
Epoch [25/100], Loss: 0.0001
Epoch [26/100], Loss: 0.0001
Epoch [27/100], Loss: 0.0001
Epoch [28/100], Loss: 0.0001
Epoch [29/100], Loss: 0.0001
Epoch [30/100], Loss: 0.0001
Epoch [31/100], Loss: 0.0001
Epoch [32/100], Loss: 0.0001
Epoch [33/100], Loss: 0.0001
Epoch [34/100], Loss: 0.0001
Epoch [35/100], Loss: 0

# Save the model

In [None]:
torch.save(model.state_dict(), 'model.pth')

# Clone the OLA engine repository

In [4]:
!git clone https://github.com/jal9o3/OLA.git

Cloning into 'OLA'...
remote: Enumerating objects: 869, done.[K
remote: Counting objects: 100% (111/111), done.[K
remote: Compressing objects: 100% (68/68), done.[K
remote: Total 869 (delta 67), reused 81 (delta 43), pack-reused 758 (from 1)[K
Receiving objects: 100% (869/869), 216.37 KiB | 14.42 MiB/s, done.
Resolving deltas: 100% (528/528), done.


In [8]:
!cd OLA && git checkout dev

Branch 'dev' set up to track remote branch 'dev' from 'origin'.
Switched to a new branch 'dev'


# Define new match simulator

In [9]:
import os
os.chdir('./OLA')

In [15]:
import random

from core import Board, Infostate, Player
from constants import Controller
from training import (TimelessBoard, CFRTrainingSimulator, DirectionFilter,
                      ActionsFilter)
from simulation import MatchSimulator

class TestingController(Controller):
  """
  This adds the model as a new possible controller.
  """
  MODEL = 3


class ModelTestingSimulator(CFRTrainingSimulator):
  """
  This simulates a match that utilizes a trained OLA model.
  """

  def __init__(self, formations: list[list[int]], controllers: list[int],
                 save_data: bool, pov: int):
        super().__init__(formations, controllers, save_data, pov)
        self.controllers = controllers # Undo the nullification in superclass

  @staticmethod
  def get_model_decision(model, board: Board, infostate: Infostate,
                         actions_filter: ActionsFilter):
    """
    This is for obtaining the model's chosen action.
    """
    input_infostate = list(map(int, str(infostate).split(" ")))
    # Convert input_infostate to a PyTorch Tensor
    input_infostate = torch.tensor(input_infostate, dtype=torch.float32)
    # Reshape the input to have an extra dimension
    input_infostate = input_infostate.unsqueeze(0)  # Add a batch dimension
    full_strategy = model(input_infostate)
    # Get the probabilities for each action from the model output
    full_strategy = full_strategy.squeeze(0).tolist()

    fullgame_actions = TimelessBoard.actions()
    valid_actions = board.actions()
    strategy = [0.0 for _ in range(len(valid_actions))]
    for action in fullgame_actions:
        if action not in valid_actions:
            full_strategy[fullgame_actions.index(action)] = 0.0
    if sum(full_strategy) > 0:
        full_strategy = [x / sum(full_strategy) for x in full_strategy]

    for i, action in enumerate(fullgame_actions):
        if action in valid_actions:
          strategy[valid_actions.index(action)] = full_strategy[i]
    if sum(strategy) <= 0:
        strategy = [1/len(valid_actions) for _ in range(len(valid_actions))]

    filtered_actions = actions_filter.filter()
    filtered_strategy = []
    for a, action in enumerate(valid_actions):
        if action in filtered_actions:
            filtered_strategy.append(strategy[a])
    normalizing_sum = sum(filtered_strategy)
    if normalizing_sum > 0:
        filtered_strategy = [
            p/normalizing_sum for p in filtered_strategy]
    else:
        # Reset options if all evaluated actions seem bad
        filtered_actions, filtered_strategy = valid_actions, strategy
    action = random.choices(
        filtered_actions, weights=filtered_strategy, k=1)[0]

    return action

  def get_controller_input(self, arbiter_board: Board, infostate: Infostate,
                           actions_filter: ActionsFilter):
      """
      This is for obtaining the controller's chosen action, be it human or
      bot.
      """
      valid_actions = arbiter_board.actions()
      if self.get_current_controller(arbiter_board) == Controller.RANDOM:
          action = random.choice(valid_actions)
      elif self.get_current_controller(arbiter_board) == Controller.HUMAN:
          while action not in valid_actions:
              action = input("Choose a move: ")
      elif self.get_current_controller(arbiter_board) == TestingController.MODEL:
          model = NeuralNet()
          model.load_state_dict(torch.load('../model.pth'))
          model.eval()
          action = self.get_model_decision(model, arbiter_board, infostate,
                                           actions_filter)

      return action

  def start(self, iterations: int = 1, target: int = None):
    """
    This is the starting method for the match simulation.
    """
    model_wins = 0
    for iteration in range(iterations):
      self.blue_formation = list(
                Player.get_sensible_random_formation(
                    piece_list=Ranking.SORTED_FORMATION)
            )
      self.red_formation = self._place_in_red_range(list(
          Player.get_sensible_random_formation(
              piece_list=Ranking.SORTED_FORMATION))
      )
      arbiter_board = Board(self.setup_arbiter_matrix(),
                                    player_to_move=Player.BLUE,
                                    blue_anticipating=False, red_anticipating=False)
      blue_infostate, red_infostate = MatchSimulator._starting_infostates(
                  arbiter_board)
      action, result, previous_action, previous_result, attack_location = (
                  "", "", "", "", None)  # Initialize needed values
      turn_number = 1
      branches_encountered = 0
      while not arbiter_board.is_terminal():
          self.manage_pov_switching(arbiter_board)

          MatchSimulator._print_game_status(turn_number, arbiter_board,
                                            infostates=[
                                                blue_infostate,
                                                red_infostate],
                                            pov=self.pov)
          valid_actions = arbiter_board.actions()
          branches_encountered += len(valid_actions)

          action = ""  # Initialize variable for storing chosen action
          relevant_infostate = (blue_infostate if arbiter_board.player_to_move == Player.BLUE
                                else red_infostate)

          # For the first turns of each player, choose a forward move
          if turn_number in [1, 2]:
              actions_filter = ActionsFilter(state=arbiter_board, directions=DirectionFilter(
                  back=False, right=False, left=False),
                  square_whitelist=[(x, y) for y in range(Board.COLUMNS)
                                    for x in range(Board.ROWS)])
          else:
              actions_filter = CFRTrainingSimulator._get_actions_filter(
                  arbiter_board, previous_action, previous_result, attack_location)

          action = self.get_controller_input(arbiter_board, relevant_infostate,
                                            actions_filter)
          print(f"Chosen Move: {action}")
          previous_action = action  # Store for the next iteration

          new_arbiter_board = arbiter_board.transition(action)
          arbiter_board, result, attack_location = self._process_action(
                      arbiter_board, action)
          previous_result = result  # Store for the next iteration
          blue_infostate, red_infostate = MatchSimulator._update_infostates(
              blue_infostate, red_infostate, action=action, result=result
          )
          arbiter_board = new_arbiter_board
          turn_number += 1
          print(f"Model Wins: {model_wins}")
          print(f"Iteration: {iteration}")

      MatchSimulator._print_result(arbiter_board)
      match_result = MatchSimulator._get_match_result(arbiter_board)
      if match_result == self.player_two_color: # Model controller is Player 2
        model_wins += 1
      model_color = "BLUE" if self.player_two_color == Player.BLUE else "RED"
      print(f"Model: {model_color}")

    print(f"Model Winrate: {(model_wins/iterations)*100: .2f}")

In [18]:
from constants import Ranking, Controller, POV

# Sample random formations
blue_formation = list(
    Player.get_sensible_random_formation(
        piece_list=Ranking.SORTED_FORMATION)
)
red_formation = list(
    Player.get_sensible_random_formation(
        piece_list=Ranking.SORTED_FORMATION)
)

match_simulator = ModelTestingSimulator(formations=[blue_formation, red_formation],
                                  controllers=[
                                      Controller.RANDOM, TestingController.MODEL],
                                  save_data=False,
                                  pov=POV.WORLD)
match_simulator.start(iterations=100)

Turn Number: 1

 0  b7  - bE [34mb1[0m b2  - b2  -  - 
 1  b3 b9 bD b4 b6 b8 bF b2  - 
 2  b2 bB bF b2 b5 bA bC  - b2 
 3   -  -  -  -  -  -  -  -  - 
 4   -  -  -  -  -  -  -  -  - 
 5   - r3 r7 r8 rF  - r2 r9 rB 
 6  r2 r6 r4  -  - rF rC rD  - 
 7  rA rE r2 r2 r2  - [31mr1[0m r2 r5 

     0  1  2  3  4  5  6  7  8 
Player to move: 1
Chosen Move: 2535
Model Wins: 0
Iteration: 0
Turn Number: 2

 0  b7  - bE [34mb1[0m b2  - b2  -  - 
 1  b3 b9 bD b4 b6 b8 bF b2  - 
 2  b2 bB bF b2 b5  - bC  - b2 
 3   -  -  -  -  - bA  -  -  - 
 4   -  -  -  -  -  -  -  -  - 
 5   - r3 r7 r8 rF  - r2 r9 rB 
 6  r2 r6 r4  -  - rF rC rD  - 
 7  rA rE r2 r2 r2  - [31mr1[0m r2 r5 

     0  1  2  3  4  5  6  7  8 
Player to move: 2
Chosen Move: 5242
Model Wins: 0
Iteration: 0
Turn Number: 3

 0  b7  - bE [34mb1[0m b2  - b2  -  - 
 1  b3 b9 bD b4 b6 b8 bF b2  - 
 2  b2 bB bF b2 b5  - bC  - b2 
 3   -  -  -  -  - bA  -  -  - 
 4   -  - r7  -  -  -  -  -  - 
 5   - r3  - r8 rF  - r2 r9 rB 
 6  r2 r6 r

  model.load_state_dict(torch.load('../model.pth'))


[1;30;43mStreaming output truncated to the last 5000 lines.[0m

 0   - b2 b3 bB [34mb1[0m bE b2  - b2 
 1  bF  -  -  - b9 b6  - bD  - 
 2   - bA bC  - b5  - b7 rA  - 
 3   - r2  -  -  -  - b2  -  - 
 4   -  -  -  -  -  -  -  - r6 
 5   -  -  -  -  -  -  - r8  - 
 6  [31mr1[0m r7 r3 r2 rD rE  -  - rF 
 7   - rB r9 rC rF  - r2 r2 r2 

     0  1  2  3  4  5  6  7  8 
Player to move: 1
Chosen Move: 1718
Model Wins: 68
Iteration: 98
Turn Number: 54

 0   - b2 b3 bB [34mb1[0m bE b2  - b2 
 1  bF  -  -  - b9 b6  -  - bD 
 2   - bA bC  - b5  - b7 rA  - 
 3   - r2  -  -  -  - b2  -  - 
 4   -  -  -  -  -  -  -  - r6 
 5   -  -  -  -  -  -  - r8  - 
 6  [31mr1[0m r7 r3 r2 rD rE  -  - rF 
 7   - rB r9 rC rF  - r2 r2 r2 

     0  1  2  3  4  5  6  7  8 
Player to move: 2
Chosen Move: 2728
Model Wins: 68
Iteration: 98
Turn Number: 55

 0   - b2 b3 bB [34mb1[0m bE b2  - b2 
 1  bF  -  -  - b9 b6  -  - bD 
 2   - bA bC  - b5  - b7  - rA 
 3   - r2  -  -  -  - b2  -  - 
 4   -  -  -  -  - 