In [4]:
import subprocess
import numpy as np

def train_sarsa_agent(n_rounds=100):
    # Change to the directory where main.py is located    
    command = [
        "python", "main.py", "play",
        "--my-agent", "sarsa_agent",
        "--train", "1",  # Train one agent
        "--n-rounds", str(n_rounds)
    ]
    
    # Run the command
    result = subprocess.run(command, capture_output=True, text=True)
    
    print(result.stdout)
    print(result.stderr)

In [5]:
def get_game_states_from_replay():
    """
    Simulate loading some game states for testing purposes.
    You can replace this with real game state extraction.
    """
    example_game_state = {
        'field': np.zeros((17, 17)),
        'explosion_map': np.zeros((17, 17)),
        'coins': [(5, 5), (10, 10)],
        'bombs': [((3, 3), 4), ((6, 6), 3)],
        'self': ("player", 0, True, (8, 8)),
        'others': [("rule_based_agent", 0, True, (2, 2)), ("rule_based_agent", 0, False, (14, 14))]
    }
    # Simulate a few game states
    return [example_game_state for _ in range(5)]

In [6]:
# Define the symmetry augmentation functions
def flip_horizontal(game_state):
    """
    Flip the game state horizontally.
    """
    game_state['field'] = np.fliplr(game_state['field'])
    game_state['explosion_map'] = np.fliplr(game_state['explosion_map'])
    game_state['coins'] = [(16-x, y) for x, y in game_state['coins']]
    game_state['bombs'] = [((16-x, y), t) for (x, y), t in game_state['bombs']]
    game_state['self'] = (game_state['self'][0], game_state['self'][1], game_state['self'][2], (16-game_state['self'][3][0], game_state['self'][3][1]))
    game_state['others'] = [(agent[0], agent[1], agent[2], (16-agent[3][0], agent[3][1])) for agent in game_state['others']]
    return game_state

def flip_vertical(game_state):
    """
    Flip the game state vertically.
    """
    game_state['field'] = np.flipud(game_state['field'])
    game_state['explosion_map'] = np.flipud(game_state['explosion_map'])
    game_state['coins'] = [(x, 16-y) for x, y in game_state['coins']]
    game_state['bombs'] = [((x, 16-y), t) for (x, y), t in game_state['bombs']]
    game_state['self'] = (game_state['self'][0], game_state['self'][1], game_state['self'][2], (game_state['self'][3][0], 16-game_state['self'][3][1]))
    game_state['others'] = [(agent[0], agent[1], agent[2], (agent[3][0], 16-agent[3][1])) for agent in game_state['others']]
    return game_state

def transpose(game_state):
    """
    Transpose the game state (swap x and y coordinates).
    """
    game_state['field'] = np.transpose(game_state['field'])
    game_state['explosion_map'] = np.transpose(game_state['explosion_map'])
    game_state['coins'] = [(y, x) for x, y in game_state['coins']]
    game_state['bombs'] = [((y, x), t) for (x, y), t in game_state['bombs']]
    game_state['self'] = (game_state['self'][0], game_state['self'][1], game_state['self'][2], (game_state['self'][3][1], game_state['self'][3][0]))
    game_state['others'] = [(agent[0], agent[1], agent[2], (agent[3][1], agent[3][0])) for agent in game_state['others']]
    return game_state

# Function to augment game state with all possible transformations
def augment_game_state(game_state):
    augmented_states = [game_state]  # Original
    augmented_states.append(flip_horizontal(game_state.copy()))
    augmented_states.append(flip_vertical(game_state.copy()))
    augmented_states.append(transpose(game_state.copy()))
    return augmented_states

# Define the training command with subprocess
def run_training(agent_name, n_rounds=10, train_mode=1, augment=False):
    """
    Run training of the agent, with an option to augment data using symmetries.
    :param agent_name: The name of the agent to be trained.
    :param n_rounds: Number of training rounds.
    :param train_mode: Whether to train (1) or not (0).
    :param augment: Whether to augment the training data.
    """
    
    # Define the training command
    command = [
        "python", "main.py", "play",
        "--agents", agent_name,
        "--train", str(train_mode),
        "--n-rounds", str(n_rounds),
        "--save-replay"  # Option to save replay
    ]
    
    # Run the training process
    result = subprocess.run(command, capture_output=True, text=True)
    
    # Print the output
    print(result.stdout)
    print(result.stderr)
    
    # Augment the training data if requested
    if augment:
        # Assume we get some training data (game states)
        game_states = get_game_states_from_replay()  # Custom function to get game states
        
        for state in game_states:
            augmented_states = augment_game_state(state)
            
            # Here we would save or process augmented states for training
            # e.g., saving them in a buffer, or training models directly
            for aug_state in augmented_states:
                # Save or train with augmented state
                print("Augmented state:", aug_state)


run_training("tpl_agent", n_rounds=5, train_mode=1, augment=True)




  0%|          | 0/5 [00:00<?, ?it/s]
  0%|          | 0/5 [00:02<?, ?it/s]

Augmented state: {'field': array([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 