In [1]:
from tpg.trainer import Trainer
from tpg.agent import Agent
from tpg.gridworld import GridWorld
from tpg.memory import get_memory
from tpg.configurations import DefaultConfiguration
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import pandas as pd

In [2]:
width = 5
height = 5
targetCell = (4, 4)
walls = [(2, 2), (3, 2), (1, 4), (4, 3)]  # Define wall positions
epsilon = 0.25
env = GridWorld(width, height, targetCell, walls)

In [3]:
customConfig = DefaultConfiguration()
customConfig.teamPopSize = 20

numGenerations = 100

In [4]:
trainer = Trainer(actions=range(5), config=customConfig)

rewardStats = []
numStepsStats= []

for generation in range(numGenerations):
    
    rewards = [] # new list every gen
    numSteps = [] # new list every gen
    
    agents = trainer.getAgents()
    
    get_memory().buffer_reset()
    
    while True:     
        teamNum = len(agents)
        agent = agents.pop()
                
        if agent is None:
            break # no more agents, proceed to next gen
        
        env.reset()
        score = 0
        
        
        i = 0
        
        print(f"Gen #{generation}, Team #{teamNum}, Score: {score}")
            
        while not env.isTerminal() and i < 500:
            
            i += 1
            
            #env.display()      
            
            agent.act(env.getState())[1]
            
            #if tpg_response is None:
             #   action = 0
            #else:
            #    action = np.argmax(tpg_response)
            
            action = 0
            
            nextState, reward = env.step(action)
            
            
            
            score += reward
        
        if i == 500:
            print("Ran out of turns... giving up")
        
        get_memory().display()
            
        agent.reward(score)
        
        rewards.append(score)
        numSteps.append(i)
        
        print(f"Finished after {i} steps with cumulative reward {score}...")
        env.display()
        
        if len(agents) == 0:
            break
    
    rewardStats.append((min(rewards), max(rewards), sum(rewards)/len(rewards)))
    numStepsStats.append((min(numSteps), max(numSteps), sum(numSteps)/len(numSteps)))
    trainer.evolve()
            
        

Gen #0, Team #20, Score: 0
Ran out of turns... giving up
Buffer for program 333
[-10.19858376   0.          -2.03108758   0.49641458   1.
   0.12180357   0.5          7.          -0.19263143   0.
   3.           0.           0.           0.           0.
   0.           0.25         0.           0.5         14.
   0.35403671   0.           3.           0.12180357  -0.41614684]
Buffer for program 215
[-10.19858376   0.          -2.03108758   0.49641458   1.
   0.12180357   0.5          7.          -0.19263143   0.
   3.           0.           0.           0.           0.
   0.           0.25         0.           0.5         14.
   0.35403671   0.           3.           0.12180357  -0.41614684]
Buffer for program 73
[-10.19858376   0.          -2.03108758   0.49641458   1.
   0.12180357   0.5          7.          -0.19263143   0.
   3.           0.           0.           0.           0.
   0.           0.25         0.           0.5         14.
   0.35403671   0.           3.           0.1

  regs[dest] = x*2
  regs[dest] = x+y


Ran out of turns... giving up
Buffer for program 333
[3.00000000e+000 0.00000000e+000 0.00000000e+000 2.00000000e+000
 3.00000000e+000 0.00000000e+000 2.00000000e+000 3.00000000e+000
 1.79769313e+308 0.00000000e+000 3.00000000e+000 0.00000000e+000
 0.00000000e+000 0.00000000e+000 2.00000000e+000 2.00000000e+000
 3.00000000e+000 3.00000000e+000 0.00000000e+000 3.00000000e+000
 0.00000000e+000 3.00000000e+000 3.00000000e+000 1.79769313e+308
 3.00000000e+000]
Buffer for program 215
[3.00000000e+000 0.00000000e+000 0.00000000e+000 2.00000000e+000
 3.00000000e+000 0.00000000e+000 2.00000000e+000 3.00000000e+000
 1.79769313e+308 0.00000000e+000 3.00000000e+000 0.00000000e+000
 0.00000000e+000 0.00000000e+000 2.00000000e+000 2.00000000e+000
 3.00000000e+000 3.00000000e+000 0.00000000e+000 3.00000000e+000
 0.00000000e+000 3.00000000e+000 3.00000000e+000 1.79769313e+308
 3.00000000e+000]
Buffer for program 73
[3.00000000e+000 0.00000000e+000 0.00000000e+000 2.00000000e+000
 3.00000000e+000 0.00

  regs[dest] = x-y


Ran out of turns... giving up
Buffer for program 635
[ 5.02269660e+299  0.00000000e+000  5.40302306e-001  0.00000000e+000
  0.00000000e+000  3.15862545e+000  0.00000000e+000  3.00000000e+000
  3.00000000e+000  0.00000000e+000  2.00000000e+000  3.00000000e+000
 -2.25000000e+000  0.00000000e+000  1.00000000e+000  0.00000000e+000
  0.00000000e+000  0.00000000e+000  0.00000000e+000  5.40302306e-001
  0.00000000e+000  2.00000000e+000  3.48539752e+000  0.00000000e+000
  1.00000000e+000]
Buffer for program 166
[ 5.02269660e+299  0.00000000e+000  5.40302306e-001  0.00000000e+000
  0.00000000e+000  3.15862545e+000  0.00000000e+000  3.00000000e+000
  3.00000000e+000  0.00000000e+000  2.00000000e+000  3.00000000e+000
 -2.25000000e+000  0.00000000e+000  1.00000000e+000  0.00000000e+000
  0.00000000e+000  0.00000000e+000  0.00000000e+000  5.40302306e-001
  0.00000000e+000  2.00000000e+000  3.48539752e+000  0.00000000e+000
  1.00000000e+000]
Buffer for program 267
[ 5.02269660e+299  0.00000000e+000 

KeyboardInterrupt: 

In [None]:
stepInfo = np.array(numStepsStats)
rewardInfo = np.array(rewardStats)

stepInfo.shape, rewardInfo.shape

min_rewards = rewardInfo[:, 0]
max_rewards = rewardInfo[:, 1]
avg_rewards = rewardInfo[:, 2]

min_steps = stepInfo[:, 0]
max_steps = stepInfo[:, 1]
avg_steps = stepInfo[:, 2]

# Create a figure with subplots
fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(18, 6))


# Plot rewards over time
episodes = range(1, len(min_rewards) + 1)
axes[0].plot(episodes, min_rewards, label='Min Rewards', color='#1f78b4', alpha=0.8)
axes[0].plot(episodes, max_rewards, label='Max Rewards', color='#33a02c', alpha=0.8)
axes[0].plot(episodes, avg_rewards, label='Avg Rewards', color='#e31a1c', alpha=0.8)
axes[0].set_title('Rewards over Time')
axes[0].set_xlabel('Generation')
axes[0].set_ylabel('Reward')
axes[0].legend(loc='upper right')
axes[0].grid(True, linestyle='--', alpha=0.6)

# Plot steps over time
axes[1].plot(episodes, min_steps, label='Min Steps', color='#ff7f00', alpha=0.8)
axes[1].plot(episodes, max_steps, label='Max Steps', color='#6a3d9a', alpha=0.8)
axes[1].plot(episodes, avg_steps, label='Avg Steps', color='#fdbf6f', alpha=0.8)
axes[1].set_title('Steps over Time')
axes[1].set_xlabel('Generation')
axes[1].set_ylabel('Steps')
axes[1].legend(loc='upper right')
axes[1].grid(True, linestyle='--', alpha=0.6)

# Adjust layout
plt.tight_layout()

# Show the plot
plt.show()

In [None]:
# Combine the data into a DataFrame for Seaborn
reward_data = np.concatenate([min_rewards, max_rewards, avg_rewards])
reward_type = np.repeat(['Min', 'Max', 'Avg'], len(min_rewards))
episode_numbers_reward = np.tile(range(1, len(min_rewards) + 1), 3)

steps_data = np.concatenate([min_steps, max_steps, avg_steps])
steps_type = np.repeat(['Min', 'Max', 'Avg'], len(min_steps))
episode_numbers_steps = np.tile(range(1, len(min_steps) + 1), 3)

df_reward = pd.DataFrame({'Episode': episode_numbers_reward, 'Reward': reward_data, 'Type': reward_type})
df_steps = pd.DataFrame({'Episode': episode_numbers_steps, 'Steps': steps_data, 'Type': steps_type})

# Create a figure with subplots
fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(18, 6))

# Create violin plots for rewards
sns.violinplot(x='Type', y='Reward', data=df_reward, color='skyblue', inner='quartile', ax=axes[0])
axes[0].set_title('Distribution of Rewards')
axes[0].set_xlabel('Reward Type')
axes[0].set_ylabel('Reward')
axes[0].grid(True, linestyle='--', alpha=0.6)

# Create violin plots for steps
sns.violinplot(x='Type', y='Steps', data=df_steps, palette="Blues", inner='quartile', ax=axes[1])
axes[1].set_title('Distribution of Steps')
axes[1].set_xlabel('Step Type')
axes[1].set_ylabel('Steps')
axes[1].grid(True, linestyle='--', alpha=0.6)

# Adjust layout
plt.tight_layout()

# Show the plot
plt.show()