### Imports

In [1]:
from ma_v03 import MA_PARTY
import supersuit as ss
from stable_baselines3 import PPO
import time
import csv
import pandas as pd

# Helper classes
import track_time
import analyse

In [None]:
# Track CPU/RAM
import threading
import psutil
import time
from datetime import datetime

def display_cpu():
    global running
    global cpu_total
    global ram_total
    global start_time

    running = True
    cpu_total = 0
    ram_total = 0
    start_time = time.strftime('%H%M%S')

    # start loop
    while running:
        "CPU-Auslastung oder -Nutzung bezeichnet die Zeit, die ein Computer benötigt, um bestimmte Informationen zu verarbeiten."
        cpu_temp = psutil.cpu_percent(interval=1)
        # print('Die CPU-Auslastung beträgt:', cpu_temp)
        cpu_total += cpu_temp

        "RAM-Auslastung oder HAUPTSPEICHER-AUSLASTUNG bezeichnet dagegen die Zeit, die RAM von einem bestimmten System zu einem bestimmten Zeitpunkt genutzt wird."
        ram_temp = psutil.virtual_memory()[2]
        # print('RAM memory % used:', ram_temp)
        ram_total += ram_temp

def start():
    global t

    # create thread and start it
    t = threading.Thread(target=display_cpu)
    t.start()
    
def stop():
    global running
    global end_time
    global t

    end_time = time.strftime('%H%M%S')

    # use `running` to stop loop in thread so thread will end
    running = False

    # wait for thread's end
    t.join()

    print("############################################")
    print("############################################")
    print("############################################")
    print("############################################")
    cpu_temp = round(cpu_total, 2)
    ram_temp = round(ram_total, 2)
    print(f"cpu_total: {cpu_temp}")
    print(f"ram_total: {ram_temp}")

    t1 = datetime.strptime(start_time, '%H%M%S')
    t2 = datetime.strptime(end_time, '%H%M%S')
    duration = t2 - t1
    seconds = duration.total_seconds()

    if seconds > 0:
        cpu_usage = round(cpu_total / seconds, 2)
        ram_usage = round(ram_total / seconds, 2)
        ram = round(ram_usage / 100 * 32, 2)

        print(f"cpu: {cpu_usage}\nram: {ram_usage} % {ram} GB\ntime: {seconds} seconds")

### Train MARL Model

In [4]:
def train(env_fn, steps: int = 10_000, seed: int = 0, **env_kwargs):
    env = env_fn(**env_kwargs)
    
    # env = parallel_to_aec(env)
    # parallel_env = parallel_wrapper_fn(env)

    print(f"Preparing training on {str(env.metadata['name'])}.")

    # env = ss.black_death_v3(env)
    
    obs, _ = env.reset(seed)
    
    # env = ss.agent_indicator_v0(env)
    env = ss.pettingzoo_env_to_vec_env_v1(env)
    
    model = PPO(
        "MlpPolicy",
        env,
        verbose=3,
        batch_size=256,
        tensorboard_log="./ppo_ma_party_tensorboard/",
    )
    
    model.learn(total_timesteps=steps)
    model_name = f"./ma_models/{env.unwrapped.metadata.get('name')}_{steps}_{time.strftime('%Y%m%d-%H%M%S')}"
    model.save(model_name)

    print("Model has been saved.")
    print(f"Finished training on {str(env.unwrapped.metadata['name'])}.")
    print(model_name)

    env.close()
    
model_name = None
track_time.start()
train(env_fn=MA_PARTY, steps=100_000)
track_time.stop()

NameError: name 'threading' is not defined

### Test MARL MODEL

In [14]:
model = PPO.load("./ma_models/marl_heroes_vs_goblins_v01_20240731-121106")
env = MA_PARTY(render_mode="human") # , debug_mode=True)
episode_data = []
step_data = []

reward_total = 0
all_heroes_alive = 0
episodes = 100000

for episode in range(1, episodes):
    # print(f"Run {run}")
    
    observations, infos = env.reset()
    while env.agents:
        
        actions = {agent: model.predict(observations[agent])[0].item() for agent in env.agents}       
        # actions = {agent: env.action_space(agent).sample(infos[agent]["action_mask"]) for agent in env.agents} # action_mask
        # actions = {agent: env.action_space(agent).sample() for agent in env.agents} # random

        observations, rewards, terminations, truncations, infos = env.step(actions)
        
        step_data.append({"rogue": actions["rogue"], "fighter": actions["fighter"], "wizard": actions["wizard"], "cleric": actions["cleric"]})

    episode_data.append({"episode": episode, "reward": rewards["rogue"]})
    env.close()

# write reward per episode data
with open('data/marl_100_000.csv', 'w', newline='') as csvfile:
    fieldnames = ['episode', 'reward']
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    writer.writeheader()
    writer.writerows(episode_data)

# write action per step data
with open('data/marl_100_000_actions.csv', 'w', newline='') as csvfile:
    fieldnames = ['rogue', 'fighter', 'wizard', 'cleric']
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    writer.writeheader()
    writer.writerows(step_data)

Reward per round: 88.03
Success rate: 81.8 %


### Analyse the data

In [57]:
# Load the data from the CSV file
data = pd.read_csv('data/marl_100_000.csv')

# Analyse the data
analyse.analyze_data(data)

############### RESULTS ###############
Mean: 88.12
Q1: 100.0
Median: 100.0
Q3: 100.0
Min: -100
Max: 100
IQR (Interquartile Range): 0.0

All heroes survive: 81.88 %
3 heroes survive: 12.51 %
2 heroes survive: 2.9 %
1 heroes survive: 0.1 %
Heroes die: 2.3 %
Heros run away: 0.3 %


In [2]:
# Load the data
data = pd.read_csv('data/marl_100_000_actions.csv')

# Analyse the data
analyse.analyze_action_spread(data)


rogue:
Counts:
  Action 0: 46
  Action 1: 386
  Action 2: 5589
  Action 3: 49
  Action 4: 9
  Action 5: 24
  Action 6: 11
  Action 7: 11
  Action 8: 21
Frequencies (%):
  Action 0: 0.75%
  Action 1: 6.28%
  Action 2: 90.94%
  Action 3: 0.80%
  Action 4: 0.15%
  Action 5: 0.39%
  Action 6: 0.18%
  Action 7: 0.18%
  Action 8: 0.34%

fighter:
Counts:
  Action 0: 45
  Action 1: 398
  Action 2: 5564
  Action 3: 53
  Action 4: 11
  Action 5: 27
  Action 6: 21
  Action 7: 8
  Action 8: 19
Frequencies (%):
  Action 0: 0.73%
  Action 1: 6.48%
  Action 2: 90.53%
  Action 3: 0.86%
  Action 4: 0.18%
  Action 5: 0.44%
  Action 6: 0.34%
  Action 7: 0.13%
  Action 8: 0.31%

wizard:
Counts:
  Action 0: 46
  Action 1: 414
  Action 2: 5564
  Action 3: 47
  Action 4: 7
  Action 5: 19
  Action 6: 19
  Action 7: 8
  Action 8: 22
Frequencies (%):
  Action 0: 0.75%
  Action 1: 6.74%
  Action 2: 90.53%
  Action 3: 0.76%
  Action 4: 0.11%
  Action 5: 0.31%
  Action 6: 0.31%
  Action 7: 0.13%
  Action 8: 0.36%