### Imports

In [1]:
from ma_v03 import MA_PARTY
import supersuit as ss
from stable_baselines3 import PPO
import time
import csv
import pandas as pd

# Helper classes
import track_time
import analyse

In [2]:
# Track CPU/RAM
import threading
import psutil
import time
from datetime import datetime

def display_cpu():
    global running
    global cpu_total
    global ram_total
    global start_time

    running = True
    cpu_total = 0
    ram_total = 0
    start_time = time.strftime('%H%M%S')

    # start loop
    while running:
        "CPU-Auslastung oder -Nutzung bezeichnet die Zeit, die ein Computer benötigt, um bestimmte Informationen zu verarbeiten."
        cpu_temp = psutil.cpu_percent(interval=1)
        # print('Die CPU-Auslastung beträgt:', cpu_temp)
        cpu_total += cpu_temp

        "RAM-Auslastung oder HAUPTSPEICHER-AUSLASTUNG bezeichnet dagegen die Zeit, die RAM von einem bestimmten System zu einem bestimmten Zeitpunkt genutzt wird."
        ram_temp = psutil.virtual_memory()[2]
        # print('RAM memory % used:', ram_temp)
        ram_total += ram_temp

def start():
    global t

    # create thread and start it
    t = threading.Thread(target=display_cpu)
    t.start()
    
def stop():
    global running
    global end_time
    global t

    end_time = time.strftime('%H%M%S')

    # use `running` to stop loop in thread so thread will end
    running = False

    # wait for thread's end
    t.join()

    print("############################################")
    print("############################################")
    print("############################################")
    print("############################################")
    cpu_temp = round(cpu_total, 2)
    ram_temp = round(ram_total, 2)
    print(f"cpu_total: {cpu_temp}")
    print(f"ram_total: {ram_temp}")

    t1 = datetime.strptime(start_time, '%H%M%S')
    t2 = datetime.strptime(end_time, '%H%M%S')
    duration = t2 - t1
    seconds = duration.total_seconds()

    if seconds > 0:
        cpu_usage = round(cpu_total / seconds, 2)
        ram_usage = round(ram_total / seconds, 2)
        ram = round(ram_usage / 100 * 32, 2)

        print(f"cpu: {cpu_usage}\nram: {ram_usage} % {ram} GB\ntime: {seconds} seconds")

### Train MARL Model

In [6]:
def train(env_fn, steps: int = 10_000, seed: int = 0, **env_kwargs):
    env = env_fn(**env_kwargs)
    
    # env = parallel_to_aec(env)
    # parallel_env = parallel_wrapper_fn(env)

    print(f"Preparing training on {str(env.metadata['name'])}.")

    # env = ss.black_death_v3(env)
    
    obs, _ = env.reset(seed)
    
    # env = ss.agent_indicator_v0(env)
    env = ss.pettingzoo_env_to_vec_env_v1(env)
    
    model = PPO(
        "MlpPolicy",
        env,
        verbose=3,
        batch_size=256,
        tensorboard_log="./ppo_ma_party_tensorboard/",
    )
    
    model.learn(total_timesteps=steps)
    model_name = f"./ma_models/{env.unwrapped.metadata.get('name')}_{steps}_{time.strftime('%Y%m%d-%H%M%S')}"
    model.save(model_name)

    print("Model has been saved.")
    print(f"Finished training on {str(env.unwrapped.metadata['name'])}.")
    print(model_name)

    env.close()
    
model_name = None
start()
train(env_fn=MA_PARTY, steps=100_000)
stop()

Preparing training on marl_heroes_vs_goblins_v01.
Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Logging to ./ppo_ma_party_tensorboard/PPO_81
-----------------------------
| time/              |      |
|    fps             | 832  |
|    iterations      | 1    |
|    time_elapsed    | 2    |
|    total_timesteps | 2048 |
-----------------------------
Model has been saved.
Finished training on marl_heroes_vs_goblins_v01.
./ma_models/marl_heroes_vs_goblins_v01_100_20240803-124016
############################################
############################################
############################################
############################################
cpu_total: 240.1
ram_total: 175.2
cpu: 60.02
ram: 43.8 % 14.02 GB
time: 4.0 seconds


### Test MARL MODEL

In [11]:
model = PPO.load("marl_heroes_vs_goblins_v01_1000_20240803-123913")
env = MA_PARTY(render_mode="human") # , debug_mode=True)
episode_data = []
step_data = []

reward_total = 0
all_heroes_alive = 0
episodes = 100_000

for episode in range(1, episodes):
    # print(f"Run {run}")
    print(episode)
    observations, infos = env.reset()
    while env.agents:
        actions = {agent: model.predict(observations[agent])[0].item() for agent in env.agents}       
        # actions = {agent: env.action_space(agent).sample(infos[agent]["action_mask"]) for agent in env.agents} # action_mask
        # actions = {agent: env.action_space(agent).sample() for agent in env.agents} # random

        observations, rewards, terminations, truncations, infos = env.step(actions)
        
        step_data.append({"rogue": actions["rogue"], "fighter": actions["fighter"], "wizard": actions["wizard"], "cleric": actions["cleric"]})

    episode_data.append({"episode": episode, "reward": rewards["rogue"]})
    env.close()

# write reward per episode data
with open('data/marl_1_000.csv', 'w', newline='') as csvfile:
    fieldnames = ['episode', 'reward']
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    writer.writeheader()
    writer.writerows(episode_data)

# write action per step data
with open('data/marl_1_000_actions.csv', 'w', newline='') as csvfile:
    fieldnames = ['rogue', 'fighter', 'wizard', 'cleric']
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    writer.writeheader()
    writer.writerows(step_data)

# 
# 193 min 10_000
# 22 min 100_000

FileNotFoundError: [Errno 2] No such file or directory: 'marl_heroes_vs_goblins_v01_1000_20240803-123913.zip'

### Analyse the data

In [6]:
# Load the data from the CSV file
data = pd.read_csv('data/marl_10_000.csv')

# Analyse the data
analyse.analyze_data(data)

############### RESULTS ###############
Mean: 47.29
Q1: 40.0
Median: 60.0
Q3: 100.0
Min: -100
Max: 100
IQR (Interquartile Range): 60.0

All heroes survive: 45.69 %
3 heroes survive: 23.03 %
2 heroes survive: 9.37 %
1 heroes survive: 3.27 %
Heroes die: 14.59 %
Heros run away: 4.05 %


In [8]:
# Load the data
data = pd.read_csv('data/marl_10_000_actions.csv')

# Analyse the data
analyse.analyze_action_spread(data)


rogue:
Counts:
  Action 0: 371751
  Action 1: 892021
  Action 2: 943178
  Action 3: 774523
  Action 4: 199008
  Action 5: 307287
  Action 6: 499587
  Action 7: 304509
  Action 8: 225435
Frequencies (%):
  Action 0: 8.23%
  Action 1: 19.75%
  Action 2: 20.88%
  Action 3: 17.15%
  Action 4: 4.41%
  Action 5: 6.80%
  Action 6: 11.06%
  Action 7: 6.74%
  Action 8: 4.99%

fighter:
Counts:
  Action 0: 371447
  Action 1: 892961
  Action 2: 943052
  Action 3: 772241
  Action 4: 199902
  Action 5: 306621
  Action 6: 500960
  Action 7: 303366
  Action 8: 226749
Frequencies (%):
  Action 0: 8.22%
  Action 1: 19.77%
  Action 2: 20.88%
  Action 3: 17.10%
  Action 4: 4.43%
  Action 5: 6.79%
  Action 6: 11.09%
  Action 7: 6.72%
  Action 8: 5.02%

wizard:
Counts:
  Action 0: 372291
  Action 1: 891654
  Action 2: 944124
  Action 3: 773108
  Action 4: 198978
  Action 5: 306918
  Action 6: 500591
  Action 7: 303230
  Action 8: 226405
Frequencies (%):
  Action 0: 8.24%
  Action 1: 19.74%
  Action 2: 20.9