In [None]:
# Required for importing modules from parent directory
import os
import sys

current_dir = os.path.dirname(os.path.abspath("__file__"))
parent_dir = os.path.dirname(current_dir)
print(parent_dir)
sys.path.append(parent_dir)

In [None]:
import ast
import json
from pathlib import Path

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

ROOT = Path().cwd().parent
ROOT

In [None]:
def read_log(path: Path):
    entries = []
    try:
        with open(path, "r") as file:
            for line in file:
                try:
                    log = json.loads(line.strip())
                    entries.append(log)
                except json.JSONDecodeError as e:
                    print(f"Error decoding JSON line: {e}")
    except Exception as e:
        print(f"Error reading JSON log file: {e}")
    return entries


def add_rewards(info, player_id: int = 0):
    player_pos = np.array(info[player_id]["player_info"]["position"])
    ball_pos = np.array(info[player_id]["ball_info"]["position"])
    distance = np.linalg.norm(player_pos - ball_pos)

    extra_reward = 1 - distance / 16.5
    return float(extra_reward)


def load_log(path: Path, calculate_reward: bool = False):
    entries = read_log(path)

    for element in entries:
        element["scores"] = ast.literal_eval(element["scores"])
        element["reward"] = ast.literal_eval(element["reward"])
        element["info"] = ast.literal_eval(element["info"])

        if calculate_reward:
            for player in range(4):
                element["scores"][player] += add_rewards(element["info"], player)

    return entries

In [None]:
a2c = load_log(ROOT / "src/visualization/data/a2c_v1/logs_a2c.json")
ddpq = load_log(ROOT / "src/visualization/data/ddpg_v1/logs_ddpg.json")
dqn = load_log(ROOT / "src/visualization/data/dqn_v1/logs_dqn.json")
ppo = load_log(ROOT / "src/visualization/data/ppo_v1/logs_ppo.json")
baseline = load_log(
    ROOT / "src/visualization/data/baseline/logs.json", calculate_reward=True
)

len(a2c), len(ddpq), len(dqn), len(ppo), len(baseline)

### Visualize reward over time

In [None]:
datasets = [a2c, ddpq, dqn, ppo, baseline]
labels = ["A2C", "DDPG", "DQN", "PPO", "Baseline"]


def plot_with_confidence_interval(episodes, rewards, label, color):
    rewards_series = pd.Series(rewards)

    window_size = 10
    rolling_mean = rewards_series.rolling(window=window_size).mean()
    rolling_std = rewards_series.rolling(window=window_size).std()  # / 4

    truncated_episodes = episodes[window_size - 1 :]

    plt.plot(
        truncated_episodes, rolling_mean[window_size - 1 :], label=label, color=color
    )

    if label != "Baseline":
        plt.fill_between(
            truncated_episodes,
            rolling_mean[window_size - 1 :] - rolling_std[window_size - 1 :],
            rolling_mean[window_size - 1 :] + rolling_std[window_size - 1 :],
            color=color,
            alpha=0.1,
        )


colors = ["blue", "orange", "green", "red", "gray"]

for dataset, label, color in zip(datasets, labels, colors):
    rewards = []
    episodes = []
    for log in dataset:
        rewards.append(log["scores"][0])
        episodes.append(log["episode"])

    plot_with_confidence_interval(episodes, rewards, label=label, color=color)

plt.xlim(0, 200)

plt.legend()
plt.xlabel("k Episodes")
plt.ylabel("Reward")
plt.title("Rewards per Episode with Confidence Intervals")
plt.show()

In [None]:
last_10000_rewards = []
for dataset in datasets:
    rewards = [log["scores"][0] for log in dataset][-25000:]
    last_10000_rewards.append(rewards)

violin_parts = plt.violinplot(last_10000_rewards, showmeans=False, showmedians=True)
plt.xticks(np.arange(1, len(labels) + 1), labels)

for i, pc in enumerate(violin_parts["bodies"]):
    pc.set_facecolor(colors[i])
    pc.set_edgecolor("black")
    pc.set_alpha(0.3 if labels[i] != "Baseline" else 1.0)

for partname in ("cbars", "cmins", "cmaxes", "cmedians"):
    vp = violin_parts[partname]
    vp.set_edgecolor("black")
    vp.set_linewidth(1)

baseline_rewards = last_10000_rewards[-1]
baseline_median = np.median(baseline_rewards)
plt.axhline(y=baseline_median, color="black", linestyle="--")

plt.xlabel("Algorithm")
plt.ylabel("Reward")
plt.title("Violin Plot of Rewards for the Last 10,000 Episodes")

plt.show()

### Visualize heatmap

In [None]:
player_position = []
ball_position = []
for entry in ppo:
    player_position.append(entry["info"][0]["player_info"]["position"])
    ball_position.append(entry["info"][0]["ball_info"]["position"])

x = [pos[0] for pos in player_position]
y = [pos[1] for pos in player_position]

plt.figure(figsize=(8, 6))
plt.hexbin(x, y, gridsize=30, cmap="inferno")
plt.colorbar(label="count")

plt.ylim(-10, 10)
plt.xlim(-20, 20)

plt.xlabel("X Coordinate (here is one wall)")
plt.ylabel("Y Coordinate (here is one goal)")
plt.title("Player Position Heatmap")

plt.show()