In [None]:
# Required for importing modules from parent directory
import os
import sys

EXPORT = True

current_dir = os.path.dirname(os.path.abspath("__file__"))
parent_dir = os.path.dirname(current_dir)
print(parent_dir)
sys.path.append(parent_dir)

In [None]:
from path import Path

from src.config import LOG_DIR
from src.utils import read_json_log_file

LOG_DIR = "notebooks/data/kaggle_single_out_1000/logs"

# Specify the file path
file_path = Path(parent_dir) / LOG_DIR / "plain/logs.json"

logs = read_json_log_file(file_path)

for log in logs[0:10]:
    print(log)

In [None]:
import ast

import pandas as pd


def convert_log_entry(log_entry):
    # List of keys to potentially convert
    keys_to_convert = ["reward", "info", "observations", "scores"]

    for key in keys_to_convert:
        if key in log_entry and isinstance(log_entry[key], str):
            try:
                log_entry[key] = ast.literal_eval(log_entry[key])
            except (ValueError, SyntaxError):
                # If conversion fails, keep the original string
                pass

    return log_entry


# Convert log entries
converted_logs = [convert_log_entry(log) for log in logs]

# Create DataFrame
df = pd.DataFrame(converted_logs)

# Print column names to see what we're working with
print(df.columns)

# If 'scores' is a column, try to access it
if "scores" in df.columns:
    print(df["scores"].iloc[0])
else:
    print("No 'scores' column found in the DataFrame")

In [None]:
df[df["message"] == "ddpg"].head(-5)

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

# Assuming df is already defined and preprocessed as in your original code
plt.rcParams.update(
    {
        "font.family": "serif",
        "font.serif": ["Times New Roman"],
        "font.size": 10,
        "axes.labelsize": 12,
        "axes.titlesize": 14,
        "xtick.labelsize": 10,
        "ytick.labelsize": 10,
        "legend.fontsize": 10,
        "figure.titlesize": 16,
    }
)
# Remove rows where message contains "Connected"
df = df[~df["message"].str.contains("Connected", na=False)]

# Extract the '0' element from scores and rewards
df["agent_0_score"] = df["scores"].apply(
    lambda x: x[0] if isinstance(x, dict) else np.nan
)
df["agent_0_reward"] = df["reward"].apply(
    lambda x: x[0] if isinstance(x, dict) else np.nan
)

# Group by message (algorithm type) and remove duplicates within each group
grouped = df.groupby("message")
data_by_algo_score = {}
data_by_algo_reward = {}
for name, group in grouped:
    # Remove duplicates based on the episode, keeping the last occurrence
    group = group.drop_duplicates(subset=["episode"], keep="last").sort_values(
        "episode"
    )
    data_by_algo_score[name] = group["agent_0_score"]
    data_by_algo_reward[name] = group["agent_0_reward"]

# Prepare data for box plots
score_data = [
    data for algo, data in data_by_algo_score.items() if algo.lower() not in ["maddpg"]
]
reward_data = [
    data for algo, data in data_by_algo_reward.items() if algo.lower() not in ["maddpg"]
]
score_labels = [
    algo.upper() for algo in data_by_algo_score.keys() if algo.lower() not in ["maddpg"]
]
reward_labels = [
    algo.upper()
    for algo in data_by_algo_reward.keys()
    if algo.lower() not in ["maddpg"]
]

# Set up the plot
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(20, 10))

# Plot scores
bp1 = ax1.boxplot(score_data)
ax1.set_title("Distribution of Scores by Algorithm (Single Agent Run)")
ax1.set_xlabel("Algorithm")
ax1.set_ylabel("Score")
ax1.set_xticklabels(score_labels, rotation=45, ha="right")

# Plot rewards
bp2 = ax2.boxplot(reward_data)
ax2.set_title("Distribution of Rewards by Algorithm (Single Agent Run)")
ax2.set_xlabel("Algorithm")
ax2.set_ylabel("Reward")
ax2.set_xticklabels(reward_labels, rotation=45, ha="right")

# Calculate mean and std for caption, and sort by mean
score_stats = sorted(
    [
        (algo, np.mean(data), np.std(data))
        for algo, data in zip(score_labels, score_data)
    ],
    key=lambda x: x[1],
    reverse=True,
)
reward_stats = sorted(
    [
        (algo, np.mean(data), np.std(data))
        for algo, data in zip(reward_labels, reward_data)
    ],
    key=lambda x: x[1],
    reverse=True,
)

# Create caption
score_caption = "Scores (mean ± std): " + ", ".join(
    [f"{algo}: {mean:.2f} ± {std:.2f}" for algo, mean, std in score_stats]
)
reward_caption = "Rewards (mean ± std): " + ", ".join(
    [f"{algo}: {mean:.2f} ± {std:.2f}" for algo, mean, std in reward_stats]
)

caption = score_caption + "\n" + reward_caption

# Add caption with stats
fig.text(0.5, 0.01, caption, ha="center", va="center", fontsize=10, wrap=True)

# Adjust layout
plt.subplots_adjust(bottom=0.2)  # Make room for caption

# Show the plot

from pathlib import Path


def export_plot(format):
    if format == "pdf":
        path = parent_dir / Path("docs") / "Latex" / "img" / "pdf"
        filename = "boxplot_single.pdf"
    elif format == "pgf":
        path = parent_dir / Path("docs") / "Latex" / "img" / "pgf"
        filename = "boxplot_single.pgf"
    else:
        raise ValueError("Unsupported format. Use 'pdf' or 'pgf'.")

    # Create the directory if it doesn't exist
    path.mkdir(parents=True, exist_ok=True)

    full_path = path / filename

    try:
        plt.savefig(str(full_path), format=format, dpi=300, bbox_inches="tight")
        print(f"Plot saved as {format.upper()} at {full_path}")
    except Exception as e:
        print(f"Error saving plot: {e}")


if EXPORT:
    export_plot("pdf")
    export_plot("pgf")

In [None]:
from pathlib import Path

import matplotlib.pyplot as plt
import numpy as np
from scipy import stats

# Assuming df is already defined and preprocessed as in your original code
plt.rcParams.update(
    {
        "font.family": "serif",
        "font.serif": ["Times New Roman"],
        "font.size": 10,
        "axes.labelsize": 12,
        "axes.titlesize": 14,
        "xtick.labelsize": 10,
        "ytick.labelsize": 10,
        "legend.fontsize": 10,
        "figure.titlesize": 16,
    }
)
# Remove rows where message contains "Connected"
df = df[~df["message"].str.contains("Connected", na=False)]

# Extract the '0' element from scores
df["agent_0_score"] = df["scores"].apply(
    lambda x: x[0] if isinstance(x, dict) else np.nan
)

# Group by message (algorithm type) and remove duplicates within each group
grouped = df.groupby("message")
data_by_algo = {}
for name, group in grouped:
    # Remove duplicates based on the episode, keeping the last occurrence
    group = group.drop_duplicates(subset=["episode"], keep="last").sort_values(
        "episode"
    )
    data_by_algo[name] = group["agent_0_score"]

# Set up the plot
plt.figure(figsize=(12, 6))

# Colors for each algorithm (extend this dictionary if needed)
colors = {
    "ddqn": "blue",
    "ddpg": "red",
    "ppo": "orange",
    "sac": "green",
    "random": "lightgrey",
    "baseline": "grey",
}

# Plot each algorithm and store gradients
gradients = {}
mse_values = {}
min_x, max_x = float("inf"), float("-inf")

for algo, scores in data_by_algo.items():
    if algo.lower() in ["maddpg"]:
        continue

    color = colors.get(
        algo.lower(), "gray"
    )  # Use 'gray' if algorithm not in colors dict

    x = np.arange(len(scores))
    min_x = min(min_x, x[0])
    max_x = max(max_x, x[-1])

    # Plot raw data in light color
    plt.scatter(x, scores, color=color, alpha=0.3, s=1)

    # Calculate and plot linear regression
    slope, intercept, _, _, _ = stats.linregress(x, scores)
    line = slope * x + intercept
    plt.plot(x, line, label=f"{algo.upper()}", color=color, linewidth=2)

    gradients[algo] = slope * 1000  # Multiply by 1000
    mse = np.mean((scores - line) ** 2)
    mse_values[algo] = mse

# Customize the plot
plt.title("Performance over 1000 Time Steps (Single Agent Run)")
plt.xlabel("Time Step")
plt.ylabel("Score [0, 0.5]")
plt.legend(loc="upper right")  # Move legend to upper right
plt.grid(True, linestyle="--", alpha=0.7)
plt.ylim(0, 0.5)  # Set y-axis limits from 0 to 0.5
plt.xlim(min_x, max_x)  # Set x-axis limits to match data range

# Prepare gradient and MSE information for caption
info = ", ".join(
    [
        f"{algo.upper()}: ({grad:.2f}, {mse:.2f})"
        for algo, grad, mse in zip(
            gradients.keys(), gradients.values(), mse_values.values()
        )
    ]
)

# Add caption with gradient and MSE information
plt.figtext(
    0.5,
    -0.05,
    f"Linear Regression (Gradient×10³, MSE): {info}",
    ha="center",
    fontsize=10,
    wrap=True,
)


# Function to export the plot
def export_plot(format):
    if format == "pdf":
        path = parent_dir / Path("docs") / "Latex" / "img" / "pdf"
        filename = "performance_trend_single.pdf"
    elif format == "pgf":
        path = parent_dir / Path("docs") / "Latex" / "img" / "pgf"
        filename = "performance_trend_single.pgf"
    else:
        raise ValueError("Unsupported format. Use 'pdf' or 'pgf'.")

    # Create the directory if it doesn't exist
    path.mkdir(parents=True, exist_ok=True)

    full_path = path / filename

    try:
        plt.savefig(str(full_path), format=format, dpi=300, bbox_inches="tight")
        print(f"Plot saved as {format.upper()} at {full_path}")
    except Exception as e:
        print(f"Error saving plot: {e}")


# Export the plot (uncomment the lines below to export)
if EXPORT:
    export_plot("pdf")
    export_plot("pgf")