In [21]:
import os
import pandas as pd
import numpy as np

In [22]:
# Define the parameters
algos = ['DQN', 'BCQ', 'CQL']
nr_trajs = [100, 1000, 10000, 50000, 100000]
opts = [0, 25, 50, 75, 100]
seeds = range(1, 4)

base_path = 'maintenance_offlineRL/d3rlpy_logs'

results = []

In [23]:
# Function to find the best configuration for each statistic
def find_best_for_statistic(statistic, value):
    if statistic == 'algo':
        best = results_df[results_df['algo'] == value].sort_values(by='avg_reward', ascending=False).iloc[0]
    elif statistic == 'opt':
        best = results_df[results_df['opt'] == value].sort_values(by='avg_reward', ascending=False).iloc[0]
    elif statistic == 'nr_traj':
        best = results_df[results_df['nr_traj'] == value].sort_values(by='avg_reward', ascending=False).iloc[0]
    return best

In [24]:
# Load the data and calculate averages and standard errors
for algo in algos:
    for opt in opts:
        for nr_traj in nr_trajs:
            timestep_rewards = {}
            for seed in seeds:
                file_path = f'{base_path}/{algo}_nr_traj_{nr_traj}_opt_{opt}_seed_{seed}/evaluation_env.csv'
                if os.path.exists(file_path):
                    df = pd.read_csv(file_path, header=None, sep=',', names=['Row', 'Timestep', 'Return', 'Variance', 'StandardError'])
                    for index, row in df.iterrows():
                        timestep = row['Timestep']
                        reward = row['Return']
                        if timestep not in timestep_rewards:
                            timestep_rewards[timestep] = []
                        timestep_rewards[timestep].append(reward)
            
            # Calculate average reward for each timestep across seeds
            avg_rewards = {t: np.mean(r) for t, r in timestep_rewards.items() if len(r) == len(seeds)}
            
            if avg_rewards:
                best_timestep = max(avg_rewards, key=avg_rewards.get)
                rewards_at_best_timestep = timestep_rewards[best_timestep]
                avg_reward = np.mean(rewards_at_best_timestep)
                std_error = np.std(rewards_at_best_timestep) / np.sqrt(len(rewards_at_best_timestep))
                results.append({
                    'algo': algo,
                    'opt': opt,
                    'nr_traj': nr_traj,
                    'avg_reward': avg_reward,
                    'std_error': std_error,
                    'best_timestep': best_timestep
                })

# Convert the results to a DataFrame
results_df = pd.DataFrame(results)

# Find the best configuration for each optimality rate
for opt in opts:
    best_config = find_best_for_statistic('opt', opt)
    print(f"Optimality {opt}: Best Algo: {best_config['algo']}, Dataset Size: {best_config['nr_traj']}, "
          f"Avg Reward: {best_config['avg_reward']:.2f}, Std Error: {best_config['std_error']:.2f}, "
          f"Best Timestep: {best_config['best_timestep']}")

# Find the best configuration for each algorithm
for algo in algos:
    best_config = find_best_for_statistic('algo', algo)
    print(f"Algorithm {algo}: Best Optimality: {best_config['opt']}, Dataset Size: {best_config['nr_traj']}, "
          f"Avg Reward: {best_config['avg_reward']:.2f}, Std Error: {best_config['std_error']:.2f}, "
          f"Best Timestep: {best_config['best_timestep']}")

# Find the best configuration for each dataset size
for nr_traj in nr_trajs:
    best_config = find_best_for_statistic('nr_traj', nr_traj)
    print(f"Dataset Size {nr_traj}: Best Algo: {best_config['algo']}, Optimality: {best_config['opt']}, "
          f"Avg Reward: {best_config['avg_reward']:.2f}, Std Error: {best_config['std_error']:.2f}, "
          f"Best Timestep: {best_config['best_timestep']}")

Optimality 0: Best Algo: CQL, Dataset Size: 1000, Avg Reward: -14405.35, Std Error: 144.68, Best Timestep: 40000.0
Optimality 25: Best Algo: CQL, Dataset Size: 100, Avg Reward: -14972.62, Std Error: 215.49, Best Timestep: 60000.0
Optimality 50: Best Algo: CQL, Dataset Size: 100, Avg Reward: -15490.48, Std Error: 155.10, Best Timestep: 60000.0
Optimality 75: Best Algo: CQL, Dataset Size: 10000, Avg Reward: -15306.57, Std Error: 422.60, Best Timestep: 70000.0
Optimality 100: Best Algo: BCQ, Dataset Size: 100000, Avg Reward: -15419.07, Std Error: 352.74, Best Timestep: 100000.0
Algorithm DQN: Best Optimality: 0, Dataset Size: 10000, Avg Reward: -16942.37, Std Error: 491.28, Best Timestep: 100000.0
Algorithm BCQ: Best Optimality: 100, Dataset Size: 100000, Avg Reward: -15419.07, Std Error: 352.74, Best Timestep: 100000.0
Algorithm CQL: Best Optimality: 0, Dataset Size: 1000, Avg Reward: -14405.35, Std Error: 144.68, Best Timestep: 40000.0
Dataset Size 100: Best Algo: CQL, Optimality: 25, A

In [25]:
import os
import pandas as pd
import numpy as np

# Define the parameters
algos = ['DQN', 'BCQ', 'CQL']
nr_trajs = [100, 1000, 10000, 50000, 100000]
opts = [0, 25, 50, 75, 100]
seeds = range(1, 4)

base_path = 'maintenance_offlineRL/d3rlpy_logs'

# Initialize a dictionary to store results
results = []

# Load the data and calculate averages and standard errors
for algo in algos:
    for opt in opts:
        for nr_traj in nr_trajs:
            seed_averages = []
            for seed in seeds:
                file_path = f'{base_path}/{algo}_nr_traj_{nr_traj}_opt_{opt}_seed_{seed}/evaluation_env.csv'
                if os.path.exists(file_path):
                    df = pd.read_csv(file_path, header=None, sep=',', names=['Row', 'Timestep', 'Return', 'Variance', 'StandardError'])
                    avg_return = df['Return'].mean()  # Average return across all timesteps
                    seed_averages.append(avg_return)
            if seed_averages:
                avg_reward = np.mean(seed_averages)
                std_error = np.std(seed_averages) / np.sqrt(len(seed_averages))
                results.append({
                    'algo': algo,
                    'opt': opt,
                    'nr_traj': nr_traj,
                    'avg_reward': avg_reward,
                    'std_error': std_error
                })

# Convert the results to a DataFrame
results_df = pd.DataFrame(results)

# Find the best configuration for each optimality rate
for opt in opts:
    best_config = find_best_for_statistic('opt', opt)
    print(f"Optimality {opt}: Best Algo: {best_config['algo']}, Dataset Size: {best_config['nr_traj']}, "
          f"Avg Reward: {best_config['avg_reward']:.2f}, Std Error: {best_config['std_error']:.2f}")

# Find the best configuration for each algorithm
for algo in algos:
    best_config = find_best_for_statistic('algo', algo)
    print(f"Algorithm {algo}: Best Optimality: {best_config['opt']}, Dataset Size: {best_config['nr_traj']}, "
          f"Avg Reward: {best_config['avg_reward']:.2f}, Std Error: {best_config['std_error']:.2f}")

# Find the best configuration for each dataset size
for nr_traj in nr_trajs:
    best_config = find_best_for_statistic('nr_traj', nr_traj)
    print(f"Dataset Size {nr_traj}: Best Algo: {best_config['algo']}, Optimality: {best_config['opt']}, "
          f"Avg Reward: {best_config['avg_reward']:.2f}, Std Error: {best_config['std_error']:.2f}")

Optimality 0: Best Algo: CQL, Dataset Size: 100000, Avg Reward: -43096.87, Std Error: 6940.26
Optimality 25: Best Algo: CQL, Dataset Size: 50000, Avg Reward: -29644.84, Std Error: 3695.05
Optimality 50: Best Algo: CQL, Dataset Size: 10000, Avg Reward: -37861.74, Std Error: 4636.49
Optimality 75: Best Algo: CQL, Dataset Size: 100, Avg Reward: -19852.71, Std Error: 1499.97
Optimality 100: Best Algo: BCQ, Dataset Size: 100000, Avg Reward: -16909.75, Std Error: 95.39
Algorithm DQN: Best Optimality: 0, Dataset Size: 10000, Avg Reward: -103094.31, Std Error: 2669.72
Algorithm BCQ: Best Optimality: 100, Dataset Size: 100000, Avg Reward: -16909.75, Std Error: 95.39
Algorithm CQL: Best Optimality: 100, Dataset Size: 50000, Avg Reward: -19310.15, Std Error: 1201.18
Dataset Size 100: Best Algo: BCQ, Optimality: 100, Avg Reward: -18718.97, Std Error: 1010.66
Dataset Size 1000: Best Algo: BCQ, Optimality: 100, Avg Reward: -16985.62, Std Error: 115.11
Dataset Size 10000: Best Algo: BCQ, Optimality: 