In [1]:
import pickle
import numpy as np
import matplotlib.pyplot as plt
import torch
from cost_func import compute_acp_cost, overall_map_cost, compute_map_air, unstable_states, compute_pulsatility_air, overall_pulsatility_cost, overall_hr_cost, compute_hr_air
from reward_func import compute_reward_smooth
from model import WorldModel

DATA_PATH = "/abiomed/downsampled/10min_1hr_window.pkl"
#this is just for the visualization so it isn't messy
MAX_STEPS_TO_PLOT = 2000

In [2]:
with open(DATA_PATH, 'rb') as f:
    data = pickle.load(f)
#this is a check to see what is included in the pkl file and hoow many episodes
print(f"there are {list(data.keys())} keys in dataset")
episodes = data['test']
print(f"episodes: {len(episodes)}")

there are ['train', 'val', 'test', 'mean', 'std'] keys in dataset
episodes: 1266


In [3]:
#to numpy if tensor
if isinstance(episodes, torch.Tensor):    
    episodes_np = episodes.numpy()  
else:
    episodes_np = np.array(episodes)

In [4]:
actions_all = episodes_np[:, :, -1]

#in order to account for the actions between episodes i am just treating the entire time series like an episode
flattened_actions = actions_all.flatten()
flattened_acp = compute_acp_cost(flattened_actions)

#this is averaged across all actions since the acp function called sums acp per episode
acp_per_timestep = flattened_acp/(len(flattened_actions)-1)
print(f"Mean ACP per timestep accounting for between episodes: {acp_per_timestep:.6f}")


Mean ACP per timestep accounting for between episodes: 0.051478


In [5]:
flattened_actions = episodes_np[:, :, -1].flatten()
#i am just using all of the steps instead of the split up episodes
flattened_map = episodes_np[:, :, 0].flatten()
overall_air_score = compute_map_air(flattened_map, flattened_actions)
if overall_air_score is not None:
    print(f"Overall AIR across all timesteps accounting for between episodes for MAP: {overall_air_score:.6f}")

Overall AIR across all timesteps accounting for between episodes for MAP: 0.013110


In [6]:
flattened_actions = episodes_np[:, :, -1].flatten()
#i am just using all of the steps instead of the split up episodes
flattened_map = episodes_np[:, :, 5].flatten()
overall_air_score = compute_map_air(flattened_map, flattened_actions)
if overall_air_score is not None:
    print(f"Overall AIR across all timesteps accounting for between episodes for HR: {overall_air_score:.6f}")

Overall AIR across all timesteps accounting for between episodes for HR: 0.015680


In [7]:
flattened_actions = episodes_np[:, :, -1].flatten()
#i am just using all of the steps instead of the split up episodes
flattened_map = episodes_np[:, :, 7].flatten()
overall_air_score = compute_map_air(flattened_map, flattened_actions)
if overall_air_score is not None:
    print(f"Overall AIR across all timesteps accounting for between episodes for pulsatility: {overall_air_score:.6f}")

Overall AIR across all timesteps accounting for between episodes for pulsatility: 0.014478


In [8]:
states_all = episodes_np[:, :, :-1]
flattened_states = states_all.reshape(-1, states_all.shape[-1])
unstable_percentage = unstable_states(flattened_states)
print(f"Percentage of unstable states: {unstable_percentage}")

Percentage of unstable states: 37.13796735123749


In [9]:
all_steps_np = np.concatenate(episodes_np, axis=0)
total_steps_available = all_steps_np.shape[0]
print(f"Total timesteps: {total_steps_available}")

# Group the data in bins with 24 steps
episode_length = 24
num_bins = total_steps_available // episode_length
#creates bins and fits the samples into 3D
binned_steps = all_steps_np[:num_bins * episode_length].reshape(num_bins, episode_length, -1)
print(f"Created {num_bins} bins with {episode_length} steps")
# Randomly select 100 of these bins
num_episodes_to_sample = 633
#no replacement
sampled_bin_indices = np.random.choice(num_bins, num_episodes_to_sample, replace=False)

all_episode_rewards = []

# Loop through the 100 randomly selected bins
for bin_idx in sampled_bin_indices:
    # Get one bin
    episode_bin = binned_steps[bin_idx]
    
    normalized_rewards_for_episode = []
    # Loop through each of the 24 steps in the bin
    for step_data in episode_bin:
        step_tensor = torch.tensor(step_data, dtype=torch.float32)
        raw_reward = compute_reward_smooth(step_tensor.unsqueeze(0))
        
        # Apply normalization from rl_env
        norm_reward = (raw_reward + 4) / 5
        clipped_reward = np.clip(norm_reward, -1.0, 1.0)
        normalized_rewards_for_episode.append(clipped_reward)
    
    # Find the total reward for this simulated episode
    total_episode_reward = sum(normalized_rewards_for_episode)
    all_episode_rewards.append(total_episode_reward)

mean_total_episode_reward = np.mean(all_episode_rewards)

print(f"Mean total normalized reward per episode from all bins: {mean_total_episode_reward:.6f}")


Total timesteps: 15192
Created 633 bins with 24 steps
Mean total normalized reward per episode from all bins: -1.770329
