In [1]:
import pickle
import numpy as np
import matplotlib.pyplot as plt
import torch
from cost_func import compute_acp_cost, compute_map_physician_air, is_stable, compute_hr_physician_air, compute_pulsatility_physician_air, unstable_percentage, aggregate_air_physician, weaning_score_physician, super_metric
from reward_func import compute_reward_smooth
from model import WorldModel

DATA_PATH = "/abiomed/downsampled/10min_1hr_all_data.pkl"
#this is just for the visualization so it isn't messy
MAX_STEPS_TO_PLOT = 2000

In [2]:
with open(DATA_PATH, 'rb') as f:
    data = pickle.load(f)
episodes = np.concatenate((data['train'],data['val'], data['test']), axis=0)
print(f"Keys in dataset: {list(data.keys())}")
print(f"Shape of combined set: {len(episodes)}")

Keys in dataset: ['train', 'val', 'test', 'mean', 'std']
Shape of combined set: 17865


In [3]:
#to numpy if tensor
if isinstance(episodes, torch.Tensor):    
    episodes_np = episodes.numpy()  
else:
    episodes_np = np.array(episodes)

In [4]:
actions_all = episodes_np[:, :, -1]


#in order to account for the actions between episodes i am just treating the entire time series like an episode
flattened_actions = actions_all.flatten()
flattened_acp = compute_acp_cost(flattened_actions)

#this is averaged across all actions since the acp function called sums acp per episode
acp_per_timestep = flattened_acp/(len(flattened_actions)-1)
print(f"Mean ACP per timestep accounting for between episodes: {acp_per_timestep:.6f}")


Mean ACP per timestep accounting for between episodes: 0.073368


In [5]:
flattened_actions = episodes_np[:, :, -1].flatten()
#i am just using all of the steps instead of the split up episodes
flattened_map = episodes_np[:, :, 0].flatten()
overall_map_score = compute_map_physician_air(flattened_map, flattened_actions)

print(flattened_map)
if overall_map_score is not None:
    print(f"Overall AIR across all timesteps accounting for between episodes for MAP: {overall_map_score:.6f}")

[95.51750488 97.64806684 99.48813273 ... 55.35730673 58.03034655
 52.48650665]
Overall AIR across all timesteps accounting for between episodes for MAP: 0.069607


In [6]:
flattened_actions = episodes_np[:, :, -1].flatten()
flattened_hr = episodes_np[:, :, 9].flatten()
overall_hr_score = compute_hr_physician_air(flattened_hr, flattened_actions)
if overall_hr_score is not None:
    print(f"Overall AIR across all timesteps accounting for between episodes for HR: {overall_hr_score:.6f}")

Overall AIR across all timesteps accounting for between episodes for HR: 0.071517


In [7]:
flattened_actions = episodes_np[:, :, -1].flatten()
flattened_pulsatility = episodes_np[:, :, 7].flatten()
overall_pulsatility_score = compute_pulsatility_physician_air(flattened_pulsatility, flattened_actions)
if overall_pulsatility_score is not None:
    print(f"Overall AIR across all timesteps accounting for between episodes for pulsatility: {overall_pulsatility_score:.6f}")

Overall AIR across all timesteps accounting for between episodes for pulsatility: 0.074155


In [8]:
states_all = episodes_np[:, :, :-1]
flattened_states = states_all.reshape(-1, states_all.shape[-1])
unstable_percentage = unstable_percentage(flattened_states)
print(f"Percentage of unstable states: {unstable_percentage}")

Percentage of unstable states: 17.822558074447244


In [9]:
overall_weaning_score = weaning_score_physician(flattened_states, flattened_actions)
print(f"Overall weaning score {overall_weaning_score}")

Overall weaning score -0.09487457555058966


In [10]:
total_air = aggregate_air_physician(flattened_states, flattened_actions)
print(f"Aggregate AIR: {total_air}")

Aggregate AIR: 0.07374862702024164


In [11]:
print(flattened_states[0])
super_acp = super_metric(flattened_states, flattened_actions)
acp_timestep = super_acp/(len(flattened_actions)-1)
print(f"Super Metric ACP: {super_acp}")
print(f"Super Metric ACP per timestep: {acp_timestep}")

[9.55175049e+01 3.89947638e+03 2.37867920e+02 3.83526803e+01
 1.39765734e+01 1.16824373e+02 8.12071603e+01 3.56172133e+01
 5.34618863e+02 6.60000000e+01 7.08239484e-01 6.19862945e+01]
Super Metric ACP: 6809.057172816964
Super Metric ACP per timestep: 0.03176177318122094


In [12]:
#look at the next cell for reward
all_steps_np = np.concatenate(episodes_np, axis=0)
total_steps_available = all_steps_np.shape[0]
print(f"Total timesteps: {total_steps_available}")

# Group the data in bins with 24 steps
episode_length = 24
num_bins = total_steps_available // episode_length
#creates bins and fits the samples into 3D
binned_steps = all_steps_np[:num_bins * episode_length].reshape(num_bins, episode_length, -1)
print(f"Created {num_bins} bins with {episode_length} steps")
# Randomly select 100 of these bins
num_episodes_to_sample = 633
#no replacement
sampled_bin_indices = np.random.choice(num_bins, num_episodes_to_sample, replace=False)

all_episode_rewards = []

# Loop through the 100 randomly selected bins
for bin_idx in sampled_bin_indices:
    # Get one bin
    episode_bin = binned_steps[bin_idx]
    
    normalized_rewards_for_episode = []
    # Loop through each of the 24 steps in the bin
    for step_data in episode_bin:
        step_tensor = torch.tensor(step_data, dtype=torch.float32)
        raw_reward = compute_reward_smooth(step_tensor.unsqueeze(0))
        
        # Apply normalizatio
        norm_reward = (raw_reward + 4) / 5
        clipped_reward = np.clip(norm_reward, -1.0, 1.0)
        normalized_rewards_for_episode.append(clipped_reward)
    
    # Find the total reward for this simulated episode
    total_episode_reward = sum(normalized_rewards_for_episode)
    all_episode_rewards.append(total_episode_reward)

mean_total_episode_reward = np.mean(all_episode_rewards)

print(f"Mean total normalized reward per episode from all bins: {mean_total_episode_reward:.6f}")


Total timesteps: 214380
Created 8932 bins with 24 steps


Mean total normalized reward per episode from all bins: -13.496964


In [13]:
EPISODE_LENGTH_STEPS = 36
num_episodes = len(all_steps_np) // EPISODE_LENGTH_STEPS
episodes_6hr = all_steps_np[:num_episodes * EPISODE_LENGTH_STEPS].reshape(
    num_episodes, EPISODE_LENGTH_STEPS, -1
)
num_to_evaluate = 100
if len(episodes_6hr) < num_to_evaluate:
    num_to_evaluate = len(episodes_6hr)

#just 100 eval episodes for reward
episodes_to_evaluate = episodes_6hr[:num_to_evaluate]
all_normalized_rewards = []

for episode in episodes_to_evaluate:
    total_normalized_reward = 0
    for step_data in episode:
        step_tensor = torch.tensor(step_data, dtype=torch.float32)
        reward = compute_reward_smooth(step_tensor)
        norm_reward = (reward + 8.93) / 4.41
        clipped_reward = np.clip(norm_reward, -1.0, 1.0)
        total_normalized_reward += clipped_reward.item()
        
    all_normalized_rewards.append(total_normalized_reward)
mean_normalized_reward = np.mean(all_normalized_rewards)

print(f"\nMean normalized reward per 6hour episode for 100 episodes: {mean_normalized_reward:.4f}")


Mean normalized reward per 6hour episode for 100 episodes: 5.6654
