In [1]:
import numpy as np
import os
from mrl.inference.posterior import normalize
from typing import Dict

In [2]:
os.chdir('/home/joschnei/multimodal-reward-learning/data/miner/near-original-reward/7')
os.getcwd()

'/home/joschnei/multimodal-reward-learning/data/miner/near-original-reward/7'

In [3]:
state = np.load('ars.state.npy')
traj = np.load('ars.traj.npy')
mixed = np.load('ars.mixed.npy')

reward = np.load('reward.npy')

print(f"state ARS shape={state.shape}")
print(f"traj ARS shape={traj.shape}")
print(f"mixed ARS shape={mixed.shape}")
print(f"{reward=}")

# Sanity check that all ARS agree with gt reward
assert np.all(state @ reward > 0)
assert np.all(traj @ reward > 0)
assert np.all(mixed @ reward > 0)

state ARS shape=(42, 4)
traj ARS shape=(154, 4)
mixed ARS shape=(84, 4)
reward=array([ 0.70359754, -0.70359754, -0.07035975, -0.07035975])


In [4]:
rng = np.random.default_rng()

# uniform dataset
n_samples = 100_000_000
samples = rng.standard_normal(size=(n_samples, state.shape[1]))
samples = np.concatenate((samples, [reward]))
samples = normalize(samples)
assert np.abs(np.linalg.norm(samples[0]) - 1) < 1e-3

# Make "epsilon ball" around gt reward
def make_near_reward(reward: np.ndarray, n_random: int = 100, epsilon: float = 1e-8, rng: np.random.Generator = np.random.default_rng()) -> np.ndarray:
	out = []
	for i in range(reward.shape[0]):
		new = reward.copy()
		new[i] += epsilon
		out.append(new)
		new = reward.copy()
		new[i] -= epsilon
		out.append(new)
	
	out = np.stack(out)

	perturbations = rng.normal(loc=0, scale=epsilon, size=(n_random, reward.shape[0]))
	out = np.concatenate((out, reward+perturbations))

	return out

In [5]:
def volume(diffs: np.ndarray, samples: np.ndarray) -> float:
	samples_in = np.all((diffs @ samples.T) > 0, axis=0)
	return np.mean(samples_in)

In [6]:
print(f"Volume of samples in state: {volume(state, samples)}")
print(f"Volume of samples in traj: {volume(traj, samples)}")
print(f"Volume of samples in mixed: {volume(mixed, samples)}")

Volume of samples in state: 1.2399999876e-06
Volume of samples in traj: 9.9999999e-09
Volume of samples in mixed: 2.9999999700000005e-08


In [7]:
for epsilon in np.logspace(-8, 0, base=10, num=9):
	near_reward = normalize(make_near_reward(reward, epsilon=epsilon))
	print(f"epsilon={epsilon}, state volume={volume(state, near_reward)}, traj volume={volume(traj, near_reward)}, mixed volume={volume(mixed, near_reward)}")

epsilon=1e-08, state volume=1.0, traj volume=1.0, mixed volume=1.0
epsilon=1e-07, state volume=1.0, traj volume=1.0, mixed volume=1.0
epsilon=1e-06, state volume=1.0, traj volume=1.0, mixed volume=1.0
epsilon=1e-05, state volume=1.0, traj volume=0.9814814814814815, mixed volume=1.0
epsilon=0.0001, state volume=1.0, traj volume=0.21296296296296297, mixed volume=0.8611111111111112
epsilon=0.001, state volume=1.0, traj volume=0.05555555555555555, mixed volume=0.26851851851851855
epsilon=0.01, state volume=0.49074074074074076, traj volume=0.037037037037037035, mixed volume=0.046296296296296294
epsilon=0.1, state volume=0.009259259259259259, traj volume=0.0, mixed volume=0.0
epsilon=1.0, state volume=0.0, traj volume=0.0, mixed volume=0.0


In [8]:
def set_op_stats(first: np.ndarray, second: np.ndarray, samples: np.ndarray) -> Dict[str, np.ndarray]:
	in_first = np.all((first @ samples.T) > 0, axis=0)
	in_second = np.all((second @ samples.T) > 0, axis=0)
	intersection = np.logical_and(in_first, in_second)
	union = np.logical_or(in_first, in_second)
	first_minus_second = np.logical_and(in_first, np.logical_not(in_second))
	second_minus_first = np.logical_and(in_second, np.logical_not(in_first))
	in_neither = np.logical_not(union)
	return {
		"intersection": np.mean(intersection),
		"union": np.mean(union),
		"first_minus_second": np.mean(first_minus_second),
		"second_minus_first": np.mean(second_minus_first),
		"neither": np.mean(in_neither)
	}

In [11]:
print(f"Set ops state traj: {set_op_stats(state, traj, samples)}")
print(f"Set ops state mixed: {set_op_stats(state, mixed, samples)}")
print(f"Set ops traj mixed: {set_op_stats(traj, mixed, samples)}")

Set ops state traj: {'intersection': 9.9999999e-09, 'union': 1.2399999876e-06, 'first_minus_second': 1.2299999877e-06, 'second_minus_first': 0.0, 'neither': 0.9999987600000124}
Set ops state mixed: {'intersection': 2.9999999700000005e-08, 'union': 1.2399999876e-06, 'first_minus_second': 1.2099999879000002e-06, 'second_minus_first': 0.0, 'neither': 0.9999987600000124}
Set ops traj mixed: {'intersection': 9.9999999e-09, 'union': 2.9999999700000005e-08, 'first_minus_second': 0.0, 'second_minus_first': 1.99999998e-08, 'neither': 0.9999999700000003}


In [12]:
for epsilon in np.logspace(-5, -1, base=10, num=5):
	print(f"{epsilon=}")
	near_reward = normalize(make_near_reward(reward, epsilon=epsilon))
	print(f"Set ops state traj: {set_op_stats(state, traj, near_reward)}")
	print(f"Set ops state mixed: {set_op_stats(state, mixed, near_reward)}")
	print(f"Set ops traj mixed: {set_op_stats(traj, mixed, near_reward)}")

epsilon=1e-05
Set ops state traj: {'intersection': 0.9444444444444444, 'union': 1.0, 'first_minus_second': 0.05555555555555555, 'second_minus_first': 0.0, 'neither': 0.0}
Set ops state mixed: {'intersection': 1.0, 'union': 1.0, 'first_minus_second': 0.0, 'second_minus_first': 0.0, 'neither': 0.0}
Set ops traj mixed: {'intersection': 0.9444444444444444, 'union': 1.0, 'first_minus_second': 0.0, 'second_minus_first': 0.05555555555555555, 'neither': 0.0}
epsilon=0.0001
Set ops state traj: {'intersection': 0.17592592592592593, 'union': 1.0, 'first_minus_second': 0.8240740740740741, 'second_minus_first': 0.0, 'neither': 0.0}
Set ops state mixed: {'intersection': 0.9629629629629629, 'union': 1.0, 'first_minus_second': 0.037037037037037035, 'second_minus_first': 0.0, 'neither': 0.0}
Set ops traj mixed: {'intersection': 0.17592592592592593, 'union': 0.9629629629629629, 'first_minus_second': 0.0, 'second_minus_first': 0.7870370370370371, 'neither': 0.037037037037037035}
epsilon=0.001
Set ops sta