-
Notifications
You must be signed in to change notification settings - Fork 0
/
ad_eval.py
107 lines (102 loc) · 5.39 KB
/
ad_eval.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
import torch
import numpy as np
import argparse
from sklearn.ensemble import IsolationForest
from sklearn.model_selection import train_test_split
from train.bc_transformer import BC_transformer
from evaluation.eval_tools import behavaiour_features, boundary_model, evaluation
win_len_q = 20
win_len_v = 60
step_size = 1
act_dim = 4
state_dim = 8
def get_actions(actions):
trajs_actions = []
for traj in actions:
traj_action = []
for a in traj:
action = np.zeros(act_dim)
action[a] = 1
traj_action.append(action)
trajs_actions.append(traj_action)
return trajs_actions
# load data
expert_observations = np.load('./Data/LunarLander/new_states.npy', allow_pickle=True)
expert_actions = np.load('./Data/LunarLander/new_actions.npy', allow_pickle=True)
expert_actions = get_actions(expert_actions)
policy_anomaly_observations = np.load('./Data/LunarLander/policy_anomaly_states.npy', allow_pickle=True)
policy_anomaly_actions = np.load('./Data/LunarLander/policy_anomaly_actions.npy', allow_pickle=True)
policy_anomaly_actions = get_actions(policy_anomaly_actions)
trajs_state = [torch.tensor(i) for i in expert_observations]
trajs_actions = [torch.tensor(i) for i in expert_actions]
indices = np.arange(len(trajs_state))
X_train, X_test, y_train, y_test, indices_train, indices_test = train_test_split(trajs_state, trajs_actions, indices, test_size=0.2, random_state=42)
states = np.concatenate(X_train, axis=0)
state_mean, state_std = np.mean(states, axis=0), np.std(states, axis=0) + 1e-6
X_train_norm = [(traj - state_mean) / state_std for traj in X_train]
X_test_norm = [(traj - state_mean) / state_std for traj in X_test]
X_policy_anomaly = [torch.tensor(i) for i in policy_anomaly_observations]
y_policy_anomaly = [torch.tensor(i) for i in policy_anomaly_actions]
X_policy_anomaly_norm = [(traj - state_mean) / state_std for traj in X_policy_anomaly]
trajs_len = [len(traj) for traj in X_train]
#pertuebed anomaly generation
mu, sigma = 0, 0.1
X_random_anomaly_norm = [(traj.reshape(-1, state_dim)+np.random.normal(mu, sigma, [len(traj),state_dim]) - state_mean) / state_std for traj in X_train[:2000]]
y_random_anomaly = y_train[:2000]
# load model
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--env', type=str, default='gym-lunarlander')
parser.add_argument('--dataset', type=str, default='expert') # medium, medium-replay, medium-expert, expert
parser.add_argument('--mode', type=str, default='normal') # normal for standard setting, delayed for sparse
parser.add_argument('--K', type=int, default=50)
parser.add_argument('--batch_size', type=int, default=64)
parser.add_argument('--model_type', type=str, default='dt') # dt for decision transformer, bc for behavior cloning
parser.add_argument('--embed_dim', type=int, default=256)
parser.add_argument('--n_layer', type=int, default=1)
parser.add_argument('--n_head', type=int, default=1)
parser.add_argument('--activation_function', type=str, default='relu')
parser.add_argument('--dropout', type=float, default=0.1)
#parser.add_argument('--max_learning_rate', '-lr', type=float, default=1e-2)
#parser.add_argument('--min_learning_rate', type=float, default=1e-4)
parser.add_argument('--action_learning_rate', type=float, default=1e-3*5)
parser.add_argument('--weight_decay', '-wd', type=float, default=1e-5)
parser.add_argument('--monotonicity_lr', '-m_lr', type=float, default=1e-4)
#parser.add_argument('--warmup_steps', type=int, default=1000)
#parser.add_argument('--num_eval_episodes', type=int, default=200)
parser.add_argument('--max_iters', type=int, default=10)
parser.add_argument('--num_steps_per_iter', type=int, default=300)
parser.add_argument('--device', type=str, default='cpu')
parser.add_argument('--log_to_wandb', '-w', type=bool, default=True)
parser.add_argument('--step_size_up', type=int, default=400)
args = parser.parse_args()
variant=vars(args)
device = variant.get('device')
max_ep_len = max([len(i) for i in X_train])
model = BC_transformer(
state_dim=state_dim,
act_dim=act_dim,
max_length=variant['K'],
max_ep_len=max_ep_len,
hidden_size=variant['embed_dim'],
n_layer=variant['n_layer'],
n_head=variant['n_head'],
n_inner=4*variant['embed_dim'],
activation_function=variant['activation_function'],
n_positions=1024,
resid_pdrop=variant['dropout'],
attn_pdrop=variant['dropout'],
)
model = model.to(device=device)
model.load_state_dict(torch.load('./train/trained_models/model_lunarlander'))
model.eval()
trained_detector = behavaiour_features(model, act_dim, state_dim, win_len_q, win_len_v, step_size)
anomaly_algorithm = (
"Isolation Forest",
IsolationForest(contamination=0.00035, random_state=33),
)
boundary = boundary_model(anomaly_algorithm, trained_detector, X_train_norm, y_train)
# evalute policy anomalies
evaluation(boundary, trained_detector, 'policy', X_test_norm, y_test, X_random_anomaly_norm, y_random_anomaly, X_policy_anomaly_norm, y_policy_anomaly, q_anomaly_score=None, v_anomaly_score=None)
# evaluate perturned anomalies
evaluation(boundary, trained_detector, 'perturbed', X_test_norm, y_test, X_random_anomaly_norm, y_random_anomaly, X_policy_anomaly_norm, y_policy_anomaly, q_anomaly_score=None, v_anomaly_score=None)