-
Notifications
You must be signed in to change notification settings - Fork 0
/
config.yaml
53 lines (40 loc) · 1.15 KB
/
config.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
# ######################### #
# Shared configuration file #
# ######################### #
########
# MCTS #
########
mcts:
iterations: 300 # The number of iterations to be performed. The faster the CPU, the higher it should be.
maximum rollout depth: 5 # The maximum depth when performing roll-outs in case no terminal state is found. Recommended: number of rows/columns (if square).
Cp: 1.41 # The upper confidence bound exploration parameter Cp.
discount factor: 0.95 # Discount Factor for simulate()
###############
# Exploration #
###############
exploration policy: epsilon-greedy
initial collect steps: 0
start exploration rate: 0.5
end exploration rate: 0.005
final exploration timestep: 5000
##########
# Models #
##########
# Global parameters shared between models
learning rate: 0.001
replay min batch: 32
replay memory size: 15000
# Model-specific parameters
teammates model:
layers:
- 48, relu
- 48, relu
dqn:
layers:
- 64, relu
- 64, relu
discount factor: 0.95
###########
# PLASTIC #
###########
eta: 0.25 # Maximum loss for PLASTIC Belief Updates. Kept as in original work