In [1]:
use_tensorboard = True

In [2]:
env_name = "Pendulum-v0"

In [3]:
from datetime import datetime
from pathlib import Path
import tensorboardX
import gym
from evofuzzy import GymRunner

timestamp = datetime.now().strftime('%Y%m%d-%H%M%S')
print("Run starting at", timestamp )

if use_tensorboard:
    tensorboard_dir = f"../tb_logs/gym/{env_name}"
    logdir = Path(f"{tensorboard_dir}/{timestamp}").absolute()
    logdir.mkdir(parents=True, exist_ok=True)
    tensorboard_writer = tensorboardX.SummaryWriter(str(logdir))
else:
    tensorboard_writer = None

env = gym.make(env_name)
runner = GymRunner(
    population_size=200,
    hall_of_fame_size=5,
    max_generation=500,
    mutation_prob=0.5,
    crossover_prob=0.5,
    min_tree_height=2,
    max_tree_height=5,
    min_rules=4,
    max_rules=8,
    whole_rule_prob=0.2,
    tree_height_limit=5,
    forgetting=0.5,
)

runner.train(env, tensorboard_writer)
print(runner.best_str)
runner.save(logdir/"runner.pkl")
reward = runner.play(env)
print("Reward:", reward)



Run starting at 20210830-143114

   	        fitness         	        size        
   	------------------------	--------------------
gen	max     	avg     	min	avg  	best
0  	-763.027	-1374.08	18 	38.27	49  

1  	-763.027	-1295.45	18 	34.775	49  

2  	-754.705	-1251.46	18 	34.065	36  

3  	-729.887	-1211.72	18 	32.29 	42  

4  	-729.887	-1212.77	16 	31.77 	42  

5  	-729.887	-1237.24	19 	30.595	42  

6  	-729.887	-1199.97	19 	29.325	42  

7  	-722.677	-1211.85	19 	28.9  	23  

8  	-722.677	-1227.89	19 	28.385	23  

9  	-722.677	-1229.51	18 	27.2  	23  

10 	-715.262	-1196.54	19 	26.53 	21  

11 	-715.262	-1225.34	20 	25.34 	21  

12 	-715.262	-1228.43	19 	25.45 	21  

13 	-635.367	-1245.46	18 	25.325	24  

14 	-634.785	-1212.34	18 	25.13 	25  

15 	-634.785	-1185.03	16 	24.865	25  

16 	-626.545	-1231.7 	15 	24.42 	34  

17 	-624.649	-1236.75	18 	24.43 	34  

18 	-623.433	-1217.74	18 	24.67 	25  

19 	-623.433	-1207.77	18 	24.36 	25  

20 	-623.433	-1233.35	19 	24.29 	25  

21 	-621.412

In [55]:
reward = runner.play(env)
print("Reward:", reward)

Finished with reward of -1178.9902864177272
