In [1]:
import numpy as np
import pandas as pd
import scipy.stats as sps

import matplotlib.pyplot as plt
import seaborn as sns

from tqdm.auto import tqdm

%matplotlib inline
%config InlineBackend.figure_format = 'svg'
sns.set(font_scale=1.3, style='darkgrid', palette='Set2')

In [3]:
from enum import Enum
from collections import defaultdict

import brian2 as b2
import gym
from scipy.optimize import differential_evolution

from Walker import *

from warnings import filterwarnings
filterwarnings(action='ignore', category=DeprecationWarning, module='.*brian2.*')
filterwarnings(action='ignore', category=UserWarning, 
               message='.*WARN: We recommend you to use a symmetric and normalized Box action space.*')

In [None]:
params0 = [-0.607, -0.311, -1.649, -1.934, # w1-w8
           0.285, 0.143, 0.302, 0.151,     # tau
           0.124, 0.770,                   # a
           0.805, 3.078, -2.120]           # u0, b, w

#         w1-w8                         tau
bounds = [(-4, 0) for _ in range(4)] + [(0.025, 1) for _ in range(4)] +\
         [(0.025, 1.5) for _ in range(2)] + [(0, 2), (0, 4), (-4, 0)]
#         a                                   u0      b       w

n_iter = 10
pbar = tqdm(total=n_iter)

def callback(x, convergence):
    print(f'params = {x}; convergence = {convergence}\n')
    pbar.update(1)

res = differential_evolution(calc_reward, bounds, maxiter=n_iter, callback=callback,
                             disp=True, x0=params0, updating='deferred', workers=-1)
pbar.close()

In [None]:
env = gym.make('BipedalWalker-v3')
c = make_controller(res.x)
observation = env.reset()

for _ in range(250):
    y = c.step(observation)
    action = np.array([y[Side.LEFT][Joint.HIP], y[Side.LEFT][Joint.KNEE], 
                       y[Side.RIGHT][Joint.HIP], y[Side.RIGHT][Joint.KNEE]])
    action = np.clip(action, -1, 1)
#     print(action)
    observation, reward, done, _ = env.step(action)
#     print(done, info)
#     print(reward)
    env.render()

env.close()

In [None]:
plot_all(c)