- data  -> equally from all individuals (0 -> 950)
- data2 -> all random
- data3 -> 1/25 from random, rest from best with noise
- data4 -> 1/10 from random, rest from best with noise

In [0]:
!pip install pybullet==2.5.5

%cd /usr/local/lib/python3.6/dist-packages
!git clone https://github.com/benelot/pybullet-gym.git
%cd pybullet-gym
!pip install -e .
%cd
%cd ../content

Collecting pybullet==2.5.5
  Using cached https://files.pythonhosted.org/packages/d4/6c/6b14ae6d1d8f10f16ea82c2c194394564b02c80b88b6e391470046968c7b/pybullet-2.5.5.tar.gz
Building wheels for collected packages: pybullet
  Building wheel for pybullet (setup.py) ... [?25l[?25hdone
  Created wheel for pybullet: filename=pybullet-2.5.5-cp36-cp36m-linux_x86_64.whl size=71822342 sha256=34dcfa2b4b23654149ed470ba8c8cc00b7060ca69d9e76cca5f71e2b0ad0bd4a
  Stored in directory: /root/.cache/pip/wheels/1d/e4/cc/7b50d6689e1bc6ba07d2df04946a0eabc89deca7caed5f52d1
Successfully built pybullet
Installing collected packages: pybullet
Successfully installed pybullet-2.5.5
/usr/local/lib/python3.6/dist-packages
Cloning into 'pybullet-gym'...
remote: Enumerating objects: 735, done.[K
remote: Total 735 (delta 0), reused 0 (delta 0), pack-reused 735[K
Receiving objects: 100% (735/735), 19.29 MiB | 19.11 MiB/s, done.
Resolving deltas: 100% (405/405), done.
/usr/local/lib/python3.6/dist-packages/pybullet-gy

In [0]:
import numpy as np
import torch
from torch import nn
import torch.nn.functional as F
from copy import deepcopy

import gym
import pybulletgym
from gym import logger as gymlogger
from gym.wrappers import Monitor
gymlogger.set_level(40) #error only

import numpy as np
from tqdm import trange
import glob
import io
import base64
from IPython.display import HTML

from IPython import display as ipythondisplay
import pickle

In [0]:
CUDA = torch.cuda.is_available()

def to_np(x):
    return x.detach().cpu().numpy()

def to_tensor(x, requires_grad=False):
    x = torch.from_numpy(x)
    if CUDA:
        x = x.cuda()
    
    if requires_grad:
        return x.clone().contiguous().detach().requires_grad_(True)
    else:
        return x.clone().contiguous().detach()

In [0]:
class AgentNetwork(nn.Module):
    
    def __init__(self):
        super(AgentNetwork, self).__init__()
        self.layers = nn.Sequential(
            nn.Linear(111, 100),
            nn.Tanh(),
            nn.Linear(100, 8),
            nn.Tanh()
        )

        
    def forward(self, X):
        X = X.view(X.size(0), -1)
        return self.layers.forward(X)
    
    
    
    def set_params(self, params):
        cpt = 0
        for param in self.parameters():
            tmp = np.product(param.size())

            if torch.cuda.is_available():
                param.data.copy_(to_tensor(
                    params[cpt:cpt + tmp]).view(param.size()).cuda())
            else:
                param.data.copy_(to_tensor(
                    params[cpt:cpt + tmp]).view(param.size()))
            cpt += tmp

            
    def get_params(self):
        return deepcopy(np.hstack([to_np(v).flatten() for v in
                                   self.parameters()]))

In [0]:
from tqdm import tqdm, trange

In [0]:
def collect_examples():
    X, Y = [], []

    goal = 2500000

    pbar = tqdm(total=goal, position=0, leave=True)
    env = gym.make('AntMuJoCoEnv-v0')

    while len(Y) < goal:
        try:
            observation = env.reset()

            for _ in range(10000):
                env.render()
                
                action = env.action_space.sample() 
                    
                X.append(list(observation)+list(action))

                observation, reward, done, info = env.step(action)

                Y.append(reward)

                if done: 
                    break;
                        
            pbar.update(_)
        except:
            env.close()
            env = gym.make('AntMuJoCoEnv-v0')

    pbar.close()

    return np.array(X), np.array(Y)

In [0]:
def collect_from_agents():
    X, Y = [], []

    env = gym.make('AntMuJoCoEnv-v0')

    for i in np.arange(0, 1000, 50):
        ind = pickle.load(open(f'drive/My Drive/project_evo/history/{i}.pkl', 'rb'))['best']
        actor = AgentNetwork()
        actor.set_params(ind)

        tmpx, tmpy = [], []

        goal = 100000

        pbar = tqdm(total=goal, desc=f'{i}',position=0, leave=True)
        
        while len(tmpy) < goal:
            try:
                observation = env.reset()
                l_obs = observation

                observation = to_tensor(observation.reshape(1, -1).astype(np.float32))

                for _ in range(10000):
                    env.render()
                    action = actor(observation)
                    action = to_np(action)[0]

                    tmpx.append(list(l_obs)+list(action))
                    
                    observation, reward, done, info = env.step(action)
                    l_obs = observation
                    observation = to_tensor(observation.reshape(1, -1).astype(np.float32))

                    tmpy.append(reward)

                    if done: 
                        break
                pbar.update(_)
            except:
                env.close()
                env = gym.make('AntMuJoCoEnv-v0')
                

        X.extend(tmpx)
        Y.extend(tmpy)

    pbar.close()

    env.close()
    return np.array(X), np.array(Y)

In [0]:
def collect_with_noise():
    X, Y = [], []

    goal = 2500000
    pbar = tqdm(total=goal,position=0, leave=True)

    env = gym.make('AntMuJoCoEnv-v0')

    # let 1/10 be from random actions
    while len(Y) < 250000:
        try:
            observation = env.reset()

            for _ in range(1000):
                env.render()
                
                action = env.action_space.sample() 
                    
                X.append(list(observation)+list(action))

                observation, reward, done, info = env.step(action)

                Y.append(reward)

                if done: 
                    break;
                        
            pbar.update(_)
        except:
            env.close()
            env = gym.make('AntMuJoCoEnv-v0')


    # rest from good individual with some noise to actions
    ind = pickle.load(open(f'drive/My Drive/project_evo/history/try1/950.pkl', 'rb'))['best']
    actor = AgentNetwork()
    actor.set_params(ind)

    
    while len(Y) < goal:
        try:
            observation = env.reset()
            l_obs = observation

            observation = to_tensor(observation.reshape(1, -1).astype(np.float32))

            for _ in range(10000):
                env.render()
                action = actor(observation)
                action = to_np(action)[0]

                action += np.clip(np.random.normal(0, np.random.random(8)/10), -1, 1)

                X.append(list(l_obs)+list(action))
                
                observation, reward, done, info = env.step(action)
                l_obs = observation
                observation = to_tensor(observation.reshape(1, -1).astype(np.float32))

                Y.append(reward)

                if done: 
                    break
            pbar.update(_)
        except:
            env.close()
            env = gym.make('AntMuJoCoEnv-v0')
            

    pbar.close()

    env.close()
    return np.array(X), np.array(Y)

In [0]:
X, Y = collect_from_agents()

pickle.dump({'X': X, 'Y': Y}, open('drive/My Drive/project_evo/data.pkl', 'w+b'), pickle.HIGHEST_PROTOCOL)

current_dir=/usr/local/lib/python3.6/dist-packages/pybullet_envs/bullet
WalkerBase::__init__


FileNotFoundError: ignored

In [0]:
X, Y = collect_examples()

pickle.dump({'X': X, 'Y': Y}, open('drive/My Drive/project_evo/data2.pkl', 'w+b'), pickle.HIGHEST_PROTOCOL)

  0%|          | 0/2500000 [00:00<?, ?it/s]

current_dir=/usr/local/lib/python3.6/dist-packages/pybullet_envs/bullet
WalkerBase::__init__
options= 


100%|█████████▉| 2497500/2500000 [1:00:02<00:03, 693.32it/s]


In [0]:
X, Y = collect_with_noise()

pickle.dump({'X': X, 'Y': Y}, open('drive/My Drive/project_evo/data4.pkl', 'w+b'), pickle.HIGHEST_PROTOCOL)

  0%|          | 0/2500000 [00:00<?, ?it/s]

current_dir=/usr/local/lib/python3.6/dist-packages/pybullet_envs/bullet
WalkerBase::__init__
options= 


100%|█████████▉| 2497500/2500000 [1:30:35<00:05, 459.45it/s]


In [0]:
d = pickle.load(open('drive/My Drive/project_evo/data4.pkl', 'rb'))
X, Y = d['X'], d['Y']

In [0]:
data = pickle.load(open('drive/My Drive/project_evo/data2.pkl', 'rb'))
X, Y = data['X'], data['Y']

In [0]:
B_NO = 10
in_b = int(2000000/B_NO)
for b in trange(B_NO, position=0):
    bx, by = X[b*in_b:(b+1)*in_b], Y[b*in_b:(b+1)*in_b]
    pickle.dump({'X': to_tensor(bx.astype(np.float32)), 'Y': to_tensor(by.reshape(-1, 1))}, 
                open(f'drive/My Drive/project_evo/data4/b{b}.pkl', 'w+b'),
                pickle.HIGHEST_PROTOCOL)

100%|██████████| 10/10 [00:07<00:00,  1.42it/s]


In [0]:
B_NO = 100
in_b = int(2000000/B_NO)
X[2*in_b:(2+1)*in_b].shape

(20000, 119)