In [1]:
from utils import SimulationEnvironment
from basic_walk.utils import BaseAgent
import sys
import time

from multiprocessing import Pool
import pickle
import matplotlib.pyplot as plt
import numpy as np

from tqc import structures, DEVICE
from tqc.trainer import Trainer
from tqc.structures import Actor, Critic, RescaleAction
from tqc.functions import eval_policy
from tqdm import tqdm
import copy

import warnings
warnings.filterwarnings('ignore')

## Заполняем буффер реплеев

In [2]:
def generate_correct_replays(name, goal_len, episode_length, seed=42):
    print(f"{name} started with goal_len: {goal_len}")
    np.random.seed(seed)
    agent = BaseAgent(random_mode=True, foot_only_mode=True)
    replay_buffer = []
    with SimulationEnvironment('scenes/basic_scene.ttt', headless_mode=True, foot_only_mode=True) as env:
        env = RescaleAction(env, -1., 1.)
        low_action, high_action = env.simulation.get_action_limits()

        state, done = env.reset(), False
        episode_timesteps = 0
        last_replay = []
        percentage_to_achive = 0.05

        while len(replay_buffer) < goal_len:
            episode_timesteps += 1
            action = agent.act(state)
            action = np.array(action)
            action = 2 / (high_action - low_action) * action + 1 - 2 * high_action / (high_action - low_action)
            
            if np.any(action < -1) or np.any(action > 1):
                done = True
            else:
                next_state, reward, done, _ = env.step(action)
                last_replay.append((state, action, next_state, reward, done))
                state = next_state
            
            
            if done or episode_timesteps >= episode_length:
                if not done:
                    replay_buffer += last_replay
#                     print(f"{name}: {len(replay_buffer)} ({float(len(replay_buffer)) / goal_len})")
                    if float(len(replay_buffer)) / goal_len >= percentage_to_achive:
                        print(f"{name}: {int(percentage_to_achive*100)}%")
                        percentage_to_achive += 0.05
                    
                agent = BaseAgent(random_mode=True, foot_only_mode=True)
                state, done = env.reset(), False
                episode_timesteps = 0
                last_replay = []
            

    return replay_buffer

In [3]:
def generate_correct_replays_in_parallel(goal_len=1200, episode_length=600, n_proc=2, seed=42):
    np.random.seed(seed)
    rng = np.random.default_rng()
    seeds = rng.choice(100000, size=n_proc, replace=False)
    
    assert goal_len % (episode_length * n_proc) == 0
    sub_process_goal_len = goal_len / n_proc
    replay_buffer = []
    
    with Pool(processes=n_proc) as pool:
        multiple_results = []
        for i, seed in enumerate(seeds):
            res = pool.apply_async(generate_correct_replays, (i, sub_process_goal_len, episode_length, seed))
            multiple_results.append(res)
        for i, res in enumerate(multiple_results):
            res.wait()
            print("finish", i)

        for res in multiple_results:
            subreplay = res.get()
            replay_buffer += subreplay
    return replay_buffer

In [4]:
%%time
goal_len = 200000
# goal_len = 40000
episode_length = 100
seed = 42

replay_buffer_arr = generate_correct_replays_in_parallel(
    goal_len=goal_len,
    episode_length=episode_length,
    seed=seed,
    n_proc=5
)
buffer_name = "replay_buffer_leg_only"

file_name = f"data/replay_buffers/{buffer_name}_array_{goal_len}_{episode_length}.pickle"
with open(file_name, 'wb') as f:
    pickle.dump(replay_buffer_arr, f)

0 started with goal_len: 40000.01 started with goal_len: 40000.02 started with goal_len: 40000.0

3 started with goal_len: 40000.0

4 started with goal_len: 40000.0
2: 5%
4: 5%
3: 5%
0: 5%
1: 5%
1: 10%
3: 10%
2: 10%
4: 10%
0: 10%
1: 15%
2: 15%
4: 15%
0: 15%
3: 15%
1: 20%
2: 20%
3: 20%
0: 20%
4: 20%
1: 25%
3: 25%
2: 25%
0: 25%
4: 25%
3: 30%
1: 30%
2: 30%
0: 30%
3: 35%
4: 30%
1: 35%
2: 35%
0: 35%
3: 40%
4: 35%
1: 40%
0: 40%
2: 40%
3: 44%
1: 44%
4: 40%
0: 44%
2: 44%
3: 49%
1: 49%
4: 44%
0: 49%
2: 49%
3: 54%
1: 54%
4: 49%
0: 54%
3: 60%
4: 54%
1: 60%
2: 54%
0: 60%
3: 65%
2: 60%
4: 60%
0: 65%
1: 65%
3: 70%
4: 65%
2: 65%
0: 70%
1: 70%
3: 75%
2: 70%
4: 70%
0: 75%
3: 80%
1: 75%
2: 75%
4: 75%
3: 85%
0: 80%
1: 80%
2: 80%
4: 80%
0: 85%
1: 85%
3: 90%
4: 85%
1: 90%
0: 90%
2: 85%
3: 95%
0: 95%
4: 90%
2: 90%
1: 95%
4: 95%
2: 95%
finish 0
finish 1
finish 2
finish 3
finish 4


NameError: name 'SurrogatPyRepEnvironment' is not defined

In [5]:
with SimulationEnvironment('scenes/basic_scene.ttt', headless_mode=True, foot_only_mode=True) as env:
    state_dim = env.observation_space.shape[0]
    action_dim = env.action_space.shape[0]
    
    replay_buffer = structures.ReplayBuffer(state_dim, action_dim)
    
    for q in replay_buffer_arr:
        replay_buffer.add(*q)

    file_name = f"data/replay_buffers/{buffer_name}_{goal_len}_{episode_length}.pickle"
    with open(file_name, 'wb') as f:
        pickle.dump(replay_buffer, f)

In [6]:
replay_buffer.size

160000