Gait 및 필요 패키지 삽입

In [1]:
import mujoco
import gymnasium as gym
import horcrux_terrain_v2
import os
from scipy.io import savemat

import pkg_resources

import time
import numpy as np
import itertools
import gait

from scipy.spatial.transform import Rotation
from multiprocessing import Process, Queue, shared_memory
from collections import deque

  import pkg_resources


+ 필요 함수 정의

In [2]:
def get_unique_filename(base_path, ext=".mp4"):
    """중복된 파일명이 존재하면 숫자를 증가하여 새로운 경로를 반환"""
    if not base_path.endswith(ext):
        base_path += ext  # 확장자 자동 추가

    file_name, file_ext = os.path.splitext(base_path)  # 파일명과 확장자 분리
    count = 0
    new_path = f"{file_name}-episode-0"+file_ext

    while os.path.exists(new_path):  # 파일 존재 여부 확인
        new_path = f"{file_name}{count}-episode-0{file_ext}"
        count += 1


    return f"rl-video{count-1}", new_path


def default_plot(x, y, f_name='default_plot', legends=['acc_x', 'acc_y', 'acc_z'], title=''):
    colors = plt.get_cmap("tab10").colors
    fig, ax = plt.subplots(figsize=(15/2.54, 10/2.54))
    ax.set_facecolor((0.95, 0.95, 0.95)) 

    n_column = len(np.shape(y))
    if n_column>2:
        print("The dimmension of data must be less than 3. (1D or 2D)")
        return -1
    
    n_data = np.shape(y)[1]

    for i in range(n_data):
        # **Plot**
        ax.plot(x, y[:,i], linewidth=1.5, linestyle="-", color=colors[i], label=legends[i])
        # ax.plot(x, y[:,i], linewidth=1.5, linestyle="-", color=colors[1], label=legends[1])
        # ax.plot(x, y[:,i], linewidth=1.5, linestyle="-", color=colors[2], label=legends[2])

    # **Grid 설정**
    ax.grid(True, linestyle="--", linewidth=1, color="#202020", alpha=0.7)  # 주요 그리드
    ax.minorticks_on()
    ax.grid(True, which="minor", linestyle=":", linewidth=0.5, color="#404040", alpha=0.5)  # 보조 그리드

    # **Axis 스타일 설정**
    ax.spines["top"].set_linewidth(1.0)
    ax.spines["right"].set_linewidth(1.0)
    ax.spines["left"].set_linewidth(1.0)
    ax.spines["bottom"].set_linewidth(1.0)

    ax.tick_params(axis="both", labelsize=11, width=1.0)  # 폰트 크기 및 라인 두께
    ax.xaxis.label.set_size(12)
    ax.yaxis.label.set_size(12)

    # **폰트 및 제목 설정**
    plt.rcParams["font.family"] = "Arial"
    ax.set_xlabel("X-Axis", fontsize=12, fontweight="bold")
    ax.set_ylabel("Y-Axis", fontsize=12, fontweight="bold")
    ax.set_title(title, fontsize=14, fontweight="bold")

    # **Legend (MATLAB 스타일 적용)**
    ax.legend(loc="upper right", ncol=3, fontsize=10, frameon=True)

    # **비율 설정 (MATLAB의 `pbaspect([2.1 1 1])`과 비슷한 효과)**
    fig.set_size_inches(2.1 * 5, 5)  # 비율 2.1:1 (기본 높이 5inch 기준)

    # **Save Figure (MATLAB saveas와 유사)**
    plt.savefig(f"./figs/{f_name}.png", dpi=600, bbox_inches="tight")

    plt.show()

def moving_average(data, window_size):
    kernel = np.ones(window_size) / window_size
    return np.convolve(data, kernel, mode='same')  # 'valid'는 경계 제외


def get_data_from_info(info):
    # Action info
    action = np.array([_info['action'] for _info in info])

    # Status info
    stat_init_rpy = np.array([_info['init_rpy'] for _info in info])
    stat_init_com = np.array([_info['init_com'] for _info in info])
    stat_xy_vel = np.array([[_info['x_velocity'], _info['y_velocity']] for _info in info])
    stat_yaw_vel = np.array([_info['yaw_velocity'] for _info in info])
    stat_quat = np.array([_info['head_quat'] for _info in info])
    stat_ang_vel = np.array([_info['head_ang_vel'] for _info in info])
    stat_lin_acc = np.array([_info['head_lin_acc'] for _info in info])
    stat_motion_vector = np.array([_info['motion_vector'] for _info in info])
    stat_com_pos = np.array([_info['com_pos'] for _info in info])
    stat_com_ypr = np.array([_info['com_ypr'] for _info in info])
    stat_step_ypr = np.array([_info['step_ypr'] for _info in info])
    stat_reward_func_orientation = np.array([_info['reward_func_orientation'] for _info in info])
    

    # Rew info
    rew_linear_movement = np.array([_info['reward_linear_movement'] for _info in info])
    reward_angular_movement = np.array([_info['reward_angular_movement'] for _info in info])
    reward_efficiency = np.array([_info['reward_efficiency'] for _info in info])
    reward_healthy = np.array([_info['reward_healthy'] for _info in info])
    cost_ctrl = np.array([_info['cost_ctrl'] for _info in info])
    cost_unhealthy = np.array([_info['cost_unhealthy'] for _info in info])
    cost_orientation = np.array([_info['cost_orientation'] for _info in info])
    cost_yaw_vel = np.array([_info['cost_yaw_vel'] for _info in info])
    direction_similarity = np.array([_info['direction_similarity'] for _info in info])
    rotation_alignment = np.array([_info['rotation_alignment'] for _info in info])
    vel_orientation = np.array([_info['velocity_theta'] for _info in info])

    # Input info
    input_joy = np.array([_info['joy_input'] for _info in info])

    data_dict = {
        'action': action,
        'stat_init_rpy': stat_init_rpy,
        'stat_init_com': stat_init_com,
        'stat_xy_vel': stat_xy_vel,
        'stat_yaw_vel': stat_yaw_vel,
        'stat_quat': stat_quat,
        'stat_ang_vel': stat_ang_vel,
        'stat_lin_acc': stat_lin_acc,
        'stat_motion_vector': stat_motion_vector,
        'stat_com_pos': stat_com_pos,
        'stat_com_ypr': stat_com_ypr,
        'stat_com_r_ypr':stat_reward_func_orientation,
        'stat_step_ypr': stat_step_ypr,

        'rew_linear_movement': rew_linear_movement,
        'reward_angular_movement': reward_angular_movement,
        'reward_efficiency': reward_efficiency,
        'reward_healthy': reward_healthy,
        'cost_ctrl': cost_ctrl,
        'cost_unhealthy': cost_unhealthy,
        'cost_orientation': cost_orientation,
        'cost_yaw_vel': cost_yaw_vel,
        'direction_similarity': direction_similarity,
        'rotation_alignment': rotation_alignment,
        'vel_orientation': vel_orientation,

        'input_joy': input_joy,
    }
    
    return data_dict


+ Env 기본설정값

In [3]:
env_config = {
    "forward_reward_weight": 100.0,
    "rotation_reward_weight": 100.0,
    "unhealthy_max_steps": 80.0,
    "healthy_reward": 3.0,
    "healthy_roll_range": (-40,40),
    "terminating_roll_range": (-80,80),
    "rotation_norm_cost_weight": 8.5,
    "termination_reward": 0,
    "gait_params": (30, 30, 40, 40, 0),
    "use_friction_chg": False,
    "joy_input_random": True,
    "use_imu_window": True,
    "use_vels_window": True,
    "ctrl_cost_weight": 0.05,
}

+ Gait 파라미터 세트 지정하기

In [None]:
# Paramter set definition
# P1 : Dorsal Spatial, Range : [25, 65] 1차이
# P2 : Lateral Spatial, Range : [25, 65]
# P3 : Dorsal Temporal, Range : [10, 65]
# P4 : Lateral Temporal, Range : [10, 65]
# P5 : Phase, Range : [-3, 3]

# p1 = np.arange(25, 27, 1)
# p2 = np.arange(10, 15, 1)
# p3 = np.arange(-3, 0, 1)

# Serpentine gait param set
# p1 = np.arange(25, 66, 1)
# p2 = np.arange(10, 66, 1)
# p3 = np.arange(-3, 4, 1)

# SW gait param set
p1 = np.arange(25, 66, 1)
p2 = np.arange(10, 66, 1)
p3 = np.arange(42, 49, 1)

# Roll gait param set
# p1 = np.arange(25, 66, 1)
# p2 = np.arange(10, 66, 1)
# p3 = np.arange(87, 94, 1)

# Other gait param set

n1 = len(p1)
n2 = len(p2)
n3 = len(p3)

gait_name = "SW"
vel_map = np.empty((n1,n2,n3,10), dtype=np.float64) # 7 datas : reward, x_vel, y_vel, yaw_vel, x_pos, y_pos, yaw_pos, com_y, com_p, com_r
shm = shared_memory.SharedMemory(name="shared_vel_map", create=True, size=vel_map.nbytes)
data_map = np.ndarray(vel_map.shape, dtype=vel_map.dtype, buffer=shm.buf)
combinations = list(itertools.product(p1, p2, p3))

+ 멀티프로세싱 세그먼트 준비

In [5]:
print(f'Number of Combinations : {len(combinations)}')
ea = len(combinations) // 12
start_idx = [0, 1 * ea, 2 * ea, 3 * ea, 4 * ea, 5 * ea, 6 * ea, 7 * ea, 8 * ea , 9 * ea, 10 * ea, 11 * ea] 
print(f'For 12 processes start indices : {start_idx}')

Number of Combinations : 16072
For 12 processes start indices : [0, 1339, 2678, 4017, 5356, 6695, 8034, 9373, 10712, 12051, 13390, 14729]


+ 멀티프로세싱 작업 준비 (gym 기반)

In [6]:
def iter_J(param_iter, shd_name:str, shd_shape) -> None:
    class MovingAverageFilter3D:
        def __init__(self, window_size=20):
            self.window_size = window_size
            self.x_queue = deque(maxlen=window_size)
            self.y_queue = deque(maxlen=window_size)
            self.z_queue = deque(maxlen=window_size)

        def update(self, new_x, new_y, new_z):
            self.x_queue.append(new_x)
            self.y_queue.append(new_y)
            self.z_queue.append(new_z)

            avg_x = np.mean(self.x_queue) if self.x_queue else 0.0
            avg_y = np.mean(self.y_queue) if self.y_queue else 0.0
            avg_z = np.mean(self.z_queue) if self.z_queue else 0.0

            return np.array([avg_x, avg_y, avg_z])

    t_start = time.time()
    exist_shm = shared_memory.SharedMemory(name=shd_name)
    d_map = np.ndarray(shd_shape, dtype=np.float64, buffer=exist_shm.buf)

    # print(f"[PID {os.getpid()}] 시작: {time.strftime('%H:%M:%S', time.localtime(t_start))}")
    for i in param_iter:
        gait_param = (i[0], i[0], i[1], i[1], i[2])
        env_config["gait_params"] = gait_param

        _temporal_param = max(i[1], i[1])
        _period = int(( (2 * np.pi) / (_temporal_param / 10) ) * ( 1 / (0.005 * 10) ) )
        mov_filter = MovingAverageFilter3D(_period * 2)

        # 스텝 별로 토크 다르게 줄 시
        # Serpentine gait
        # m = [157.9434,  236.2089,  374.3404,  242.5976,  354.4613,  391.8927,  334.5015,  356.9765,  403.4059,  170.1290,  238.0919,  132.8187,  124.3184,  116.8671]
        # s = [148.4869,	216.0080,  251.9615,  180.9668,	 219.4103,	300.9064,  208.0863,  274.3248,	 278.9040,	161.4832,  246.2632,  105.2138,  94.5821,	66.5773]

        # sw
        m = [226.9149,  314.8914,  188.9860,  363.9839,  318.0925,  686.8243,  340.8768,  734.6390,  266.1755,  524.5500,  146.2553,  211.9047,   93.3026,   95.9254]
        s = [139.1565,  251.8106,  145.2850,  243.7314,  226.2398,  306.4975,  280.9143,  360.4109,  231.0070,  303.3806,  143.9727,  181.9718,   74.8189,   65.7750]

        # Env initiate
        env = gym.make("horcrux_terrain_v2/plane-v2", **env_config)
        obs = env.reset()

        rew_return = 0
        xyr_vels = []
        rpy_vels = []
        drpy_vels = np.zeros(3,)
        com_ypr = []

        for t in range(2000):
            # # 스텝 별 다른 토크
            # action = np.random.normal(m, s)
            # action = np.clip(action, 0, 2700)
            # action = action * 0.001

            # 토크 일정
            action = np.array([0.75] * 14)

            obs, _step_rew, _, env_done, env_info = env.step(action)

            xyr_vels.append(mov_filter.update(env_info['x_velocity'], env_info['y_velocity'], env_info['yaw_velocity']))

            tmp_vels = drpy_vels - env_info['reward_func_orientation']
            drpy_vels = env_info['reward_func_orientation']
            
            rpy_vels.append(tmp_vels)

            com_ypr.append(env_info['reward_func_orientation'])

            rew_return += _step_rew

        # print(f'sim done! {rew_return}, {np.mean(xyr_vels, axis=0)}, {np.mean(xyr_pos, axis=0)}, {np.mean(com_ypr, axis=0)}')
        mean_xyr_vels = np.mean(xyr_vels, axis=0)
        mean_xyr_pos = np.mean(rpy_vels, axis=0)
        mean_com_ypr = np.mean(com_ypr, axis=0)
        
        env.reset()
        env.close()

        global combinations
        global p1, p2, p3

        if i in combinations:
            _index = combinations.index(i)

        _index1 = p1[0] - (combinations[_index][0])
        _index2 = p2[0] - (combinations[_index][1])
        _index3 = p3[0] - (combinations[_index][2])

        d_map[-_index1, -_index2, -_index3, 0] = rew_return
        d_map[-_index1, -_index2, -_index3, 1] = mean_xyr_vels[0]
        d_map[-_index1, -_index2, -_index3, 2] = mean_xyr_vels[1]
        d_map[-_index1, -_index2, -_index3, 3] = mean_xyr_vels[2]
        d_map[-_index1, -_index2, -_index3, 4] = mean_xyr_pos[0]
        d_map[-_index1, -_index2, -_index3, 5] = mean_xyr_pos[1]
        d_map[-_index1, -_index2, -_index3, 6] = mean_xyr_pos[2]
        d_map[-_index1, -_index2, -_index3, 7] = mean_com_ypr[0]
        d_map[-_index1, -_index2, -_index3, 8] = mean_com_ypr[1]
        d_map[-_index1, -_index2, -_index3, 9] = mean_com_ypr[2]

        if i in param_iter:
            _progress = param_iter.index(i)
        print(f"[PID {os.getpid()}] 진행률 {_progress / len(param_iter) * 100:.2f} %")


    t_end = time.time()
    elapsed = t_end - t_start

    # print(f"[PID {os.getpid()}] 종료: {time.strftime('%H:%M:%S', time.localtime(t_end))}")
    print(f"[PID {os.getpid()}] 총 실행 시간: {elapsed:.3f}초")
    
    exist_shm.close()

+ Mujoco 기반

In [7]:
# __mjcf_model_path__ = pkg_resources.resource_filename("horcrux_terrain_v2", "resources/horcrux_plane.xml")
# snake = mujoco.MjModel.from_xml_path(__mjcf_model_path__)
# data = mujoco.MjData(snake)
# mujoco.mj_forward(snake, data)

+ 멀티프로세싱 시작

In [None]:
pc1 = Process(target=iter_J, args=((combinations[start_idx[0]:start_idx[1]]), shm.name, vel_map.shape))
pc2 = Process(target=iter_J, args=((combinations[start_idx[1]:start_idx[2]]), shm.name, vel_map.shape))
pc3 = Process(target=iter_J, args=((combinations[start_idx[2]:start_idx[3]]), shm.name, vel_map.shape))
pc4 = Process(target=iter_J, args=((combinations[start_idx[3]:start_idx[4]]), shm.name, vel_map.shape))
pc5 = Process(target=iter_J, args=((combinations[start_idx[4]:start_idx[5]]), shm.name, vel_map.shape))
pc6 = Process(target=iter_J, args=((combinations[start_idx[5]:start_idx[6]]), shm.name, vel_map.shape))
pc7 = Process(target=iter_J, args=((combinations[start_idx[6]:start_idx[7]]), shm.name, vel_map.shape))
pc8 = Process(target=iter_J, args=((combinations[start_idx[7]:start_idx[8]]), shm.name, vel_map.shape))
pc9 = Process(target=iter_J, args=((combinations[start_idx[8]:start_idx[9]]), shm.name, vel_map.shape))
pc10 = Process(target=iter_J, args=((combinations[start_idx[9]:start_idx[10]]), shm.name, vel_map.shape))
pc11 = Process(target=iter_J, args=((combinations[start_idx[10]:start_idx[11]]), shm.name, vel_map.shape))
pc12 = Process(target=iter_J, args=((combinations[start_idx[11]::]), shm.name, vel_map.shape))

pc1.start()
pc2.start()
pc3.start()
pc4.start()
pc5.start()
pc6.start()
pc7.start()
pc8.start()
pc9.start()
pc10.start()
pc11.start()
pc12.start()

pc1.join()
pc2.join()
pc3.join()
pc4.join()
pc5.join()
pc6.join()
pc7.join()
pc8.join()
pc9.join()
pc10.join()
pc11.join()
pc12.join()

data_dict = {'vel_map': data_map, 'param_p1': p1, 'param_p2': p2, 'param_p3': p3}

# Save the data to a .mat file
savemat("Side_mov.mat", data_dict)

shm.close()
shm.unlink()

print('done')

[PID 31970] 진행률 0.00 %
[PID 31974] 진행률 0.00 %
[PID 31969] 진행률 0.00 %
[PID 31971] 진행률 0.00 %[PID 31968] 진행률 0.00 %

[PID 31967] 진행률 0.00 %
[PID 31973] 진행률 0.00 %
[PID 31972] 진행률 0.00 %
[PID 31975] 진행률 0.00 %
[PID 31977] 진행률 0.00 %
[PID 31966] 진행률 0.00 %
[PID 31976] 진행률 0.00 %
[PID 31970] 진행률 0.07 %
[PID 31974] 진행률 0.07 %
[PID 31968] 진행률 0.07 %
[PID 31969] 진행률 0.07 %
[PID 31973] 진행률 0.07 %[PID 31971] 진행률 0.07 %

[PID 31975] 진행률 0.07 %
[PID 31977] 진행률 0.07 %
[PID 31972] 진행률 0.07 %
[PID 31967] 진행률 0.07 %
[PID 31976] 진행률 0.07 %
[PID 31966] 진행률 0.07 %
[PID 31970] 진행률 0.15 %
[PID 31974] 진행률 0.15 %
[PID 31968] 진행률 0.15 %
[PID 31977] 진행률 0.15 %
[PID 31973] 진행률 0.15 %
[PID 31975] 진행률 0.15 %
[PID 31969] 진행률 0.15 %
[PID 31972] 진행률 0.15 %
[PID 31971] 진행률 0.15 %
[PID 31967] 진행률 0.15 %
[PID 31976] 진행률 0.15 %
[PID 31966] 진행률 0.15 %
[PID 31970] 진행률 0.22 %
[PID 31974] 진행률 0.22 %
[PID 31968] 진행률 0.22 %
[PID 31977] 진행률 0.22 %
[PID 31975] 진행률 0.22 %
[PID 31973] 진행률 0.22 %
[PID 31971] 진행률 0.22 %
[PID 31969]