In [None]:
!apt-get update && apt-get install -y ffmpeg freeglut3-dev xvfb  # For visualization
!pip install "stable-baselines3[extra]>=2.0.0a4"

In [None]:
import stable_baselines3

print(f"{stable_baselines3.__version__=}")

## Imports

Stable-Baselines works on environments that follow the [gym interface](https://stable-baselines.readthedocs.io/en/master/guide/custom_env.html).
You can find a list of available environment [here](https://gym.openai.com/envs/#classic_control).

It is also recommended to check the [source code](https://github.com/openai/gym) to learn more about the observation and action space of each env, as gym does not have a proper documentation.
Not all algorithms can work with all action spaces, you can find more in this [recap table](https://stable-baselines.readthedocs.io/en/master/guide/algos.html)

In [None]:
# !pip install moviepy

In [None]:
# import moviepy
# print(f"{moviepy.__version__=}")

In [None]:
# !sudo apt -y update
# !sudo apt -y install ffmpeg
# !pip install decorator
# !pip install moviepy --upgrade
# !pip install ffmpeg --upgrade

In [None]:
# from IPython.core.display import HTML
# HTML("<script>Jupyter.notebook.kernel.restart()</script>")

In [None]:
import gymnasium as gym
import numpy as np

print(f"{gym.__version__=}")

The first thing you need to import is the RL model, check the documentation to know what you can use on which problem

In [None]:
# from stable_baselines3 import PPO

In [None]:
from stable_baselines3.ppo import MlpPolicy

In [None]:
# import gym
from gymnasium import logger, spaces
import math
import numpy as np
import random
import matplotlib.pyplot as plt
from gymnasium.spaces.discrete import Discrete
import io
import matplotlib
matplotlib.use('agg')  # turn off interactive backend
# matplotlib.use( 'plt' )
import numpy as np

from typing import Optional
from matplotlib.patches import Wedge

class EversionRobot(gym.Env):
    metadata = {
        "render_modes": ["human", "rgb_array"],
        "render_fps": 15,
    }
    def __init__(self, obs_use : Optional[bool] = False,render_mode : Optional[str]='human'):
        super(EversionRobot, self).__init__()
        self.MAX_EPISODE = 100
        # self.MAX_EPISODE = 200
        self.x_threshold = 5
        self.use_obstacle = obs_use

        if self.use_obstacle:
            high = np.array([self.x_threshold, self.x_threshold, self.x_threshold, self.x_threshold, 2.0],
                        dtype=np.float32)
        else:
            high = np.array([self.x_threshold, self.x_threshold, self.x_threshold, self.x_threshold],
                        dtype=np.float32)

#         self.action_space = spaces.Discrete(5)
        self.action_space = Discrete(5)
        self.observation_space = spaces.Box(-high, high, dtype=np.float32)

        self.steps_left = self.MAX_EPISODE
        self.low = [-1.5, 0.5]
        self.high = [1.5, 3.0]
        self.x_target = [random.uniform(self.low[0],self.high[0]), random.uniform(self.low[1],self.high[1])]
        # self.x_target = [-0.5, 0.5]
        self.state = [0, 0, self.x_target[0], self.x_target[1], 0]
        self.init_length = 0.1
        self.length = self.init_length
        self.bending = 0
        self.delta_length = 0.1
        self.delta_bending = 0.1
        self.segment_num = 1
        self.segment_num_max = 20
#         self.length_max = 1.5
        self.length_max = 0.5
        self.length_array = []
        self.length_array.append(self.length)
        self.bending_array = []
        self.bending_array.append(self.bending)
        self.T_static = np.eye(3)
        self.safety_param = 2
        self.safety_penalty = 10
        self.render_mode = render_mode
        self.ang_deg = 0
        self.center = None
        self.radius_area = 0.3
        self.start_angle = None
        self.end_angle = None
        # Obstacle's centre position and radius
        if self.use_obstacle:
            self.obs_center = []
            self.obs_center.append(np.array([0.0, 2.0]))
            self.obs_center.append(np.array([-1.1, 1.2]))
            self.obs_center.append(np.array([0.7, 1.0]))
            self.radius = []
            for i in range(0, len(self.obs_center)):
                self.radius.append(0.1)

    def constant_curvature(self, bending, length):
        if bending != 0:
            x_tip = (1-math.cos(bending))*length/bending
            y_tip = math.sin(bending)*length/bending
        else:
            x_tip = 0
            y_tip = length
        return x_tip, y_tip

    def static_segment(self, bending_array, length_array, segment_index):
        T_multi = np.eye(3)
        for i in range(0, segment_index):
            bending = self.bending_array[i]
            length = self.length_array[i]
            if bending != 0:
                x_tip = (1-math.cos(bending))*length/bending
                y_tip = math.sin(bending)*length/bending
            else:
                x_tip = 0
                y_tip = length
            T_single = np.array([[math.cos(bending), math.sin(bending), x_tip],
                                [-math.sin(bending), math.cos(bending), y_tip],
                                [0, 0, 1]])
            T_multi = T_multi@T_single

        return T_multi

    def pose_segment(self, segment_index):
        T_prior_segment = self.static_segment(self.bending_array, self.length_array, segment_index)
        indeks_maks = math.floor(self.length_array[segment_index]/self.delta_length)
        if indeks_maks!=0:
            bending_increment = self.bending_array[segment_index]/indeks_maks
        else:
            bending_increment = 0
        x_array = []
        y_array = []
        for i in range(0,indeks_maks):
            x_tip, y_tip = self.constant_curvature(bending_increment*i, self.delta_length*i)
            T_single = np.array([[math.cos(bending_increment*i), math.sin(bending_increment*i), x_tip],
                                [-math.sin(bending_increment*i), math.cos(bending_increment*i), y_tip],
                                [0, 0, 1]])
            transform_xy = T_prior_segment@T_single
            x_tip, y_tip = transform_xy[0,2], transform_xy[1,2]
            x_array.append(x_tip)
            y_array.append(y_tip)
        return x_array, y_array

    def check_collision(self):
        collision = False
        x_array, y_array = self.pose_segment(len(self.length_array)-1)
        for i in range(0, len(self.obs_center)):
            for j in range(0, len(x_array)):
                distance_vect = np.array([[x_array[j]-self.obs_center[i][0]],
                                            [y_array[j]-self.obs_center[i][1]]])
                distance_scalar = np.linalg.norm(distance_vect)
                if(distance_scalar<=self.radius[i]):
                    collision = True
                    break

            if(collision == True):
                break

        return collision

    def check_safety(self):
        danger = False
        x_array, y_array = self.pose_segment(len(self.length_array)-1)
        for i in range(0, len(self.obs_center)):
            for j in range(0, len(x_array)):
                distance_vect = np.array([[x_array[j]-self.obs_center[i][0]],
                                            [y_array[j]-self.obs_center[i][1]]])
                distance_scalar = np.linalg.norm(distance_vect)
                if(distance_scalar<=self.safety_param*self.radius[i]):
                    danger = True
                    break

            if(danger == True):
                break

        return danger
    def calculate_angle(self,point1, point2):
      x1, y1 = point1
      x2, y2 = point2
      # Calculate the angle in radians using arctan
      # angle_radians = math.atan(slope)
      angle_radians = math.atan2((y2 - y1), (x2 - x1))
  
      # Convert the angle from radians to degrees
      angle_degrees = math.degrees(angle_radians)

      return angle_degrees
    def is_point_inside_sector(self, point, center, start_angle, end_angle):
      radius = 0.5
      angle = np.angle(complex(point[0] - center[0], point[1] - center[1]), deg=True)
      distance = np.sqrt((point[0] - center[0])**2 + (point[1] - center[1])**2)

      if distance <= radius and start_angle <= angle <= end_angle:
          return True
      return False

    def obstacle_vector(self, dist_to_goal):
        limit_obs = self.radius_area
        # gain_obs = 3.0
        gain_obs = 3

        min_distance = 100
        min_index_i = 0
        # min_index_j = 0
        x_1 = None
        x_array, y_array = self.pose_segment(len(self.length_array)-1)
        if(len(x_array)>=2):
          x_array = x_array[-2:]
          y_array = y_array[-2:]
          x_1,x_2 = x_array
          y_1,y_2 = y_array
        elif(len(self.length_array)>1):
          x_2 = x_array[0]
          y_2 = y_array[0]
          x_array, y_array = self.pose_segment(len(self.length_array)-2)
          x_1 = x_array[-1:]
          y_1 = y_array[-1:]
        else:
          x_1 = None

        if(x_1 is not None):
          angle = self.calculate_angle((x_1,y_1),(x_2,y_2))
          total_angle = 45
          start_angle = angle-(total_angle/2)
          end_angle = start_angle + total_angle
          forward_reward = 0

          center = (x_2,y_2)
          self.center = center
          self.start_angle = start_angle
          self.end_angle = end_angle

          distance_vect_min = np.array([[min_distance],[min_distance]])

          for i in range(0, len(self.obs_center)):
              if(self.is_point_inside_sector(self.obs_center[i],(x_2,y_2),start_angle,end_angle)):
                distance_vect = np.array([[x_2-self.obs_center[i][0]],
                                            [y_2-self.obs_center[i][1]]])
                distance_scalar = np.linalg.norm(distance_vect)
                if(distance_scalar<min_distance):
                    min_distance = distance_scalar
                    distance_vect_min = distance_vect
                    min_index_i = i
                  # min_index_j = j
          if(self.is_point_inside_sector(self.obs_center[min_index_i],(x_2,y_2),start_angle,end_angle)):
            direction_obs = distance_vect_min/min_distance
            if(np.linalg.norm(dist_to_goal)>0):
                direction_goal = dist_to_goal/np.linalg.norm(dist_to_goal)
            else:
                direction_goal = dist_to_goal
            cos_angle = np.dot(direction_goal, direction_obs)
            if((min_distance-self.radius[min_index_i])<=limit_obs and abs(cos_angle)>0.3):
                distance_to_surface = min_distance - self.radius[min_index_i]
                avoidance_term = (1.0/distance_to_surface - 1.0/limit_obs)*gain_obs
            elif((min_distance-self.radius[min_index_i])<=limit_obs):
                avoidance_term = 100000
            else:
                avoidance_term = 0
            return -avoidance_term , forward_reward
        return 0,0

        # if((min_distance-self.radius[min_index_i])<=limit_obs and abs(cos_angle>0.3)):
        #     distance_to_surface = min_distance - self.radius[min_index_i]
        #     avoidance_term = (1.0/distance_to_surface - 1.0/limit_obs)*gain_obs
        #     if(avoidance_term > 100000):
        #       avoidance_term = 10000
        # elif((min_distance-self.radius[min_index_i])<=limit_obs):
        #     avoidance_term = 100000
        # else:
        #     avoidance_term = 0



        # if((min_distance-self.radius[min_index_i])<=limit_obs and abs(cos_angle)>0.2):
        #     distance_to_surface = min_distance - self.radius[min_index_i]
        #     avoidance_term = (1.0/distance_to_surface - 1.0/limit_obs)*gain_obs
        # elif((min_distance-self.radius[min_index_i])<=limit_obs):
        #     avoidance_term = -100000
        # else:
        #     avoidance_term = 0

    def step(self, action):
        self.act = action
        save_state = self.state
        if action == 0:
            self.length = self.length+self.delta_length
        elif action == 1:
            self.length = self.length-self.delta_length
        elif action == 3:
            self.bending = self.bending+self.delta_bending
        elif action == 4:
            self.bending = self.bending-self.delta_bending
        if(self.bending>math.pi):
            self.bending = -math.pi
        elif(self.bending<-math.pi):
            self.bending = math.pi
        self.length = round(self.length,1)
        # Update length_array and bending_array
        if(self.length<=self.length_max and self.length>0):
            self.length_array[len(self.length_array)-1] = self.length
            self.bending_array[len(self.bending_array)-1] = self.bending
            # print(len(self.length_array))
        elif self.length>self.length_max:
            if(len(self.length_array)<=self.segment_num_max):
                self.length =  round(self.length - self.length_max,1)
                self.bending = 0
                self.length_array.append(self.length)
                self.bending_array.append(self.bending)
            else:
                self.length = self.length_max
        else:
            if(len(self.length_array)>1):
                self.length_array.pop()
                self.bending_array.pop()
                self.length = self.length_array[len(self.length_array)-1]
                self.bending = self.bending_array[len(self.bending_array)-1]
            else:
                self.length = 0

        self.T_static = self.static_segment(self.bending_array, self.length_array, len(self.bending_array))
        x_tip, y_tip = self.T_static[0,2], self.T_static[1,2]

        if self.use_obstacle:
            safety_flag = self.check_safety()
            if(safety_flag):
                safety_obs = 1
            else:
                safety_obs = 0

            self.state = [x_tip, y_tip, self.x_target[0], self.x_target[1], safety_obs]
        else:
            self.state = [x_tip, y_tip, self.x_target[0], self.x_target[1]]

        boundary= x_tip < -self.x_threshold or x_tip > self.x_threshold or y_tip < -self.x_threshold or y_tip > self.x_threshold

        error = np.array(self.state[0:2]) - np.array(self.x_target)

        if self.use_obstacle:
            self.flag_collision = self.check_collision()
            reward_safety ,f_reward= self.obstacle_vector(error)
            # reward_safety = 0
        done = bool(
            boundary
            or self.steps_left<0
            or self.length<0
        )
        if self.use_obstacle:
            done = done or self.flag_collision

        if not done:
            reward = - np.linalg.norm(error)**2
#             if(y_tip<2.25):
#               reward += - (2.25 - y_tip)*3.5
            if self.use_obstacle:
                reward = reward + reward_safety +f_reward
        else:
            if self.length<0:
                reward = -100000
            elif self.use_obstacle:
                if self.flag_collision:
                    reward = -100000
                elif self.length<0:
                    reward = -100000
                else:
                    reward = 0
            else:
                reward = 0
        if not done:
            self.steps_left = self.steps_left-1
        self.cur_reward = reward
        self.cur_done = done
        return np.array(self.state, dtype=np.float32), reward, done,False, {}

    def reset(self,seed: Optional[int] = None):
        super().reset(seed=seed)
        self.x_target = [random.uniform(self.low[0],self.high[0]), random.uniform(self.low[1],self.high[1])]
        # self.x_target = [-0.5, 0.5]
        if self.use_obstacle:
            self.state = [0, 0, self.x_target[0], self.x_target[1], 0]
        else:
            self.state = [0, 0, self.x_target[0], self.x_target[1]]
        self.steps_left = self.MAX_EPISODE
        self.length = self.init_length
        self.bending = 0
        self.length_array = []
        self.length_array.append(self.length)
        self.bending_array = []
        self.bending_array.append(self.bending)
        self.T_static = np.eye(3)
        return np.array(self.state, dtype=np.float32) , {}

    def draw_segment(self, segment_index):
        if(segment_index%2==0):
            color_plot = 'red'
        else:
            color_plot = 'blue'
        x_array, y_array = self.pose_segment(segment_index)
        for i in range(0, len(x_array)):
            plt.scatter(x_array[i], y_array[i], color=color_plot)

    def draw_obs(self):
        for i in range(0, len(self.obs_center)):
            circle1 = plt.Circle((self.obs_center[i][0], self.obs_center[i][1]), self.radius[i], color='k')
            plt.gca().add_patch(circle1)

    def render(self, mode='human'):
        plt.axis('equal')
        # plt.xlim([-1.5, 1.5])
        # plt.ylim([-1.5, 1.5])
        for i in range(0, len(self.length_array)):
            self.draw_segment(i)
        plt.scatter(self.x_target[0], self.x_target[1], color='black')
        if self.use_obstacle:
            self.draw_obs()

        if self.center is not None:
          # print(self.center, self.radius_area, self.start_angle, self.end_angle)
          wedge = Wedge(self.center, self.radius_area, self.start_angle, self.end_angle, width=self.radius_area, facecolor='orange', alpha=0.6)
          plt.gca().add_patch(wedge)
        if self.render_mode == 'human':
            plt.draw()
            plt.pause(0.01)

        # Convert the plot to a NumPy array
        fig = plt.gcf()
        with io.BytesIO() as buff:
            fig.savefig(buff, format='raw')
            buff.seek(0)
            data = np.frombuffer(buff.getvalue(), dtype=np.uint8)
        w, h = fig.canvas.get_width_height()
        im = data.reshape((int(h), int(w), -1))

        plt.clf()
#         if self.use_obstacle:
#             print(f'State {self.state}, action: {self.act}, done: {self.cur_done}, bending: {self.bending_array}, length: {self.length_array}, reward: {self.cur_reward}, collision: {self.flag_collision}')
#         else:
#             print(f'State {self.state}, action: {self.act}, done: {self.cur_done}, bending: {self.bending_array}, length: {self.length_array}, reward: {self.cur_reward}')
        if self.render_mode == 'rgb_array':
            return im
        print(self.ang_deg)
        return 0

In [None]:
# from stable_baselines3.common.vec_env import VecVideoRecorder, DummyVecEnv


# def record_video(env_id, model, video_length=500, prefix="", video_folder="videos/"):
#     """
#     :param env_id: (str)
#     :param model: (RL model)
#     :param video_length: (int)
#     :param prefix: (str)
#     :param video_folder: (str)
#     """
#     eval_env = DummyVecEnv([lambda: EversionRobot(True,'rgb_array')])
#     # Start the video at step=0 and record 500 steps
#     eval_env = VecVideoRecorder(
#         eval_env,
#         video_folder=video_folder,
#         record_video_trigger=lambda step: step == 0,
#         video_length=video_length,
#         name_prefix=prefix,
#     )

#     obs = eval_env.reset()
#     for le in range(video_length):
#         action= eval_env.action_space.sample()
#         if(le < 18):
#           action = 0
#         else:
#           action = 3
#         obs, rewards, _, _ = eval_env.step([action])
#         print(rewards,action)
#     # Close the video recorder
#     eval_env.close()

# Wandb Init

In [None]:
!pip install wandb

In [None]:
import wandb
# from google.colab import userdata
from kaggle_secrets import UserSecretsClient
user_secrets = UserSecretsClient()
secret_value = user_secrets.get_secret("wandb_api")
# I have saved my API token with "wandb_api" as Label.
# If you use some other Label make sure to change the same below.
wandb_api = secret_value

wandb.login(key=wandb_api,relogin=True)

In [None]:
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.vec_env import DummyVecEnv, VecVideoRecorder
import wandb
from wandb.integration.sb3 import WandbCallback


config = {
    "policy_type": "MlpPolicy",
    "total_timesteps": 8000000,
    "env_name": "DQN-avoidance-test-discrete",
}
run = wandb.init(
    project="Eversion-robot_DQN",
    config=config,
    sync_tensorboard=True,  # auto-upload sb3's tensorboard metrics
    monitor_gym=True,  # auto-upload the videos of agents playing the game
    save_code=True,  # optional
)


def make_env():
    env = EversionRobot(True,'rgb_array')
    env = Monitor(env)  # record stats such as returns
    return env


env = DummyVecEnv([make_env])

In [None]:
from stable_baselines3 import DQN
from stable_baselines3 import PPO
# env = EversionRobot(True)
# model = DQN('MlpPolicy', env, verbose=1,device="cuda",tau=0.1,target_update_interval=100000,learning_rate=0.00005,exploration_final_eps=0.1,exploration_fraction=0.001).learn(total_timesteps=8000000,log_interval=10)
# model = DQN('MlpPolicy', env, verbose=1,device="cuda",tau=0.1,target_update_interval=100000,learning_rate=linear_schedule(0.0001)).learn(total_timesteps=100000,log_interval=10)
# model.save('DQN_test_avoidance')

model = DQN('MlpPolicy', env, verbose=1,device="cuda",tau=0.1,target_update_interval=300000,learning_rate=0.00005,exploration_final_eps=0.1,exploration_fraction=0.01,tensorboard_log=f"runs/{run.id}")
model.learn(
    total_timesteps=config["total_timesteps"],
    callback=WandbCallback(
        gradient_save_freq=100,
        model_save_path=f"models/{run.id}",
        verbose=0,
    ),
)


In [None]:
run.finish()

# Evaluate Policy

In [None]:
from stable_baselines3.common.evaluation import evaluate_policy

In [None]:
from stable_baselines3.common.monitor import Monitor
# Use a separate environement for evaluation
eval_env = Monitor(EversionRobot(True))

# Random Agent, before training
mean_reward, std_reward = evaluate_policy(model, eval_env, n_eval_episodes=100)

print(f"mean_reward:{mean_reward:.2f} +/- {std_reward:.2f}")

# Prepare video recording

In [None]:
# Set up fake display; otherwise rendering will fail
import os
os.system("Xvfb :1 -screen 0 1024x768x24 &")
os.environ['DISPLAY'] = ':1'

### Visualize trained agent



In [None]:
eval_env = DummyVecEnv([lambda: EversionRobot(True,'rgb_array')])
frames = []
video_length = 400
obs = eval_env.reset()
for le in range(video_length):
    action, _ = model.predict(obs)
    obs, rewards, done, _ = eval_env.step(action)
    if(done):
      obs = eval_env.reset()
    print(rewards,action)
    frames.append(eval_env.render(mode='rgb_array'))
# Close the video recorder
eval_env.close()

In [None]:
from matplotlib import pyplot as plt, animation
%matplotlib inline
from IPython import display

def create_anim(frames, dpi, fps):
    plt.figure(figsize=(frames[0].shape[1] / dpi, frames[0].shape[0] / dpi), dpi=dpi)
    patch = plt.imshow(frames[0])
    def setup():
        plt.axis('off')
    def animate(i):
        patch.set_data(frames[i])
    anim = animation.FuncAnimation(plt.gcf(), animate, init_func=setup, frames=len(frames), interval=fps)
    return anim

def display_anim(frames, dpi=72, fps=50):
    anim = create_anim(frames, dpi, fps)
    return anim.to_jshtml()

def save_anim(frames, filename, dpi=72, fps=50):
    anim = create_anim(frames, dpi, fps)
    anim.save(filename)

In [None]:
display.HTML(display_anim(frames))

Basecode

In [None]:
# # import gym
# from gymnasium import logger, spaces
# import math
# import numpy as np
# import random
# import matplotlib.pyplot as plt
# from gymnasium.spaces.discrete import Discrete
# import io
# import matplotlib
# matplotlib.use('agg')  # turn off interactive backend
# # matplotlib.use( 'plt' )
# import numpy as np

# from typing import Optional

# class EversionRobot(gym.Env):
#     metadata = {
#         "render_modes": ["human", "rgb_array"],
#         "render_fps": 15,
#     }
#     def __init__(self, obs_use : Optional[bool] = False,render_mode : Optional[str]='human'):
#         super(EversionRobot, self).__init__()
#         self.MAX_EPISODE = 100
#         self.x_threshold = 5
#         self.use_obstacle = obs_use

#         if self.use_obstacle:
#             high = np.array([self.x_threshold, self.x_threshold, self.x_threshold, self.x_threshold, 2.0],
#                         dtype=np.float32)
#         else:
#             high = np.array([self.x_threshold, self.x_threshold, self.x_threshold, self.x_threshold],
#                         dtype=np.float32)

# #         self.action_space = spaces.Discrete(5)
#         self.action_space = Discrete(5)
#         self.observation_space = spaces.Box(-high, high, dtype=np.float32)

#         self.steps_left = self.MAX_EPISODE
#         self.low = [-1.5, 0.5]
#         self.high = [1.5, 3.0]
#         self.x_target = [random.uniform(self.low[0],self.high[0]), random.uniform(self.low[1],self.high[1])]
#         # self.x_target = [-0.5, 0.5]
#         self.state = [0, 0, self.x_target[0], self.x_target[1], 0]
#         self.init_length = 0.1
#         self.length = self.init_length
#         self.bending = 0
#         self.delta_length = 0.1
#         self.delta_bending = 0.1
#         self.segment_num = 1
#         self.segment_num_max = 5
#         self.length_max = 1.5
#         self.length_array = []
#         self.length_array.append(self.length)
#         self.bending_array = []
#         self.bending_array.append(self.bending)
#         self.T_static = np.eye(3)
#         self.safety_param = 2
#         self.safety_penalty = 10
#         self.render_mode = render_mode

#         # Obstacle's centre position and radius
#         if self.use_obstacle:
#             self.obs_center = []
#             self.obs_center.append(np.array([0.0, 2.0]))
#             self.obs_center.append(np.array([-1.1, 1.2]))
#             self.obs_center.append(np.array([0.7, 1.0]))
#             self.radius = []
#             for i in range(0, len(self.obs_center)):
#                 self.radius.append(0.1)

#     def constant_curvature(self, bending, length):
#         if bending != 0:
#             x_tip = (1-math.cos(bending))*length/bending
#             y_tip = math.sin(bending)*length/bending
#         else:
#             x_tip = 0
#             y_tip = length
#         return x_tip, y_tip

#     def static_segment(self, bending_array, length_array, segment_index):
#         T_multi = np.eye(3)
#         for i in range(0, segment_index):
#             bending = self.bending_array[i]
#             length = self.length_array[i]
#             if bending != 0:
#                 x_tip = (1-math.cos(bending))*length/bending
#                 y_tip = math.sin(bending)*length/bending
#             else:
#                 x_tip = 0
#                 y_tip = length
#             T_single = np.array([[math.cos(bending), math.sin(bending), x_tip],
#                                 [-math.sin(bending), math.cos(bending), y_tip],
#                                 [0, 0, 1]])
#             T_multi = T_multi@T_single

#         return T_multi

#     def pose_segment(self, segment_index):
#         T_prior_segment = self.static_segment(self.bending_array, self.length_array, segment_index)
#         indeks_maks = math.floor(self.length_array[segment_index]/self.delta_length)
#         if indeks_maks!=0:
#             bending_increment = self.bending_array[segment_index]/indeks_maks
#         else:
#             bending_increment = 0
#         x_array = []
#         y_array = []
#         for i in range(0,indeks_maks):
#             x_tip, y_tip = self.constant_curvature(bending_increment*i, self.delta_length*i)
#             T_single = np.array([[math.cos(bending_increment*i), math.sin(bending_increment*i), x_tip],
#                                 [-math.sin(bending_increment*i), math.cos(bending_increment*i), y_tip],
#                                 [0, 0, 1]])
#             transform_xy = T_prior_segment@T_single
#             x_tip, y_tip = transform_xy[0,2], transform_xy[1,2]
#             x_array.append(x_tip)
#             y_array.append(y_tip)
#         return x_array, y_array

#     def check_collision(self):
#         collision = False
#         x_array, y_array = self.pose_segment(len(self.length_array)-1)
#         for i in range(0, len(self.obs_center)):
#             for j in range(0, len(x_array)):
#                 distance_vect = np.array([[x_array[j]-self.obs_center[i][0]],
#                                             [y_array[j]-self.obs_center[i][1]]])
#                 distance_scalar = np.linalg.norm(distance_vect)
#                 if(distance_scalar<=self.radius[i]):
#                     collision = True
#                     break

#             if(collision == True):
#                 break

#         return collision

#     def check_safety(self):
#         danger = False
#         x_array, y_array = self.pose_segment(len(self.length_array)-1)
#         for i in range(0, len(self.obs_center)):
#             for j in range(0, len(x_array)):
#                 distance_vect = np.array([[x_array[j]-self.obs_center[i][0]],
#                                             [y_array[j]-self.obs_center[i][1]]])
#                 distance_scalar = np.linalg.norm(distance_vect)
#                 if(distance_scalar<=self.safety_param*self.radius[i]):
#                     danger = True
#                     break

#             if(danger == True):
#                 break

#         return danger

#     def obstacle_vector(self, dist_to_goal):
#         limit_obs = 0.5
#         gain_obs = 3.0
#         x_array, y_array = self.pose_segment(len(self.length_array)-1)
#         min_distance = 100
#         min_index_i = 0
#         min_index_j = 0
#         distance_vect_min = np.array([[min_distance],[min_distance]])
#         for i in range(0, len(self.obs_center)):
#             for j in range(0, len(x_array)):
#                 distance_vect = np.array([[x_array[j]-self.obs_center[i][0]],
#                                             [y_array[j]-self.obs_center[i][1]]])
#                 distance_scalar = np.linalg.norm(distance_vect)
#                 if(distance_scalar<min_distance):
#                     min_distance = distance_scalar
#                     distance_vect_min = distance_vect
#                     min_index_i = i
#                     min_index_j = j

#         direction_obs = distance_vect_min/min_distance
#         if(np.linalg.norm(dist_to_goal)!=0):
#             direction_goal = dist_to_goal/np.linalg.norm(dist_to_goal)
#         else:
#             direction_goal = dist_to_goal
#         cos_angle = np.dot(direction_goal, direction_obs)
#         if((min_distance-self.radius[min_index_i])<=limit_obs and cos_angle>0):
#             distance_to_surface = min_distance - self.radius[min_index_i]
#             avoidance_term = (1.0/distance_to_surface - 1.0/limit_obs)*gain_obs
#         else:
#             avoidance_term = 0

#         return avoidance_term

#     def step(self, action):
#         self.act = action
#         save_state = self.state
#         if action == 0:
#             self.length = self.length+self.delta_length
#         elif action == 1:
#             self.length = self.length-self.delta_length
#         elif action == 3:
#             self.bending = self.bending+self.delta_bending
#         elif action == 4:
#             self.bending = self.bending-self.delta_bending
#         if(self.bending>math.pi):
#             self.bending = -math.pi
#         elif(self.bending<-math.pi):
#             self.bending = math.pi

#         # Update length_array and bending_array
#         if(self.length<=self.length_max and self.length>=0):
#             self.length_array[len(self.length_array)-1] = self.length
#             self.bending_array[len(self.bending_array)-1] = self.bending
#         elif self.length>self.length_max:
#             if(len(self.length_array)<=self.segment_num_max):
#                 self.length = 0
#                 self.bending = 0
#                 self.length_array.append(self.length)
#                 self.bending_array.append(self.bending)
#             else:
#                 self.length = self.length_max
#         else:
#             if(len(self.length_array)>1):
#                 self.length_array.pop()
#                 self.bending_array.pop()
#                 self.length = self.length_array[len(self.length_array)-1]
#                 self.bending = self.bending_array[len(self.bending_array)-1]
#             else:
#                 self.length = 0

#         self.T_static = self.static_segment(self.bending_array, self.length_array, len(self.bending_array))
#         x_tip, y_tip = self.T_static[0,2], self.T_static[1,2]

#         if self.use_obstacle:
#             safety_flag = self.check_safety()
#             if(safety_flag):
#                 safety_obs = 1
#             else:
#                 safety_obs = 0

#             self.state = [x_tip, y_tip, self.x_target[0], self.x_target[1], safety_obs]
#         else:
#             self.state = [x_tip, y_tip, self.x_target[0], self.x_target[1]]

#         boundary= x_tip < -self.x_threshold or x_tip > self.x_threshold or y_tip < -self.x_threshold or y_tip > self.x_threshold

#         error = np.array(self.state[0:2]) - np.array(self.x_target)

#         if self.use_obstacle:
#             self.flag_collision = self.check_collision()
#             reward_safety = -1*self.obstacle_vector(error)
#         done = bool(
#             boundary
#             or self.steps_left<0
#             or self.length<0
#         )
#         if self.use_obstacle:
#             done = done or self.flag_collision

#         if not done:
#             reward = - np.linalg.norm(error)**2
#             if self.use_obstacle:
#                 reward = reward + reward_safety
#         else:
#             if self.length<0:
#                 reward = -100000
#             elif self.use_obstacle:
#                 if self.flag_collision:
#                     reward = -100000
#                 else:
#                     reward = 0
#             else:
#                 reward = 0
#         if not done:
#             self.steps_left = self.steps_left-1
#         self.cur_reward = reward
#         self.cur_done = done
#         return np.array(self.state, dtype=np.float32), reward, done, False, {}

#     def reset(self,seed: Optional[int] = None):
#         super().reset(seed=seed)
#         self.x_target = [random.uniform(self.low[0],self.high[0]), random.uniform(self.low[1],self.high[1])]
#         # self.x_target = [-0.5, 0.5]
#         if self.use_obstacle:
#             self.state = [0, 0, self.x_target[0], self.x_target[1], 0]
#         else:
#             self.state = [0, 0, self.x_target[0], self.x_target[1]]
#         self.steps_left = self.MAX_EPISODE
#         self.length = self.init_length
#         self.bending = 0
#         self.length_array = []
#         self.length_array.append(self.length)
#         self.bending_array = []
#         self.bending_array.append(self.bending)
#         self.T_static = np.eye(3)
#         return np.array(self.state, dtype=np.float32) , {}

#     def draw_segment(self, segment_index):
#         if(segment_index%2==0):
#             color_plot = 'red'
#         else:
#             color_plot = 'blue'
#         x_array, y_array = self.pose_segment(segment_index)
#         for i in range(0, len(x_array)):
#             plt.scatter(x_array[i], y_array[i], color=color_plot)

#     def draw_obs(self):
#         for i in range(0, len(self.obs_center)):
#             circle1 = plt.Circle((self.obs_center[i][0], self.obs_center[i][1]), self.radius[i], color='k')
#             plt.gca().add_patch(circle1)

#     def render(self, mode='human'):
#         plt.axis('equal')
#         # plt.xlim([-1.5, 1.5])
#         # plt.ylim([-1.5, 1.5])
#         for i in range(0, len(self.length_array)):
#             self.draw_segment(i)
#         plt.scatter(self.x_target[0], self.x_target[1], color='black')
#         if self.use_obstacle:
#             self.draw_obs()
#         if self.render_mode == 'human':
#             plt.draw()
#             plt.pause(0.01)

#         # Convert the plot to a NumPy array
#         fig = plt.gcf()
#         with io.BytesIO() as buff:
#             fig.savefig(buff, format='raw')
#             buff.seek(0)
#             data = np.frombuffer(buff.getvalue(), dtype=np.uint8)
#         w, h = fig.canvas.get_width_height()
#         im = data.reshape((int(h), int(w), -1))

#         plt.clf()
# #         if self.use_obstacle:
# #             print(f'State {self.state}, action: {self.act}, done: {self.cur_done}, bending: {self.bending_array}, length: {self.length_array}, reward: {self.cur_reward}, collision: {self.flag_collision}')
# #         else:
# #             print(f'State {self.state}, action: {self.act}, done: {self.cur_done}, bending: {self.bending_array}, length: {self.length_array}, reward: {self.cur_reward}')
#         if self.render_mode == 'rgb_array':
#             return im
#         return 0