In [None]:
import importlib
import torch
import os
import subprocess
import gymnasium as gym

In [None]:
# Environment downloader
env_file = 'model_downloader.py'
env_url = "https://raw.githubusercontent.com/alibaniasad1999/master-thesis/main/Code/Python/utils/model_downloader.py"

if not os.path.isfile(env_file):
    print(f"Downloading {env_file} ...")
    subprocess.run(
        ['wget', '-q', '-O', env_file, env_url],
        stdout=subprocess.DEVNULL,
        stderr=subprocess.DEVNULL
    )
    print(f"{env_file} downloaded.")
else:
    print(f"{env_file} already exists.")

from model_downloader import download_everything

In [None]:
download_everything('ddpg')

In [None]:
# Properly import and reload DDPG module
import sys
import importlib
import DDPG

# Force reload the module to ensure we have the latest version
if 'DDPG' in sys.modules:
    importlib.reload(DDPG)

# Now import the specific components after reload
from DDPG import DDPG
from DDPG import setup_logger_kwargs
from DDPG import MLPActorCritic

In [None]:
env = gym.make('Ant-v5')
hid = 1024
l = 3
gamma = 0.99
seed = 0
epochs = 50
exp_name = 'ddpg'
steps_per_epoch_ = 5000
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

ac_kwargs = dict(hidden_sizes=[hid] * l)
states_array = []
logger_kwargs = setup_logger_kwargs(exp_name, seed)
reward_array = []
ddpg = DDPG(lambda: env, MLPActorCritic, ac_kwargs,
               seed_=seed, steps_per_epoch=steps_per_epoch_, epochs_=epochs, logger_kwargs_=logger_kwargs,
               device_=device, batch_size=1024, start_steps=5000, update_after=1000, update_every=2000, max_ep_len=30000)

In [None]:
# ddpg.train()

In [None]:
ddpg.load(from_device_to_load='cuda')

In [None]:
# if False:
#     try:
#         # noinspection PyUnresolvedReferences
#         import google.colab
#
#         # download created models
#         if os.path.isfile("DDPG_TBP_results.tar.gz"):
#             os.remove("DDPG_TBP_results.tar.gz")
#         !tar cvfz DDPG_TBP_results.tar.gz model/
#         # noinspection PyUnresolvedReferences
#         from google.colab import files
#
#         files.download('DDPG_TBP_results.tar.gz')
#         print(colorize("Done! 🥰😎", 'blue', bold=True))
#     except Exception as e:
#         print(e, end=' ----> ')
#         print(colorize("Something Wrong 🧐😩, or maybe you used your Mac 😱🥲", 'red', bold=True))
# else:
#     print(colorize("You are in test mode! 🥰😎", 'blue', bold=True))

In [None]:
import time
# Create the Ant environment with a human render mode
env = gym.make('Ant-v5', render_mode="human")

# Reset the environment and get the initial observation and info
obs, info = env.reset()
step = 0

try:
    while step < 1000:
        # Render the environment (animation window)
        env.render()

        # Sample a random action from the action space
        action = ddpg.get_action(obs, 0)

        # Take a step in the environment
        obs, reward, terminated, truncated, info = env.step(action)

        # Print the current observation (state)
        # print(f"Step {step}: Observation: {obs}")

        # Check if the episode is over
        print(step)
        if terminated or truncated:
            print("Episode finished!")
            # break
            obs, info = env.reset()

        # Small delay to control simulation speed
        time.sleep(0.05)
        step += 1
except KeyboardInterrupt:
    print("Simulation interrupted.")
finally:
    # Close the environment to free resources and close the render window
    env.close()


In [None]:
# If you don't have gymnasium yet, uncomment:
# !pip install gymnasium[classic-control]  # or: pip install "gym[classic_control]"

import numpy as np
import matplotlib.pyplot as plt
from matplotlib.animation import FuncAnimation
from IPython.display import HTML

# Try Gymnasium first, then classic Gym
try:
    import gymnasium as gym
except ImportError:
    import gym

# Create env that returns RGB frames (no external window)
env = gym.make("Ant-v5", render_mode="rgb_array")

# Reset (Gymnasium returns (obs, info); old Gym returns obs)
reset_out = env.reset(seed=0)
obs = reset_out[0] if isinstance(reset_out, tuple) else reset_out

frames = []
done = False

for _ in range(1000):
    # Get a frame for the animation
    frame = env.render()          # returns an HxWx3 numpy array when render_mode="rgb_array"
    if frame is not None:
        frames.append(frame)

    # Random policy just to generate movement
    action = env.action_space.sample()
    action = ddpg.get_action(obs, 0)
    step_out = env.step(action)

    # Handle both Gymnasium and old Gym step signatures
    if len(step_out) == 5:
        obs, reward, terminated, truncated, info = step_out
        done = terminated or truncated
    else:
        obs, reward, done, info = step_out

    if done:
        break

env.close()

# --- Build the animation inline ---
fig, ax = plt.subplots()
ax.axis("off")
im = ax.imshow(frames[0])

def update(i):
    im.set_data(frames[i])
    return [im]

anim = FuncAnimation(fig, update, frames=len(frames), interval=30, blit=True)
plt.close(fig)  # avoid duplicate static image below

HTML(anim.to_jshtml())  # inline, with controls


In [None]:
# If needed: %pip install gymnasium ipywidgets pillow
import io, time
from PIL import Image
import ipywidgets as widgets
from IPython.display import display

try:
    import gymnasium as gym
except ImportError:
    import gym

env = gym.make("Ant-v5", render_mode="rgb_array")
reset_out = env.reset(seed=0)

img = widgets.Image(format="png")
display(img)

def show(arr):
    buf = io.BytesIO()
    Image.fromarray(arr).save(buf, format="PNG")
    img.value = buf.getvalue()

done = False
for _ in range(100):
    show(env.render())
    # step_out = env.step(env.action_space.sample())
    action = ddpg.get_action(obs, 0)
    env.step(action)
    if len(step_out) == 5:
        _, _, terminated, truncated, _ = step_out
        done = terminated or truncated
    else:
        _, _, done, _ = step_out
    if done:
        reset_out = env.reset()
        done = False
    time.sleep(1/60)

env.close()
