#Task 6

### Implementation of `CustomWrapper`

In [11]:
# Install SWIG, a tool used to connect C/C++ code with Python. It's often used in RL environments to enable efficient communication between Python and low-level implementations of algorithms
!pip install -q swig

# Install the gym library with the box2d environment, used for 2D physics-based simulation tasks
!pip install -q gym[box2d]

# Install stable-baselines3 with extra dependencies (needed for various environments and features in the library), a set of RL algorithms implemented in PyTorch
!pip install stable-baselines3[extra]

  Preparing metadata (setup.py) ... [?25l[?25hdone
  Building wheel for box2d-py (setup.py) ... [?25l[?25hdone
Collecting stable-baselines3[extra]
  Downloading stable_baselines3-2.3.2-py3-none-any.whl (182 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m182.3/182.3 kB[0m [31m3.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting gymnasium<0.30,>=0.28.1 (from stable-baselines3[extra])
  Downloading gymnasium-0.29.1-py3-none-any.whl (953 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m953.9/953.9 kB[0m [31m24.3 MB/s[0m eta [36m0:00:00[0m
Collecting shimmy[atari]~=1.3.0 (from stable-baselines3[extra])
  Downloading Shimmy-1.3.0-py3-none-any.whl (37 kB)
Collecting autorom[accept-rom-license]~=0.6.1 (from stable-baselines3[extra])
  Downloading AutoROM-0.6.1-py3-none-any.whl (9.4 kB)
Collecting AutoROM.accept-rom-license (from autorom[accept-rom-license]~=0.6.1->stable-baselines3[extra])
  Downloading AutoROM.accept-rom-license-0.6.1.tar.gz (4

In [12]:
import gymnasium as gym

class CustomWrapper(gym.Wrapper):
    def __init__(self, env):
        super().__init__(env)
        self.total_steps = 0
        self.episode_length = 0

    def reset(self, **kwargs):
        obs, info = self.env.reset(**kwargs)
        # Print the total steps taken across all episodes
        print(f"Total steps taken so far: {self.total_steps}")
        self.episode_length = 0
        return obs, info

    def step(self, action):
        obs, reward, terminated, truncated, info = self.env.step(action)
        self.total_steps += 1
        self.episode_length += 1
        return obs, reward, terminated, truncated, info

    def get_episode_length(self):
        return self.episode_length

### Demonstration of `CustomWrapper`

In [13]:
# Create and wrap the environment
env = gym.make("CartPole-v1")
wrapped_env = CustomWrapper(env)

# Function to run one episode
def run_episode(environment):
    obs, info = environment.reset()
    done = False
    while not done:
        action = environment.action_space.sample()
        obs, reward, terminated, truncated, info = environment.step(action)
        print(f"Observation: {obs}, Reward: {reward}, Done: {terminated or truncated}")
        done = terminated or truncated
    print(f"Total steps in this episode: {environment.get_episode_length()}")
    print()

# Run multiple episodes
for episode in range(3):
    print(f"Episode {episode + 1}")
    run_episode(wrapped_env)

Episode 1
Total steps taken so far: 0
Observation: [ 0.01898332 -0.18985447  0.03322982  0.27138755], Reward: 1.0, Done: False
Observation: [ 0.01518623  0.00477793  0.03865757 -0.01063212], Reward: 1.0, Done: False
Observation: [ 0.01528179  0.19932477  0.03844493 -0.29087186], Reward: 1.0, Done: False
Observation: [0.01926829 0.00367632 0.03262749 0.01368398], Reward: 1.0, Done: False
Observation: [ 0.01934181  0.19831553  0.03290117 -0.26852867], Reward: 1.0, Done: False
Observation: [ 0.02330812  0.39295286  0.0275306  -0.5506555 ], Reward: 1.0, Done: False
Observation: [ 0.03116718  0.19745527  0.01651749 -0.24942723], Reward: 1.0, Done: False
Observation: [ 0.03511629  0.3923375   0.01152894 -0.53685474], Reward: 1.0, Done: False
Observation: [ 0.04296304  0.19705535  0.00079185 -0.24056159], Reward: 1.0, Done: False
Observation: [ 0.04690414  0.00192211 -0.00401938  0.05237101], Reward: 1.0, Done: False
Observation: [ 0.04694258 -0.19314198 -0.00297196  0.34378308], Reward: 1.0,