In [64]:
import gym

In [65]:
"""
Classic cart-pole system implemented by Rich Sutton et al.
Copied from http://incompleteideas.net/sutton/book/code/pole.c
permalink: https://perma.cc/C9ZM-652R
"""
import math
from typing import Optional, Union

import numpy as np
import pygame
from pygame import gfxdraw

import gym
from gym import spaces, logger
from gym.utils import seeding


class PendulumEnv(gym.Env):
    """
       ### Description
    The inverted pendulum swingup problem is based on the classic problem in control theory. The system consists of a pendulum attached at one end to a fixed point, and the other end being free. The pendulum starts in a random position and the goal is to apply torque on the free end to swing it into an upright position, with its center of gravity right above the fixed point.
    The diagram below specifies the coordinate system used for the implementation of the pendulum's
    dynamic equations.
    ![Pendulum Coordinate System](./diagrams/pendulum.png)
    -  `x-y`: cartesian coordinates of the pendulum's end in meters.
    - `theta` : angle in radians.
    - `tau`: torque in `N m`. Defined as positive _counter-clockwise_.
    ### Action Space
    The action is a `ndarray` with shape `(1,)` representing the torque applied to free end of the pendulum.
    | Num | Action | Min  | Max |
    |-----|--------|------|-----|
    | 0   | Torque | -2.0 | 2.0 |
    ### Observation Space
    The observation is a `ndarray` with shape `(3,)` representing the x-y coordinates of the pendulum's free end and its angular velocity.
    | Num | Observation      | Min  | Max |
    |-----|------------------|------|-----|
    | 0   | x = cos(theta)   | -1.0 | 1.0 |
    | 1   | y = sin(angle)   | -1.0 | 1.0 |
    | 2   | Angular Velocity | -8.0 | 8.0 |
    ### Rewards
    The reward function is defined as:
    *r = -(theta<sup>2</sup> + 0.1 * theta_dt<sup>2</sup> + 0.001 * torque<sup>2</sup>)*
    where `$\theta$` is the pendulum's angle normalized between *[-pi, pi]* (with 0 being in the upright position).
    Based on the above equation, the minimum reward that can be obtained is *-(pi<sup>2</sup> + 0.1 * 8<sup>2</sup> + 0.001 * 2<sup>2</sup>) = -16.2736044*, while the maximum reward is zero (pendulum is
    upright with zero velocity and no torque applied).
    ### Starting State
    The starting state is a random angle in *[-pi, pi]* and a random angular velocity in *[-1,1]*.
    ### Episode Termination
    The episode terminates at 200 time steps.
    ### Arguments
    - `g`: acceleration of gravity measured in *(m s<sup>-2</sup>)* used to calculate the pendulum dynamics. The default value is g = 10.0 .
    ```
    gym.make('Pendulum-v1', g=9.81)
    ```
    ### Version History
    * v1: Simplify the math equations, no difference in behavior.
    * v0: Initial versions release (1.0.0)
    """

    metadata = {"render_modes": ["human", "rgb_array"], "render_fps": 30}

    def __init__(self, g=10.0):
        self.max_speed = 8
        self.max_torque = 2.0
        self.dt = 0.05
        self.g = g
        self.m = 1.0
        self.l = 1.0
        self.screen = None
        self.clock = None
        self.isopen = True

        self.screen_dim = 500

        high = np.array([1.0, 1.0, self.max_speed], dtype=np.float32)
        # This will throw a warning in tests/envs/test_envs in utils/env_checker.py as the space is not symmetric
        #   or normalised as max_torque == 2 by default. Ignoring the issue here as the default settings are too old
        #   to update to follow the openai gym api
        self.action_space = spaces.Box(
            low=-self.max_torque, high=self.max_torque, shape=(1,), dtype=np.float32
        )
        self.observation_space = spaces.Box(low=-high, high=high, dtype=np.float32)

    def step(self, u):
        th, thdot = self.state  # th := theta

        g = self.g
        m = self.m
        l = self.l
        dt = self.dt

        u = np.clip(u, -self.max_torque, self.max_torque)[0]
        self.last_u = u  # for rendering
        costs = angle_normalize(th) ** 2 + 0.1 * thdot ** 2 + 0.001 * (u ** 2)

        newthdot = thdot + (3 * g / (2 * l) * np.sin(th) + 3.0 / (m * l ** 2) * u) * dt
        newthdot = np.clip(newthdot, -self.max_speed, self.max_speed)
        newth = th + newthdot * dt

        self.state = np.array([newth, newthdot])
        return self._get_obs(), -costs, False, {}

    def reset(
        self,
        *,
        seed: Optional[int] = None,
        return_info: bool = False,
        options: Optional[dict] = None
    ):
        super().reset(seed=seed)
        high = np.array([np.pi, 1])
        self.state = self.np_random.uniform(low=-high, high=high)
        self.last_u = None
        if not return_info:
            return self._get_obs()
        else:
            return self._get_obs(), {}

    def _get_obs(self):
        theta, thetadot = self.state
        return np.array([np.cos(theta), np.sin(theta), thetadot], dtype=np.float32)

    def render(self, mode="human"):
        pass

    def close(self):
        if self.screen is not None:
            pygame.display.quit()
            pygame.quit()
            self.isopen = False


def angle_normalize(x):
    return ((x + np.pi) % (2 * np.pi)) - np.pi

In [66]:
env = PendulumEnv()
env.reset()
action = env.action_space.sample() # your agent here (this takes random actions)
observation, reward, done, info = env.step(action)
reward

-1.3007798420678889

In [69]:
env = PendulumEnv()
env.reset()
for _ in range(1000):
    act = env.action_space.sample()
    env.step(act) # take a random action
    print(act)
env.close()

[1.3079237]
[1.2606143]
[0.28585932]
[-0.08966658]
[-0.05901046]
[0.90067035]
[1.3564887]
[-1.9411765]
[-0.05468049]
[-0.40015474]
[1.22183]
[0.01706773]
[0.2855663]
[1.8998054]
[0.16139437]
[-1.205081]
[-1.2428848]
[-1.5475823]
[-0.5943941]
[1.8084421]
[0.34196648]
[0.24480274]
[1.254461]
[0.58575976]
[0.37505147]
[-1.0070541]
[-1.0648904]
[1.8861365]
[-0.02291754]
[-0.889626]
[-0.8420146]
[0.24246176]
[0.57700425]
[-1.3018918]
[-0.20343655]
[1.7727069]
[0.76061445]
[0.9581833]
[-1.1308597]
[1.5259227]
[-0.21141969]
[-1.0412227]
[-1.6367053]
[-0.5172385]
[-1.3712296]
[1.9312779]
[0.49962747]
[-1.6698045]
[1.7399966]
[1.7788824]
[0.3238223]
[-1.2426625]
[-0.789607]
[0.09437577]
[-1.2785761]
[-0.48660684]
[-1.1513468]
[1.5118428]
[-1.3536435]
[-1.6261251]
[-1.7860322]
[1.2320181]
[1.7096941]
[-1.5262045]
[1.9709785]
[-1.1265211]
[-1.0696518]
[1.2415434]
[0.15241663]
[0.5431283]
[1.1568227]
[0.14049146]
[-0.9886738]
[0.85574883]
[0.990727]
[1.6005644]
[-1.466145]
[-0.93597585]
[-0.154134

In [45]:
def build_model(states, actions):
   model = Sequential()
   model.add(Dense(24, activation='relu',input_shape=(states)))
   model.add(Dense(24, activation='relu'))
   model.add(Dense(actions, activation='linear'))
   return model

In [2]:
%pip install gym pygame

Collecting gym
  Downloading gym-0.23.1.tar.gz (626 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m626.2/626.2 KB[0m [31m6.4 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25h  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Preparing metadata (pyproject.toml) ... [?25ldone
Collecting gym-notices>=0.0.4
  Downloading gym_notices-0.0.6-py3-none-any.whl (2.7 kB)
Building wheels for collected packages: gym
  Building wheel for gym (pyproject.toml) ... [?25ldone
[?25h  Created wheel for gym: filename=gym-0.23.1-py3-none-any.whl size=701378 sha256=84a964b3d300d054902ff75aff8291ba9ad06a951fc7bfb6c031770a0af35cf5
  Stored in directory: /home/jovyan/.cache/pip/wheels/4e/be/7e/92a54668db96883e38ce60a9249dc55de7cd6eee49e7311940
Successfully built gym
Installing collected packages: gym-notices, gym
Successfully installed gym-0.23.1 gym-notices-0.0.6
Note: you may need to restart the kernel to use updated 

In [5]:
%pip install pygame

Collecting pygame
  Downloading pygame-2.1.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (21.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m21.9/21.9 MB[0m [31m5.6 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hInstalling collected packages: pygame
Successfully installed pygame-2.1.2
Note: you may need to restart the kernel to use updated packages.
