In [1]:
import json
import websockets, asyncio
import threading

class WaitableQueue(asyncio.Queue):
    def __init__(self):
        super().__init__()
        self.event = threading.Event()

    def put(self, item):
        super().put_nowait(item)
        self.event.set()

    def get(self,timeout=1000000000):
        if self.event.wait(timeout):
            res = super().get_nowait()
            if super().empty():
                self.event.clear()
            return res
        else:
            raise TimeoutError("Environement is not responding.")

    def waitNotEmpty(self,timeout=1000000000):
        if self.event.wait(timeout):    
            return
        else:
            raise TimeoutError("Environement is not responding.")

# the server
class Server:
    def __init__(self):
        self.inQueue = WaitableQueue()
        self.outQueue = WaitableQueue()
        self.debug = True
        self.ws = None

    def start(self):
        threading.Thread(target=self.message_sender_loop).start()
        asyncio.run(self.main())

    async def main(self):
        try:
            async with websockets.serve(self.echo, "localhost", 8766):
                await asyncio.Future()  # run forever
        except websockets.exceptions.ConnectionClosedError as e: print(e)

    async def echo(self,websocket):
        self.ws = websocket
        print('connect')
        #asyncio.create_task(self.message_sender_loop())
        async for message in websocket:
            try:
                self.recv(json.loads(message))
            except json.decoder.JSONDecodeError:
                self.recv(message)

    def recv(self,message):
        self.inQueue.put(message)
        
        if self.debug:
            print("recv: ",message)
    
    def send(self,command:str, content):
        self.outQueue.put({'command':command,'content':content})

    def message_sender_loop(self):
        while True:
            try:
                message = self.outQueue.get(None)
                asyncio.run(self.ws.send(json.dumps(message, indent=4)))
            except websockets.exceptions.ConnectionClosedError:
                print("Connection closed")
            except Exception as e:
                print(e)

    def update(self,handler):
        while not self.inQueue.empty:
            message = self.inQueue.get()
            getattr(handler, message["command"])(message["content"])

                
# start the server in a separate thread to avoid blocking
import threading
server = Server()
t=threading.Thread(target=server.start)
t.start()

# the interface to the server
class WSManager:
    def __init__(self,server:Server):
        self.debug = False
        self.server = server

#server.send("action",{"voltage":[1,0,0,0,100,200,100,100]})

In [2]:
import numpy as np
def flatten(list_of_lists):
    if len(list_of_lists) == 0:
        return list(list_of_lists)
    if hasattr(list_of_lists[0], '__iter__'):
        return flatten(list_of_lists[0]) + flatten(list_of_lists[1:])
    return list(list_of_lists[:1]) + flatten(list_of_lists[1:])
def decomposeCosSin(angle):
    return [np.cos(angle), np.sin(angle)]


In [3]:

from torch import nn
import gym
import numpy as np
class Environment(gym.Env):
    def __init__(self,ws_server : Server,device = 'cpu'):
        self.ws = ws_server
        self.t = 0
        self.t_episode = 0
        self.device = device
        self.prevState = None
        self.prevAction = None
        self.targetPos = None
        self.noiseIntensity = 0.5
        self.hasRecievedState = False
        self.pos = np.array([0.,0.])
        self.ws.send("pos",{'x':0,'y':0, 'z':0})
        self.training = True
        self.timeSetp = 0.1
        self.timePenalty = 0

        # Implement gym.Env
        self.observation_space = gym.spaces.Box(-np.inf,np.inf,shape=(19,),dtype=float)
        self.action_space = gym.spaces.Box(-1,1,shape=(8,),dtype=float)

    def processFeature(self,state:dict):
        feature = []
        feature.append(state['baseLinkPos']['x']-self.targetPos[0].item())
        feature.append(state['baseLinkPos']['y']-self.targetPos[1].item())
        feature.append(decomposeCosSin(state['baseLinkOrientation']))
        feature.append(state['baseLinkVelocity']['x'])
        feature.append(state['baseLinkVelocity']['y'])
        feature.append(state['baseLinkAngularVelocity'])
        feature.append(decomposeCosSin(state['wheelBaseOrientation']))
        feature.append(state['wheelSpeed'])
        feature = flatten(feature)
        return feature

    def getObservation(self):
        return self.processFeature(self.state)

    def calculateReward(self,pos,targetPos):
        return -np.linalg.norm(pos-targetPos,2)

    def terminateCondition(self,pos,targetPos):
        if not self.training:
            return False, False
        d = np.linalg.norm(pos-targetPos,2)
        return d<0.5 or d>15 or self.t_episode>100, d<0.5

    def getPos(self,state):
        return np.array([state['baseLinkPos']['x'],state['baseLinkPos']['y']],dtype=float)

    def readMessages(self):
        while not self.ws.inQueue.empty():
            message = self.ws.inQueue.get()
            command = message["command"]
            content = message["content"]
            if command == "state":
                self.hasRecievedState = True
                self.state = message["content"]
            if command == "target":
                self.targetPos = np.array([content["x"],content["z"]])

    def wheelOrientationMotorMapping(self,x):
        gamma = 10
        return x**10


    # Implement gym.Env
    
    def reset(self):
        self.targetPos = self.pos + np.random.standard_normal((2,))*4
        self.t_episode = 0
        self.prevState = None
        self.ws.send("target",{"pos":{'x':self.targetPos[0],'y':0, 'z':self.targetPos[1]}})
        #self.ws.send("pos",{'x':0,'y':0, 'z':0})
        self.ws.send("require state",None)

        # return the initial observation
        self.hasRecievedState = False
        while not self.hasRecievedState:
            self.ws.inQueue.waitNotEmpty()
            self.readMessages()

        return self.getObservation()
        
    def step(self, action):
        self.t_episode +=1
        # Send an action then wait for the env to run one step
        action[0:4] = self.wheelOrientationMotorMapping(action[0:4])
        action[0:4]*=1000
        action[4:8]*=2000
        self.ws.send("action", {"voltage":action.tolist()})
        #self.ws.send("action", {"voltage":[1000,1000,1000,1000,0,0,0,0]})

        prevPos = self.getPos(self.state)

        # Get state and calculate stuffs in the step
        
        self.hasRecievedState = False
        while not self.hasRecievedState:
            self.ws.inQueue.waitNotEmpty()
            self.readMessages()

        observation = self.getObservation()
        self.pos = self.getPos(self.state)

        distReward = self.calculateReward(self.pos,self.targetPos) - self.calculateReward(prevPos,self.targetPos)
        timePanelty = self.timePenalty*self.timeSetp
        reward = distReward + timePanelty

        done, goal = self.terminateCondition(self.pos,self.targetPos)

        if goal:
            reward += 10

        info = {}

        return observation, reward, done, info
env = Environment(server,'cuda')
server.debug = False

'NoneType' object has no attribute 'send'


In [6]:
from stable_baselines3 import PPO
import datetime
size = 256
model = PPO("MlpPolicy", env, verbose=1,device = "cuda",gamma=0.99,policy_kwargs={'net_arch':[dict(pi=[size, size], vf=[size, size])]},tensorboard_log="runs/wheel=fixed,gamma=0.99,timePanelty=0,goalReward=10,size=256;"+datetime.datetime.now().strftime("%m_%d_%Y/%H_%M_%S"))
#model = PPO.load("1002k_06_27_2022_00_38_11.zip",env)

Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


In [10]:
env.training = True
for i in range(20):
    model.save(f"{67+i*50}k_06_29_2022_01_05_32")
    model.learn(total_timesteps=50_000)

Logging to runs/wheel=fixed,gamma=0.99,timePanelty=0,goalReward=10,size=256;06_29_2022/01_05_32/PPO_2
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 39.2     |
|    ep_rew_mean     | 13.3     |
| time/              |          |
|    fps             | 51       |
|    iterations      | 1        |
|    time_elapsed    | 39       |
|    total_timesteps | 2048     |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 38.9        |
|    ep_rew_mean          | 13.5        |
| time/                   |             |
|    fps                  | 50          |
|    iterations           | 2           |
|    time_elapsed         | 81          |
|    total_timesteps      | 4096        |
| train/                  |             |
|    approx_kl            | 0.032597966 |
|    clip_fraction        | 0.336       |
|    clip_range           | 0.2         |
|    entropy

connection handler failed
Traceback (most recent call last):
  File "/home/eri24816/anaconda3/envs/expansion/lib/python3.8/site-packages/websockets/legacy/protocol.py", line 945, in transfer_data
    message = await self.read_message()
  File "/home/eri24816/anaconda3/envs/expansion/lib/python3.8/site-packages/websockets/legacy/protocol.py", line 1015, in read_message
    frame = await self.read_data_frame(max_size=self.max_size)
  File "/home/eri24816/anaconda3/envs/expansion/lib/python3.8/site-packages/websockets/legacy/protocol.py", line 1090, in read_data_frame
    frame = await self.read_frame(max_size)
  File "/home/eri24816/anaconda3/envs/expansion/lib/python3.8/site-packages/websockets/legacy/protocol.py", line 1145, in read_frame
    frame = await Frame.read(
  File "/home/eri24816/anaconda3/envs/expansion/lib/python3.8/site-packages/websockets/legacy/framing.py", line 70, in read
    data = await reader(2)
  File "/home/eri24816/anaconda3/envs/expansion/lib/python3.8/asyncio/

connect
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 22.1       |
|    ep_rew_mean          | 5.06       |
| time/                   |            |
|    fps                  | 43         |
|    iterations           | 16         |
|    time_elapsed         | 757        |
|    total_timesteps      | 32768      |
| train/                  |            |
|    approx_kl            | 0.05015209 |
|    clip_fraction        | 0.388      |
|    clip_range           | 0.2        |
|    entropy_loss         | -9.86      |
|    explained_variance   | 0.807      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.0123     |
|    n_updates            | 730        |
|    policy_gradient_loss | -0.0209    |
|    std                  | 0.835      |
|    value_loss           | 0.508      |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    e

KeyboardInterrupt: 

In [11]:
import stable_baselines3
env.training = False 
stable_baselines3.common.evaluation.evaluate_policy(model, env, n_eval_episodes=10)



KeyboardInterrupt: 

In [9]:
model.save("67k_06_29_2022_01_05_32")

In [19]:
env = gym.make("CartPole-v1")


In [27]:
env.step((1,2))

AssertionError: (1, 2) (<class 'tuple'>) invalid

In [25]:
env.action_space

Discrete(2)

In [28]:
model1 = PPO("MlpPolicy", env, verbose=1)


Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


In [34]:
env.step(0)

(array([-0.03819583, -0.18100157,  0.01109779,  0.3364094 ], dtype=float32),
 1.0,
 False,
 {})

In [32]:
action, _states = model1.predict(obs, deterministic=True)

    action

In [47]:
np.log(0.5)/np.log(0.98)

34.309618491520645

In [44]:
np.log(0.5)

-0.6931471805599453