In [111]:
# %load model.py
from math import sin, cos
import numpy as np
from model_parameters import m, g, I, l, Kt, Kd

def transfomationMatrix(roll, pitch, yaw):
    cR = cos(roll)
    cP = cos(pitch)
    cY = cos(yaw)
    sR = sin(roll)
    sP = sin(pitch)
    sY = sin(yaw)
    R = np.array([[cP*cY, sR*sP*cY - cR*sY, cR*sP*cY + sR*sY],
                  [cP*sY, sR*sP*sY - cR*cY, cR*sP*sY - sR*cY],
                  [-sP, sR*cP, cR*cP]], dtype=np.float32)
    return R

def translationalMotion(R, F):
    f = F[0] + F[1]  + F[2] + F[3]
    accelerations = np.zeros((3, 1), dtype=np.float32)
    bodyFrameThrust = np.array([[0], 
                               [0],
                               [f]], dtype=np.float32)
    referenceFrame = np.matmul(R, bodyFrameThrust) + np.array([[0],
                                                               [0],
                                                               [-m*g]], dtype=np.float32)
    accelerations = referenceFrame/m
    accelerations = np.reshape(accelerations, 3)
    return accelerations

def angularMotion(F, M, eulerAnglesPrim):
    T = np.zeros(3)
    T[0] = (F[0] - F[3])*l
    T[1] = (F[1] - F[2])*l
    T[2] = M[0] + M[1] + M[2] + M[3]
    accelerations =  T - np.cross(eulerAnglesPrim, np.multiply(I, eulerAnglesPrim))
    accelerations = np.divide(accelerations, I)
    return accelerations

def inputToForces(omega):
    return np.multiply(Kt, omega)

def inputToMomentum(omega):
    return np.multiply(Kd, omega)

def model(x):
    dstate = np.zeros(12)
    omega = np.array([2000, 2000, 2000, 2000])
    F = inputToForces(omega)
    M = inputToMomentum(omega)
    R = transfomationMatrix(x[3], x[4], x[5])
    dstate[0:3] = x[3:6]
    dstate[3:6] = translationalMotion(R, F)
    dstate[6:9] = x[9:12]
    dstate[9:12] = angularMotion(F, M, x[9:12])
    return dstate 

def modelRT(x, u, deltaT):
    state = np.zeros(12)
    omega = np.array(u)
    F = inputToForces(omega)
    M = inputToMomentum(omega)
    R = transfomationMatrix(x[3], x[4], x[5])
    state[0:3] = x[3:6] * deltaT + x[0:3]
    state[3:6] = translationalMotion(R, F) * deltaT + x[3:6]
    state[6:9] = x[9:12] * deltaT + x[6:9]
    state[9:12] = angularMotion(F, M, x[9:12]) * deltaT + x[9:12]
    return state 

In [126]:
class DroneEnv(Env):
    def __init__(self):
        self.action_space = Box(low=0, high=2000, shape=(4,), dtype=np.float32)
        self.observation_space = Box(low=-inf, high=inf, shape=(12,))
        self.state = np.zeros(12)
        self.state[2] = 5
        self.maximum_angle = radians(30)
        self.flight_length = 10
        
    def step(self, action, deltaT):
        self.state = modelRT(self.state, action, deltaT)
        self.flight_lenght = self.flight_length-deltaT

        reward = - abs(5 - self.state[2])
        print(reward)
        if self.state[2] < 0:
            done = True
        else:
            done = False
        info = {}
        return self.state, reward, done, info
        
    def render(self):
        #visualization
        pass
    def reset(self):
        self.state = np.zeros(12)
        self.state[2] = 5
        self.flight_lenght = 10

In [127]:
env = DroneEnv()

In [114]:
env.action_space.sample()

array([1631.0653,  720.1694,  653.44  , 1608.3169], dtype=float32)

In [58]:
env = gym.make('CartPole-v0')
states = env.observation_space.shape[0]
actions = env.action_space.n

In [59]:
actions

2

In [128]:
episodes = 20
for episode in range(1, episodes+1):
    state = env.reset()
    done = False
    score = 0

    while not done:
        #env.render()
        action = env.action_space.sample()
        n_state, reward, done, info = env.step(action, 0.1)
        score+=reward
    print('Episode:{} Score:{}'. format(episode, score))

-0.0
-0.09799790382385254
-0.2939600884914402
-0.5879106581211095
-0.9798849284648901
-1.4698523998260504
-2.057761108875275
-2.743661999702454
-3.5275483608245857
-4.409421390295029
-5.389251118898392
Episode:1 Score:-21.55724995732308
-0.0
-0.09796308875083959
-0.2939268290996555
-0.5878638625144958
-0.9798436820507046
-1.4697650134563442
-2.0576595902442927
-2.7435324430465693
-3.5273723125457757
-4.4091506421566
-5.388855654001236
Episode:2 Score:-21.555933117866513
-0.0
-0.09792694449424744
-0.2938214480876926
-0.5877285420894625
-0.9795751571655273
-1.4693969428539275
-2.057179409265518
-2.742942517995834
-3.526697820425033
-4.40844486951828
-5.38813636302948
Episode:3 Score:-21.551850014925
-0.0
-0.09803283810615504
-0.2940421998500824
-0.5880227565765379
-0.9800088405609131
-1.4699857890605927
-2.0579434573650364
-2.743893736600876
-3.527839785814286
-4.409762006998063
-5.389676487445832
Episode:4 Score:-21.559207898378375
-0.0
-0.09797388911247218
-0.2939375579357142
-0.587868

In [174]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.optimizers import Adam

In [175]:
states = env.observation_space.shape
actions = 4

In [165]:
def build_model(states, actions):
    model = Sequential()
    model.add(Dense(24, activation='relu', input_shape=states))
    model.add(Dense(24, activation='relu'))
    model.add(Dense(actions, activation='linear'))
    model.add(Flatten())
    return model

In [166]:
CNNmodel = build_model(states, actions)

In [167]:
CNNmodel.summary()

Model: "sequential_5"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_13 (Dense)            (None, 24)                312       
                                                                 
 dense_14 (Dense)            (None, 24)                600       
                                                                 
 dense_15 (Dense)            (None, 4)                 100       
                                                                 
 flatten (Flatten)           (None, 4)                 0         
                                                                 
Total params: 1,012
Trainable params: 1,012
Non-trainable params: 0
_________________________________________________________________


In [168]:
from rl.agents import DQNAgent
from rl.policy import BoltzmannQPolicy 
from rl.memory import SequentialMemory

In [176]:
def buildAgent(model, actions):
    policy = BoltzmannQPolicy()
    memory = SequentialMemory(limit=50000, window_length=1)
    dqn = DQNAgent(model, memory=memory, policy=policy, nb_actions=actions, nb_steps_warmup=10,
                   target_model_update=1e-2)
    return dqn

In [177]:
dqn = buildAgent(CNNmodel, actions)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])
dqn.fit(env, nb_steps=50000, visualize=False, verbose=1)

TypeError: Keras symbolic inputs/outputs do not implement `__len__`. You may be trying to pass Keras symbolic inputs/outputs to a TF API that does not register dispatching, preventing Keras from automatically converting the API call to a lambda layer in the Functional Model. This error will also get raised if you try asserting a symbolic input/output directly.