In [1]:
import numpy as np
import sys

from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras import losses
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import plot_model
from tensorflow.keras.models import load_model

In [2]:
class TrainModel:
    def __init__(self, num_layers, width, batch_size, learning_rate, input_dim, output_dim, input_shape):
        self._input_dim = input_dim  #old, should be deleted everywhere + code adjusted + config changed
        self._input_shape = input_shape
        self._output_dim = output_dim
        self._batch_size = batch_size
        self._learning_rate = learning_rate
        self._model = self._build_model(num_layers, width)
        

class RNNTrainModel(TrainModel):
    def __init__(self, num_layers, width, batch_size, learning_rate, input_dim, output_dim, input_shape, sequence_length):
        self._sequence_length = sequence_length
        super().__init__(num_layers, width, batch_size, learning_rate, input_dim, output_dim, input_shape)
    
    
    def _build_model(self, num_layers, width):
        """
        Build and compile a deep neural network with convolution as LSTM
        """
        
        self._input_shape =  (self._sequence_length,) + self._input_shape
        
        
        #input layer
        inputs = keras.Input(shape = self._input_shape)
                
        #convolutional layers
        c1 = layers.TimeDistributed(layers.Conv2D(filters = 128, kernel_size = 4, strides = (2,2), padding = "same", activation = 'relu'))(inputs)
        c2 = layers.TimeDistributed(layers.Conv2D(filters = 128, kernel_size = 4, strides = (2,2), padding = "same", activation = 'relu'))(c1)
        c3 = layers.TimeDistributed(layers.Conv2D(filters = 64, kernel_size = 2, strides = (1,1), padding = "same", activation = 'relu'))(c2)
        flat = layers.TimeDistributed(layers.Flatten())(c3)
        lstm = layers.LSTM(96, activation='tanh')(flat)
        dense = layers.Dense(16, activation='relu')(lstm)
        outputs = layers.Dense(self._output_dim, activation='linear')(dense)
        
        
        model = keras.Model(inputs = inputs, outputs = dense, name='CNN_with_LSTM')
        model.compile(loss=losses.mean_squared_error, optimizer=Adam(lr=self._learning_rate))
        
        # model.summary()
        return model


    
num_layers=4 
width=4
batch_size=100
learning_rate= 0.001
input_dim=20
output_dim= 4
sequence_length= 8

number_of_cells_per_lane = 10
input_shape = (number_of_cells_per_lane, 8, 1)



Model = RNNTrainModel(num_layers, width, batch_size, learning_rate, input_dim, output_dim, input_shape, sequence_length)

In [3]:
from tensorflow import keras
from pandas import DataFrame
from pandas import concat
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import LSTM

# create sequence
length = 10
sequence = [i/float(length) for i in range(length)]
print(sequence)

# create X/y pairs
df = DataFrame(sequence)
df = concat([df.shift(1), df], axis=1)
df.dropna(inplace=True)

print(df)

# convert to LSTM friendly format
values = df.values
X, y = values[:, 0], values[:, 1]
X = X.reshape(len(X), 1, 1)

print(X)
print(y)

# # 1. define network
# model = Sequential()
# model.add(LSTM(10, input_shape=(1,1)))
# model.add(Dense(1))

# # 2. compile network
# model.compile(optimizer='adam', loss='mean_squared_error')
# # 3. fit network
# history = model.fit(X, y, epochs=1000, batch_size=len(X), verbose=0)
# # 4. evaluate network
# loss = model.evaluate(X, y, verbose=0)
# print(loss)
# # 5. make predictions
# predictions = model.predict(X, verbose=0)
# print(predictions[:, 0])

[0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
     0    0
1  0.0  0.1
2  0.1  0.2
3  0.2  0.3
4  0.3  0.4
5  0.4  0.5
6  0.5  0.6
7  0.6  0.7
8  0.7  0.8
9  0.8  0.9
[[[0. ]]

 [[0.1]]

 [[0.2]]

 [[0.3]]

 [[0.4]]

 [[0.5]]

 [[0.6]]

 [[0.7]]

 [[0.8]]]
[0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9]


In [4]:
import random
import numpy as np


number_of_cells_per_lane = 10
# input_shape = (number_of_cells_per_lane, 8, 1)
input_shape = 1
num_actions = 4
# num_actions = 1
sequence_length = 3



samples = []


for ep in range(10):
    buffer = []
    for s in range(10):
        buffer.append(["state"+str(s)+"_episode_"+str(ep), "action"+str(s)+"_episode_"+str(ep), "reward"+str(s)+"_episode_"+str(ep), "next_state"+str(s)+"_episode_"+str(ep)])
    samples.append(buffer)
print(samples)
print("")





[[['state0_episode_0', 'action0_episode_0', 'reward0_episode_0', 'next_state0_episode_0'], ['state1_episode_0', 'action1_episode_0', 'reward1_episode_0', 'next_state1_episode_0'], ['state2_episode_0', 'action2_episode_0', 'reward2_episode_0', 'next_state2_episode_0'], ['state3_episode_0', 'action3_episode_0', 'reward3_episode_0', 'next_state3_episode_0'], ['state4_episode_0', 'action4_episode_0', 'reward4_episode_0', 'next_state4_episode_0'], ['state5_episode_0', 'action5_episode_0', 'reward5_episode_0', 'next_state5_episode_0'], ['state6_episode_0', 'action6_episode_0', 'reward6_episode_0', 'next_state6_episode_0'], ['state7_episode_0', 'action7_episode_0', 'reward7_episode_0', 'next_state7_episode_0'], ['state8_episode_0', 'action8_episode_0', 'reward8_episode_0', 'next_state8_episode_0'], ['state9_episode_0', 'action9_episode_0', 'reward9_episode_0', 'next_state9_episode_0']], [['state0_episode_1', 'action0_episode_1', 'reward0_episode_1', 'next_state0_episode_1'], ['state1_episode_

In [5]:
sampled_episodes = random.sample(samples, 5)
# print("sampled_epsiodes", sampled_epsiodes)

batch = []              
for episode in sampled_episodes:
    start_point = random.randint(0,len(episode)-sequence_length)
#     print(start_point)
    batch.append(episode[start_point:start_point+sequence_length])
    
print(batch)



[[['state7_episode_5', 'action7_episode_5', 'reward7_episode_5', 'next_state7_episode_5'], ['state8_episode_5', 'action8_episode_5', 'reward8_episode_5', 'next_state8_episode_5'], ['state9_episode_5', 'action9_episode_5', 'reward9_episode_5', 'next_state9_episode_5']], [['state4_episode_4', 'action4_episode_4', 'reward4_episode_4', 'next_state4_episode_4'], ['state5_episode_4', 'action5_episode_4', 'reward5_episode_4', 'next_state5_episode_4'], ['state6_episode_4', 'action6_episode_4', 'reward6_episode_4', 'next_state6_episode_4']], [['state0_episode_6', 'action0_episode_6', 'reward0_episode_6', 'next_state0_episode_6'], ['state1_episode_6', 'action1_episode_6', 'reward1_episode_6', 'next_state1_episode_6'], ['state2_episode_6', 'action2_episode_6', 'reward2_episode_6', 'next_state2_episode_6']], [['state6_episode_2', 'action6_episode_2', 'reward6_episode_2', 'next_state6_episode_2'], ['state7_episode_2', 'action7_episode_2', 'reward7_episode_2', 'next_state7_episode_2'], ['state8_epis

In [6]:
x = np.zeros((len(batch), sequence_length, input_shape), dtype='S16') #from online network
y = np.zeros((len(batch), sequence_length, num_actions))  #from target network

print(x.shape)
print(y.shape)

# print(len(batch))

(5, 3, 1)
(5, 3, 4)


In [7]:
# states = [len(batch), sequence_length], dtype='S16')
states = []
next_states = []

for index_sequence, sequence in enumerate(batch):
    states.append( np.array([val[0] for val in sequence]) )
    next_states.append( np.array([val[3] for val in sequence]) )

states = np.asarray(states)
next_states = np.asarray(next_states)

    

print(states, states.shape )
print(next_states)

[['state7_episode_5' 'state8_episode_5' 'state9_episode_5']
 ['state4_episode_4' 'state5_episode_4' 'state6_episode_4']
 ['state0_episode_6' 'state1_episode_6' 'state2_episode_6']
 ['state6_episode_2' 'state7_episode_2' 'state8_episode_2']
 ['state1_episode_0' 'state2_episode_0' 'state3_episode_0']] (5, 3)
[['next_state7_episode_5' 'next_state8_episode_5' 'next_state9_episode_5']
 ['next_state4_episode_4' 'next_state5_episode_4' 'next_state6_episode_4']
 ['next_state0_episode_6' 'next_state1_episode_6' 'next_state2_episode_6']
 ['next_state6_episode_2' 'next_state7_episode_2' 'next_state8_episode_2']
 ['next_state1_episode_0' 'next_state2_episode_0' 'next_state3_episode_0']]


In [8]:
q_s_a = np.zeros((len(batch), 3, 4))
q_s_a_d = np.zeros((len(batch),3, 4))
print(q_s_a.shape)

gamma = 0.5

for index_sequence, sequence in enumerate(batch):
    for index_step, step in enumerate(sequence):
        state, action, reward, _ = step[0], step[1], step[2], step[3]  # extract data from one sample
    
#         print(index_sequence, "....", index_step)
        current_q = q_s_a[index_sequence][index_step]  # get the Q(state) predicted before
        print("current q: ",current_q)
        
#         print("action: ", action) #action will be 0,1,2,3
        action = random.randint(0, 3)
#         print("action: ", action)
        
            #update with combination of online and target network
        current_q[action] = random.randint(0,10) #reward + "gamma * np.amax(q_s_a_d[index_sequence][index_step])"  # update Q(state, action)
        x[index_sequence][index_step] = state
        y[index_sequence][index_step] = current_q
        
print("x: ,", x, x.shape)
print("y: ,", y,y.shape)
        
        
# print("current-q ", current_q)

# x[i] = state
#         y[i] = current_q  # Q(state) that includes the updated action value

        


        

(5, 3, 4)
current q:  [0. 0. 0. 0.]
current q:  [0. 0. 0. 0.]
current q:  [0. 0. 0. 0.]
current q:  [0. 0. 0. 0.]
current q:  [0. 0. 0. 0.]
current q:  [0. 0. 0. 0.]
current q:  [0. 0. 0. 0.]
current q:  [0. 0. 0. 0.]
current q:  [0. 0. 0. 0.]
current q:  [0. 0. 0. 0.]
current q:  [0. 0. 0. 0.]
current q:  [0. 0. 0. 0.]
current q:  [0. 0. 0. 0.]
current q:  [0. 0. 0. 0.]
current q:  [0. 0. 0. 0.]
x: , [[[b'state7_episode_5']
  [b'state8_episode_5']
  [b'state9_episode_5']]

 [[b'state4_episode_4']
  [b'state5_episode_4']
  [b'state6_episode_4']]

 [[b'state0_episode_6']
  [b'state1_episode_6']
  [b'state2_episode_6']]

 [[b'state6_episode_2']
  [b'state7_episode_2']
  [b'state8_episode_2']]

 [[b'state1_episode_0']
  [b'state2_episode_0']
  [b'state3_episode_0']]] (5, 3, 1)
y: , [[[ 0.  0.  0.  0.]
  [ 0.  9.  0.  0.]
  [ 0.  0.  2.  0.]]

 [[ 0.  0.  0.  4.]
  [ 0.  0.  0.  8.]
  [ 0.  0.  3.  0.]]

 [[ 0.  0.  7.  0.]
  [ 9.  0.  0.  0.]
  [ 0.  0.  0.  9.]]

 [[ 0.  0. 10.  0.]
  [ 

In [9]:
import numpy as np

In [10]:
a = np.zeros((2,3,4))
print(a)
print("-----------")


a[1][0][0]=2
print(a)




[[[0. 0. 0. 0.]
  [0. 0. 0. 0.]
  [0. 0. 0. 0.]]

 [[0. 0. 0. 0.]
  [0. 0. 0. 0.]
  [0. 0. 0. 0.]]]
-----------
[[[0. 0. 0. 0.]
  [0. 0. 0. 0.]
  [0. 0. 0. 0.]]

 [[2. 0. 0. 0.]
  [0. 0. 0. 0.]
  [0. 0. 0. 0.]]]


In [11]:
b=[]

b.append(1)

print(b)

[1]


In [12]:
import pandas as pd

def rollavg_pandas(a,n):
        'Pandas rolling average over data set a with window size n. Returns a centered np array of same size'
        return np.ravel(pd.DataFrame(a).rolling(n, center=True, min_periods=1).mean().to_numpy())


data = [1,8]
    
roling_window = 8
        # if (len(data>=roling_wind
data = rollavg_pandas(data, roling_window)

print(data)

[4.5 4.5]


In [13]:
def hello():
    q=1
    w=2
    e=[1,2,3,4,5,6]
    
    return [q,w,e]

print(hello())

[1, 2, [1, 2, 3, 4, 5, 6]]


In [14]:
print(np.zeros(4))

[0. 0. 0. 0.]


In [15]:
#SET STATE DIMENSION PARAMETERS
number_of_cells_per_lane = 10
conv_state_shape = (number_of_cells_per_lane, 8, 2)
green_phase_state_shape = 4
elapsed_time_state_shape = 1
state_shape = [conv_state_shape, green_phase_state_shape, elapsed_time_state_shape]
 
print(state_shape)

print([state_shape[0], state_shape[1], state_shape[2]])


[(10, 8, 2), 4, 1]
[(10, 8, 2), 4, 1]


In [16]:
green_phase_state = np.zeros(state_shape[1])
green_phase_state[2] = 1

print(green_phase_state)

[0. 0. 1. 0.]


In [134]:
L = []


for x in range(15):
    temp = []
    for y in range (8):
        temp.append([np.random.rand(2,8,2), np.random.rand(4), 23])
    L.append(temp)
              
# print(L)
L = np.asarray(L)
print(L.shape)
print(L[0][0][0].shape)



(15, 8, 3)
(2, 8, 2)


In [115]:
print(L[:,0].shape)
# print(L[:,1])

print("------")

K0 = L[:,0]
K1 = L[:,1]
K2 = L[:,2]
print("K1: ", K1.shape, K1)

print("-----")
print(K1[0].shape, K1[0], type(K1[0]))


(15,)
------
K1:  (15,) [array([0.7331605 , 0.05076741, 0.36866795, 0.05889683])
 array([0.25119205, 0.60878257, 0.64068848, 0.84423789])
 array([0.63151348, 0.73128216, 0.68496817, 0.59168999])
 array([0.00318538, 0.59659337, 0.41725245, 0.24994936])
 array([0.82020541, 0.0676231 , 0.3655208 , 0.25473668])
 array([0.3511161 , 0.11978402, 0.90814116, 0.15455863])
 array([0.95008309, 0.54953048, 0.88436523, 0.89567363])
 array([0.88384462, 0.27380379, 0.98959524, 0.30724914])
 array([0.50279243, 0.99176509, 0.95598995, 0.85166968])
 array([0.65801678, 0.76440165, 0.21144751, 0.73275945])
 array([0.71596397, 0.21301077, 0.17761369, 0.33979022])
 array([0.35523564, 0.9995932 , 0.14361286, 0.13943099])
 array([0.76541894, 0.37406208, 0.92865142, 0.12134889])
 array([0.66906416, 0.2343215 , 0.53771032, 0.97396121])
 array([0.37065575, 0.39692956, 0.68338624, 0.48837111])]
-----
(4,) [0.7331605  0.05076741 0.36866795 0.05889683] <class 'numpy.ndarray'>


In [116]:
J1 = np.concatenate(K1).reshape((15,4))
print(J1.shape, J1)

(15, 4) [[0.7331605  0.05076741 0.36866795 0.05889683]
 [0.25119205 0.60878257 0.64068848 0.84423789]
 [0.63151348 0.73128216 0.68496817 0.59168999]
 [0.00318538 0.59659337 0.41725245 0.24994936]
 [0.82020541 0.0676231  0.3655208  0.25473668]
 [0.3511161  0.11978402 0.90814116 0.15455863]
 [0.95008309 0.54953048 0.88436523 0.89567363]
 [0.88384462 0.27380379 0.98959524 0.30724914]
 [0.50279243 0.99176509 0.95598995 0.85166968]
 [0.65801678 0.76440165 0.21144751 0.73275945]
 [0.71596397 0.21301077 0.17761369 0.33979022]
 [0.35523564 0.9995932  0.14361286 0.13943099]
 [0.76541894 0.37406208 0.92865142 0.12134889]
 [0.66906416 0.2343215  0.53771032 0.97396121]
 [0.37065575 0.39692956 0.68338624 0.48837111]]


In [118]:

J0 = np.concatenate(K0).reshape((15,2,8,2))
print(J0.shape, J0)

(15, 2, 8, 2) [[[[0.74489412 0.20674486]
   [0.76492674 0.45786593]
   [0.36420031 0.50921633]
   [0.97859753 0.94659086]
   [0.1335775  0.50789508]
   [0.52261268 0.53986829]
   [0.84994852 0.353315  ]
   [0.388698   0.34725806]]

  [[0.65507421 0.16277852]
   [0.69797755 0.80444447]
   [0.74608712 0.62327667]
   [0.3988514  0.60515703]
   [0.08589372 0.48312466]
   [0.2825303  0.06195482]
   [0.45824427 0.29642216]
   [0.72694159 0.66652394]]]


 [[[0.33012714 0.29825706]
   [0.60986891 0.32939859]
   [0.41041183 0.10992324]
   [0.11804085 0.05045018]
   [0.92024875 0.82666851]
   [0.20198278 0.75190294]
   [0.88640846 0.65313248]
   [0.48361383 0.73522826]]

  [[0.01962477 0.49001031]
   [0.34651012 0.2432548 ]
   [0.83015407 0.21795219]
   [0.35554662 0.18539662]
   [0.22142536 0.86336359]
   [0.94915167 0.20016811]
   [0.78508298 0.39876976]
   [0.12591147 0.63016687]]]


 [[[0.53839689 0.21928783]
   [0.2058835  0.30661047]
   [0.16797131 0.33295989]
   [0.35682761 0.09171508]
  

In [119]:
print("K2: ", K2.shape, K2)


print(K2.dtype)

K2:  (15,) [23 23 23 23 23 23 23 23 23 23 23 23 23 23 23]
object


In [120]:
G = L[:,2]

F = np.array(G, dtype=np.float)
print(F, F.dtype)

[23. 23. 23. 23. 23. 23. 23. 23. 23. 23. 23. 23. 23. 23. 23.] float64


In [121]:
#SET STATE DIMENSION PARAMETERS
number_of_cells_per_lane = 10
conv_state_shape = (number_of_cells_per_lane, 8, 2)
green_phase_state_shape = 4
elapsed_time_state_shape = 1
state_shape = [conv_state_shape, green_phase_state_shape, elapsed_time_state_shape]
 
    
print(state_shape)

# sequence_state_shape = (None, ) + state_shape[i] 

# for i in range(len(state_shape)):
#     print(i)
#     sequence_state_shape = (None, ) + state_shape[i]

sequence_state_shape = []
sequence_state_shape.append((1,1) +state_shape[0])
sequence_state_shape.append((1,1, state_shape[1]))
sequence_state_shape.append((1,1, state_shape[2]))
    

print(sequence_state_shape)
print(sequence_state_shape[1])

[(10, 8, 2), 4, 1]
[(1, 1, 10, 8, 2), (1, 1, 4), (1, 1, 1)]
(1, 1, 4)


In [126]:
print(L[:,2].shape)
print(L[:,2][0])

new = L[:,2][0]
new2 = np.expand_dims((new,), axis=(0,1))
# new = (1,1,new)

print(new2.shape)
print(new2)

(15,)
23
(1, 1, 1)
[[[23]]]
