In [None]:
import numpy as np
import random
import math
from env import CustomEnvironment

In [None]:
actions_size = 4

In [None]:
def preprocess_data(environment) :

    input = []

    for i in range(len(environment.state)):
      input.append(environment.state[i] / environment.grid_size[i])

    for i in range(len(environment.destination)):
      input.append(environment.destination[i] / environment.grid_size[i])

    for i in range(len(environment.actions)):
      input.append(0)

    input[len(environment.state) + environment.direction + 1] = 1

    input.append(environment.velocity / environment.max_velocity)

    input.append(environment.communication_radius / math.sqrt(environment.grid_size[0] ** 2 + environment.grid_size[1] ** 2))

    input = np.array(input)

    return input # x, y, xd, yd, direction, velocity, com_rad



In [None]:
def get_nearby_vehicles(environment):
    nearby_vehicles = []

    for i in environment.stationary_vehicles:
      distance = math.sqrt((environment.state[0] - i.state[0])** 2 + (environment.state[1] - i.state[1])** 2)
      if(distance <= environment.communication_radius or distance <= i.communication_radius):
        nearby_vehicles.append(i)

    return nearby_vehicles


In [None]:
def get_nearby_vehicle_input(environment) :
    nearby_vehicles = get_nearby_vehicles(environment)
    nearby_vehicles_input = []
    for i in nearby_vehicles :
      nearby_vehicles_input.append(preprocess_data(i))

    nearby_vehicles_input = np.array(nearby_vehicles_input)

    return nearby_vehicles_input

In [None]:
from keras.models import Sequential, Model
from keras.layers import LSTM, Dense, Input, Concatenate
import numpy as np

In [None]:
def build_model():

    input_size = 10
    lstm_hidden_size = 100

    current_vehicle_input = Input(shape=(input_size, ), name='current_vehicle_input')
    nearby_vehicles_input = Input(shape=(None, input_size), name='nearby_vehicles_input')

    lstm_layer = LSTM(lstm_hidden_size, return_sequences=False)(nearby_vehicles_input)

    current_vehicle_layer = Dense(64, activation='relu')(current_vehicle_input)

    combined_output = Concatenate()([lstm_layer, current_vehicle_layer])

    output_layer = Dense(actions_size, activation='softmax')(combined_output)

    model = Model(inputs=[current_vehicle_input, nearby_vehicles_input], outputs=output_layer)

    model.compile(optimizer='adam', loss='mse')

    return model


In [None]:
model = build_model()
model.summary()

In [None]:
num_episodes = 1000
max_steps_per_episode = 100

discount_rate = 0.99
learning_rate = 0.1

exploration_rate = 1
exploration_decay_rate = 0.01
max_exploration_rate = 1
min_exploration_rate = 0.01


In [None]:
rewards_all_episodes = []
input_size = 10
env = CustomEnvironment()

In [None]:

for episode in range(num_episodes) :

    state = env.reset()
    rewards_current_episode = 0

    for step in range(max_steps_per_episode):


            current_vehicle_input = preprocess_data(state)
            nearby_vehicles_input = get_nearby_vehicle_input(state)

            exploration_rate_threshold = random.uniform(0, 1)

            if exploration_rate_threshold < exploration_rate:
                action = random.choice(state.actions)
                q_values = [0] * len(state.actions)
                q_values[action] = 1
                q_values = np.array(q_values, dtype=np.float64)
                q_values = [q_values]
                q_values = np.array(q_values)
            else:
                q_values = model.predict([current_vehicle_input.reshape(1, -1), nearby_vehicles_input.reshape(1, -1, input_size)], verbose=0)
                action = np.argmax(q_values)

            new_state, reward, done = state.step(action)

            rewards_current_episode += reward

            new_current_vehicle_input = preprocess_data(new_state)
            new_nearby_vehicles_input = get_nearby_vehicle_input(new_state)
            print(new_current_vehicle_input.shape, new_nearby_vehicles_input.shape)

            if new_nearby_vehicles_input.shape[0] > 0 :
              new_q_values = model.predict([new_current_vehicle_input.reshape(1, -1), new_nearby_vehicles_input.reshape(1, -1, input_size)], verbose=0)

              target_q_value = reward + discount_rate * np.max(new_q_values)
              q_values[0][action] = target_q_value

              model.fit([current_vehicle_input.reshape(1, -1), nearby_vehicles_input.reshape(1, -1, input_size)], q_values, verbose=0)

              if done :
                break

    print(episode, rewards_current_episode)
    exploration_rate = min_exploration_rate + (max_exploration_rate - min_exploration_rate) * np.exp(-exploration_decay_rate * episode)

    rewards_all_episodes.append(rewards_current_episode)


In [None]:
rewards_all_episodes