In [1]:
import glob
import os
import sys
import random
import time
import numpy as np
import cv2
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from matplotlib import rcParams
import math
from collections import deque
import pandas as pd

from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.layers import Dense, Dropout, Conv2D, MaxPool2D, Activation, Flatten, GlobalAveragePooling2D
from tensorflow.keras.callbacks import TensorBoard
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.applications.xception import Xception
import tensorflow.compat.v1.keras.backend as backend
from tensorflow.keras.models import load_model
import tensorflow as tf
from threading import Thread

from collections import deque 
from tqdm import tqdm
from tqdm import tqdm_notebook

try:
    sys.path.append(glob.glob('../carla/dist/carla-*%d.%d-%s.egg' % (
        sys.version_info.major,
        sys.version_info.minor,
        'win-amd64' if os.name == 'nt' else 'linux-x86_64'))[0])
except IndexError:
    pass

import carla

print(tf.__version__)

2.5.0


In [2]:
SHOW_PREVIEW = True
IM_WIDTH = 640
IM_HEIGHT = 480
SECONDS_PER_EPISODE = 30
REPLAY_MEMORY_SIZE = 5_000
MIN_REPLAY_MEMORY_SIZE = 1_000
MINIBATCH_SIZE = 16
PREDICTION_BATCH_SIZE = 1
TRAINING_BATCH_SIZE = MINIBATCH_SIZE//4
UPDATE_TARGET_EVERY = 20
MODEL_NAME = "Xception"

MEMORY_FRACTION = 0.8
MIN_REWARD = -200

EPISODES = 5

DISCOUNT = 0.99
epsilon = 1
EPSILON_DECAY = 0.95 ## 0.9975 99975
MIN_EPSILON = 0.001

AGGREGATE_STATS_EVERY = 100

In [3]:
class CarEnv:
    
    
    SHOW_CAM = SHOW_PREVIEW
    STEER_AMT = 0.8
    
    im_width = IM_WIDTH
    im_height = IM_HEIGHT
    
    rgb_image = None
    
    def __init__(self):
        self.client = carla.Client("localhost", 2000)
        self.client.set_timeout(5.0)
        
        self.world = self.client.get_world()
        self.world_map = self.world.get_map()
        
        self.blueprint_library = self.world.get_blueprint_library()
        
        self.model_3_bp = self.blueprint_library.filter("model3")[0]
        
    def reset(self):
        self.collision_hist = []
        self.actor_list = []
        
        self.transform = random.choice(self.world_map.get_spawn_points())
        self.vehicle = self.world.spawn_actor(self.model_3_bp, self.transform)
        self.actor_list.append(self.vehicle)
        
        self.spectator = self.world.get_spectator() 
        self.spectator.set_transform(self.vehicle.get_transform())
        
        self.rgb_cam_bp = self.blueprint_library.find("sensor.camera.rgb")
        
        self.rgb_cam_bp.set_attribute('image_size_x', str(self.im_width))
        self.rgb_cam_bp.set_attribute('image_size_y', str(self.im_height))
        self.rgb_cam_bp.set_attribute('fov', str(110))
        
        transform = carla.Transform(carla.Location(x=2.5, z=0.7))
        self.rgb_cam_sensor = self.world.spawn_actor(self.rgb_cam_bp, transform, attach_to=self.vehicle)
        self.actor_list.append(self.rgb_cam_sensor)
        
        self.rgb_cam_sensor.listen(lambda data: self.process_img(data))
        
        self.vehicle.apply_control(carla.VehicleControl(throttle=0.0, brake=0.0))
        
        time.sleep(4)
        
        self.col_sensor_bp = self.blueprint_library.find("sensor.other.collision")
        
        self.col_sensor = self.world.spawn_actor(self.col_sensor_bp, transform, attach_to=self.vehicle)
        self.col_sensor.listen(lambda event: self.collision_data(event))
        
        while self.rgb_image is None:
            time.sleep(0.01)
            
        self.episode_start_time = time.time()
        
        self.vehicle.apply_control(carla.VehicleControl(throttle=0.0, brake=0.0))
        
        return self.rgb_image
    
    def collision_data(self, event):
        self.collision_hist.append(event)
        
    def process_img(self, image):
        i = np.array(image.raw_data)
        i2 = i.reshape((self.im_height, self.im_width, 4))
        i3 = i2[:, :, :3]
        self.rgb_image = i3.copy()
        if self.SHOW_CAM:
            self.disp_rgb_cam()
            
    def disp_rgb_cam(self):
        cv2.imshow("Car front camera" , self.rgb_image)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            cv2.destroyAllWindows()
            self.SHOW_CAM = False
            
    def step(self, action):
        
        if action == 0:
            self.vehicle.apply_control(carla.VehicleControl(throttle=0.8, steer=0.0))
        if action == 1:
            self.vehicle.apply_control(carla.VehicleControl(throttle=0.8, steer=-1*self.STEER_AMT))
        if action == 2:
            self.vehicle.apply_control(carla.VehicleControl(throttle=0.8, steer=1*self.STEER_AMT))
        
        v = self.vehicle.get_velocity()
        v_kmh = int(3.6 * math.sqrt(v.x**2 + v.y**2 + v.z**2))
    
        if len(self.collision_hist) != 0:
            done = True
            reward = -200
        elif v_kmh < 50:
            done = False
            reward = -1
        else:
            done = False
            reward = 1
            
        if time.time() > self.episode_start_time + SECONDS_PER_EPISODE:
            done = True
            
        return self.rgb_image, reward, done, None
    
    def destroy_all_actors(self):
        for actor in self.actor_list:
            actor.destroy()
        
        

In [4]:
class DQNAgent:
    
    
    def __init__(self):
        self.model = self.create_model()
        self.target_model = self.create_model()
        self.target_model.set_weights(self.model.get_weights())
        
        self.replay_memory = deque(maxlen=REPLAY_MEMORY_SIZE)
        
        self.target_update_counter = 0
        self.graph = tf.compat.v1.get_default_graph()
        
        ####check what these do
        self.terminate = False
        self.terminal_episode = False
        self.training_initialized = False
    
    def create_model(self):
        base_model = Xception(weights=None, include_top=False, input_shape=(IM_HEIGHT, IM_WIDTH, 3))
        x= base_model.output
        x = GlobalAveragePooling2D()(x)
        predictions = Dense(3, activation='linear')(x)
        
        model = Model(inputs=base_model.input, outputs=predictions)
        model.run_eagerly = False
        model.compile(loss='mse', optimizer=Adam(learning_rate=0.001), metrics=['accuracy'])
        
        return model
    
    def update_replay_memory(self, transition):
        #transition = (current_state, action, reward, new_state, done)
        self.replay_memory.append(transition)
        
    def train(self):
        if len(self.replay_memory) < MIN_REPLAY_MEMORY_SIZE:
            return
        
        minibatch = random.sample(self.replay_memory, MINIBATCH_SIZE)
        
        current_states = np.array([transition[0] for transition in minibatch])/255
        #with self.graph.as_default():
        current_qs_list = self.model.predict(current_states, PREDICTION_BATCH_SIZE)

        new_states = np.array([transition[3] for transition in minibatch])/255
        #with self.graph.as_default():
        future_qs_list = self.target_model.predict(new_states, PREDICTION_BATCH_SIZE)
        
        X = []
        Y = []
        
        for index, (current_state, action, reward, new_state, done) in enumerate(minibatch):
            if not done:
                max_future_q = np.max(future_qs_list[index])
                new_q = reward + DISCOUNT * max_future_q
            else:
                new_q = reward
            
            current_qs = current_qs_list[index]
            current_qs[action] = new_q

            X.append(current_state)
            Y.append(current_qs)
        
        #with self.graph.as_default():
        self.model.fit(np.array(X)/255, np.array(Y), batch_size=TRAINING_BATCH_SIZE, verbose=0, shuffle=False)
            
        if self.terminal_episode:
            self.target_update_counter += 1 
        
        if self.target_update_counter > UPDATE_TARGET_EVERY:
            self.target_model.set_weights(self.model.get_weights())
            self.target_update_counter = 0
        
    def get_qs(self, state):
        return self.model.predict(np.array(state).reshape(-1, *state.shape)/255)[0]
        
    def train_in_loop(self):
        X = np.random.uniform(size=(1, IM_HEIGHT, IM_WIDTH, 3)).astype(np.float32)
        Y = np.random.uniform(size=(1, 3)).astype(np.float32)
        #with self.graph.as_default():
        self.model.fit(X,Y, verbose=1, batch_size=1)

        self.training_initialized = True

        while True:
            if self.terminate:
                return
            self.train()
            time.sleep(0.01)        


In [5]:
#Main section

FPS = 60
ep_rewards = [-200]
average_rewards = []
min_rewards = []
max_rewards = []

random.seed(1)
np.random.seed(1)
tf.random.set_seed(1)

#Memory segmentation
# gpu_options = tf.compat.v1.GPUOptions(per_process_gpu_memory_fraction=MEMORY_FRACTION)
# backend.set_session(tf.compat.v1.Session(config=tf.compat.v1.ConfigProto(gpu_options=gpu_options)))

# Create agent and environment
env = CarEnv()
agent = DQNAgent()
#env.reset()

In [6]:
# Start training thread and wait for training to be initialized
trainer_thread = Thread(target=agent.train_in_loop, daemon=True)
trainer_thread.start()
print("sleeping....")
while not agent.training_initialized:
    time.sleep(0.01)

# X = np.random.uniform(size=(1, IM_HEIGHT, IM_WIDTH, 3)).astype(np.float32)
# y = np.random.uniform(size=(1, 3)).astype(np.float32)
# #with self.graph.as_default():
# agent.model.fit(X,y, verbose=1, batch_size=1)
    
# Initialize predictions - forst prediction takes longer as of initialization that has to be done
# It's better to do a first prediction then before we start iterating over episode steps
print("First predict start")
agent.get_qs(np.ones((env.im_height, env.im_width, 3)))
print("First predict over")

#Iterate over episodes
start_time = time.time()
for episode in tqdm_notebook(range(1, EPISODES + 1), ascii=True, unit="episodes"):
    
    print("Episode: {}".format(episode))
    #Resetting episode reward and step number
    episode_reward = 0
    step = 1
    
    #reset environment and get initial state
    
    try:
        current_state = env.reset()
    except:
        print("Error while resetting environment. Moving on to next episode.")
        env.destroy_all_actors()
        continue
    
    #reset Flag and start iterating until episode ends
    done = False
    
    #playing the episode
    while True:
        # This part stays mostly the same, the change is to query a model for Q values
        if np.random.random() > epsilon:
            # Get action from Q table
            action = np.argmax(agent.get_qs(current_state))
        else:
            # Get random action
            action = np.random.randint(0, 3)
            # This takes no time, so we add a delay matching 60 FPS (prediction above takes longer)
            time.sleep(1/FPS)
            
        new_state, reward, done, _ = env.step(action)
        agent.terminal_episode = done
        
        # Transform new continous state to new discrete state and count reward
        episode_reward += reward

        # Every step we update replay memory
        agent.update_replay_memory((current_state, action, reward, new_state, done))
        #agent.train()

        current_state = new_state
        step += 1

        if done:
            break
    
    # End of episode - destroy agents
    env.destroy_all_actors()
    
    # Append episode reward to a list and log stats (every given number of episodes)
    ep_rewards.append(episode_reward)
    if episode > AGGREGATE_STATS_EVERY:
        average_reward = sum(ep_rewards[-AGGREGATE_STATS_EVERY:])/len(ep_rewards[-AGGREGATE_STATS_EVERY:])
        average_rewards.append(average_reward)
        min_reward = min(ep_rewards[-AGGREGATE_STATS_EVERY:])
        min_rewards.append(min_reward)
        max_reward = max(ep_rewards[-AGGREGATE_STATS_EVERY:])
        max_rewards.append(max_reward)
        
        #Save model if min reward is greater or equal to set value
        if min_reward >= MIN_REWARD:
            agent.model.save("Models/One/network_data/episodes_{}_min_reward_{}.model".format(EPISODES, min_reward))
    
    if epsilon > MIN_EPSILON:
        epsilon *= EPSILON_DECAY
        epsilon = max(MIN_EPSILON, epsilon)

agent.terminate = True
trainer_thread.join()
end_time = time.time()
print("Simulation time: {} minutes".format((end_time-start_time)/60))
      
agent.model.save("Models/One/network_data/final_episodes_{}.model".format(EPISODES))

average_rewards = [0]*(AGGREGATE_STATS_EVERY+1) + average_rewards
min_rewards = [0]*(AGGREGATE_STATS_EVERY+1) + min_rewards
max_rewards = [0]*(AGGREGATE_STATS_EVERY+1) + max_rewards        
        
rewards_df = pd.DataFrame({"Episode Rewards":ep_rewards, "Average Rewards":average_rewards, "Minimum Rewards":min_rewards, "Maximum Rewards": max_rewards})
print(rewards_df)

rewards_df.to_csv("Models/One/rewards/test.csv", index=False)

sleeping....
First predict start
First predict over


Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`


  0%|          | 0/5 [00:00<?, ?episodes/s]

Episode: 1
Episode: 2
Episode: 3
Episode: 4
Episode: 5
Simulation time: 1.3676429430643717 minutes




INFO:tensorflow:Assets written to: Models/One/network_data/final_episodes_5.model\assets


ValueError: arrays must all be same length

In [None]:
rewards_df = pd.read_csv("Models/One/rewards/test.csv")

In [None]:
%matplotlib notebook
rcParams["figure.figsize"] = 10, 6
plt.plot(rewards_df["Episode Rewards"])
plt.grid(True)
plt.title("Episode Rewards")
plt.xlabel("episodes")
plt.show()

In [None]:
%matplotlib notebook
rcParams["figure.figsize"] = 10, 6
plt.plot(rewards_df["Average Rewards"])
plt.grid(True)
plt.title("Average Rewards")
plt.xlabel("episodes")
plt.show()

In [None]:
%matplotlib notebook
rcParams["figure.figsize"] = 10, 6
plt.plot(rewards_df["Minimum Rewards"])
plt.grid(True)
plt.title("Minimum Rewards")
plt.xlabel("episodes")
plt.show()

In [None]:
%matplotlib notebook
rcParams["figure.figsize"] = 10, 6
plt.plot(rewards_df["Maximum Rewards"])
plt.grid(True)
plt.title("Maximum Rewards")
plt.xlabel("episodes")
#plt.ylim(-250, 50)
#plt.yticks(list(range(-250,55,10)))
plt.show()

In [None]:
%matplotlib notebook
rcParams["figure.figsize"] = 10, 6
plt.plot(rewards_df["Episode Rewards"], label='Episode Rewards')
plt.plot(rewards_df["Average Rewards"], label='Average Rewards')
plt.plot(rewards_df["Minimum Rewards"], label='Minimum Rewards')
plt.plot(rewards_df["Maximum Rewards"], label='Maximum Rewards')
plt.grid(True)
plt.title("Rewards")
plt.xlabel("episodes")
plt.legend()
plt.show()

In [None]:
#Inference model

#Memory Fraction
# gpu_options = tf.compat.v1.GPUOptions(per_process_gpu_memory_fraction=MEMORY_FRACTION)
# backend.set_session(tf.compat.v1.Session(config=tf.compat.v1.ConfigProto(gpu_options=gpu_options)))

#load the model
print("Start loading model...")
model = load_model("Models/One/network_data/final_episodes_1000.model")
print("Model loading complete.")

#create environment
env = CarEnv()
env.SHOW_CAM = False

#For agent speed measurements - keeps last 60 frametimes
fps_counter = deque(maxlen=60)

# Initialize predictions - first prediction takes longer as of initialization that has to be done
# It's better to do a first prediction then before we start iterating over episode steps
model.predict(np.ones((1, env.im_height, env.im_width, 3)))

SHOW_CAM = True
NUM_EPISODES = 50

#loop over episodes
for episode in tqdm_notebook(range(NUM_EPISODES), ascii=True, unit="episodes"):
    
    print("Restarting episode....")
    
    #Reset environment and get initial state
    current_state = env.reset()
    env.collision_hist = []
    
    done = False
    
    #Loop over steps
    while True:
        
        #For FPS counter
        step_start = time.time()
        
        #Show current frame
        if SHOW_CAM == True:
            cv2.imshow("front camera", current_state)
            if cv2.waitKey(1) & 0xFF == ord('q'):
                SHOW_CAM = False
                cv2.destroyAllWindows()

            
            
        #Predict an action based on current observation space
        qs = model.predict(np.array(current_state).reshape(-1, *current_state.shape)/255)[0]
        action = np.argmax(qs)
        
        new_state, reward, done, _ = env.step(action)
        
        current_state = new_state
        
        if done:
            break
            
        # Measure step time, append to a deque, then print mean FPS for last 60 frames, q values and taken action
        frame_time = time.time() - step_start
        fps_counter.append(frame_time)
        print(f'Agent: {len(fps_counter)/sum(fps_counter):>4.1f} FPS | Action: [{qs[0]:>5.2f}, {qs[1]:>5.2f}, {qs[2]:>5.2f}] {action}')
        
    #Destroy actors at the end of episode
    env.destroy_all_actors()
    
cv2.destroyAllWindows()    



In [None]:
agent.model.save("Models/One/network_data/episodes_{}_min_reward_{}.model".format(EPISODES, min_reward))
    

In [None]:
average_rewards = [0]*(AGGREGATE_STATS_EVERY+1) + average_rewards
min_rewards = [0]*(AGGREGATE_STATS_EVERY+1) + min_rewards
max_rewards = [0]*(AGGREGATE_STATS_EVERY+1) + max_rewards
print(len(ep_rewards))
print(len(average_rewards))
print(len(min_rewards))
print(len(max_rewards))

In [None]:
rewards_df = pd.DataFrame({"Episode Rewards":ep_rewards, "Average Rewards":average_rewards, "Minimum Rewards":min_rewards, "Maximum Rewards": max_rewards})
rewards_df

In [None]:
rewards_df.to_csv("Models/One/rewards/test.csv", index=False)

In [None]:
df = pd.read_csv("Models/One/rewards/test.csv")

In [None]:
plt.plot(df["Episode Rewards"].values)

In [None]:
df.values

In [None]:
plt.plot(ep_rewards)
plt.plot(average_rewards)
plt.plot(min_rewards)
plt.plot(max_rewards)

plt.show()

In [None]:
%matplotlib notebook
plt.plot(ep_rewards)
plt.plot(average_rewards)
plt.show()

In [None]:
%matplotlib notebook
plt.plot(df["Episode Rewards"])
plt.legend()
plt.show()

In [None]:
z = 2.314
type(z)

In [None]:
int(z)

In [None]:
for actor in env.actor_list:
    actor.destroy()

In [None]:
env.rgb_cam_sensor.stop()

In [None]:
SHOW_PREVIEW = False

In [None]:
cv2.destroyAllWindows()

In [None]:
        # Append episode reward to a list and log stats (every given number of episodes)
        ep_rewards.append(episode_reward)
        if not episode % AGGREGATE_STATS_EVERY or episode == 1:
            average_reward = sum(ep_rewards[-AGGREGATE_STATS_EVERY:])/len(ep_rewards[-AGGREGATE_STATS_EVERY:])
            min_reward = min(ep_rewards[-AGGREGATE_STATS_EVERY:])
            max_reward = max(ep_rewards[-AGGREGATE_STATS_EVERY:])
            agent.tensorboard.update_stats(reward_avg=average_reward, reward_min=min_reward, reward_max=max_reward, epsilon=epsilon)

            # Save model, but only when min reward is greater or equal a set value
            if min_reward >= MIN_REWARD:
                agent.model.save(f'models/{MODEL_NAME}__{max_reward:_>7.2f}max_{average_reward:_>7.2f}avg_{min_reward:_>7.2f}min__{int(time.time())}.model')

        # Decay epsilon
        if epsilon > MIN_EPSILON:
            epsilon *= EPSILON_DECAY
            epsilon = max(MIN_EPSILON, epsilon)


# Set termination flag for training thread and wait for it to finish
agent.terminate = True
trainer_thread.join()
agent.model.save(f'models/{MODEL_NAME}__{max_reward:_>7.2f}max_{average_reward:_>7.2f}avg_{min_reward:_>7.2f}min__{int(time.time())}.model')

In [None]:
import numpy as np
state = np.array([[[2]]])
np.reshape(-1, *state.shape).shape

In [None]:
#assigning GPU memory to model
#tf.config.gpu.set_per_process_memory_fraction(0.75)
#tf.config.gpu.set_per_process_memory_growth(True)

In [None]:
graph = tf.compat.v1.reset_default_graph()
graph = tf.compat.v1.get_default_graph()

In [None]:
    cv2.imshow("Car front camera", image)
    cv2.imshow("Perspective transform", image_wraped)
#     cv2.imshow("Binary image", image_binary)
#     cv2.imshow("Centroid", image_gray)
    cv2.imshow("Semantic Segmentation", image_semantic)
    font = cv2.FONT_HERSHEY_SIMPLEX
    text = 'average angle: ' + str(avg_angle_degrees) + '\n' + 'steer: ' + str(steer)
    image_semantic_binary = cv2.putText(image_semantic_binary, text, (10, 10), font, 0.3, (255, 255, 255), 1, cv2.LINE_AA)
    cv2.imshow("Semantic Segmentation Binary", image_semantic_wraped)
    #cv2.imshow("Perspective transform semantic", image_semantic_wraped)
    if cv2.waitKey(1) & 0xFF == ord('q'):      #press 'q' to end the video feed
        break

print(image.shape)
camera_sensor.stop()
cv2.destroyAllWindows()


In [None]:
import tensorflow as tf

a = 20
b = 13

c = tf.add(a, b, name="Add")
print(a)
print(b)
print(type(a), type(b))
print(c)
type(c)

In [None]:
d = tf.multiply(c, a, name='Mul')
e = tf.truediv(d, a, name="div")
print(d)
print(e)

In [None]:
a = tf.constant(20)
b = tf.constant(13)
c= a + b

print(c)
type(c)

In [None]:
print(tf.compat.v1.get_default_graph())