# Reinforcement Learning in Carla - DQN Agent

## Imports

In [1]:
import gym
import gym_carla
import random
import numpy as np
import pygame
import time
import os
from tqdm import tqdm
from collections import deque, namedtuple
import torch as T
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
#from torch.utils.data import Dataset, DataLoader
from torch.utils.tensorboard import SummaryWriter

pygame 1.9.6
Hello from the pygame community. https://www.pygame.org/contribute.html


In [2]:
import sys
directory = '/opt/carla-simulator/PythonAPI/carla/dist/carla-0.9.11-py3.7-linux-x86_64.egg'
try:
    sys.path.append(directory)
except IndexError:
    pass
import carla

Checking if GPU is available.

In [3]:
device = "cuda" if T.cuda.is_available() else "cpu"
print(device)

cuda


Clear GPU memory

In [4]:
T.cuda.empty_cache() 

## Training

In [5]:
from DQNAgent import DQNAgent

In [4]:
from SegmentationAgent import SegmentationAgent
from oldencoder import autoencoder
from pathlib import Path

In [5]:
VAL_PERCENTAGE = 0.2  # Amount of data to use for validation
TEST_NUM = 10  # Number of images to set aside for testing and visualization
NUM_CLASSES = 13  # Total number of classes in the dataset
BATCH_SIZE = 16  # Batch size for training
IMG_SIZE = 128  # The input size for model
DATA_PATH = Path('images')  # Location of the dataset
SHUFFLE = True  # Shuffle the dataset before making the split
LR = 0.001  # Learning rate for the model
EPOCHS = 30  # Number of epochs to train the model
DEVICE = 'cuda' if T.cuda.is_available() else 'cpu'  # Device used to train

unet = SegmentationAgent(VAL_PERCENTAGE, TEST_NUM, NUM_CLASSES, BATCH_SIZE, IMG_SIZE, DATA_PATH, SHUFFLE, LR, DEVICE)
unet.model.load_state_dict(T.load('model4.pt'))
unet.model.eval()

SegmentationUNet(
  (conv_final): Conv2d(64, 13, kernel_size=(1, 1), stride=(1, 1))
  (down_convs): ModuleList(
    (0): DownConv(
      (conv_in): Sequential(
        (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (conv_out): Sequential(
        (0): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    )
    (1): DownConv(
      (conv_in): Sequential(
        (0): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (conv_out): Sequential(
        (0): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (1): Ba

In [6]:
ae = autoencoder(1, 13, 0.001, 'ae-models/dummy', 'ae-runs/dummy', T.tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]))
ae.load_state_dict(T.load('ae-models/Clear Noon Dry/ae-1649055310'))
ae.eval()

autoencoder(
  (conv1): Conv2d(1, 32, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))
  (conv2): Conv2d(32, 64, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))
  (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv3): Conv2d(64, 128, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))
  (bn3): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv4): Conv2d(128, 256, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))
  (bn4): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv5): Conv2d(256, 512, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))
  (bn5): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv6): Conv2d(512, 64, kernel_size=(4, 4), stride=(1, 1))
  (conv7): ConvTranspose2d(64, 512, kernel_size=(4, 4), stride=(1, 1))
  (bn7): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv8): ConvT

In [7]:
params = {
    'number_of_vehicles': 0,
    'number_of_walkers': 0,
    'display_size': 256,  # screen size of bird-eye render
    'max_past_step': 1,  # the number of past steps to draw
    'dt': 0.1,  # time interval between two frames
    'discrete': True,  # whether to use discrete control space
    'discrete_acc': [-3.0, 0.0, 3.0],  # discrete value of accelerations
    'discrete_steer': [-0.4, 0.0, 0.4],  # discrete value of steering angles
    'continuous_accel_range': [-3.0, 3.0],  # continuous acceleration range
    'continuous_steer_range': [-0.3, 0.3],  # continuous steering angle range
    'ego_vehicle_filter': 'vehicle.lincoln*',  # filter for defining ego vehicle
    'port': 2000,  # connection port
    'town': 'Town03',  # which town to simulate
    'task_mode': 'curriculum',  # mode of the task, [random, roundabout (only for Town03)] ######******#####
    'max_time_episode': 500,  # maximum timesteps per episode
    'max_waypt': 12,  # maximum number of waypoints
    'obs_range': 32,  # observation range (meter)
    'lidar_bin': 0.25,  # bin size of lidar sensor (meter)
    'd_behind': 12,  # distance behind the ego vehicle (meter)
    'out_lane_thres': 5,  # threshold for out of lane
    'desired_speed': 12.5,  # desired speed (m/s)
    'max_ego_spawn_times': 200,  # maximum times to spawn ego vehicle
    'display_route': True,  # whether to render the desired route
    'pixor_size': 64,  # size of the pixor labels
    'pixor': False,  # whether to output PIXOR observation
    'image_collection': False,
    'image_input': True,
    'unet': unet,
    'ae': ae,
    'penalty': 'quadratic'
}

env = gym.make('carla-v0', params=params)

random.seed(1)
np.random.seed(1)

agent = DQNAgent(10000, 1000, 0.95, 0.99997, 0.01, 0.001, 73, 3, 'runs/Curriculum/Linear2', 'models/Curriculum/Linear2', True)
# agent.load_model('models/Experiments/Image Collection/carla-agent->r:-230.7414897081867-t:501-ep:35-1647951699')

ep_rewards = []
total_timesteps = 0
batch_size = 512
update = 256
min_timesteps = 100
episodes = 2500
agg_stats_every = 10
num_inputs = 73

train_start = time.time()

for episode in tqdm(range(1, episodes + 1), ascii=True, unit='episodes'):
    env.collision_hist = []

    episode_reward = 0
    step = 0

    current_s = env.reset()
    state = current_s['state']
    latent = current_s['latent']
    current_state_ = np.concatenate((np.array([state[0], state[1], state[2], state[3], 0, 0, 0, 0, 0]), latent), axis=None)
    current_state = np.reshape(current_state_, [1, num_inputs])

    done = False
    episode_start = time.time()

    while True:

        action = agent.act(current_state)

        new_s, reward, done, info = env.step(action)
        a = new_s['state']
        b = [info['steer'], info['acceleration'], info['angular_velocity_x'], info['angular_velocity_y'], info['angular_velocity_z']]
        c = new_s['latent']
        new_state_ = np.concatenate((np.array([a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3], b[4]]), c), axis=None)
        new_state = np.reshape(new_state_, [1, num_inputs])

        episode_reward += reward

        agent.memory.memorise(current_state, action, reward, new_state, done)

        current_state = new_state
        step += 1
        total_timesteps += 1

        loss = agent.learn(batch_size)

        if loss is not None:
            agent.tensorboard.add_scalar('Loss', loss.item(), episode)
            agent.tensorboard.flush()            

        if not total_timesteps % update:
            agent.update_target_model()

        if done:
            ep_end = time.time()
            ep_time = ep_end - episode_start                
            ep_rewards.append(episode_reward)
            agent.tensorboard.add_scalar('Reward/Total Reward', episode_reward, episode)
            agent.tensorboard.add_scalar('Epsilon', agent.epsilon, episode)
            agent.tensorboard.add_scalar('Episode Length/Timesteps', step, episode)
            agent.tensorboard.add_scalar('Episode Length/Seconds', ep_time, episode)
            agent.tensorboard.flush()
            break

    if not episode % agg_stats_every or episode == 1:
        average_reward = sum(ep_rewards[-agg_stats_every:])/len(ep_rewards[-agg_stats_every:])
        min_reward = min(ep_rewards[-agg_stats_every:])
        max_reward = max(ep_rewards[-agg_stats_every:])
        agent.tensorboard.add_scalar('Reward/Average Reward', average_reward, episode)
        agent.tensorboard.add_scalar('Reward/Minimum Reward', min_reward, episode)
        agent.tensorboard.add_scalar('Reward/Maximum Reward', max_reward, episode)
        agent.tensorboard.flush()

    if not episode % 250:
        agent.save_model(round(episode_reward), step, episode)
        # min_timesteps = step

    if episode in [500, 1500]:
        agent.epsilon = 1

agent.save_model(round(episode_reward), step, episode)
train_end = time.time()
train_time = train_end - train_start
print('Training time: ', train_time, ' seconds', '\nTotal Steps: ', total_timesteps)
# print('RGB Images Collected: ', env.rgb_count)
# print('Semantic Images Collected: ', env.sem_count)
agent.tensorboard.close()
pygame.quit()



connecting to Carla server...
Carla server connected!


100%|##########| 2500/2500 [13:27:44<00:00, 19.39s/episodes]  


Training time:  48464.15215778351  seconds 
Total Steps:  387651


In [10]:
env.close()

## Agent using AED

In [6]:
from EncoderAgent import EncoderAgent
from pathlib import Path

In [7]:
VAL_PERCENTAGE = 0.2  # Amount of data to use for validation
TEST_NUM = 10  # Number of images to set aside for testing and visualization
CHANNELS = 3
NUM_CLASSES = 13  # Total number of classes in the dataset
BATCH_SIZE = 128  # Batch size for training
IMG_SIZE = 128  # The input size for model
DATA_PATH = Path('images')  # Location of the dataset
SHUFFLE = True  # Shuffle the dataset before making the split
LR = 0.001  # Learning rate for the model
EPOCHS = 30  # Number of epochs to train the model
DEVICE = 'cuda' if T.cuda.is_available() else 'cpu'  # Device used to train

aed = EncoderAgent(VAL_PERCENTAGE, TEST_NUM, CHANNELS, NUM_CLASSES,
                          BATCH_SIZE, IMG_SIZE, DATA_PATH, SHUFFLE, LR, DEVICE, False)
aed.model.load_state_dict(T.load('rgb-sem-models/model.pt'))
aed.model.eval()

autoencoder(
  (conv1): Conv2d(3, 32, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))
  (conv2): Conv2d(32, 64, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))
  (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv3): Conv2d(64, 128, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))
  (bn3): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv4): Conv2d(128, 256, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))
  (bn4): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv5): Conv2d(256, 512, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))
  (bn5): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv6): Conv2d(512, 64, kernel_size=(4, 4), stride=(1, 1))
  (conv7): ConvTranspose2d(64, 512, kernel_size=(4, 4), stride=(1, 1))
  (bn7): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv8): ConvT

In [None]:
params = {
    'number_of_vehicles': 0,
    'number_of_walkers': 0,
    'display_size': 256,  # screen size of bird-eye render
    'max_past_step': 1,  # the number of past steps to draw
    'dt': 0.1,  # time interval between two frames
    'discrete': True,  # whether to use discrete control space
    'discrete_acc': [-3.0, 0.0, 3.0],  # discrete value of accelerations
    'discrete_steer': [-0.4, 0.0, 0.4],  # discrete value of steering angles
    'continuous_accel_range': [-3.0, 3.0],  # continuous acceleration range
    'continuous_steer_range': [-0.3, 0.3],  # continuous steering angle range
    'ego_vehicle_filter': 'vehicle.lincoln*',  # filter for defining ego vehicle
    'port': 2000,  # connection port
    'town': 'Town03',  # which town to simulate
    'task_mode': 'curriculum',  # mode of the task, [random, roundabout (only for Town03)] ######******#####
    'max_time_episode': 500,  # maximum timesteps per episode
    'max_waypt': 12,  # maximum number of waypoints
    'obs_range': 32,  # observation range (meter)
    'lidar_bin': 0.25,  # bin size of lidar sensor (meter)
    'd_behind': 12,  # distance behind the ego vehicle (meter)
    'out_lane_thres': 5,  # threshold for out of lane
    'desired_speed': 5.0,  # desired speed (m/s)
    'max_ego_spawn_times': 200,  # maximum times to spawn ego vehicle
    'display_route': True,  # whether to render the desired route
    'pixor_size': 64,  # size of the pixor labels
    'pixor': False,  # whether to output PIXOR observation
    'image_collection': False,
    'image_input': False,
    'ae_only': True,
    'aed': aed,
    'penalty': 'linear2'
}

env = gym.make('carla-v0', params=params)

random.seed(1)
np.random.seed(1)

agent = DQNAgent(10000, 1000, 0.95, 0.99975, 0.01, 0.001, 73, 3, 'runs/Curriculum/Linear2andAED', 'models/Curriculum/Linear2andAED', True)
# agent.load_model('models/Experiments/Image Collection/carla-agent->r:-230.7414897081867-t:501-ep:35-1647951699')

ep_rewards = []
total_timesteps = 0
batch_size = 512
update = 256
min_timesteps = 100
episodes = 2500
agg_stats_every = 10
num_inputs = 73

train_start = time.time()

for episode in tqdm(range(1, episodes + 1), ascii=True, unit='episodes'):
    env.collision_hist = []

    episode_reward = 0
    step = 0

    current_s = env.reset()
    state = current_s['state']
    latent = current_s['latent']
    current_state_ = np.concatenate((np.array([state[0], state[1], state[2], state[3], 0, 0, 0, 0, 0]), latent), axis=None)
    current_state = np.reshape(current_state_, [1, num_inputs])

    done = False
    episode_start = time.time()

    while True:

        action = agent.act(current_state)

        new_s, reward, done, info = env.step(action)
        a = new_s['state']
        b = [info['steer'], info['acceleration'], info['angular_velocity_x'], info['angular_velocity_y'], info['angular_velocity_z']]
        c = new_s['latent']
        new_state_ = np.concatenate((np.array([a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3], b[4]]), c), axis=None)
        new_state = np.reshape(new_state_, [1, num_inputs])

        episode_reward += reward

        agent.memory.memorise(current_state, action, reward, new_state, done)

        current_state = new_state
        step += 1
        total_timesteps += 1

        loss = agent.learn(batch_size)

        if loss is not None:
            agent.tensorboard.add_scalar('Loss', loss.item(), episode)
            agent.tensorboard.flush()            

        if not total_timesteps % update:
            agent.update_target_model()

        if done:
            ep_end = time.time()
            ep_time = ep_end - episode_start                
            ep_rewards.append(episode_reward)
            agent.tensorboard.add_scalar('Reward/Total Reward', episode_reward, episode)
            agent.tensorboard.add_scalar('Epsilon', agent.epsilon, episode)
            agent.tensorboard.add_scalar('Episode Length/Timesteps', step, episode)
            agent.tensorboard.add_scalar('Episode Length/Seconds', ep_time, episode)
            agent.tensorboard.flush()
            break

    if not episode % agg_stats_every or episode == 1:
        average_reward = sum(ep_rewards[-agg_stats_every:])/len(ep_rewards[-agg_stats_every:])
        min_reward = min(ep_rewards[-agg_stats_every:])
        max_reward = max(ep_rewards[-agg_stats_every:])
        agent.tensorboard.add_scalar('Reward/Average Reward', average_reward, episode)
        agent.tensorboard.add_scalar('Reward/Minimum Reward', min_reward, episode)
        agent.tensorboard.add_scalar('Reward/Maximum Reward', max_reward, episode)
        agent.tensorboard.flush()

    if not episode % 250:
        agent.save_model(round(episode_reward), step, episode)
        # min_timesteps = step

    if episode in [500, 1500]:
        agent.epsilon = 1

agent.save_model(round(episode_reward), step, episode)
train_end = time.time()
train_time = train_end - train_start
print('Training time: ', train_time, ' seconds', '\nTotal Steps: ', total_timesteps)
# print('RGB Images Collected: ', env.rgb_count)
# print('Semantic Images Collected: ', env.sem_count)
agent.tensorboard.close()
pygame.quit()
env.close()
