In [18]:
import traci
import matplotlib.pyplot as plt
import json
import numpy as np
import pickle
from time import time
from os.path import exists
from os import listdir, mkdir


In [19]:
sumoBinary = "sumo"
sumoCmd = [sumoBinary, "-c", "simulacao.sumocfg"]

# Aprendizado por reforço

In [20]:
def simulation_init():
    try:
        traci.close()
    except:
        pass

    traci.start(sumoCmd)

    simulation_data = {
        'traffic_lights': [],
        'data': [],
        'stats': {
            'step': []
        },
        'vehicles_list': []
    }

    traffic_lights = traci.trafficlight.getIDList()

    for traffic_light in traffic_lights:

        traffic_light_data = {
            'id': traffic_light,
            'controlled_edges': [],
        }

        traffic_light_controlled_lanes = traci.trafficlight.getControlledLanes(
            traffic_light)

        traffic_light_data['controlled_lanes'] = list(
            traffic_light_controlled_lanes)

        controlled_lanes = []

        for traffic_light_controlled_lane in traffic_light_controlled_lanes:
            controlled_lanes.append(
                traffic_light_controlled_lane.split('_')[0])

        controlled_edges = list(set(controlled_lanes))

        for controlled_edge in controlled_edges:
            edge_data = {
                'id': controlled_edge,
                'lanes': traci.edge.getLaneNumber(controlled_edge),
            }

            traffic_light_data['controlled_edges'].append(edge_data)

        simulation_data['traffic_lights'].append(traffic_light_data)
    
    return simulation_data


In [21]:
def simulation(simulation_data, traffic_light_duration, toggleSemDirection):

    traffic_light_duration += 15

    step = len(simulation_data['stats']['step'])

    phases = [
        traci.trafficlight.Phase(traffic_light_duration, "GGGGGrrrrr"),
        traci.trafficlight.Phase(3, "yyyyyrrrrr"),
        traci.trafficlight.Phase(traffic_light_duration, "rrrrrGGGGG"),
        traci.trafficlight.Phase(3, "rrrrryyyyy"),
    ]

    if toggleSemDirection:
        phases = [
            traci.trafficlight.Phase(traffic_light_duration, "rrrrrGGGGG"),
            traci.trafficlight.Phase(3, "yyyyyrrrrr"),
            traci.trafficlight.Phase(traffic_light_duration, "GGGGGrrrrr"),
            traci.trafficlight.Phase(3, "rrrrryyyyy"),
        ]

    for traffic_light in simulation_data['traffic_lights']:

        traci.trafficlight.setProgramLogic(traffic_light['id'], traci.trafficlight.Logic(
            programID='0',
            type=0,
            currentPhaseIndex=0,
            phases=phases
        ))

    for _ in range(traffic_light_duration + 3):

        if step == 60 * 60 * 24:
            break

        total_waiting_time = 0
        total_waiting_vehicle_count = 0
        total_avg_queue_length = 0

        # if step % 3600 == 0:
        #     print('Hour:', step / 3600, 'Step: {}'.format(step))

        if step == 0 * 3600:
            traci.simulation.setScale(0.6)
        elif step == 1 * 3600 or step == 2 * 3600:
            traci.simulation.setScale(0.5)
        elif step == 3 * 3600 or step == 4 * 3600:
            traci.simulation.setScale(0.4)
        elif step == 5 * 3600:
            traci.simulation.setScale(0.5)
        elif step == 6 * 3600:
            traci.simulation.setScale(0.8)
        elif step == 7 * 3600:
            traci.simulation.setScale(1.0)
        elif step == 8 * 3600:
            traci.simulation.setScale(3)
        elif step == 9 * 3600:
            traci.simulation.setScale(3.2)
        elif step == 10 * 3600:
            traci.simulation.setScale(2.8)
        elif step == 11 * 3600:
            traci.simulation.setScale(2.1)
        elif step == 12 * 3600:
            traci.simulation.setScale(2)
        elif step == 13 * 3600:
            traci.simulation.setScale(1.8)
        elif step == 14 * 3600:
            traci.simulation.setScale(1.3)
        elif step == 15 * 3600:
            traci.simulation.setScale(1.4)
        elif step == 16 * 3600:
            traci.simulation.setScale(1.8)
        elif step == 17 * 3600:
            traci.simulation.setScale(2.6)
        elif step == 18 * 3600:
            traci.simulation.setScale(2.9)
        elif step == 19 * 3600:
            traci.simulation.setScale(3.5)
        elif step == 20 * 3600:
            traci.simulation.setScale(2.7)
        elif step == 21 * 3600:
            traci.simulation.setScale(2)
        elif step == 22 * 3600:
            traci.simulation.setScale(1.3)
        elif step == 23 * 3600:
            traci.simulation.setScale(0.9)

        vehicles = traci.vehicle.getIDList()
        simulation_data['vehicles_list'].extend(vehicles)

        simulation_data['stats']['step'].append({
            'step': step,
            'vehicles': len(vehicles),
            'traffic_light_duration': int(traffic_light_duration),
        })

        traci.simulationStep()

        step_info = {
            'step': step,
            'traffic_lights': [],
        }

        for traffic_light in simulation_data['traffic_lights']:

            traffic_light_info = {
                'id': traffic_light['id'],
                'controlled_edges': [],
            }

            for controlled_edge in traffic_light['controlled_edges']:
                waiting_time = traci.edge.getWaitingTime(controlled_edge['id'])
                waiting_vehicle_count = traci.edge.getLastStepHaltingNumber(
                    controlled_edge['id'])
                avg_queue_length = round(
                    waiting_vehicle_count / controlled_edge['lanes'], 2)
                
                total_waiting_time += waiting_time
                total_waiting_vehicle_count += waiting_vehicle_count
                total_avg_queue_length += avg_queue_length

                edge_info = {
                    'id': controlled_edge['id'],
                    'waiting_time': waiting_time,
                    'waiting_vehicle_count': waiting_vehicle_count,
                    'avg_queue_length': avg_queue_length,
                }

                traffic_light_info['controlled_edges'].append(edge_info)

            step_info['traffic_lights'].append(traffic_light_info)

        simulation_data['data'].append(step_info)
        step += 1

    reward = ((total_waiting_time / total_waiting_vehicle_count * -1) +
              10) if total_waiting_vehicle_count > 0 else 0
    return simulation_data, round(reward), round(total_waiting_vehicle_count / 5), step == 60 * 60 * 24

In [24]:
def close_simulation(simulation_data, epsilon, total_reward):
    try:
        traci.close()
    except:
        pass

    simulation_data['total_reward'] = total_reward
    simulation_data['epsilon'] = epsilon
    simulation_data['stats']['vehicles_count'] = len(
        list(set(simulation_data['vehicles_list'])))
    
    return simulation_data

In [23]:
def choose_duration(q_values, state, epsilon):
    if np.random.random() < epsilon or state not in q_values.keys():
        duration = np.random.choice(actions)
    else:
        duration = np.argmax(q_values[state])
    return duration


In [25]:
def update_q_values(q_values, state, action, reward, next_state):
    if state not in q_values.keys():
        q_values[state] = [-1] * len(actions)

    if next_state not in q_values.keys():
        q_values[next_state] = [-1] * len(actions)

    q_values[state][action] = q_values[state][action] + learning_rate * \
        (reward + gamma *
         np.max(q_values[next_state]) - q_values[state][action])

In [27]:
def get_initial_values():
    q_values = {}
    if exists('arquivos/q_values.pickle'):
        with open('arquivos/q_values.pickle', 'rb') as pickle_out:
            q_values = pickle.load(pickle_out)
    
    epsilon = 0.7

    if exists('arquivos/simulation_data'):
        start_episode = len(listdir('arquivos/simulation_data')) + 1

        if start_episode > 1:
            with open('arquivos/simulation_data/simulation_data_'+str(start_episode)+'.json') as json_file:
                simulation_data = json.load(json_file)
                epsilon = simulation_data['epsilon']

    else:
        start_episode = 1
        mkdir('arquivos/simulation_data')

    return q_values, start_episode, epsilon

In [28]:
# 15 to 90 seconds
actions = range(90 - 15 + 1)

gamma = 0.9
epsilon_min = 0.01
epsilon_decay = 0.995

learning_rate = 0.03

In [29]:
rewards = []
q_values, start_episode, epsilon = get_initial_values()

state = 0

num_episodes = 500

print('Training...')

for episode in range(start_episode, num_episodes + 1):

    print('Starting episode', episode)
    data = simulation_init()
    total_reward = 0
    start_time = time()

    done = False
    toggleSemDirection = False

    while not done:
        duration = choose_duration(q_values, state, epsilon)

        # state = waiting_vehicle_count / 20 (lanes)
        data, reward, next_state, done = simulation(
            data, duration, toggleSemDirection)
        
        toggleSemDirection = not toggleSemDirection

        update_q_values(q_values, state, duration, reward, next_state)

        state = next_state
        total_reward += reward

        # print('duration', duration + 15)
        # print('reward', reward)
        # print('state', next_state)
        # print('done', done)
        # print('')

    epsilon = max(epsilon_min, epsilon * epsilon_decay)
    rewards.append(total_reward)

    data = close_simulation(data, epsilon, total_reward)

    with open('arquivos/simulation_data/episode_'+str(episode)+'.json', 'w') as outfile:
        outfile.write(json.dumps(data))

    elapsed_time = time() - start_time

    print(f'Episode {episode},  '
            f'Reward: {total_reward},  '
            f'Epsilon: {epsilon:.3f}, '
            f'Time: {elapsed_time:.2f}s')

    # save q_values
    with open('arquivos/q_values.pickle', 'wb') as outfile:
        pickle.dump(q_values, outfile)

print('Training finished.')


Training...


TypeError: Object of type int32 is not JSON serializable

In [None]:
# Rewards by Episode
plt.figure(figsize=(15, 5))

episodes = range(1, num_episodes + 1)

plt.title('Rewards by Episode')
plt.xlabel('Episode')
plt.ylabel('Reward')

plt.plot(episodes, rewards)
plt.legend()

plt.show()
