In [1]:
from __future__ import absolute_import
from __future__ import print_function

import os
import datetime
from shutil import copyfile

from tensorflow.keras.utils import plot_model




In [2]:
from simulation import Simulation, TrainSimulation, VanillaTrainSimulation, RNNTrainSimulation
from generator import TrafficGenerator
from memory import Memory, NormalMemory, SequenceMemory
# from model import TrainModel
from model import *
from visualization import Visualization
from utils import import_train_configuration, set_sumo, set_train_path

In [3]:
import traci   #DELETE LATER


if __name__ == "__main__":

    config = import_train_configuration(config_file='training_settings.ini')
    sumo_cmd = set_sumo(config['gui'], config['sumocfg_file_name'], config['max_steps'])
    path = set_train_path(config['models_path_name'])

    

    # SET PARAMETERS (ADD TO CONFIG LATER)
    #TO DO: add to config files:
    sequence_length = 8
    number_of_cells_per_lane = 10
    state_shape = (number_of_cells_per_lane, 8, 1)
    
    
    
    
    

    
    
    # TO DO: add penetration rate
    TrafficGen = TrafficGenerator(
        config['max_steps'], 
        config['n_cars_generated'],
        config['penetration_rate']
    )

    Visualization = Visualization(
        path, 
        dpi=96
    )
    
    
    #VANILLA MODEL
    if config['uses_reccurent_network'] == False:
        
        # online model used for training
        Model = VanillaTrainModel(
            config['batch_size'], 
            config['learning_rate'], 
            output_dim=config['num_actions'],
            state_shape=state_shape
        )
#         Model._model.summary()
        plot_model(Model._model, 'my_first_model_with_shape_info.png', show_shapes=True)

        #target model, only used for predictions. regularly the values of Model are copied into TargetModel
        TargetModel = VanillaTrainModel(
            config['batch_size'], 
            config['learning_rate'], 
            output_dim=config['num_actions'],
            state_shape=state_shape
        )
        
        Memory = NormalMemory(
            config['memory_size_max'], 
            config['memory_size_min']
        )
        
        Simulation = VanillaTrainSimulation(
            Model,
            TargetModel,
            Memory,
            TrafficGen,
            sumo_cmd,
            config['gamma'],
            config['max_steps'],
            config['green_duration'],
            config['yellow_duration'],
            config['num_actions'],
            config['training_epochs'],
            config['copy_step']
        )

        
    #RECURRENT MODEL
    else:
            # online model used for training
        Model = RNNTrainModel(
            config['batch_size'], 
            config['learning_rate'], 
            output_dim=config['num_actions'],
            state_shape=state_shape,
            sequence_length=sequence_length, 
            statefulness = False
        )
        Model._model.summary()
        plot_model(Model._model, 'my_first_model_with_shape_info.png', show_shapes=True)


        #target model, only used for predictions. regularly the values of Model are copied into TargetModel
        TargetModel = RNNTrainModel( 
            config['batch_size'], 
            config['learning_rate'], 
            output_dim=config['num_actions'],
            state_shape=state_shape,
            sequence_length=sequence_length,
            statefulness = False
        )
        
        PredictModel = RNNTrainModel(
            config['batch_size'], 
            config['learning_rate'], 
            output_dim=config['num_actions'],
            state_shape=state_shape,
            sequence_length=sequence_length,
            statefulness = True
        )
        
        
        
        
        Memory = SequenceMemory(
            config['memory_size_max'], 
            config['memory_size_min'],
            sequence_length
        )
        
        Simulation = RNNTrainSimulation(
            Model,
            TargetModel,
            Memory,
            TrafficGen,
            sumo_cmd,
            config['gamma'],
            config['max_steps'],
            config['green_duration'],
            config['yellow_duration'],
            config['num_actions'],
            config['training_epochs'],
            config['copy_step'],
            PredictModel
        )


    
    

    
    
        
    
    
    print(' ')
    print(' ')
    print('Starting...' )
    print(' ')
    
    episode = 0
    timestamp_start = datetime.datetime.now()
    

   
    
    
    
    
    
    
    
#     epsilon = 1
#     simulation_time, training_time = Simulation.run(episode, epsilon)  # run the simulation
#     print('1. Simulation time:', simulation_time, 's - Training time:', training_time, 's - Total:', round(simulation_time+training_time, 1), 's')

#     simulation_time, training_time = Simulation.run(episode, epsilon)  # run the simulation
#     print('2. Simulation time:', simulation_time, 's - Training time:', training_time, 's - Total:', round(simulation_time+training_time, 1), 's')









    
    while episode < config['total_episodes']:
        print('\n----- Episode', str(episode+1), 'of', str(config['total_episodes']))
        
        #set epsilon
        epsilon = 1.0 - (episode / config['total_episodes'])  # set the epsilon for this episode according to epsilon-greedy policy
        
        
        #run simulation + train for one episode at a time
        simulation_time, training_time = Simulation.run(episode, epsilon)  # run the simulation
        print('Simulation time:', simulation_time, 's - Training time:', training_time, 's - Total:', round(simulation_time+training_time, 1), 's')
        episode += 1

    print("\n----- Start time:", timestamp_start)
    print("----- End time:", datetime.datetime.now())
    print("----- Session info saved at:", path)








    Model.save_model(path)

    copyfile(src='training_settings.ini', dst=os.path.join(path, 'training_settings.ini'))

    Visualization.save_data_and_plot(data=Simulation.reward_store, filename='reward', xlabel='Episode', ylabel='Cumulative negative reward')
    Visualization.save_data_and_plot(data=Simulation.cumulative_wait_store, filename='delay', xlabel='Episode', ylabel='Cumulative delay (s)')
    Visualization.save_data_and_plot(data=Simulation.avg_queue_length_store, filename='queue', xlabel='Episode', ylabel='Average queue length (vehicles)')
    
    
    
    
    
    
    
    ### --- RANDOM TESTS - DELETE LATER
    
#     TrafficGen.generate_routefile(seed=1)
#     traci.start(sumo_cmd)
#     for _ in range(200):
#         traci.simulationStep()
# #         print("step: ", _)
#     car_list = traci.vehicle.getIDList()
#     print(car_list)
    
    
#     for car_id in car_list:
#         vehicle_type = traci.vehicle.getTypeID(car_id)
        
#         if vehicle_type == "connected_vehicle":
#             print("it is connected: ......" + vehicle_type)
#         else:
#             print("it is regular: ......" + vehicle_type)
#     traci.close() 
    
    
    
    
    

 
 
Starting...
 

----- Episode 1 of 2
Simulating...
Total reward: -36388.0 - Epsilon: 1.0
Training...
Simulation time: 14.8 s - Training time: 0.0 s - Total: 14.8 s

----- Episode 2 of 2
Simulating...
Total reward: -68881.0 - Epsilon: 0.5
Training...
Simulation time: 25.1 s - Training time: 10.8 s - Total: 35.9 s

----- Start time: 2020-06-09 15:25:10.872176
----- End time: 2020-06-09 15:26:01.526002
----- Session info saved at: C:\Users\Chantal\Documents\Deep-QLearning-Agent-for-Traffic-Signal-Control\TLCS\models\model_169\
