In [1]:
from __future__ import absolute_import
from __future__ import print_function

import os
import datetime
from shutil import copyfile

from tensorflow.keras.utils import plot_model




In [2]:
#delete later
import traci

In [3]:
from simulation import Simulation, TrainSimulation, VanillaTrainSimulation, RNNTrainSimulation
from generator import TrafficGenerator
from memory import Memory, NormalMemory, SequenceMemory
# from model import TrainModel
from model import *
from visualization import Visualization
from utils import import_train_configuration, set_sumo, set_train_path

In [4]:



if __name__ == "__main__":

    config = import_train_configuration(config_file='training_settings.ini')
    sumo_cmd = set_sumo(config['gui'], config['sumocfg_file_name'], config['max_steps'])
    path = set_train_path(config['models_path_name'])

    

#     # SET PARAMETERS (ADD TO CONFIG LATER)
#     #TO DO: add to config files:
    sequence_length = 15
    
    #SET STATE DIMENSION PARAMETERS
    number_of_cells_per_lane = 10
    conv_state_shape = (number_of_cells_per_lane, 8, 2)
    green_phase_state_shape = 4
    elapsed_time_state_shape = 1
    state_shape = [conv_state_shape, green_phase_state_shape, elapsed_time_state_shape]
    
    
    
    
    
    

    
    

    TrafficGen = TrafficGenerator(
        config['max_steps'], 
        config['penetration_rate']
    )

    Visualization = Visualization(
        path, 
        dpi=96
    )
    
    
    #VANILLA MODEL
    if config['uses_reccurent_network'] == False:
        
        # online model used for training
        Model = VanillaTrainModel(
            config['batch_size'], 
            config['learning_rate'], 
            output_dim=config['num_actions'],
            state_shape=state_shape
        )
        Model._model.summary()
        plot_model(Model._model, 'my_first_model_with_shape_info.png', show_shapes=True)

        #target model, only used for predictions. regularly the values of Model are copied into TargetModel
        TargetModel = VanillaTrainModel(
            config['batch_size'], 
            config['learning_rate'], 
            output_dim=config['num_actions'],
            state_shape=state_shape
        )
        
        Memory = NormalMemory(
            config['memory_size_max'], 
            config['memory_size_min']
        )
        
        Simulation = VanillaTrainSimulation(
            Model,
            TargetModel,
            Memory,
            TrafficGen,
            sumo_cmd,
            config['gamma'],
            config['max_steps'],
            config['green_duration'],
            config['yellow_duration'],
            config['num_actions'],
            config['training_epochs'],
            config['copy_step']
        )

        
    #RECURRENT MODEL
    else:
            # online model used for training
        Model = RNNTrainModel(
            config['batch_size'], 
            config['learning_rate'], 
            output_dim=config['num_actions'],
            state_shape=state_shape,
            sequence_length=sequence_length, 
            statefulness = False
        )
        Model._model.summary()
        plot_model(Model._model, 'my_first_model_with_shape_info.png', show_shapes=True)


        #target model, only used for predictions. regularly the values of Model are copied into TargetModel
        TargetModel = RNNTrainModel( 
            config['batch_size'], 
            config['learning_rate'], 
            output_dim=config['num_actions'],
            state_shape=state_shape,
            sequence_length=sequence_length,
            statefulness = False
        )
        
        PredictModel = RNNTrainModel(
            config['batch_size'], 
            config['learning_rate'], 
            output_dim=config['num_actions'],
            state_shape=state_shape,
            sequence_length=sequence_length,
            statefulness = True
        )
        
        
        
        
        Memory = SequenceMemory(
            config['memory_size_max'], 
            config['memory_size_min'],
            sequence_length
        )
        
        Simulation = RNNTrainSimulation(
            Model,
            TargetModel,
            Memory,
            TrafficGen,
            sumo_cmd,
            config['gamma'],
            config['max_steps'],
            config['green_duration'],
            config['yellow_duration'],
            config['num_actions'],
            config['training_epochs'],
            config['copy_step'],
            PredictModel
        )


    
    

    
    
        
    
    
    print(' ')
    print(' ')
    print('Starting...' )
    print(' ')
    
    episode = 0
    timestamp_start = datetime.datetime.now()



    
    while episode < config['total_episodes']:
        print('\n----- Episode', str(episode+1), 'of', str(config['total_episodes']))
        
        #set epsilon
        epsilon = 1.0 - (episode / config['total_episodes'])  # set the epsilon for this episode according to epsilon-greedy policy
        
        
        #run simulation + train for one episode at a time
        simulation_time, training_time = Simulation.run(episode, epsilon)  # run the simulation
        print('Simulation time:', simulation_time, 's - Training time:', training_time, 's - Total:', round(simulation_time+training_time, 1), 's')
        episode += 1

    print("\n----- Start time:", timestamp_start)
    print("----- End time:", datetime.datetime.now())
    print("----- Session info saved at:", path)








    Model.save_model(path)

    copyfile(src='training_settings.ini', dst=os.path.join(path, 'training_settings.ini'))

    Visualization.training_save_data_and_plot(data=Simulation.reward_store, filename='reward', xlabel='Episode', ylabel='Cumulative negative reward')


Model: "simple_CNN"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 10, 8, 2)]   0                                            
__________________________________________________________________________________________________
conv2d (Conv2D)                 (None, 5, 4, 32)     288         input_1[0][0]                    
__________________________________________________________________________________________________
input_2 (InputLayer)            [(None, 4)]          0                                            
__________________________________________________________________________________________________
input_3 (InputLayer)            [(None, 1)]          0                                            
_________________________________________________________________________________________

Simulation time: 145.7 s - Training time: 122.5 s - Total: 268.2 s

----- Episode 31 of 304
Simulating...
Total reward: -1184271.083624256 - Epsilon: 0.9
Training...
Simulation time: 246.9 s - Training time: 123.3 s - Total: 370.2 s

----- Episode 32 of 304
Simulating...
Total reward: -1038721.105413455 - Epsilon: 0.9
Training...
Simulation time: 220.8 s - Training time: 123.8 s - Total: 344.6 s

----- Episode 33 of 304
Simulating...
Total reward: -4869.754483849219 - Epsilon: 0.89
Training...
Simulation time: 7.2 s - Training time: 123.0 s - Total: 130.2 s

----- Episode 34 of 304
Simulating...
Total reward: -108857.1885835193 - Epsilon: 0.89
Training...
Simulation time: 41.8 s - Training time: 123.0 s - Total: 164.8 s

----- Episode 35 of 304
Simulating...
Total reward: -2674524.415951255 - Epsilon: 0.89
Training...
Simulation time: 488.1 s - Training time: 122.7 s - Total: 610.8 s

----- Episode 36 of 304
Simulating...
Total reward: -3793733.0762040555 - Epsilon: 0.88
Training...
Si

Simulating...
Total reward: -1665662.7227378841 - Epsilon: 0.74
Training...
Simulation time: 311.6 s - Training time: 124.1 s - Total: 435.7 s

----- Episode 81 of 304
Simulating...
Total reward: -3263.3766095279807 - Epsilon: 0.74
Training...
Simulation time: 10.7 s - Training time: 124.6 s - Total: 135.3 s

----- Episode 82 of 304
Simulating...
Total reward: -71049.85830278123 - Epsilon: 0.73
Training...
Simulation time: 41.1 s - Training time: 125.1 s - Total: 166.2 s

----- Episode 83 of 304
Simulating...
Total reward: -2114611.2785968087 - Epsilon: 0.73
Training...
Simulation time: 387.1 s - Training time: 123.9 s - Total: 511.0 s

----- Episode 84 of 304
Simulating...
Total reward: -3158185.9056856446 - Epsilon: 0.73
Training...
Simulation time: 547.5 s - Training time: 123.9 s - Total: 671.4 s

----- Episode 85 of 304
Simulating...
Total reward: -723193.2879269203 - Epsilon: 0.72
Training...
Simulation time: 155.1 s - Training time: 123.3 s - Total: 278.4 s

----- Episode 86 of 

Simulating...
Total reward: -5680.643436226789 - Epsilon: 0.58
Training...
Simulation time: 15.9 s - Training time: 124.6 s - Total: 140.5 s

----- Episode 130 of 304
Simulating...
Total reward: -450508.33520523657 - Epsilon: 0.58
Training...
Simulation time: 102.0 s - Training time: 125.3 s - Total: 227.3 s

----- Episode 131 of 304
Simulating...
Total reward: -2054664.5068016013 - Epsilon: 0.57
Training...
Simulation time: 354.3 s - Training time: 124.5 s - Total: 478.8 s

----- Episode 132 of 304
Simulating...
Total reward: -2518518.6850303225 - Epsilon: 0.57
Training...
Simulation time: 431.0 s - Training time: 139.7 s - Total: 570.7 s

----- Episode 133 of 304
Simulating...
Total reward: -873098.0161157995 - Epsilon: 0.57
Training...
Simulation time: 180.7 s - Training time: 137.6 s - Total: 318.3 s

----- Episode 134 of 304
Simulating...
Total reward: -1435556.3150423658 - Epsilon: 0.56
Training...
Simulation time: 242.9 s - Training time: 146.2 s - Total: 389.1 s

----- Episode 

Simulating...
Total reward: -443820.92857256735 - Epsilon: 0.42
Training...
Simulation time: 112.5 s - Training time: 135.3 s - Total: 247.8 s

----- Episode 179 of 304
Simulating...
Total reward: -1539910.6507148154 - Epsilon: 0.41
Training...
Simulation time: 380.2 s - Training time: 151.1 s - Total: 531.3 s

----- Episode 180 of 304
Simulating...
Total reward: -1519984.0484358338 - Epsilon: 0.41
Training...
Simulation time: 491.0 s - Training time: 141.7 s - Total: 632.7 s

----- Episode 181 of 304
Simulating...
Total reward: -612208.743999963 - Epsilon: 0.41
Training...
Simulation time: 151.6 s - Training time: 134.6 s - Total: 286.2 s

----- Episode 182 of 304
Simulating...
Total reward: -814391.5820722047 - Epsilon: 0.4
Training...
Simulation time: 184.7 s - Training time: 133.5 s - Total: 318.2 s

----- Episode 183 of 304
Simulating...
Total reward: -536659.2752133795 - Epsilon: 0.4
Training...
Simulation time: 167.4 s - Training time: 136.1 s - Total: 303.5 s

----- Episode 184

Simulating...
Total reward: -634006.8906120278 - Epsilon: 0.26
Training...
Simulation time: 226.2 s - Training time: 124.4 s - Total: 350.6 s

----- Episode 228 of 304
Simulating...
Total reward: -1097183.5544831268 - Epsilon: 0.25
Training...
Simulation time: 375.9 s - Training time: 125.7 s - Total: 501.6 s

----- Episode 229 of 304
Simulating...
Total reward: -336593.9230147041 - Epsilon: 0.25
Training...
Simulation time: 123.1 s - Training time: 125.1 s - Total: 248.2 s

----- Episode 230 of 304
Simulating...
Total reward: -1310765.797141062 - Epsilon: 0.25
Training...
Simulation time: 242.5 s - Training time: 131.6 s - Total: 374.1 s

----- Episode 231 of 304
Simulating...
Total reward: -783822.8457241808 - Epsilon: 0.24
Training...
Simulation time: 204.1 s - Training time: 131.1 s - Total: 335.2 s

----- Episode 232 of 304
Simulating...
Total reward: -565149.097699162 - Epsilon: 0.24
Training...
Simulation time: 165.1 s - Training time: 128.7 s - Total: 293.8 s

----- Episode 233

Simulating...
Total reward: -3419366.186224422 - Epsilon: 0.1
Training...
Simulation time: 492.7 s - Training time: 125.2 s - Total: 617.9 s

----- Episode 277 of 304
Simulating...
Total reward: -815643.449196723 - Epsilon: 0.09
Training...
Simulation time: 188.9 s - Training time: 124.8 s - Total: 313.7 s

----- Episode 278 of 304
Simulating...
Total reward: -542316.1759213001 - Epsilon: 0.09
Training...
Simulation time: 145.8 s - Training time: 124.9 s - Total: 270.7 s

----- Episode 279 of 304
Simulating...
Total reward: -728079.206620345 - Epsilon: 0.09
Training...
Simulation time: 184.9 s - Training time: 125.1 s - Total: 310.0 s

----- Episode 280 of 304
Simulating...
Total reward: -773775.4417920783 - Epsilon: 0.08
Training...
Simulation time: 197.8 s - Training time: 125.0 s - Total: 322.8 s

----- Episode 281 of 304
Simulating...
Total reward: -30914.36646113585 - Epsilon: 0.08
Training...
Simulation time: 40.3 s - Training time: 125.2 s - Total: 165.5 s

----- Episode 282 of 

In [5]:

# plot_model(Simulation._Model._model, show_shapes=True, show_layer_names=True)