In [1]:
from __future__ import absolute_import
from __future__ import print_function

import os
import datetime
from shutil import copyfile

from tensorflow.keras.utils import plot_model




In [2]:
from training_simulation import TrainSimulation, VanillaTrainSimulation, RNNTrainSimulation
from generator import TrafficGenerator
from memory import Memory, NormalMemory, SequenceMemory
# from model import TrainModel
from model import *
from visualization import Visualization
from utils import import_train_configuration, set_sumo, set_train_path

In [3]:



if __name__ == "__main__":

    config = import_train_configuration(config_file='training_settings.ini')
    sumo_cmd = set_sumo(config['gui'], config['sumocfg_file_name'], config['max_steps'])
    path = set_train_path(config['models_path_name'])

    
    #SET PARAMETERS (TO BE ADDED TO CONFIG LATER)
    #set if we want to use recurrent network
    uses_reccurent_network = True

    sequence_length = 8
    number_of_cells_per_lane = 10
    input_shape = (number_of_cells_per_lane, 8, 1)
    
    
    
    #TO DO: add input_shape, remove input_dims, remove width, remove num_layers
    
    
    TrafficGen = TrafficGenerator(
        config['max_steps'], 
        config['n_cars_generated']
    )

    Visualization = Visualization(
        path, 
        dpi=96
    )
    
    
    if uses_reccurent_network == False:
        
        # online model used for training
        Model = VanillaTrainModel(
            config['num_layers'], 
            config['width_layers'], 
            config['batch_size'], 
            config['learning_rate'], 
            input_dim=config['num_states'], 
            output_dim=config['num_actions'],
            input_shape=input_shape
        )
        Model._model.summary()
        plot_model(Model._model, 'my_first_model_with_shape_info.png', show_shapes=True)

        #target model, only used for predictions. regularly the values of Model are copied into TargetModel
        TargetModel = VanillaTrainModel(
            config['num_layers'], 
            config['width_layers'], 
            config['batch_size'], 
            config['learning_rate'], 
            input_dim=config['num_states'], 
            output_dim=config['num_actions'],
            input_shape=input_shape
        )
        
        Memory = NormalMemory(
            config['memory_size_max'], 
            config['memory_size_min']
        )
        
        Simulation = VanillaTrainSimulation(
            Model,
            TargetModel,
            Memory,
            TrafficGen,
            sumo_cmd,
            config['gamma'],
            config['max_steps'],
            config['green_duration'],
            config['yellow_duration'],
            config['num_states'],
            config['num_actions'],
            config['training_epochs'],
            config['copy_step']
        )

    else:
            # online model used for training
        Model = RNNTrainModel(
            config['num_layers'], 
            config['width_layers'], 
            config['batch_size'], 
            config['learning_rate'], 
            input_dim=config['num_states'], 
            output_dim=config['num_actions'],
            input_shape=input_shape,
            sequence_length=sequence_length #lalala
        )
        Model._model.summary()
        plot_model(Model._model, 'my_first_model_with_shape_info.png', show_shapes=True)


        #target model, only used for predictions. regularly the values of Model are copied into TargetModel
        TargetModel = RNNTrainModel(
            config['num_layers'], 
            config['width_layers'], 
            config['batch_size'], 
            config['learning_rate'], 
            input_dim=config['num_states'], 
            output_dim=config['num_actions'],
            input_shape=input_shape,
            sequence_length=sequence_length 
        )
        
        Memory = SequenceMemory(
            config['memory_size_max'], 
            config['memory_size_min'],
            sequence_length
        )
        
        Simulation = RNNTrainSimulation(
            Model,
            TargetModel,
            Memory,
            TrafficGen,
            sumo_cmd,
            config['gamma'],
            config['max_steps'],
            config['green_duration'],
            config['yellow_duration'],
            config['num_states'],
            config['num_actions'],
            config['training_epochs'],
            config['copy_step']
        )


    
    

    
    
        
    
    
    print(' ')
    print(' ')
    print('Starting...' )
    print(' ')
#     print(' config of copy step: ', str(config['copy_step']))
    
    
    episode = 0
    timestamp_start = datetime.datetime.now()
    
    
    
    
    
    
#     epsilon = 1
#     simulation_time, training_time = Simulation.run(episode, epsilon)  # run the simulation
#     print('Simulation time:', simulation_time, 's - Training time:', training_time, 's - Total:', round(simulation_time+training_time, 1), 's')





    
#     while episode < config['total_episodes']:
#         print('\n----- Episode', str(episode+1), 'of', str(config['total_episodes']))
        
#         #set epsilon
#         epsilon = 1.0 - (episode / config['total_episodes'])  # set the epsilon for this episode according to epsilon-greedy policy
        
        
#         #run simulation + train
#         simulation_time, training_time = Simulation.run(episode, epsilon)  # run the simulation
#         print('Simulation time:', simulation_time, 's - Training time:', training_time, 's - Total:', round(simulation_time+training_time, 1), 's')
#         episode += 1

#     print("\n----- Start time:", timestamp_start)
#     print("----- End time:", datetime.datetime.now())
#     print("----- Session info saved at:", path)








#     Model.save_model(path)

#     copyfile(src='training_settings.ini', dst=os.path.join(path, 'training_settings.ini'))

#     Visualization.save_data_and_plot(data=Simulation.reward_store, filename='reward', xlabel='Episode', ylabel='Cumulative negative reward')
#     Visualization.save_data_and_plot(data=Simulation.cumulative_wait_store, filename='delay', xlabel='Episode', ylabel='Cumulative delay (s)')
#     Visualization.save_data_and_plot(data=Simulation.avg_queue_length_store, filename='queue', xlabel='Episode', ylabel='Average queue length (vehicles)')
    
    
    
    

Model: "CNN_with_LSTM"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 8, 10, 8, 1)]     0         
_________________________________________________________________
time_distributed (TimeDistri (None, 8, 5, 4, 128)      2176      
_________________________________________________________________
time_distributed_1 (TimeDist (None, 8, 3, 2, 128)      262272    
_________________________________________________________________
time_distributed_2 (TimeDist (None, 8, 3, 2, 64)       32832     
_________________________________________________________________
time_distributed_3 (TimeDist (None, 8, 384)            0         
_________________________________________________________________
lstm (LSTM)                  (None, 96)                184704    
_________________________________________________________________
dense (Dense)                (None, 16)              

In [6]:
import random

class Memory:
    def __init__(self, size_max, size_min):
        self._size_max = size_max
        self._size_min = size_min
        
class SequenceMemory(Memory):
    def __init__(self, size_max, size_min, sequence_length):
        self._sequence_length = sequence_length
        self._buffer = []                       # will store sequences of samples while the episode is not yet done
        self._samples = []                    #we will store each episode sequence in a different array
        super().__init__(size_max, size_min)
        
    
    
    def add_to_buffer(self, sample):
        """
        Samples will first be added to a buffer, as long as the episode is not finished yet.
        After the episode is done, the buffer will be added to the memory.
        """
        self._buffer.append(sample)
     
     
    def _collect_and_empty_buffer(self):
        temp = self._buffer
        self._buffer = []
        
        return temp
    
    
    
    
    def add_sequence(self):
        """
        Add the finished episode sequence from the buffer into the memory
        """
        sequence = self._collect_and_empty_buffer()
        self._samples.append(sequence)
        
        
        if self._size_now() > self._size_max:
            self._samples.pop(0)  # if the length is greater than the size of memory, remove the oldest element


    def get_samples(self, batch_size): #TO DO
        """
        Get n samples randomly from the memory
        """
        if self._size_now() < self._size_min:
            return []

        sampled_episodes = random.choices(self._samples, k=batch_size)
        sampled_traces = []        
        
        for episode in sampled_episodes:
            start_point = random.randint(0,len(episode)-self._sequence_length)
            sampled_traces.append(episode[start_point:start_point+self._sequence_length])
        
        return sampled_traces
        

    def _size_now(self):
        """
        Check how full the memory is
        """
        return sum(len(x) for x in self._samples)    
 

Memory = SequenceMemory(
            50000, 
            config['memory_size_min'],
            sequence_length
        )


for ep in range(10):
    for seq in range(600):
        Memory.add_to_buffer(("old_state_episode"+str(ep)+"_sequence"+str(seq), \
                              "old_action_episode"+str(ep)+"_sequence"+str(seq), \
                              "reward_episode"+str(ep)+"_sequence"+str(seq), \
                              "current_state_episode"+str(ep)+"_sequence"+str(seq)))
    Memory.add_sequence()
# print(*Memory._samples, sep = "\n")

# print(Memory._size_now())
# print(Memory._size_max)



6000
50000
