In [1]:
import config
import numpy as np
import networkx as nx
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import matplotlib
import random
import time
from network import *
from utility import *
from train import *
import simpy
import logging
import math
import copy

In [2]:
%matplotlib notebook
plt.ion()

In [3]:
env = simpy.Environment()

In [4]:
N = Network(config.STATIONS_FILENAME, config.RAILWAY_FILENAME , env)

In [5]:
def create_statistic_proc(env , logger , stat_time , N , trains):
    '''
    Create statistic in the log file every stat_time
    @parameters:
    env : simpy environment
    logger : log file where need to create the log
    stat_time : time after which log is to be created
    N         : network
    trains    : trains in the network
    '''
    while True :
        yield env.timeout(20)

        #create statistics
        not_yet_started = 0
        running = 0
        Completed_resource_not_freed = 0
        Completed = 0

        for t in trains:
            status = t.status()
            if (status == 'not_yet_started'):
                not_yet_started += 1
            elif (status == 'running'):
                running += 1
            elif(status == 'Completed_resource_not_freed'):
                Completed_resource_not_freed += 1
            else:
                Completed += 1
        
        if (len(trains) == Completed):
            config.TRAINS_COMPLETED_JOURNEY = True

        logger.info("Time : {} Number of trains not yet started - {}".format(env.now , not_yet_started))
        logger.info("Time : {} Number of running trains - {}".format(env.now , running))
        logger.info("Time : {} Number of trains Completed but resource not freed - {}".format(env.now , Completed_resource_not_freed))
        logger.info("Time : {} Number of trains which have Completed journey - {}".format(env.now , Completed))

In [6]:
#utility function
def update_graph (env , interval , train_to_color = None):
    '''
    This function will simulate the whole process on a GUI
    This function is going to be used as the process
    @parameters
    env : simpy environment
    total_time : total time for the simulation
    '''
    fig = plt.figure(figsize = (6 , 6))
    ax = fig.add_subplot(111)
    fig.show()
    fig.canvas.draw()
    
    while True:
        #Draw graph
        
        N.draw_railway_network(ax  = ax , suppress_station_info= False , 
                               suppress_track_info= False , suppress_edge_label=False , 
                               train_to_color=train_to_color)


        #yield interval time
        yield env.timeout(interval)
        time.sleep(0.5)
        
        #log info
        plt.title("Time {}".format(env.now))
        
        #Paint the canvas
        fig.canvas.draw()

In [7]:
def deadlock_detection_process (env , deadlock_time):
    '''
    For creating deadlock detection process
    '''
    global DEADLOCK
    while True:
        
        #Invoke this deadlock process after every 20 iterations
        
        yield env.timeout(deadlock_time)
        logger.info("Time : {} Checking Deadlock".format(env.now))
        config.DEADLOCK , _ = deadlock_detection(N , trains)
        


In [8]:
def compute_loss_one_train (t):
    '''
    This function should be called after the successful completion of the train
    @Parameter : Name of the train 
    @Return : Loss for the train  
    '''
    p = t.priority
    loss = 0
    
    #for each station in the path
    for route , log in zip(t.route , t.log):
        d , t1 , t2 = route
        _ , t3 , t4 = log
        
        #Compute the loss, for the arrival (may arrive before the assigned time)
        #and departure (always after the assigned time)
        if t3 >= t1:
            loss += (t3 - t1)
        if (t4 >= t2):
            loss += (t4 - t2)

    return loss / p
    

In [9]:

def compute_loss (trains):
    '''
    Takes all the trains in the network after completing the journey and then computes the value of J
    '''
    total_loss = 0
    total_departures = 0

    #for each train compute the loss, and return the total loss
    for t in trains:
        total_departures += 2*len(t.route)
        total_loss += compute_loss_one_train(t)
        
    total_loss = total_loss / total_departures
    return total_loss

In [10]:
def get_state_vector (train_name , b = 2 , l= 6 , R = 3 , w_c = 1 , w_d = 1):
    #get the train
    t = name_train_map[train_name]
    
    #find the current position 
    current = t.current_index * 2;
    if (not t.station_or_not):
        current += 1
            
    #get the local neighbors
    res =t.all_resources[ max(0 , current - b) : min(current + l+1 , len(t.all_resources )) ]
    state_vector = [0 for _ in range(len(res))]
    res_to_index = {}
    for i in range(len(res)):
        res_to_index[res[i]] = i

    #position of the train in the local space
    if (t.current == '_'):
        t1_pos = 0 
    else:
        t1_pos = res_to_index[t.current]
    

    #go through each local resource
    for t2_pos , r in enumerate(res):
        #check if the resource is station or track and get the corresponding details
        if type(r) is not str:
            s = N.get_track_details(r[0],r[1])

        else:
            s = N.get_station_details(r)
        
        towards = 0                                          #train converging towards the given train  
        away = 0                                             #trains diverging from the given train
        total = len(s.train_running)                         #total number of trains
        
        #for each train in one of the local resource 
        for running_train in s.train_running:

            if not running_train == '_':
                
                #find the next position of the train and depict wether it is moving towards or away from the resource
                t2 = name_train_map[running_train]
                index = t2.current_index * 2 
                if not (t2.station_or_not):
                    index += 1
                
                #if train is not at the last destination
                if ( index + 1 < len(t2.all_resources) ):
                    #get the next resource and find it's position
                    next_res = t2.all_resources[index + 1]
                    
                    #depending on the position, check if the train is toward or away from the current trian.
                    if next_res in res_to_index:
                        t2_next_pos = res_to_index[t2.all_resources[index + 1]]
                        if abs(t2_next_pos - t1_pos) < abs(t2_pos - t1_pos):
                            towards+=1
                        else:
                            away+=1

                    elif type(next_res) is not str and (next_res[1] , next_res[0]) in res_to_index:
                        t2_next_pos = res_to_index[(next_res[1] , next_res[0]) ]
                        if abs(t2_next_pos - t1_pos) < abs(t2_pos - t1_pos):
                            towards+=1
                        else:
                            away+=1

                    else :
                        away+=1
                else :
                    away+=1
                    
        #construct the state vector
        state_vector[t2_pos] = int(R - 1 - min (R-1 , math.floor(total - w_c * towards - w_d * away)))
    
    #make it equal to the size of the state vector
    if (current - b < 0):
        temp_list = [0] * abs(current-b)
        temp_list.extend(state_vector)
        state_vector = temp_list

    if (current + l + 1 > len(t.all_resources)):
        state_vector.extend([0] * (current + l + 1 - len(t.all_resources)) )
        
    #include the priority of the train in the state vector
    state_vector.insert(0 , t.priority)
    
    return state_vector
    

In [11]:
def state_vector_to_index (state_vector , b = 2 , l = 6 , total_priority = 3 , R = 3):
    '''
    This function takes the state vector and convert it into index used for storing purpose
    state vector is a list
    '''
    temp = 1
    ans = 0
    for i in range(1 , len(state_vector)):
        ans += temp * state_vector[i]
        temp *= R
        
    x = R ** (l + b + 1)
    return ans + (state_vector[0]-1) * x

In [12]:
def index_to_state_vector (index , b = 2 , l = 6 , R = 3 , total_priority = 3):
    '''
    Given th index , it returns the state vector
    '''
    state_vector = [0]
    x = R ** (l + b + 1)
    state_vector[0] = index//x + 1
    index = index % x
    
    while index:
        index, r = divmod(index, 3)
        state_vector.append(r)
        
    state_vector.extend( [0] * (l + b + 2 - len(state_vector)) )
    return state_vector

In [13]:
trains = read_trains(config.TRAINS_FILENAME , N , env)

In [14]:
name_train_map = {}                #Map from train name to train object; note key can be changed to train_id 
for t in trains:
    name_train_map[t.name] = t

In [15]:
#Define a map from train to color to which it belongs 
#Note this feature considers only 10 trains into account
colors = ['red' , 'saddlebrown' , 'forestgreen','dimgray' , 'darkcyan' , 'royalblue' , 'magenta' , 'darkorange' ] 
permanent_color = 'lightcoral'

train_to_color = {}
if (len(trains) <= 8):
    for i,t in enumerate(trains):
        train_to_color[t.name] = colors[i]
else:
    for i,t in enumerate(trains):
        train_to_color[t.name] = permanent_color
        
print(train_to_color)


{'Train001': 'red', 'Train002': 'saddlebrown', 'Train003': 'forestgreen', 'Train004': 'dimgray', 'Train011': 'darkcyan', 'Train012': 'royalblue', 'Train013': 'magenta', 'Train014': 'darkorange'}


In [16]:
logger = setup_logger("main_log_file" ,"Logs/log.log")

# Simulation and Algorithm

In [17]:
#hyperparameters
pho = 0.25
minimum_J = 1000000
total_priority = 3
R = 3
l = 1
b = 0
w_c = 1
w_d = 1
total_episodes = 1000
w = 0.25

tau = 0.9
alpha = 0.1

EPS_START = 1.0
EPS_END = 0.1
EPS_STEPS = 300

gamma = 0.9

CURRENT_EPISODE = 0
TERMINATE = False

TOTAL_STEPS = 0

In [18]:
#Variables
total_states = total_priority * (R ** (l + b + 1))

#map for the train most recent state space
train_state_space_map = {} 
train_action_terminated = {}
reward_for_train = {}

for key , value in  name_train_map.items():
    train_state_space_map[key] = '-'
    train_action_terminated[key] = False
    reward_for_train[key] = []
#for the Q-values
Q_values = np.zeros((total_states , 2))
Q_values_100 = None

In [19]:
def epsilon_greedy (state_space , epsilon = None):
    '''
    Given the state space, pick the action according to epsilon-greedy policy.    
    '''
   #given the state space, it will select the action according to epsilon greedy policy
    actions = ['move' , 'wait']
    
    #compute epsilon
    if (epsilon is None):
        if (CURRENT_EPISODE > EPS_STEPS):
            epsilon = EPS_END
        else:
            epsilon = ((EPS_END - EPS_START) / EPS_STEPS) * CURRENT_EPISODE + EPS_START
        
    #check if the action is random or greedy
    random = np.random.choice([True , False ] , 1 , p = [epsilon , 1 - epsilon])

    #pick the action at random
    if (random):
        action = np.random.choice(actions , 1 , p = [0.5 , 0.5])
        return action[0]
    else:
        
        #pick the action greedily
        if (Q_values_100 is None):
            q_1 = Q_values[state_vector_to_index(state_space) , 0]
            q_2 = Q_values[state_vector_to_index(state_space) , 1]
        else:
            q_1 = Q_values_100[state_vector_to_index(state_space) , 0]
            q_2 = Q_values_100[state_vector_to_index(state_space) , 1]
        
        if (q_1 >= q_2):
            return 'move'
        else:
            return 'wait'

In [20]:
def get_reward (train):
    #get the status of the train
    status = train.status()
    if (status == 'not_yet_started'):
        return 0

    else:
        #if the train is on the station, take the departure time
        current_index = train.current_index

        if (train.station_or_not):
            return min(0,-(train.log[current_index][1] - train.route[current_index][1]))
        else:
            return min(0,-(train.log[current_index][2] - train.route[current_index][2]))

In [21]:
total_reward = []
INVALID_MOVES = 0

In [22]:
def choose_action_algo (env ,N ,name_train_map):
    
    '''
    This function will choose the action for the trains that need action at the
    particular simulation time
    
    This will also check if the move is valid or not.
    '''
    
    actions = ['move' , 'wait']
    
    global TERMINATE , TOTAL_STEPS , total_reward , INVALID_MOVES
    
    while True :
        #This line is extremely important
        #As this line will wait for all the events upto this current simulation point to complete first
        #and then this will execute
        for _ in range(4*len(name_train_map)):
            yield env.timeout(0)
        
        #Check if the trains need action
        if (len(config.TRAINS_NEEDING_ACTION) == 0):
            yield env.timeout(1)
        
        else:
            
            #Pick the train that needs the action most
            time ,name = pick_most_suitable_action(name_train_map , N , env)
            t = name_train_map[name]

            #construct the state vector
            state_vector = get_state_vector(name , b = b , l = l , R = R , w_c = w_c , w_d = w_d)
            action = epsilon_greedy(state_vector , epsilon = 0.1 )
            
            action_index = 0
            if action == 'wait':
                action_index = 1
                
            if action == 'move' and not (t.is_move_valid(env)):
                INVALID_MOVES += 1
                TERMINATE = True
            else:
                TERMINATE = False
            
            reward = get_reward(t)

            if not (train_state_space_map[name] == '-'):
                #computing the Q_values
                if (train_action_terminated[name] == True):
                    reward = -10000
                    
                reward_for_train[name].append((action , reward , state_vector 
                                               , Q_values[state_vector_to_index(state_vector) , 0]
                                               , Q_values[state_vector_to_index(state_vector) , 1]))
                    
                total_reward.append(reward)
                #update the Q-values
                prev_state_vector , prev_action = train_state_space_map[name]
                Q_t1 = Q_values[state_vector_to_index(prev_state_vector) , prev_action]
                Q_t2 = Q_values[state_vector_to_index(state_vector) , action_index]
                Q_t1 = Q_t1 +  alpha * ((reward + gamma * Q_t2  - Q_t1))
                Q_values[state_vector_to_index(prev_state_vector) , prev_action] = Q_t1
                
            
            train_state_space_map[name] = (state_vector , action_index)
            TOTAL_STEPS += 1
            if (TERMINATE):
                train_action_terminated[name] = True
                continue
            else:
                train_action_terminated[name] = False
                
                

            env.process(t.act_simulate(env , action))
            
            config.TRAINS_NEEDING_ACTION.remove((time,  name) )
            

In [23]:
# sim_proc = env.process(update_graph(env , 1 , train_to_color =train_to_color))
def start_episode():
    '''
    Reset everything at the start of the episode
    '''
    env = simpy.Environment()
    N.reset(env)
    for t in trains:
        t.reset(env , N)
    
        
    dead_proc = env.process(deadlock_detection_process(env , 20))
    stat_proc = env.process(create_statistic_proc(env , logger , 20 , N , trains))

    for t in trains:
        proc = env.process(t.put_train_on_track(env))

    proc_act = env.process(choose_action_algo(env , N, name_train_map))
    return env,N

In [24]:
#For plotting and storing info
J_plot = []
minimum_J_plot = []

#for logging about the total success, deadlock and completed episodes
total_deadlock = 0
completed_episodes = 0
terminated = 0

#Run for the required number of episodes
for episode in range(total_episodes):

    #reset the episode
    INVALID_MOVES = 0
    config.TRAINS_NEEDING_ACTION = []
    total_reward = []
    TOTAL_STEPS = 0
    env , N = start_episode()    
    config.DEADLOCK = True
    config.TRAINS_COMPLETED_JOURNEY = False
    config.CURRENT_SIMULATION_TIME = 0
    
    for key , value in reward_for_train.items():
        reward_for_train[key] = []
    
    TERMINATE = False
    CURRENT_EPISODE += 1

    #mark the seen to all zero for the current episode
    seen = np.zeros((total_states , 2))
    
    #This loop is not needed as such
    for time in range(0 , config.TOTAL_SIMULATION_TIME , 1):
            
        if (config.DEADLOCK== False):
            logger.warning("Time : {} Terminating Simulation. Deadlock detected".format(env.now))
#             print("Terminating Simulation : Deadlock detected")
            total_deadlock += 1 
            
            break

        if (config.TRAINS_COMPLETED_JOURNEY == True):
            logger.info("Time : {} All trains completed journey. Terminating Simulation".format(env.now))
            #print("Terminating Simulation : All trains completed journey")
            break

        config.CURRENT_SIMULATION_TIME += 1

        #Run the simulation
        env.run(until = config.CURRENT_SIMULATION_TIME)

    
    if (config.TRAINS_COMPLETED_JOURNEY):
        completed_episodes += 1
        J = compute_loss(trains)
        J_plot.append(J)
        minimum_J = min(J , minimum_J)
        minimum_J_plot.append(minimum_J)
        
    if (config.DEADLOCK == False):
        print("Episode : {} Ended in deadlock".format(episode))
    else:
        print('*' * 50)
        print ("Episode : {} , J : {:.5f} , total_time {} , total_deadlock - {}".format( episode ,J , 
                                                        config.CURRENT_SIMULATION_TIME,  total_deadlock ))

        reward_sum = np.sum(np.array(total_reward))
        temp = (reward_sum + INVALID_MOVES * 100)/(TOTAL_STEPS - INVALID_MOVES)
        print("Episode : {} Total reward {} , total_steps {}, average reward {:.2f} , invalid_moves {} , reward_step {:.2f}".format(
        episode, reward_sum , TOTAL_STEPS , reward_sum/TOTAL_STEPS, INVALID_MOVES , temp )   )
    
    
    
    
    if (episode == 900):
        Q_values_100 = copy.deepcopy(Q_values)
    

**************************************************
Episode : 0 , J : 20.02500 , total_time 61 , total_deadlock - 0
Episode : 0 Total reward -123963 , total_steps 337, average reward -367.84 , invalid_moves 12 , reward_step -377.73
**************************************************
Episode : 1 , J : 54.85000 , total_time 161 , total_deadlock - 0
Episode : 1 Total reward -305177 , total_steps 929, average reward -328.50 , invalid_moves 27 , reward_step -335.34
**************************************************
Episode : 2 , J : 13.65000 , total_time 121 , total_deadlock - 0
Episode : 2 Total reward -6311 , total_steps 380, average reward -16.61 , invalid_moves 0 , reward_step -16.61
**************************************************
Episode : 3 , J : 48.03750 , total_time 121 , total_deadlock - 0
Episode : 3 Total reward -24720 , total_steps 667, average reward -37.06 , invalid_moves 1 , reward_step -36.97
**************************************************
Episode : 4 , J : 4.05000 , tot

**************************************************
Episode : 36 , J : 57.96250 , total_time 201 , total_deadlock - 0
Episode : 36 Total reward -34667 , total_steps 841, average reward -41.22 , invalid_moves 0 , reward_step -41.22
**************************************************
Episode : 37 , J : 16.15000 , total_time 101 , total_deadlock - 0
Episode : 37 Total reward -3636 , total_steps 321, average reward -11.33 , invalid_moves 0 , reward_step -11.33
**************************************************
Episode : 38 , J : 9.86250 , total_time 41 , total_deadlock - 0
Episode : 38 Total reward -1036 , total_steps 198, average reward -5.23 , invalid_moves 0 , reward_step -5.23
**************************************************
Episode : 39 , J : 21.91250 , total_time 121 , total_deadlock - 0
Episode : 39 Total reward -3685 , total_steps 335, average reward -11.00 , invalid_moves 0 , reward_step -11.00
**************************************************
Episode : 40 , J : 24.71250 , total_

**************************************************
Episode : 76 , J : 14.31250 , total_time 141 , total_deadlock - 2
Episode : 76 Total reward -4256 , total_steps 366, average reward -11.63 , invalid_moves 0 , reward_step -11.63
**************************************************
Episode : 77 , J : 16.22500 , total_time 81 , total_deadlock - 2
Episode : 77 Total reward -2687 , total_steps 287, average reward -9.36 , invalid_moves 0 , reward_step -9.36
**************************************************
Episode : 78 , J : 21.86250 , total_time 121 , total_deadlock - 2
Episode : 78 Total reward -39361 , total_steps 487, average reward -80.82 , invalid_moves 3 , reward_step -80.70
**************************************************
Episode : 79 , J : 72.21250 , total_time 261 , total_deadlock - 2
Episode : 79 Total reward -263579 , total_steps 1235, average reward -213.42 , invalid_moves 19 , reward_step -215.20
**************************************************
Episode : 80 , J : 62.21250 ,

**************************************************
Episode : 113 , J : 22.65000 , total_time 101 , total_deadlock - 3
Episode : 113 Total reward -4990 , total_steps 386, average reward -12.93 , invalid_moves 0 , reward_step -12.93
**************************************************
Episode : 114 , J : 10.86250 , total_time 61 , total_deadlock - 3
Episode : 114 Total reward -1434 , total_steps 228, average reward -6.29 , invalid_moves 0 , reward_step -6.29
**************************************************
Episode : 115 , J : 14.83750 , total_time 101 , total_deadlock - 3
Episode : 115 Total reward -3293 , total_steps 265, average reward -12.43 , invalid_moves 0 , reward_step -12.43
**************************************************
Episode : 116 , J : 31.48750 , total_time 141 , total_deadlock - 3
Episode : 116 Total reward -8577 , total_steps 474, average reward -18.09 , invalid_moves 0 , reward_step -18.09
**************************************************
Episode : 117 , J : 15.35000

**************************************************
Episode : 151 , J : 11.23750 , total_time 81 , total_deadlock - 5
Episode : 151 Total reward -12326 , total_steps 278, average reward -44.34 , invalid_moves 1 , reward_step -44.14
**************************************************
Episode : 152 , J : 25.68750 , total_time 161 , total_deadlock - 5
Episode : 152 Total reward -9460 , total_steps 486, average reward -19.47 , invalid_moves 0 , reward_step -19.47
**************************************************
Episode : 153 , J : 19.88750 , total_time 121 , total_deadlock - 5
Episode : 153 Total reward -6628 , total_steps 414, average reward -16.01 , invalid_moves 0 , reward_step -16.01
**************************************************
Episode : 154 , J : 14.26250 , total_time 61 , total_deadlock - 5
Episode : 154 Total reward -2006 , total_steps 249, average reward -8.06 , invalid_moves 0 , reward_step -8.06
**************************************************
Episode : 155 , J : 33.17500

**************************************************
Episode : 189 , J : 19.90000 , total_time 141 , total_deadlock - 7
Episode : 189 Total reward -7908 , total_steps 451, average reward -17.53 , invalid_moves 0 , reward_step -17.53
**************************************************
Episode : 190 , J : 17.80000 , total_time 121 , total_deadlock - 7
Episode : 190 Total reward -30028 , total_steps 415, average reward -72.36 , invalid_moves 2 , reward_step -72.22
**************************************************
Episode : 191 , J : 104.71250 , total_time 221 , total_deadlock - 7
Episode : 191 Total reward -462640 , total_steps 1631, average reward -283.65 , invalid_moves 35 , reward_step -287.68
**************************************************
Episode : 192 , J : 30.57500 , total_time 141 , total_deadlock - 7
Episode : 192 Total reward -114336 , total_steps 758, average reward -150.84 , invalid_moves 9 , reward_step -151.45
**************************************************
Episode : 193

**************************************************
Episode : 227 , J : 18.27500 , total_time 141 , total_deadlock - 7
Episode : 227 Total reward -5502 , total_steps 408, average reward -13.49 , invalid_moves 0 , reward_step -13.49
**************************************************
Episode : 228 , J : 28.27500 , total_time 81 , total_deadlock - 7
Episode : 228 Total reward -4069 , total_steps 388, average reward -10.49 , invalid_moves 0 , reward_step -10.49
**************************************************
Episode : 229 , J : 45.33750 , total_time 161 , total_deadlock - 7
Episode : 229 Total reward -10502 , total_steps 608, average reward -17.27 , invalid_moves 0 , reward_step -17.27
**************************************************
Episode : 230 , J : 9.07500 , total_time 61 , total_deadlock - 7
Episode : 230 Total reward -11458 , total_steps 241, average reward -47.54 , invalid_moves 1 , reward_step -47.33
**************************************************
Episode : 231 , J : 19.787

**************************************************
Episode : 263 , J : 23.36250 , total_time 101 , total_deadlock - 7
Episode : 263 Total reward -6383 , total_steps 405, average reward -15.76 , invalid_moves 0 , reward_step -15.76
**************************************************
Episode : 264 , J : 17.22500 , total_time 101 , total_deadlock - 7
Episode : 264 Total reward -13868 , total_steps 337, average reward -41.15 , invalid_moves 1 , reward_step -40.98
**************************************************
Episode : 265 , J : 22.75000 , total_time 81 , total_deadlock - 7
Episode : 265 Total reward -3613 , total_steps 342, average reward -10.56 , invalid_moves 0 , reward_step -10.56
**************************************************
Episode : 266 , J : 69.15000 , total_time 201 , total_deadlock - 7
Episode : 266 Total reward -209748 , total_steps 1098, average reward -191.03 , invalid_moves 15 , reward_step -192.29
**************************************************
Episode : 267 , J :

**************************************************
Episode : 299 , J : 80.91250 , total_time 261 , total_deadlock - 7
Episode : 299 Total reward -288555 , total_steps 1468, average reward -196.56 , invalid_moves 19 , reward_step -197.83
**************************************************
Episode : 300 , J : 82.10000 , total_time 261 , total_deadlock - 7
Episode : 300 Total reward -225829 , total_steps 1240, average reward -182.12 , invalid_moves 14 , reward_step -183.06
**************************************************
Episode : 301 , J : 111.51250 , total_time 361 , total_deadlock - 7
Episode : 301 Total reward -580160 , total_steps 1698, average reward -341.67 , invalid_moves 43 , reward_step -347.95
**************************************************
Episode : 302 , J : 132.43750 , total_time 281 , total_deadlock - 7
Episode : 302 Total reward -592975 , total_steps 1843, average reward -321.74 , invalid_moves 45 , reward_step -327.29
**************************************************

**************************************************
Episode : 335 , J : 12.03750 , total_time 61 , total_deadlock - 7
Episode : 335 Total reward -1740 , total_steps 236, average reward -7.37 , invalid_moves 0 , reward_step -7.37
**************************************************
Episode : 336 , J : 25.61250 , total_time 121 , total_deadlock - 7
Episode : 336 Total reward -6288 , total_steps 404, average reward -15.56 , invalid_moves 0 , reward_step -15.56
**************************************************
Episode : 337 , J : 22.18750 , total_time 81 , total_deadlock - 7
Episode : 337 Total reward -3115 , total_steps 325, average reward -9.58 , invalid_moves 0 , reward_step -9.58
**************************************************
Episode : 338 , J : 26.12500 , total_time 121 , total_deadlock - 7
Episode : 338 Total reward -5175 , total_steps 364, average reward -14.22 , invalid_moves 0 , reward_step -14.22
**************************************************
Episode : 339 , J : 18.08750 , 

**************************************************
Episode : 371 , J : 52.95000 , total_time 241 , total_deadlock - 8
Episode : 371 Total reward -171659 , total_steps 866, average reward -198.22 , invalid_moves 13 , reward_step -199.72
**************************************************
Episode : 372 , J : 140.18750 , total_time 341 , total_deadlock - 8
Episode : 372 Total reward -564539 , total_steps 2051, average reward -275.25 , invalid_moves 35 , reward_step -278.29
**************************************************
Episode : 373 , J : 73.11250 , total_time 301 , total_deadlock - 8
Episode : 373 Total reward -259600 , total_steps 1199, average reward -216.51 , invalid_moves 18 , reward_step -218.29
**************************************************
Episode : 374 , J : 91.51250 , total_time 321 , total_deadlock - 8
Episode : 374 Total reward -424300 , total_steps 1373, average reward -309.03 , invalid_moves 33 , reward_step -314.18
**************************************************
E

**************************************************
Episode : 406 , J : 9.28750 , total_time 81 , total_deadlock - 8
Episode : 406 Total reward -22063 , total_steps 277, average reward -79.65 , invalid_moves 2 , reward_step -79.50
**************************************************
Episode : 407 , J : 55.48750 , total_time 221 , total_deadlock - 8
Episode : 407 Total reward -226825 , total_steps 939, average reward -241.56 , invalid_moves 18 , reward_step -244.33
Episode : 408 Ended in deadlock
**************************************************
Episode : 409 , J : 154.12500 , total_time 361 , total_deadlock - 9
Episode : 409 Total reward -722549 , total_steps 2363, average reward -305.78 , invalid_moves 50 , reward_step -310.22
**************************************************
Episode : 410 , J : 24.90000 , total_time 121 , total_deadlock - 9
Episode : 410 Total reward -7998 , total_steps 421, average reward -19.00 , invalid_moves 0 , reward_step -19.00
*********************************

**************************************************
Episode : 442 , J : 81.71250 , total_time 301 , total_deadlock - 9
Episode : 442 Total reward -331939 , total_steps 1274, average reward -260.55 , invalid_moves 25 , reward_step -263.76
**************************************************
Episode : 443 , J : 109.22500 , total_time 401 , total_deadlock - 9
Episode : 443 Total reward -605879 , total_steps 1782, average reward -340.00 , invalid_moves 45 , reward_step -346.22
**************************************************
Episode : 444 , J : 27.66250 , total_time 161 , total_deadlock - 9
Episode : 444 Total reward -99287 , total_steps 652, average reward -152.28 , invalid_moves 8 , reward_step -152.93
**************************************************
Episode : 445 , J : 102.32500 , total_time 361 , total_deadlock - 9
Episode : 445 Total reward -448069 , total_steps 1655, average reward -270.74 , invalid_moves 30 , reward_step -273.89
**************************************************
Ep

**************************************************
Episode : 478 , J : 55.90000 , total_time 201 , total_deadlock - 10
Episode : 478 Total reward -201614 , total_steps 1087, average reward -185.48 , invalid_moves 15 , reward_step -186.67
**************************************************
Episode : 479 , J : 52.15000 , total_time 221 , total_deadlock - 10
Episode : 479 Total reward -277200 , total_steps 1109, average reward -249.95 , invalid_moves 22 , reward_step -252.99
**************************************************
Episode : 480 , J : 106.06250 , total_time 321 , total_deadlock - 10
Episode : 480 Total reward -512486 , total_steps 1589, average reward -322.52 , invalid_moves 39 , reward_step -328.12
**************************************************
Episode : 481 , J : 63.05000 , total_time 241 , total_deadlock - 10
Episode : 481 Total reward -281834 , total_steps 1225, average reward -230.07 , invalid_moves 21 , reward_step -232.34
***********************************************

**************************************************
Episode : 513 , J : 22.05000 , total_time 81 , total_deadlock - 10
Episode : 513 Total reward -25857 , total_steps 415, average reward -62.31 , invalid_moves 2 , reward_step -62.12
**************************************************
Episode : 514 , J : 63.30000 , total_time 221 , total_deadlock - 10
Episode : 514 Total reward -214407 , total_steps 1175, average reward -182.47 , invalid_moves 15 , reward_step -183.54
**************************************************
Episode : 515 , J : 78.73750 , total_time 261 , total_deadlock - 10
Episode : 515 Total reward -374113 , total_steps 1281, average reward -292.05 , invalid_moves 29 , reward_step -296.50
**************************************************
Episode : 516 , J : 99.85000 , total_time 281 , total_deadlock - 10
Episode : 516 Total reward -518170 , total_steps 1638, average reward -316.34 , invalid_moves 39 , reward_step -321.62
**************************************************
Epi

**************************************************
Episode : 550 , J : 30.16250 , total_time 101 , total_deadlock - 10
Episode : 550 Total reward -5385 , total_steps 427, average reward -12.61 , invalid_moves 0 , reward_step -12.61
**************************************************
Episode : 551 , J : 19.77500 , total_time 81 , total_deadlock - 10
Episode : 551 Total reward -3293 , total_steps 331, average reward -9.95 , invalid_moves 0 , reward_step -9.95
**************************************************
Episode : 552 , J : 16.57500 , total_time 81 , total_deadlock - 10
Episode : 552 Total reward -2471 , total_steps 278, average reward -8.89 , invalid_moves 0 , reward_step -8.89
**************************************************
Episode : 553 , J : 25.06250 , total_time 121 , total_deadlock - 10
Episode : 553 Total reward -5687 , total_steps 410, average reward -13.87 , invalid_moves 0 , reward_step -13.87
**************************************************
Episode : 554 , J : 25.3000

**************************************************
Episode : 586 , J : 105.95000 , total_time 321 , total_deadlock - 11
Episode : 586 Total reward -529569 , total_steps 1624, average reward -326.09 , invalid_moves 40 , reward_step -331.80
**************************************************
Episode : 587 , J : 106.53750 , total_time 301 , total_deadlock - 11
Episode : 587 Total reward -617139 , total_steps 1701, average reward -362.81 , invalid_moves 48 , reward_step -370.44
**************************************************
Episode : 588 , J : 99.21250 , total_time 321 , total_deadlock - 11
Episode : 588 Total reward -425947 , total_steps 1561, average reward -272.87 , invalid_moves 31 , reward_step -276.37
**************************************************
Episode : 589 , J : 120.01250 , total_time 321 , total_deadlock - 11
Episode : 589 Total reward -435536 , total_steps 1639, average reward -265.73 , invalid_moves 30 , reward_step -268.82
*********************************************

**************************************************
Episode : 622 , J : 9.18750 , total_time 61 , total_deadlock - 11
Episode : 622 Total reward -1524 , total_steps 219, average reward -6.96 , invalid_moves 0 , reward_step -6.96
**************************************************
Episode : 623 , J : 12.68750 , total_time 81 , total_deadlock - 11
Episode : 623 Total reward -2443 , total_steps 298, average reward -8.20 , invalid_moves 0 , reward_step -8.20
**************************************************
Episode : 624 , J : 37.61250 , total_time 141 , total_deadlock - 11
Episode : 624 Total reward -17837 , total_steps 551, average reward -32.37 , invalid_moves 0 , reward_step -32.37
**************************************************
Episode : 625 , J : 52.43750 , total_time 201 , total_deadlock - 11
Episode : 625 Total reward -28300 , total_steps 663, average reward -42.68 , invalid_moves 1 , reward_step -42.60
**************************************************
Episode : 626 , J : 14.900

**************************************************
Episode : 658 , J : 62.66250 , total_time 241 , total_deadlock - 12
Episode : 658 Total reward -169551 , total_steps 954, average reward -177.73 , invalid_moves 11 , reward_step -178.63
**************************************************
Episode : 659 , J : 42.52500 , total_time 201 , total_deadlock - 12
Episode : 659 Total reward -7362 , total_steps 502, average reward -14.67 , invalid_moves 0 , reward_step -14.67
**************************************************
Episode : 660 , J : 21.65000 , total_time 221 , total_deadlock - 12
Episode : 660 Total reward -13530 , total_steps 542, average reward -24.96 , invalid_moves 0 , reward_step -24.96
**************************************************
Episode : 661 , J : 24.16250 , total_time 101 , total_deadlock - 12
Episode : 661 Total reward -15412 , total_steps 376, average reward -40.99 , invalid_moves 1 , reward_step -40.83
**************************************************
Episode : 662 

**************************************************
Episode : 694 , J : 96.87500 , total_time 281 , total_deadlock - 13
Episode : 694 Total reward -388657 , total_steps 1546, average reward -251.40 , invalid_moves 28 , reward_step -254.19
**************************************************
Episode : 695 , J : 110.36250 , total_time 321 , total_deadlock - 13
Episode : 695 Total reward -514627 , total_steps 1660, average reward -310.02 , invalid_moves 37 , reward_step -314.80
**************************************************
Episode : 696 , J : 95.17500 , total_time 221 , total_deadlock - 13
Episode : 696 Total reward -480222 , total_steps 1481, average reward -324.26 , invalid_moves 38 , reward_step -330.16
**************************************************
Episode : 697 , J : 121.65000 , total_time 341 , total_deadlock - 13
Episode : 697 Total reward -497149 , total_steps 1661, average reward -299.31 , invalid_moves 35 , reward_step -303.60
**********************************************

**************************************************
Episode : 731 , J : 59.21250 , total_time 241 , total_deadlock - 13
Episode : 731 Total reward -211428 , total_steps 1125, average reward -187.94 , invalid_moves 14 , reward_step -189.04
**************************************************
Episode : 732 , J : 82.72500 , total_time 281 , total_deadlock - 13
Episode : 732 Total reward -492259 , total_steps 1459, average reward -337.39 , invalid_moves 40 , reward_step -344.09
**************************************************
Episode : 733 , J : 89.57500 , total_time 241 , total_deadlock - 13
Episode : 733 Total reward -333499 , total_steps 1349, average reward -247.22 , invalid_moves 25 , reward_step -250.00
**************************************************
Episode : 734 , J : 117.35000 , total_time 341 , total_deadlock - 13
Episode : 734 Total reward -457704 , total_steps 1700, average reward -269.24 , invalid_moves 31 , reward_step -272.38
***********************************************

**************************************************
Episode : 766 , J : 103.47500 , total_time 301 , total_deadlock - 13
Episode : 766 Total reward -587068 , total_steps 1657, average reward -354.30 , invalid_moves 46 , reward_step -361.56
**************************************************
Episode : 767 , J : 103.01250 , total_time 361 , total_deadlock - 13
Episode : 767 Total reward -524985 , total_steps 1727, average reward -303.99 , invalid_moves 36 , reward_step -308.33
**************************************************
Episode : 768 , J : 138.72500 , total_time 361 , total_deadlock - 13
Episode : 768 Total reward -688943 , total_steps 2077, average reward -331.70 , invalid_moves 48 , reward_step -337.18
**************************************************
Episode : 769 , J : 110.30000 , total_time 261 , total_deadlock - 13
Episode : 769 Total reward -497170 , total_steps 1637, average reward -303.71 , invalid_moves 37 , reward_step -308.42
********************************************

**************************************************
Episode : 801 , J : 108.85000 , total_time 281 , total_deadlock - 13
Episode : 801 Total reward -409491 , total_steps 1537, average reward -266.42 , invalid_moves 29 , reward_step -269.62
**************************************************
Episode : 802 , J : 39.07500 , total_time 221 , total_deadlock - 13
Episode : 802 Total reward -131589 , total_steps 679, average reward -193.80 , invalid_moves 10 , reward_step -195.20
**************************************************
Episode : 803 , J : 130.41250 , total_time 301 , total_deadlock - 13
Episode : 803 Total reward -590311 , total_steps 1880, average reward -314.00 , invalid_moves 43 , reward_step -319.00
**************************************************
Episode : 804 , J : 71.57500 , total_time 201 , total_deadlock - 13
Episode : 804 Total reward -296885 , total_steps 1158, average reward -256.38 , invalid_moves 23 , reward_step -259.55
***********************************************

**************************************************
Episode : 837 , J : 20.95000 , total_time 101 , total_deadlock - 13
Episode : 837 Total reward -4920 , total_steps 448, average reward -10.98 , invalid_moves 0 , reward_step -10.98
**************************************************
Episode : 838 , J : 14.26250 , total_time 61 , total_deadlock - 13
Episode : 838 Total reward -2370 , total_steps 275, average reward -8.62 , invalid_moves 0 , reward_step -8.62
**************************************************
Episode : 839 , J : 19.91250 , total_time 81 , total_deadlock - 13
Episode : 839 Total reward -3310 , total_steps 320, average reward -10.34 , invalid_moves 0 , reward_step -10.34
**************************************************
Episode : 840 , J : 12.60000 , total_time 81 , total_deadlock - 13
Episode : 840 Total reward -13042 , total_steps 348, average reward -37.48 , invalid_moves 1 , reward_step -37.30
**************************************************
Episode : 841 , J : 23.08

**************************************************
Episode : 873 , J : 116.03750 , total_time 321 , total_deadlock - 14
Episode : 873 Total reward -758233 , total_steps 1899, average reward -399.28 , invalid_moves 61 , reward_step -409.21
**************************************************
Episode : 874 , J : 71.41250 , total_time 281 , total_deadlock - 14
Episode : 874 Total reward -314381 , total_steps 1133, average reward -277.48 , invalid_moves 25 , reward_step -281.48
**************************************************
Episode : 875 , J : 98.48750 , total_time 301 , total_deadlock - 14
Episode : 875 Total reward -354836 , total_steps 1429, average reward -248.31 , invalid_moves 25 , reward_step -250.95
**************************************************
Episode : 876 , J : 81.51250 , total_time 261 , total_deadlock - 14
Episode : 876 Total reward -164712 , total_steps 1261, average reward -130.62 , invalid_moves 8 , reward_step -130.82
************************************************

**************************************************
Episode : 909 , J : 13.71250 , total_time 61 , total_deadlock - 14
Episode : 909 Total reward -2025 , total_steps 275, average reward -7.36 , invalid_moves 0 , reward_step -7.36
**************************************************
Episode : 910 , J : 15.43750 , total_time 81 , total_deadlock - 14
Episode : 910 Total reward -3150 , total_steps 337, average reward -9.35 , invalid_moves 0 , reward_step -9.35
**************************************************
Episode : 911 , J : 22.78750 , total_time 101 , total_deadlock - 14
Episode : 911 Total reward -4317 , total_steps 350, average reward -12.33 , invalid_moves 0 , reward_step -12.33
**************************************************
Episode : 912 , J : 11.91250 , total_time 61 , total_deadlock - 14
Episode : 912 Total reward -1924 , total_steps 263, average reward -7.32 , invalid_moves 0 , reward_step -7.32
**************************************************
Episode : 913 , J : 9.67500 , 

**************************************************
Episode : 947 , J : 17.95000 , total_time 121 , total_deadlock - 14
Episode : 947 Total reward -26326 , total_steps 462, average reward -56.98 , invalid_moves 2 , reward_step -56.80
**************************************************
Episode : 948 , J : 22.11250 , total_time 101 , total_deadlock - 14
Episode : 948 Total reward -14680 , total_steps 368, average reward -39.89 , invalid_moves 1 , reward_step -39.73
**************************************************
Episode : 949 , J : 12.48750 , total_time 101 , total_deadlock - 14
Episode : 949 Total reward -2040 , total_steps 295, average reward -6.92 , invalid_moves 0 , reward_step -6.92
**************************************************
Episode : 950 , J : 26.91250 , total_time 121 , total_deadlock - 14
Episode : 950 Total reward -6109 , total_steps 401, average reward -15.23 , invalid_moves 0 , reward_step -15.23
**************************************************
Episode : 951 , J : 2

**************************************************
Episode : 984 , J : 20.67500 , total_time 61 , total_deadlock - 15
Episode : 984 Total reward -3116 , total_steps 344, average reward -9.06 , invalid_moves 0 , reward_step -9.06
**************************************************
Episode : 985 , J : 31.85000 , total_time 101 , total_deadlock - 15
Episode : 985 Total reward -16655 , total_steps 464, average reward -35.89 , invalid_moves 1 , reward_step -35.76
**************************************************
Episode : 986 , J : 8.46250 , total_time 101 , total_deadlock - 15
Episode : 986 Total reward -1737 , total_steps 292, average reward -5.95 , invalid_moves 0 , reward_step -5.95
**************************************************
Episode : 987 , J : 18.03750 , total_time 81 , total_deadlock - 15
Episode : 987 Total reward -3871 , total_steps 342, average reward -11.32 , invalid_moves 0 , reward_step -11.32
**************************************************
Episode : 988 , J : 14.5125

In [25]:
reward_for_train

{'Train001': [('wait', 0, [1, 0, 0], -277.15197534294657, -207.49398077828332),
  ('wait', 0, [1, 0, 0], -240.12924398191984, -207.49398077828332),
  ('wait', 0, [1, 0, 0], -240.12924398191984, -191.4639698888898),
  ('move', 0, [1, 0, 0], -240.12924398191984, -176.6723623890801),
  ('move', -3, [1, 0, 1], -301.4430936695782, -452.7571024250498),
  ('move', -3, [1, 2, 0], -263.1316019384591, -299.9913983867555),
  ('move', -3, [1, 0, 1], -289.8344098038349, -452.7571024250498),
  ('move', -3, [1, 2, 0], -263.20353862695833, -299.9913983867555),
  ('wait', -3, [1, 0, 2], -12652.231447998101, -337.75849207708006),
  ('move', -3, [1, 2, 1], -292.8539960472058, -1338.2685740620523),
  ('move', -4, [1, 2, 0], -274.6072178957541, -299.9913983867555),
  ('move', -4, [1, 1, 1], -299.7787182492755, -699.6220610478425),
  ('move', -4, [1, 2, 0], -271.910777756308, -294.9642285461477),
  ('wait', -4, [1, 0, 0], -253.22364152290703, -162.90287566829798),
  ('wait', -4, [1, 0, 0], -252.719082346388

In [26]:
for i in range(9):
    print(index_to_state_vector(i, b=b , l=l, R = R , total_priority = total_priority) , Q_values[i , :])


[1, 0, 0] [-254.62959854 -184.36481016]
[1, 1, 0] [-251.50584749 -191.67455561]
[1, 2, 0] [-226.82854205 -287.70932206]
[1, 0, 1] [-245.48533575 -430.95288628]
[1, 1, 1] [-288.06149123 -699.62206105]
[1, 2, 1] [ -262.132358   -1338.26857406]
[1, 0, 2] [-12652.231448     -305.30410059]
[1, 1, 2] [-15164.2573021    -365.10496063]
[1, 2, 2] [-14892.1412829   -1635.08300925]


0

In [27]:
for t in trains:
    t.print_details()

**************************************************
Train ID : 8
Name :Train001
Priority : 1
Average speed : 1
Route of the train
            Alpha : 0 - 1 
            Bravo : 2 - 3 
            Charlie : 4 - 5 
            Delta : 6 - 7 
            Echo : 8 - 9 
Train has completed the journey
______Printing log________
            Alpha : 117 176 
            Bravo : 177 178 
            Charlie : 179 180 
            Delta : 181 182 
            Echo : 183 184 
**************************************************
**************************************************
Train ID : 8
Name :Train002
Priority : 1
Average speed : 1
Route of the train
            Alpha : 0 - 1 
            Bravo : 2 - 3 
            Charlie : 4 - 5 
            Delta : 6 - 7 
            Echo : 8 - 9 
Train not yet started
**************************************************
**************************************************
Train ID : 8
Name :Train003
Priority : 1
Average speed : 1
Route of the train
            

# Resource Usage

In [41]:
G,train_nodes , station_nodes , track_nodes = create_resource_usage_graph(trains , N)

In [42]:
fig = plt.figure(figsize = (6 , 6))
ax = fig.add_subplot(111)
draw_network_usage_graph(G,train_nodes , station_nodes , track_nodes, N,ax)

<IPython.core.display.Javascript object>

  pos = np.row_stack((pos[x] for x in node_list))


# TODO

1. Create a simple train class with all the parameters                                                Done
2. Implement the resource facility with the track and the station                                     Done
3. Run the single train on the track (without getting worried wether the resource is free or not)     Done  
4. Simulate whole thing on graph (single train only)                                                  Done
5. Run multiple instances of the train without worrying about the deadlock                            Done
   Create action for each train                                                                       Done
6. Try to simulate as you want to do in the project i.e. take action from the user.                   Done
   Also able to order the train for which to take the action at a particular time 
   
   
   
7. Try to create deadlock with the trains                                                             Done 
8. Create the graph for resource usage.                                                               Done  
9. Use the standard deadlock detection algorithm for the detection of the algorithm                   Done 
   Implement the Bank's algorithm for deadlock detection.


# List of reasons of creating events
1. If a train is standing at a station, the event processing time            Can be done by implementing timeout if train arrive early before depart time
    corresponds to the earliest time at which the train can depart,          occupancy of next track.
    as defined by its minimum halt time at the station and by any           Done
    departure time constraints enforced for passenger convenience.

2. `If it is running between two stations, the event processing time         Already done   
    corresponds to the earliest time at which it can arrive at the          occupancy of next station if move.
    next station, as defined by the length of the track and the train       Done
    running speed.

3. If the train is yet to start, the event processing                       Done
    time is the time at which it is expected at the starting station.       

# TODO

Create the actions properly and implement deadlock Avoidance heuristic

1. Code the proper list of events. All the three listed above.                                      Done
2. Create the proper time table with arrival and departure time of each train at each station.      Done
3. Create separate log file for each train.                                                         Done
3. Run simulation with the first toy environment (as in the paper).                                 Done
4. Label the current set of implementation under Simulation Phase.                                  Done
5. Implement the actions properly without actually waiting for the resource.                        Done
6. Understand Heuristic that can be used to avoid deadlock.                                         Done
7. Move that train that occupies the most congested resource first and then other.                  Done
   If tie break it using priority of each train.
   
   
8. Implement the state vector when action need to be taken.                                       Note Done



# TODO 
1. Implement the function to compute J      $\;\;\;\;\;\;\;\;\;\;\;$   Done
2. Implement the state space i.e. when taking action train should know what is the state space 
3. Implement Proxy reward.
4. Implement the Q-learning algo given in the paper
5. Check for the first test case

In [26]:
# Each wait is accumulating too much reward