In [1]:
import config
import numpy as np
import networkx as nx
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import matplotlib
import random
import time
from network import *
from utility import *
from train import *
import simpy
import logging
import math
import copy

In [2]:
%matplotlib notebook
plt.ion()

In [3]:
env = simpy.Environment()

In [4]:
N = Network(config.STATIONS_FILENAME, config.RAILWAY_FILENAME , env)

In [5]:
def create_statistic_proc(env , logger , stat_time , N , trains):
    '''
    Create statistic in the log file every stat_time
    @parameters:
    env : simpy environment
    logger : log file where need to create the log
    stat_time : time after which log is to be created
    N         : network
    trains    : trains in the network
    '''
    while True :
        yield env.timeout(20)

        #create statistics
        not_yet_started = 0
        running = 0
        Completed_resource_not_freed = 0
        Completed = 0

        for t in trains:
            status = t.status()
            if (status == 'not_yet_started'):
                not_yet_started += 1
            elif (status == 'running'):
                running += 1
            elif(status == 'Completed_resource_not_freed'):
                Completed_resource_not_freed += 1
            else:
                Completed += 1
        
        if (len(trains) == Completed):
            config.TRAINS_COMPLETED_JOURNEY = True

        logger.info("Time : {} Number of trains not yet started - {}".format(env.now , not_yet_started))
        logger.info("Time : {} Number of running trains - {}".format(env.now , running))
        logger.info("Time : {} Number of trains Completed but resource not freed - {}".format(env.now , Completed_resource_not_freed))
        logger.info("Time : {} Number of trains which have Completed journey - {}".format(env.now , Completed))

In [6]:
#utility function
def update_graph (env , interval , train_to_color = None):
    '''
    This function will simulate the whole process on a GUI
    This function is going to be used as the process
    @parameters
    env : simpy environment
    total_time : total time for the simulation
    '''
    fig = plt.figure(figsize = (6 , 6))
    ax = fig.add_subplot(111)
    fig.show()
    fig.canvas.draw()
    
    while True:
        #Draw graph
        
        N.draw_railway_network(ax  = ax , suppress_station_info= False , 
                               suppress_track_info= False , suppress_edge_label=False , 
                               train_to_color=train_to_color)


        #yield interval time
        yield env.timeout(interval)
        time.sleep(0.5)
        
        #log info
        plt.title("Time {}".format(env.now))
        
        #Paint the canvas
        fig.canvas.draw()

In [7]:
def deadlock_detection_process (env , deadlock_time):
    '''
    For creating deadlock detection process
    '''
    global DEADLOCK
    while True:
        
        #Invoke this deadlock process after every 20 iterations
        
        yield env.timeout(deadlock_time)
        logger.info("Time : {} Checking Deadlock".format(env.now))
        config.DEADLOCK , _ = deadlock_detection(N , trains)
        


In [8]:
def compute_loss_one_train (t):
    '''
    This function should be called after the successful completion of the train
    @Parameter : Name of the train 
    @Return : Loss for the train  
    '''
    p = t.priority
    loss = 0
    
    #for each station in the path
    for route , log in zip(t.route , t.log):
        d , t1 , t2 = route
        _ , t3 , t4 = log
        
        #Compute the loss, for the arrival (may arrive before the assigned time)
        #and departure (always after the assigned time)
        if t3 >= t1:
            loss += (t3 - t1)
        if (t4 >= t2):
            loss += (t4 - t2)

    return loss / p
    

In [9]:

def compute_loss (trains):
    '''
    Takes all the trains in the network after completing the journey and then computes the value of J
    '''
    total_loss = 0
    total_departures = 0

    #for each train compute the loss, and return the total loss
    for t in trains:
        total_departures += 2*len(t.route)
        total_loss += compute_loss_one_train(t)
        
    total_loss = total_loss / total_departures
    return total_loss

In [10]:
def get_state_vector (train_name , b = 2 , l= 6 , R = 3 , w_c = 1 , w_d = 1):
    #get the train
    t = name_train_map[train_name]
    
    #find the current position 
    current = t.current_index * 2;
    if (not t.station_or_not):
        current += 1
            
    #get the local neighbors
    res =t.all_resources[ max(0 , current - b) : min(current + l+1 , len(t.all_resources )) ]
    state_vector = [0 for _ in range(len(res))]
    res_to_index = {}
    for i in range(len(res)):
        res_to_index[res[i]] = i

    #position of the train in the local space
    if (t.current == '_'):
        t1_pos = 0 
    else:
        t1_pos = res_to_index[t.current]
    

    #go through each local resource
    for t2_pos , r in enumerate(res):
        #check if the resource is station or track and get the corresponding details
        if type(r) is not str:
            s = N.get_track_details(r[0],r[1])

        else:
            s = N.get_station_details(r)
        
        towards = 0                                          #train converging towards the given train  
        away = 0                                             #trains diverging from the given train
        total = len(s.train_running)                         #total number of trains
        
        #for each train in one of the local resource 
        for running_train in s.train_running:

            if not running_train == '_':
                
                #find the next position of the train and depict wether it is moving towards or away from the resource
                t2 = name_train_map[running_train]
                index = t2.current_index * 2 
                if not (t2.station_or_not):
                    index += 1
                
                #if train is not at the last destination
                if ( index + 1 < len(t2.all_resources) ):
                    #get the next resource and find it's position
                    next_res = t2.all_resources[index + 1]
                    
                    #depending on the position, check if the train is toward or away from the current trian.
                    if next_res in res_to_index:
                        t2_next_pos = res_to_index[t2.all_resources[index + 1]]
                        if abs(t2_next_pos - t1_pos) < abs(t2_pos - t1_pos):
                            towards+=1
                        else:
                            away+=1

                    elif type(next_res) is not str and (next_res[1] , next_res[0]) in res_to_index:
                        t2_next_pos = res_to_index[(next_res[1] , next_res[0]) ]
                        if abs(t2_next_pos - t1_pos) < abs(t2_pos - t1_pos):
                            towards+=1
                        else:
                            away+=1

                    else :
                        away+=1
                else :
                    away+=1
                    
        #construct the state vector
        state_vector[t2_pos] = int(R - 1 - min (R-1 , math.floor(total - w_c * towards - w_d * away)))
    
    #make it equal to the size of the state vector
    if (current - b < 0):
        temp_list = [0] * abs(current-b)
        temp_list.extend(state_vector)
        state_vector = temp_list

    if (current + l + 1 > len(t.all_resources)):
        state_vector.extend([0] * (current + l + 1 - len(t.all_resources)) )
        
    #include the priority of the train in the state vector
    state_vector.insert(0 , t.priority)
    
    return state_vector
    

In [11]:
def state_vector_to_index (state_vector , b = 2 , l = 6 , total_priority = 3 , R = 3):
    '''
    This function takes the state vector and convert it into index used for storing purpose
    state vector is a list
    '''
    temp = 1
    ans = 0
    for i in range(1 , len(state_vector)):
        ans += temp * state_vector[i]
        temp *= R
        
    x = R ** (l + b + 1)
    return ans + (state_vector[0]-1) * x

In [12]:
def index_to_state_vector (index , b = 2 , l = 6 , R = 3 , total_priority = 3):
    '''
    Given th index , it returns the state vector
    '''
    state_vector = [0]
    x = R ** (l + b + 1)
    state_vector[0] = index//x + 1
    index = index % x
    
    while index:
        index, r = divmod(index, 3)
        state_vector.append(r)
        
    state_vector.extend( [0] * (l + b + 2 - len(state_vector)) )
    return state_vector

In [13]:
trains = read_trains(config.TRAINS_FILENAME , N , env , delay = 0.0)

In [14]:
name_train_map = {}                #Map from train name to train object; note key can be changed to train_id 
for t in trains:
    name_train_map[t.name] = t

In [15]:
#Define a map from train to color to which it belongs 
#Note this feature considers only 10 trains into account
colors = ['red' , 'saddlebrown' , 'forestgreen','dimgray' , 'darkcyan' , 'royalblue' , 'magenta' , 'darkorange' ] 
permanent_color = 'lightcoral'

train_to_color = {}
if (len(trains) <= 8):
    for i,t in enumerate(trains):
        train_to_color[t.name] = colors[i]
else:
    for i,t in enumerate(trains):
        train_to_color[t.name] = permanent_color
        
print(train_to_color)


{'Train001': 'red', 'Train002': 'saddlebrown', 'Train003': 'forestgreen', 'Train004': 'dimgray', 'Train011': 'darkcyan', 'Train012': 'royalblue', 'Train013': 'magenta', 'Train014': 'darkorange'}


In [16]:
logger = setup_logger("main_log_file" ,"Logs/log.log")

# Simulation and Algorithm

In [17]:
#hyperparameters
pho = 0.25
minimum_J = 1000000
total_priority = 3
R = 3
l = 6
b = 2
w_c = 0.9
w_d = 1
total_episodes = 1000
w = 0.25

tau = 0.9
alpha = 0.9

EPS_START = 1.0
EPS_END = 0.1
EPS_STEPS = 500

CURRENT_EPISODE = 0

In [18]:
#Variables
total_states = total_priority * (R ** (l + b + 1))

success_freq = np.zeros (( total_states , 2 ))
pass_freq = np.zeros ((total_states , 2))
success_prob = 0.5 * np.ones ((total_states , 2))
seen = np.zeros((total_states , 2))

#map for the train most recent state space
train_state_space_map = {} 
for key , value in  name_train_map.items():
    train_state_space_map[key] = '-'
    
#for the Q-values
Q_values = 0.5 * np.ones((total_states , 2))

#total_neighbors and running mean
state_vector_neighbors = np.zeros((total_states , 2))
state_vector_running_mean = np.zeros((total_states , 2))

In [19]:
def epsilon_greedy (state_space):
    '''
    Given the state space, pick the action according to epsilon-greedy policy.    
    '''
   #given the state space, it will select the action according to epsilon greedy policy
    actions = ['move' , 'wait']
    
    #compute epsilon
    epsilon = 0
    if (CURRENT_EPISODE > EPS_STEPS):
        epsilon = EPS_END
    else:
        epsilon = ((EPS_END - EPS_START) / EPS_STEPS) * CURRENT_EPISODE + EPS_START

    #check if the action is random or greedy
    random = np.random.choice([True , False ] , 1 , p = [epsilon , 1 - epsilon])

    #pick the action at random
    if (random):

        return np.random.choice(actions , 1 , p = [0.5 , 0.5]) 
    else:
        
        #pick the action greedily
        q_1 = Q_values[state_vector_to_index(state_space) , 0]
        q_2 = Q_values[state_vector_to_index(state_space) , 1]
        if (q_1 >= q_2):
            return 'move'
        else:
            return 'wait'

In [20]:
random_actions = 0
biased_on_q = 0
biased_on_alpha= 0
greedy_actions = 0

In [21]:
def modified_epsilon_greedy(state_space):
    '''
    given the state space, it will select the action according to the epsilon greedy policy
    as mentioned in the paper
    '''
    global random_actions , biased_on_q , biased_on_alpha , greedy_actions
    actions = ['move' , 'wait']
    
    #compute the epsilon
    epsilon = 0
    if (CURRENT_EPISODE > EPS_STEPS):
        epsilon = EPS_END
    else:
        epsilon = ((EPS_END - EPS_START) / EPS_STEPS) * CURRENT_EPISODE + EPS_START

    #computing Q-values for each action
    q0 = Q_values[state_vector_to_index(state_space) , 0]
    q1 = Q_values[state_vector_to_index(state_space) , 1]
  
    random = np.random.choice([True , False ] , 1 , p = [epsilon , 1 - epsilon])
    if (random):
        
        #choose the action based on the toss of a biased coin
        if (q0 + q1 == 0):
            random_actions += 1
            return np.random.choice(actions , 1 , p = [0.5 , 0.5 ])
        else:
            biased_on_q += 1
            return np.random.choice(actions , 1 , p = [q0/(q0+q1) , q1/(q0+q1)])
    else:
        #choose action greedily if there is sufficient difference between Q-values
        if (min(q0,q1) / max(q0 , q1) < tau):
            greedy_actions += 1
            if (q0>q1):
                return actions[0]
            else:
                return actions[1]
        
        #else choose accorind to alpha
        else:
            biased_on_alpha += 1
            return np.random.choice(actions , 1 , p = [alpha , 1-alpha])
        

In [22]:
def choose_action_algo (env ,N ,name_train_map):
    
    '''
    This function will choose the action for the trains that need action at the
    particular simulation time
    
    This will also check if the move is valid or not.
    '''
    
    actions = ['move' , 'wait']
    
    
    
    while True :
        #This line is extremely important
        #As this line will wait for all the events upto this current simulation point to complete first
        #and then this will execute
        for _ in range(4*len(name_train_map)):
            yield env.timeout(0)
        
        #Check if the trains need action
        if (len(config.TRAINS_NEEDING_ACTION) == 0):
            yield env.timeout(1)
        
        else:
            
            #Pick the train that needs the action most
            time ,name = pick_most_suitable_action(name_train_map , N , env)
            t = name_train_map[name]
            
            #construct the state vector
            state_vector = get_state_vector(name , b = b , l = l , R = R , w_c = w_c , w_d = w_d)

            
            #Pick the action
            #action = epsilon_greedy(state_vector)
            action = modified_epsilon_greedy(state_vector)
#             action = epsilon_greedy(state_vector)
            #if the action is to move, then check if move is valid or not
            #if not : then implement the wait move
            if not (t.is_move_valid(env)):
                action = 'wait'
                
                #since the move is invalid, so mark the success probability of this state-action pair to 0
                success_prob[state_vector_to_index(state_vector) , 0 ] = 0
                #Q_values[state_vector_to_index(state_vector) , 0 ] = 0
            
            action_index = 0
            if action == 'wait':
                action_index = 1
            
            if not (train_state_space_map[name] == '-'):
                #computing the Q_values
                prev_state_vector , prev_action = train_state_space_map[name]
                state_vector_neighbors[state_vector_to_index(prev_state_vector) , prev_action] += 1

                #updating neighboring info and running mean
                k = state_vector_neighbors[state_vector_to_index(prev_state_vector) , prev_action]
                temp = state_vector_running_mean[state_vector_to_index(prev_state_vector) , prev_action]
                
                #Change to Q_values for experiment (scope of improvement)
                #(better)
#                 temp = temp + (Q_values[state_vector_to_index(state_vector) , action_index] - temp ) / k
                #(less better)
                temp = temp + (success_prob[state_vector_to_index(state_vector) , action_index] - temp ) / k
                
                state_vector_running_mean[state_vector_to_index(prev_state_vector) , prev_action] = temp

                #Computing the final Q-value
                first_component = success_prob[state_vector_to_index(prev_state_vector) , prev_action]
                second_component = state_vector_running_mean[state_vector_to_index(prev_state_vector) , prev_action]

                #updating Q-values
                Q_values[state_vector_to_index(prev_state_vector) , prev_action] = w*first_component + (1 - w)*second_component
                

            #updating the recent q_value
            train_state_space_map[name] = (state_vector , action_index)    
            seen[state_vector_to_index(state_vector) , action_index ] = 1
            #Create the process that completes the action
            env.process(t.act_simulate(env , action))
            
            #Remove the name of the train from the list
            config.TRAINS_NEEDING_ACTION.remove((time,  name) )

In [23]:
# sim_proc = env.process(update_graph(env , 1 , train_to_color =train_to_color))
def start_episode():
    '''
    Reset everything at the start of the episode
    '''
    env = simpy.Environment()
    N.reset(env)
    for t in trains:
        t.reset(env , N)
    
        
    dead_proc = env.process(deadlock_detection_process(env , 20))
    stat_proc = env.process(create_statistic_proc(env , logger , 20 , N , trains))

    for t in trains:
        proc = env.process(t.put_train_on_track(env))

    proc_act = env.process(choose_action_algo(env , N, name_train_map))
    return env,N

In [24]:
#this cell runs the main algo

#For plotting and storing info
J_plot = []
minimum_J_plot = []
cumsum , moving_aves = [0] ,[]
Q_values_plot = []
Q_values_plot.append(copy.deepcopy(Q_values))
deadlock_plot = []

#for logging about the total success, deadlock and completed episodes
total_success = 0
total_deadlock = 0
completed_episodes = 0

#For plotting
fig , axes= plt.subplots (2, 1 , figsize = (6 , 12))
fig.show()
fig.canvas.draw()

random_actions_list = []
biased_on_q_list = []
biased_on_alpha_list = []
greedy_actions_list = []
#Run for the required number of episodes
for episode in range(total_episodes):

    #reset the episode
    for key , value in  name_train_map.items():
        train_state_space_map[key] = '-'
    
    
    config.TRAINS_NEEDING_ACTION = []
    env , N = start_episode()    
    config.DEADLOCK = True
    config.TRAINS_COMPLETED_JOURNEY = False
    config.CURRENT_SIMULATION_TIME = 0
    
    CURRENT_EPISODE += 1

    #mark the seen to all zero for the current episode
    seen = np.zeros((total_states , 2))
    
    random_actions = 0
    biased_on_q = 0
    biased_on_alpha= 0
    greedy_actions = 0
    
    #This loop is not needed as such
    for time in range(0 , config.TOTAL_SIMULATION_TIME , 20):
        if (config.DEADLOCK== False):
            logger.warning("Time : {} Terminating Simulation. Deadlock detected".format(env.now))
            #print("Terminating Simulation : Deadlock detected")
            total_deadlock += 1 
            break

        if (config.TRAINS_COMPLETED_JOURNEY == True):
            logger.info("Time : {} All trains completed journey. Terminating Simulation".format(env.now))
            #print("Terminating Simulation : All trains completed journey")
            break

        config.CURRENT_SIMULATION_TIME += 20

        #Run the simulation
        env.run(until = config.CURRENT_SIMULATION_TIME)

        
    #check if the episode is successful or not
    success = False    
    if (config.TRAINS_COMPLETED_JOURNEY == True):
        deadlock_plot.append(total_deadlock/(episode + 1) )
        J = compute_loss(trains) 
        if (J < (1 + pho) * minimum_J):
            success = True 
            total_success += 1
        
        minimum_J = min (J , minimum_J)
        J_plot.append(J)
        minimum_J_plot.append(minimum_J)
        
        #updating the running mean
        cumsum.append(cumsum[completed_episodes] + J)
        if completed_episodes > 50:
            moving_ave = (cumsum[completed_episodes+1] - cumsum[completed_episodes+1-50])/50
            moving_aves.append(moving_ave)
        else:
            moving_aves.append(0)
        completed_episodes += 1
        
    elif (config.DEADLOCK == False):
        deadlock_plot.append(total_deadlock/(episode + 1) )

    #updating success and pass frequency
    if (success):
        success_freq[seen == 1] += 1 

    pass_freq = pass_freq + seen 
    for x in range(total_states):
        for y in range(2):
            if not (pass_freq[x,y] == 0):
                success_prob[x,y] = success_freq[x,y] / pass_freq[x,y] 
    
    #printing the log
    if ((episode + 1) % 50  == 0):
        x , _ = np.where(pass_freq > 0)
        print ("Episode : {} , minimum_J : {:.5f} , total_success : {} , total_deadlock : {} , Pass : {}".format( 
                                                episode , minimum_J , total_success , total_deadlock,len(x) ))
    #plotting the graph    
    if ( (episode + 1) % 10 == 0  ):
        axes[0].set_ylim((0, 20))
        axes[0].set_xlabel('Number of episodes')
        axes[0].set_ylabel('Priority weighted delay')
        axes[0].set_title("Episode : {}".format(episode))
        axes[0].scatter(range(1 , len(J_plot) + 1) , J_plot ,s = 80 , facecolors = 'none' , edgecolors = 'b')
        axes[0].plot(moving_aves , color = 'r')

        axes[1].set_xlabel("Total episodes")
        axes[1].set_ylabel("Total deadlocks")
        axes[1].plot(deadlock_plot ,  color = 'g')

        fig.canvas.draw()
    
    #Copy the Q-values for plotting
    if ((episode+1)%100 == 0):
        Q_values_plot.append(copy.deepcopy(Q_values))
        
    #debugging action
    total_actions = random_actions + biased_on_q + biased_on_alpha + greedy_actions
    random_actions_list.append(random_actions/total_actions)
    biased_on_q_list.append(biased_on_q/total_actions)
    biased_on_alpha_list.append(biased_on_alpha/total_actions)
    greedy_actions_list.append(greedy_actions/total_actions)
    
    

<IPython.core.display.Javascript object>

Episode : 49 , minimum_J : 6.00000 , total_success : 15 , total_deadlock : 2 , Pass : 575
Episode : 99 , minimum_J : 5.93750 , total_success : 20 , total_deadlock : 10 , Pass : 697
Episode : 149 , minimum_J : 4.96250 , total_success : 30 , total_deadlock : 14 , Pass : 739
Episode : 199 , minimum_J : 4.96250 , total_success : 34 , total_deadlock : 21 , Pass : 785
Episode : 249 , minimum_J : 4.12500 , total_success : 41 , total_deadlock : 24 , Pass : 819
Episode : 299 , minimum_J : 4.06250 , total_success : 55 , total_deadlock : 28 , Pass : 835
Episode : 349 , minimum_J : 3.21250 , total_success : 68 , total_deadlock : 28 , Pass : 844
Episode : 399 , minimum_J : 3.10000 , total_success : 76 , total_deadlock : 28 , Pass : 853
Episode : 449 , minimum_J : 2.82500 , total_success : 83 , total_deadlock : 28 , Pass : 858
Episode : 499 , minimum_J : 2.65000 , total_success : 93 , total_deadlock : 28 , Pass : 862
Episode : 549 , minimum_J : 2.17500 , total_success : 102 , total_deadlock : 29 , P

# plotting actions

In [25]:
# plt.ylim( (-10 , 300) )
plt.plot(biased_on_q_list , label = 'q')
plt.plot(biased_on_alpha_list , label = 'alpha')
plt.plot(greedy_actions_list , label = 'greedy')
plt.plot(random_actions_list, label = 'random')
plt.legend()

<IPython.core.display.Javascript object>

<matplotlib.legend.Legend at 0x7f2db013a250>

# plotting cummulative values

In [26]:
plot_bool= np.where(pass_freq > 0)

In [27]:
for i in range(len(Q_values_plot)):
    plot_Q_values = Q_values_plot[i][plot_bool]
    hist , bin_edges = np.histogram(plot_Q_values , bins = np.linspace(0 ,  1 , 101))
    plt.plot(bin_edges[:-1],  np.cumsum(hist) , label = 'Episode - {}'.format(i * 100) )
    
plt.legend()
plt.xlabel('Q-value')
plt.ylabel('Cumulative percentage')

<IPython.core.display.Javascript object>

Text(0, 0.5, 'Cumulative percentage')

# plotting Q-for different action

In [55]:
Q_action_move = []
Q_action_wait = []
my_set = set()

for i in plot_bool[0]:
    my_set.add(i)
    
    
for i in my_set:
    Q_action_move.append(Q_values[i, 0])
    Q_action_wait.append(Q_values[i,1])
    
total = 0
for i,x in enumerate(my_set):
    q0 = Q_values[x , 0]
    q1 = Q_values[x , 1]
    if  not (min(q0,q1) / max(q0 , q1) < tau):
        total+=1
        print("{} {:.5f} {:.5f} {} {}".format(i ,q0 , q1 , pass_freq[x , 0] , pass_freq[x , 1]) ,end = ' ')
        print(index_to_state_vector(i , b = b , l = l , R = R , total_priority = total_priority))
    
print(len(Q_action_move) - total)
    
# plt.scatter(range(1 , 101) , Q_action_move[:100] ,s = 10 , facecolors = 'none' , edgecolors = 'b' , label = 'move')
# plt.scatter(range(1 , 101) , Q_action_wait[:100] ,s = 10 , facecolors = 'none' , edgecolors = 'r' , label = 'wait')
# plt.legend()

0 0.20802 0.23010 986.0 735.0 [1, 0, 0, 0, 0, 0, 0, 0, 0, 0]
1 0.17235 0.18643 50.0 13.0 [1, 1, 0, 0, 0, 0, 0, 0, 0, 0]
3 0.20866 0.23166 876.0 486.0 [1, 0, 1, 0, 0, 0, 0, 0, 0, 0]
6 0.20440 0.21958 688.0 253.0 [1, 0, 2, 0, 0, 0, 0, 0, 0, 0]
8 0.21129 0.22359 27.0 15.0 [1, 2, 2, 0, 0, 0, 0, 0, 0, 0]
9 0.17722 0.17754 7.0 4.0 [1, 0, 0, 1, 0, 0, 0, 0, 0, 0]
10 0.50000 0.50000 0.0 1.0 [1, 1, 0, 1, 0, 0, 0, 0, 0, 0]
11 0.21031 0.22932 984.0 753.0 [1, 2, 0, 1, 0, 0, 0, 0, 0, 0]
12 0.21242 0.23229 841.0 367.0 [1, 0, 1, 1, 0, 0, 0, 0, 0, 0]
13 0.20293 0.22502 162.0 59.0 [1, 1, 1, 1, 0, 0, 0, 0, 0, 0]
14 0.18526 0.20378 92.0 27.0 [1, 2, 1, 1, 0, 0, 0, 0, 0, 0]
18 0.21096 0.23254 982.0 556.0 [1, 0, 0, 2, 0, 0, 0, 0, 0, 0]
19 0.22141 0.21279 141.0 53.0 [1, 1, 0, 2, 0, 0, 0, 0, 0, 0]
20 0.23147 0.24991 22.0 11.0 [1, 2, 0, 2, 0, 0, 0, 0, 0, 0]
21 0.20847 0.22330 941.0 500.0 [1, 0, 1, 2, 0, 0, 0, 0, 0, 0]
22 0.20616 0.22597 225.0 66.0 [1, 1, 1, 2, 0, 0, 0, 0, 0, 0]
25 0.20191 0.19934 241.0 79.0 [1,



# Resource Usage

In [28]:
G,train_nodes , station_nodes , track_nodes = create_resource_usage_graph(trains , N)

In [29]:
fig = plt.figure(figsize = (6 , 6))
ax = fig.add_subplot(111)
draw_network_usage_graph(G,train_nodes , station_nodes , track_nodes, N,ax)

<IPython.core.display.Javascript object>

# TODO

1. Create a simple train class with all the parameters                                                Done
2. Implement the resource facility with the track and the station                                     Done
3. Run the single train on the track (without getting worried wether the resource is free or not)     Done  
4. Simulate whole thing on graph (single train only)                                                  Done
5. Run multiple instances of the train without worrying about the deadlock                            Done
   Create action for each train                                                                       Done
6. Try to simulate as you want to do in the project i.e. take action from the user.                   Done
   Also able to order the train for which to take the action at a particular time 
   
   
   
7. Try to create deadlock with the trains                                                             Done 
8. Create the graph for resource usage.                                                               Done  
9. Use the standard deadlock detection algorithm for the detection of the algorithm                   Done 
   Implement the Bank's algorithm for deadlock detection.


# List of reasons of creating events
1. If a train is standing at a station, the event processing time            Can be done by implementing timeout if train arrive early before depart time
    corresponds to the earliest time at which the train can depart,          occupancy of next track.
    as defined by its minimum halt time at the station and by any           Done
    departure time constraints enforced for passenger convenience.

2. `If it is running between two stations, the event processing time         Already done   
    corresponds to the earliest time at which it can arrive at the          occupancy of next station if move.
    next station, as defined by the length of the track and the train       Done
    running speed.

3. If the train is yet to start, the event processing                       Done
    time is the time at which it is expected at the starting station.       

# TODO

Create the actions properly and implement deadlock Avoidance heuristic

1. Code the proper list of events. All the three listed above.                                      Done
2. Create the proper time table with arrival and departure time of each train at each station.      Done
3. Create separate log file for each train.                                                         Done
3. Run simulation with the first toy environment (as in the paper).                                 Done
4. Label the current set of implementation under Simulation Phase.                                  Done
5. Implement the actions properly without actually waiting for the resource.                        Done
6. Understand Heuristic that can be used to avoid deadlock.                                         Done
7. Move that train that occupies the most congested resource first and then other.                  Done
   If tie break it using priority of each train.
   
   
8. Implement the state vector when action need to be taken.                                       Note Done



# TODO 
1. Implement the function to compute J      $\;\;\;\;\;\;\;\;\;\;\;$   Done
2. Implement the state space i.e. when taking action train should know what is the state space 
3. Implement Proxy reward.
4. Implement the Q-learning algo given in the paper
5. Check for the first test case