In [1]:
import config
import numpy as np
import networkx as nx
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import matplotlib
import random
import time
from network import *
from utility import *
from train import *
import simpy
import logging
import math

In [2]:
%matplotlib notebook
plt.ion()

In [3]:
env = simpy.Environment()

In [4]:
N = Network(config.STATIONS_FILENAME, config.RAILWAY_FILENAME , env)

In [5]:
def create_statistic_proc(env , logger , stat_time , N , trains):
    '''
    Create statistic in the log file every stat_time
    @parameters:
    env : simpy environment
    logger : log file where need to create the log
    stat_time : time after which log is to be created
    N         : network
    trains    : trains in the network
    '''
    while True :
        yield env.timeout(20)

        #create statistics
        not_yet_started = 0
        running = 0
        Completed_resource_not_freed = 0
        Completed = 0

        for t in trains:
            status = t.status()
            if (status == 'not_yet_started'):
                not_yet_started += 1
            elif (status == 'running'):
                running += 1
            elif(status == 'Completed_resource_not_freed'):
                Completed_resource_not_freed += 1
            else:
                Completed += 1
        
        if (len(trains) == Completed):
            config.TRAINS_COMPLETED_JOURNEY = True

        logger.info("Time : {} Number of trains not yet started - {}".format(env.now , not_yet_started))
        logger.info("Time : {} Number of running trains - {}".format(env.now , running))
        logger.info("Time : {} Number of trains Completed but resource not freed - {}".format(env.now , Completed_resource_not_freed))
        logger.info("Time : {} Number of trains which have Completed journey - {}".format(env.now , Completed))

In [6]:
#utility function
def update_graph (env , interval , train_to_color = None):
    '''
    This function will simulate the whole process on a GUI
    This function is going to be used as the process
    @parameters
    env : simpy environment
    total_time : total time for the simulation
    '''
    fig = plt.figure(figsize = (6 , 6))
    ax = fig.add_subplot(111)
    fig.show()
    fig.canvas.draw()
    
    while True:
        #Draw graph
        
        N.draw_railway_network(ax  = ax , suppress_station_info= False , 
                               suppress_track_info= False , suppress_edge_label=False , 
                               train_to_color=train_to_color)


        #yield interval time
        yield env.timeout(interval)
        time.sleep(0.5)
        
        #log info
        plt.title("Time {}".format(env.now))
        
        #Paint the canvas
        fig.canvas.draw()

In [7]:
def deadlock_detection_process (env , deadlock_time):
    '''
    For creating deadlock detection process
    '''
    global DEADLOCK
    while True:
        
        #Invoke this deadlock process after every 20 iterations
        
        yield env.timeout(deadlock_time)
        logger.info("Time : {} Checking Deadlock".format(env.now))
        config.DEADLOCK , _ = deadlock_detection(N , trains)
        


In [8]:
def compute_loss_one_train (t):
    '''
    This function should be called after the successful completion of the train
    @Parameter : Name of the train 
    @Return : Loss for the train  
    '''
    p = t.priority
    loss = 0
    
    for route , log in zip(t.route , t.log):
        d , t1 , t2 = route
        _ , t3 , t4 = log
        
        assert t3 >= t1 and t4 >= t2
        loss += (t3 - t1) + (t4 - t2)

    return loss / p
    

In [9]:

def compute_loss (trains):
    '''
    Takes all the trains in the network after completing the journey and then computes the value of J
    '''
    total_loss = 0
    total_departures = 0
    for t in trains:
        total_departures += 2*len(t.route)
        total_loss += compute_loss_one_train(t)
        
    total_loss = total_loss / total_departures
    return total_loss

In [10]:
def get_state_vector (train_name , b = 2 , l= 6 , R = 3 , w_c = 1 , w_d = 1):
    #get the train
    t = name_train_map[train_name]
    
    #find the current position 
    current = t.current_index * 2;
    if (not t.station_or_not):
        current += 1
            
    #get the local neighbors
    res =t.all_resources[ max(0 , current - b) : min(current + l+1 , len(t.all_resources )) ]
    state_vector = [0 for _ in range(len(res))]
    res_to_index = {}
    for i in range(len(res)):
        res_to_index[res[i]] = i

    #position of the train in the local space
    if (t.current == '_'):
        t1_pos = 0 
    else:
        t1_pos = res_to_index[t.current]
    

    #go through each local resource
    for t2_pos , r in enumerate(res):
        #check if the resource is station or track and get the corresponding details
        if type(r) is not str:
            s = N.get_track_details(r[0],r[1])

        else:
            s = N.get_station_details(r)
        
        towards = 0                                          #train converging towards the given train  
        away = 0                                             #trains diverging from the given train
        total = len(s.train_running)                         #total number of trains
        
        #for each train in one of the local resource 
        for running_train in s.train_running:

            if not running_train == '_':
                
                #find the next position of the train and depict wether it is moving towards or away from the resource
                t2 = name_train_map[running_train]
                index = t2.current_index * 2 
                if not (t2.station_or_not):
                    index += 1
                
                #if train is not at the last destination
                if ( index + 1 < len(t2.all_resources) ):
                    #get the next resource and find it's position
                    next_res = t2.all_resources[index + 1]
                    if next_res in res_to_index:
                        t2_next_pos = res_to_index[t2.all_resources[index + 1]]
                        if abs(t2_next_pos - t1_pos) < abs(t2_pos - t1_pos):
                            towards+=1
                        else:
                            away+=1

                    elif type(next_res) is not str and (next_res[1] , next_res[0]) in res_to_index:
                        t2_next_pos = res_to_index[(next_res[1] , next_res[0]) ]
                        if abs(t2_next_pos - t1_pos) < abs(t2_pos - t1_pos):
                            towards+=1
                        else:
                            away+=1

                    else :
                        away+=1
                else :
                    away+=1
#         print("Resource {} Total = {} Towards = {} Away = {}".format(r , len(s.train_running) , towards , away))
        #construct the state vector
        state_vector[t2_pos] = int(R - 1 - min (R-1 , math.floor(total - w_c * towards - w_d * away)))
    
    #make it equal to the size of the state vector
    if (current - b < 0):
        temp_list = [0] * abs(current-b)
        temp_list.extend(state_vector)
        state_vector = temp_list

    if (current + l + 1 > len(t.all_resources)):
        state_vector.extend([0] * (current + l + 1 - len(t.all_resources)) )
        
    #include the priority of the train in the state vector
    state_vector.insert(0 , t.priority)
    
    return state_vector
    

In [11]:
def state_vector_to_index (state_vector , b = 2 , l = 6 , total_priority = 3 , R = 3):
    '''
    This function takes the state vector and convert it into index used for storing purpose
    state vector is a list
    '''
    temp = 1
    ans = 0
    for i in range(1 , len(state_vector)):
        ans += temp * state_vector[i]
        temp *= R
        
    x = R ** (l + b + 1)
    return ans + (state_vector[0]-1) * x

In [12]:
def index_to_state_vector (index , b = 2 , l = 6 , R = 3 , total_priority = 3):
    '''
    Given th index , it returns the state vector
    '''
    state_vector = [0]
    x = R ** (l + b + 1)
    state_vector[0] = index//x + 1
    index = index % x
    
    while index:
        index, r = divmod(index, 3)
        state_vector.append(r)
        
    state_vector.extend( [0] * (l + b + 2 - len(state_vector)) )
    return state_vector

In [13]:
trains = read_trains(config.TRAINS_FILENAME , N , env)

In [14]:
name_train_map = {}                #Map from train name to train object; note key can be changed to train_id 
for t in trains:
    name_train_map[t.name] = t

In [15]:
#Define a map from train to color to which it belongs 
#Note this feature considers only 10 trains into account
colors = ['red' , 'saddlebrown' , 'forestgreen','dimgray' , 'darkcyan' , 'royalblue' , 'magenta' , 'darkorange' ] 
permanent_color = 'lightcoral'

train_to_color = {}
if (len(trains) <= 8):
    for i,t in enumerate(trains):
        train_to_color[t.name] = colors[i]
else:
    for i,t in enumerate(trains):
        train_to_color[t.name] = permanent_color
        
print(train_to_color)


{'Train001': 'red', 'Train002': 'saddlebrown', 'Train003': 'forestgreen', 'Train004': 'dimgray', 'Train011': 'darkcyan', 'Train012': 'royalblue', 'Train013': 'magenta', 'Train014': 'darkorange'}


In [16]:
logger = setup_logger("main_log_file" ,"Logs/log.log")

# Simulation and Algorithm

In [17]:
#hyperparameters
pho = 0.25
minimum_J = 1000000
total_priority = 3
R = 3
l = 6
b = 2
w_c = 0.9
w_d = 1
total_episodes = 500
w = 0.25

tau = 0.9
alpha = 0.9

EPS_START = 1.0
EPS_END = 0.1
EPS_STEPS = 300

CURRENT_EPISODE = 0

In [18]:
#Variables
total_states = total_priority * (R ** (l + b + 1))

success_freq = np.zeros (( total_states , 2 ))
pass_freq = np.zeros ((total_states , 2))
success_prob = 0.5 * np.ones ((total_states , 2))
seen = np.zeros((total_states , 2))

#map for the train most recent state space
train_state_space_map = {} 
for key , value in  name_train_map.items():
    train_state_space_map[key] = '-'
    
#for the Q-values
Q_values = 0.5 * np.ones((total_states , 2))

#total_neighbors
state_vector_neighbors = np.zeros((total_states , 2))
state_vector_running_mean = np.zeros((total_states , 2))

In [19]:
epsilon_list = []
train_states = []
train_name = 'Train001'

In [20]:
def epsilon_greedy (state_space):
   #given the state space, it will select the action according to epsilon greedy policy
    actions = ['move' , 'wait']
    epsilon = 0
    if (CURRENT_EPISODE > EPS_STEPS):
        epsilon = EPS_END
    else:
        epsilon = ((EPS_END - EPS_START) / EPS_STEPS) * CURRENT_EPISODE + EPS_START
        
    epsilon_list.append(epsilon)
    random = np.random.choice([True , False ] , 1 , p = [epsilon , 1 - epsilon])
    
    if (random):
        return np.random.choice(actions , 1 , p = [0.5 , 0.5]) 
    else:
        q_1 = Q_values[state_vector_to_index(state_space) , 0]
        q_2 = Q_values[state_vector_to_index(state_space) , 1]
        if (q_1 >= q_2):
            return 'move'
        else:
            return 'wait'

In [21]:
def modified_epsilon_greedy(state_space):
    #given the state space, it will select the action according to the epsilon greedy policy as mentioned in the paper
    actions = ['move' , 'wait']
    
    epsilon = 0
    if (CURRENT_EPISODE > EPS_STEPS):
        epsilon = EPS_END
    else:
        epsilon = ((EPS_END - EPS_START) / EPS_STEPS) * CURRENT_EPISODE + EPS_START
        
    q0 = Q_values[state_vector_to_index(state_space) , 0]
    q1 = Q_values[state_vector_to_index(state_space) , 1]
  
    
    random = np.random.choice([True , False ] , 1 , p = [epsilon , 1 - epsilon])
    if (random):
        #choose the action based on the toss of a biased coin
        if (q0 + q1 == 0):
            return np.random.choice(actions , 1 , p = [0.5 , 0.5 ])
        else:
            return np.random.choice(actions , 1 , p = [q0/(q0+q1) , q1/(q0+q1)])
    else:
        if (min(q0,q1) / max(q0 , q1) < tau):
            if (q0>q1):
                return actions[0]
            else:
                return actions[1]
            
        else:
            return np.random.choice(actions , 1 , p = [alpha , 1-alpha])
        

In [22]:
def choose_action_algo (env ,N ,name_train_map):
    
    '''
    This function will choose the action for the trains that need action at the
    particular simulation time
    
    This will also check if the move is valid or not.
    '''
    
    actions = ['move' , 'wait']
    
    
    
    while True :
        #This line is extremely important
        #As this line will wait for all the events upto this current simulation point to complete first
        #and then this will execute
        for _ in range(4*len(name_train_map)):
            yield env.timeout(0)
        
        #Check if the trains need action
        if (len(config.TRAINS_NEEDING_ACTION) == 0):
            yield env.timeout(1)
        
        else:
            
            #Pick the train that needs the action most
#             time , name = config.TRAINS_NEEDING_ACTION[0]
            
            time ,name = pick_most_suitable_action(name_train_map , N , env)
            t = name_train_map[name]
        
            state_vector = get_state_vector(name , b = b , l = l , R = R , w_c = w_c , w_d = w_d)
            
            #for debugging 
            if (train_name == name):
                train_states.append([CURRENT_EPISODE , state_vector_to_index(state_vector)] )
            
            #Pick the action
#             action = epsilon_greedy(state_vector)
            action = modified_epsilon_greedy(state_vector)
            #if the action is to move, then check if move is valid or not
            #if not : then implement the wait move
            if not (t.is_move_valid(env)):
                action = 'wait'
                
                #since the move is invalid, so mark the success probability of this state-action pair to 0
                success_prob[state_vector_to_index(state_vector) , 0 ] = 0
                Q_values[state_vector_to_index(state_vector) , 0 ] = 0
            
            action_index = 0
            if action == 'wait':
                action_index = 1
            
            if not (train_state_space_map[name] == '-'):
                #computing the Q_values
                prev_state_vector , prev_action = train_state_space_map[name]
                state_vector_neighbors[state_vector_to_index(prev_state_vector) , prev_action] += 1

                #updating neighboring info and running mean
                k = state_vector_neighbors[state_vector_to_index(prev_state_vector) , prev_action]
                temp = state_vector_running_mean[state_vector_to_index(prev_state_vector) , prev_action]
                temp = temp + (success_prob[state_vector_to_index(state_vector) , action_index] - temp ) / k
                state_vector_running_mean[state_vector_to_index(prev_state_vector) , prev_action] = temp

                first_component = success_prob[state_vector_to_index(prev_state_vector) , prev_action]
                second_component = state_vector_running_mean[state_vector_to_index(prev_state_vector) , prev_action]

                #updating Q-values
                Q_values[state_vector_to_index(prev_state_vector) , prev_action] = w*first_component + (1 - w)*second_component
                

            #updating the recent q_value
            train_state_space_map[name] = (state_vector , action_index)    
            seen[state_vector_to_index(state_vector) , action_index ] = 1
            #Create the process that completes the action
            env.process(t.act_simulate(env , action))
            
            #Remove the name of the train from the list
            config.TRAINS_NEEDING_ACTION.remove((time,  name) )
            
            
            


In [23]:
# sim_proc = env.process(update_graph(env , 1 , train_to_color =train_to_color))

def start_episode():

    env = simpy.Environment()
    N.reset(env)
    for t in trains:
        t.reset(env , N)
    
        
    dead_proc = env.process(deadlock_detection_process(env , 20))
    stat_proc = env.process(create_statistic_proc(env , logger , 20 , N , trains))

    for t in trains:
        proc = env.process(t.put_train_on_track(env))

    proc_act = env.process(choose_action_algo(env , N, name_train_map))
    return env,N

In [24]:
steps = 20
J_plot = []
minimum_J_plot = []
total_success = 0
total_deadlock = 0

fig = plt.figure(figsize = (6 , 6))
fig.show()
fig.canvas.draw()

cumsum , moving_aves = [0] ,[]


for episode in range(total_episodes):
    
#     print("Episode : " , episode)
    config.TRAINS_NEEDING_ACTION = []
    env , N = start_episode()    
    config.DEADLOCK = True
    config.TRAINS_COMPLETED_JOURNEY = False
    config.CURRENT_SIMULATION_TIME = 0
    
    CURRENT_EPISODE += 1

    seen = np.zeros((total_states , 2))

    #This loop is not needed as such
    for time in range(0 , config.TOTAL_SIMULATION_TIME , steps):
        if (config.DEADLOCK== False):
            logger.warning("Time : {} Terminating Simulation. Deadlock detected".format(env.now))
#             print("Terminating Simulation : Deadlock detected")
            total_deadlock += 1 
            break

        if (config.TRAINS_COMPLETED_JOURNEY == True):
            logger.info("Time : {} All trains completed journey. Terminating Simulation".format(env.now))
#             print("Terminating Simulation : All trains completed journey")
            break

        config.CURRENT_SIMULATION_TIME += 20


        #Run the simulation
        env.run(until = config.CURRENT_SIMULATION_TIME)

    success = False
    
    if (config.TRAINS_COMPLETED_JOURNEY == True):
        J = compute_loss(trains) 
        if (J < (1 + pho) * minimum_J):
            success = True 
            total_success += 1
        minimum_J = min (J , minimum_J)
    
    if (success):
        success_freq[seen == 1] += 1 

    pass_freq = pass_freq + seen 
    for x in range(total_states):
        for y in range(2):
            if not (pass_freq[x,y] == 0):
                success_prob[x,y] = success_freq[x,y] / pass_freq[x,y] 
    
    if ((episode + 1) % 50  == 0):
        x , _ = np.where(pass_freq > 0)
        print ("Episode : {} J : {} , minimum_J : {} , total_success : {} , total_deadlock : {} , Pass : {}".format( 
                                                                                        episode, J , minimum_J , 
                                                                                 total_success , total_deadlock,
                                                                                    len(x) ))
    #for plotting
    J_plot.append(J)
    minimum_J_plot.append(minimum_J)
    
    plt.xlabel('Number of episodes')
    plt.ylabel('Priority weighted delay')
    
    plt.scatter(range(1 , len(J_plot) + 1) , J_plot ,s = 80 , facecolors = 'none' , edgecolors = 'b')
    
    
    cumsum.append(cumsum[episode] + J)
    if episode > 50:
        moving_ave = (cumsum[episode+1] - cumsum[episode+1-50])/50
        moving_aves.append(moving_ave)
    else:
        moving_aves.append(0)
    plt.plot(moving_aves , color = 'r')
    
        
    fig.canvas.draw()

<IPython.core.display.Javascript object>



Episode : 49 J : 9.1125 , minimum_J : 6.825 , total_success : 19 , total_deadlock : 4 , Pass : 562
Episode : 99 J : 8.5625 , minimum_J : 5.475 , total_success : 33 , total_deadlock : 4 , Pass : 645
Episode : 149 J : 5.7375 , minimum_J : 4.6875 , total_success : 54 , total_deadlock : 6 , Pass : 694
Episode : 199 J : 7.1 , minimum_J : 3.55 , total_success : 69 , total_deadlock : 6 , Pass : 731
Episode : 249 J : 3.5875 , minimum_J : 3.4125 , total_success : 85 , total_deadlock : 6 , Pass : 744
Episode : 299 J : 2.7375 , minimum_J : 2.7375 , total_success : 104 , total_deadlock : 6 , Pass : 746
Episode : 349 J : 3.85 , minimum_J : 2.4125 , total_success : 122 , total_deadlock : 6 , Pass : 750
Episode : 399 J : 3.1625 , minimum_J : 2.4125 , total_success : 131 , total_deadlock : 6 , Pass : 753
Episode : 449 J : 4.275 , minimum_J : 2.4125 , total_success : 136 , total_deadlock : 6 , Pass : 754
Episode : 499 J : 4.15 , minimum_J : 2.4125 , total_success : 142 , total_deadlock : 6 , Pass : 760

In [25]:
st,ac = np.where (pass_freq > 0)
for s,a in zip(st , ac):
    print(index_to_state_vector(s) , a , end = ' ')
    print ("{:.5f} {:.5f} {} {:.2f}".format(Q_values[s , 0] , Q_values[s,1], pass_freq[s,a] ,
                                           success_prob[s,a]))
    

[1, 0, 1, 0, 0, 0, 0, 0, 0, 0] 0 0.24684 0.27405 495.0 0.18
[1, 0, 1, 0, 0, 0, 0, 0, 0, 0] 1 0.24684 0.27405 439.0 0.18
[1, 1, 1, 0, 0, 0, 0, 0, 0, 0] 0 0.18731 0.21037 13.0 0.00
[1, 1, 1, 0, 0, 0, 0, 0, 0, 0] 1 0.18731 0.21037 12.0 0.08
[1, 2, 1, 0, 0, 0, 0, 0, 0, 0] 0 0.14278 0.50000 1.0 0.00
[1, 0, 2, 0, 0, 0, 0, 0, 0, 0] 0 0.24204 0.26831 439.0 0.18
[1, 0, 2, 0, 0, 0, 0, 0, 0, 0] 1 0.24204 0.26831 318.0 0.20
[1, 1, 2, 0, 0, 0, 0, 0, 0, 0] 0 0.19982 0.15000 7.0 0.14
[1, 1, 2, 0, 0, 0, 0, 0, 0, 0] 1 0.19982 0.15000 2.0 0.00
[1, 2, 2, 0, 0, 0, 0, 0, 0, 0] 0 0.24695 0.12500 1.0 0.00
[1, 2, 2, 0, 0, 0, 0, 0, 0, 0] 1 0.24695 0.12500 1.0 0.00
[1, 0, 1, 1, 0, 0, 0, 0, 0, 0] 0 0.25890 0.27736 12.0 0.25
[1, 0, 1, 1, 0, 0, 0, 0, 0, 0] 1 0.25890 0.27736 4.0 0.00
[1, 1, 1, 1, 0, 0, 0, 0, 0, 0] 0 0.14693 0.50000 1.0 0.00
[1, 0, 2, 1, 0, 0, 0, 0, 0, 0] 0 0.20550 0.14354 6.0 0.00
[1, 0, 2, 1, 0, 0, 0, 0, 0, 0] 1 0.20550 0.14354 3.0 0.00
[1, 1, 2, 1, 0, 0, 0, 0, 0, 0] 0 0.14693 0.50000 1.0 0.00
[1,

In [25]:
train_states = np.array(train_states)
index_list , _ = np.where(train_states == 5)
for i in index_list:
    print(index_to_state_vector(train_states[i, 1]))

[1, 0, 0, 0, 0, 1, 0, 1, 0, 1]
[1, 0, 0, 2, 1, 0, 1, 0, 1, 0]
[1, 0, 1, 2, 0, 1, 0, 1, 0, 2]
[1, 0, 2, 0, 1, 0, 1, 0, 2, 0]
[1, 2, 0, 2, 0, 1, 1, 1, 0, 0]
[1, 1, 0, 2, 0, 2, 0, 1, 0, 0]
[1, 2, 0, 2, 0, 2, 0, 1, 0, 0]
[1, 1, 0, 2, 0, 2, 0, 1, 0, 0]
[1, 0, 1, 2, 1, 0, 1, 0, 0, 0]
[1, 0, 2, 2, 1, 0, 2, 0, 0, 0]
[1, 1, 1, 1, 2, 0, 2, 0, 0, 0]
[1, 2, 1, 0, 1, 0, 2, 0, 0, 0]
[1, 2, 0, 2, 0, 2, 0, 0, 0, 0]
[1, 1, 0, 2, 0, 2, 0, 0, 0, 0]
[1, 2, 0, 2, 0, 2, 0, 0, 0, 0]
[1, 0, 1, 1, 1, 0, 0, 0, 0, 0]
[1, 1, 0, 2, 0, 0, 0, 0, 0, 0]
[1, 1, 0, 2, 0, 0, 0, 0, 0, 0]
[1, 0, 2, 0, 0, 0, 0, 0, 0, 0]
[1, 0, 2, 0, 0, 0, 0, 0, 0, 0]
[1, 0, 2, 0, 0, 0, 0, 0, 0, 0]
[1, 2, 1, 0, 0, 0, 0, 0, 0, 0]
[1, 2, 1, 0, 0, 0, 0, 0, 0, 0]


In [25]:
plot_Q_values = Q_values[np.where(pass_freq > 0)]
hist , bin_edges = np.histogram(plot_Q_values , bins = np.linspace(0 ,  1 , 101))

In [26]:
hist

array([ 7,  0,  0,  0,  2,  2,  2,  0,  1,  0,  1,  1, 29,  2,  4,  5,  9,
        1,  7,  6, 10, 11,  9, 21, 14, 24, 34, 41, 56, 68, 56, 58, 58, 42,
       31, 13, 18, 21,  8,  9, 10, 10,  3,  4,  4,  4,  6,  4,  2,  1, 29,
        1,  3,  2,  0,  1,  2,  1,  0,  0,  2,  0,  5,  0,  0,  0,  0,  0,
        2,  0,  0,  1,  0,  0,  0,  1,  1,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  6,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0])

In [28]:
plt.plot(bin_edges[:-1],  np.cumsum(hist) )
# plt.scatter(bin_edges[:-1] , np.cumsum(hist) ,s = 80 , facecolors = 'none' , edgecolors = 'b')

<IPython.core.display.Javascript object>

[<matplotlib.lines.Line2D at 0x7ff461c21a10>]

# Resource Usage

In [29]:
G,train_nodes , station_nodes , track_nodes = create_resource_usage_graph(trains , N)

In [30]:
fig = plt.figure(figsize = (6 , 6))
ax = fig.add_subplot(111)
draw_network_usage_graph(G,train_nodes , station_nodes , track_nodes, N,ax)

<IPython.core.display.Javascript object>

# TODO

1. Create a simple train class with all the parameters                                                Done
2. Implement the resource facility with the track and the station                                     Done
3. Run the single train on the track (without getting worried wether the resource is free or not)     Done  
4. Simulate whole thing on graph (single train only)                                                  Done
5. Run multiple instances of the train without worrying about the deadlock                            Done
   Create action for each train                                                                       Done
6. Try to simulate as you want to do in the project i.e. take action from the user.                   Done
   Also able to order the train for which to take the action at a particular time 
   
   
   
7. Try to create deadlock with the trains                                                             Done 
8. Create the graph for resource usage.                                                               Done  
9. Use the standard deadlock detection algorithm for the detection of the algorithm                   Done 
   Implement the Bank's algorithm for deadlock detection.


# List of reasons of creating events
1. If a train is standing at a station, the event processing time            Can be done by implementing timeout if train arrive early before depart time
    corresponds to the earliest time at which the train can depart,          occupancy of next track.
    as defined by its minimum halt time at the station and by any           Done
    departure time constraints enforced for passenger convenience.

2. `If it is running between two stations, the event processing time         Already done   
    corresponds to the earliest time at which it can arrive at the          occupancy of next station if move.
    next station, as defined by the length of the track and the train       Done
    running speed.

3. If the train is yet to start, the event processing                       Done
    time is the time at which it is expected at the starting station.       

# TODO

Create the actions properly and implement deadlock Avoidance heuristic

1. Code the proper list of events. All the three listed above.                                      Done
2. Create the proper time table with arrival and departure time of each train at each station.      Done
3. Create separate log file for each train.                                                         Done
3. Run simulation with the first toy environment (as in the paper).                                 Done
4. Label the current set of implementation under Simulation Phase.                                  Done
5. Implement the actions properly without actually waiting for the resource.                        Done
6. Understand Heuristic that can be used to avoid deadlock.                                         Done
7. Move that train that occupies the most congested resource first and then other.                  Done
   If tie break it using priority of each train.
   
   
8. Implement the state vector when action need to be taken.                                       Note Done



# TODO 
1. Implement the function to compute J      $\;\;\;\;\;\;\;\;\;\;\;$   Done
2. Implement the state space i.e. when taking action train should know what is the state space 
3. Implement Proxy reward.
4. Implement the Q-learning algo given in the paper
5. Check for the first test case

In [31]:
'''
Doubts
1. If the train is not yet started what is the state vector

'''

'\nDoubts\n1. If the train is not yet started what is the state vector\n\n'