In [None]:
# The system model will provide feedback to the agent from the environment regarding the next state and the reward given an 
# action in the current state. The task is episodic and will terminate after all the vehicles in the fleet have completed the 
# delivery of the goods. Initially the location of the vehicle will be at the warehouse and the capacity will be set to the 
# maximum capacity of the vehicle. All customer locations will be initialized to the initial demand.
# The definition of the next state has three components of which the location is built within the action, which is defined as 
# the movement to a specific location. If the current capacity of the vehicle is more than the current demand, then the vehicle 
# will deliver the goods to the location. The capacity of the vehicle will be reduced accordingly and the outstanding demand at 
# the location will be set to zero.
# If the vehicle successfully delivers the goods to the location, then the agent will receive a positive reward proportional to 
# the amount of goods delivered. Apart from the delivery component, the cost component will need to be accounted for each action.
# The cost will be computed as the Euclidean distance between the current location and the next location.


In [16]:

max_capacity = 10

cost_factor = -5
delivery_factor = 10

class state:
    def __init__(self,demand,location,capacity,vehicles):
        self.demand = demand
        self.location = location
        self.capacity = capacity
        self.vehicles = vehicles
        
    def printState(self):
        print("Demand: "+str(self.demand))
        print("Location: "+str(self.location))
        print("Capacity: "+str(self.capacity))
        print("Vehicles: "+str(self.vehicles))

def system_model(state,action):
    
    reward_delivery = 0
    # Compute the cost component of the reward based on the Euclidean distance between the current and next state
    reward_cost = cost_factor * euclidean_distance(state.location,action)
    
    # Check is the destination is the warehouse (location = 0)
    if action == 0:
        capacity = max_capacity
        state.vehicles -= 1
    
    # For customer location check if vehicle capacity is more than the demand
    else:
        if (state.capacity > state.demand[action]):
            state.capacity -= state.demand[action]
            reward_delivery = delivery_factor * state.demand[action]
            state.demand[action] = 0
            
    state.location = action
    reward = reward_cost + reward_delivery

    return state,reward
    
def euclidean_distance(loc1,loc2):
    distance = ((X[loc2]-X[loc1])**2+(Y[loc2]-Y[loc1])**2)**(1/2)
    return distance
    

In [17]:
# Reading the input file

file1 = open('vrp_5_4_1', 'r') 

Lines = file1.readlines() 

demand = {}
X={}
Y={}
counter = -1
 
for line in Lines: 
    print(line.split())
    if counter == -1:
        N,V,c = map(int, line.split())
    else:
        demand[counter],X[counter],Y[counter] = map(int, line.split())
    counter+= 1

['5', '4', '10']
['0', '0', '0']
['3', '0', '10']
['3', '-10', '10']
['3', '0', '-10']
['3', '10', '-10']
{0: 0, 1: 0, 2: -10, 3: 0, 4: 10}
{0: 0, 1: 10, 2: 10, 3: -10, 4: -10}


In [18]:
systemState = state(demand,0,c,V)

state.printState(systemState)

Demand: {0: 0, 1: 3, 2: 3, 3: 3, 4: 3}
Location: 0
Capacity: 10
Vehicles: 4


In [19]:
systemState,reward = system_model(systemState,1)

10.0


In [20]:
state.printState(systemState)
print("Reward: "+str(reward))

Demand: {0: 0, 1: 0, 2: 3, 3: 3, 4: 3}
Location: 1
Capacity: 7
Vehicles: 4
Reward: -20.0
