In [17]:
import tkinter as tk
import math
import heapq
import time
import numpy as np

In [18]:
# Initialise variables

# Environment variables
global ENV_ARRAY, CANVAS_WIDTH, CANVAS_HEIGHT, PROG_LABEL
ENV_ARRAY = [[1,1,1,1,1,1,1],
             [1,0,0,0,0,0,1],
             [1,0,1,1,3,0,1],
             [1,0,3,0,0,0,1],
             [1,0,0,0,1,4,1],
             [1,5,0,0,1,0,1],
             [1,1,1,1,1,1,1]]
CANVAS_WIDTH = 525
CANVAS_HEIGHT = 525

# Bin variables
global AREA_ID, BIN_COLOUR, BIN_RATE, BIN_MAX, BIN_START, BINS_LOC, BINS_LOC_RATE, NOT_COLLECTED_BINS
AREA_ID = {3:"busy", 4:"moderate", 5:"quiet"}
BIN_COLOUR = {"busy":"red", "moderate":"#FF8D29", "quiet":"#FFCD38"}
BIN_RATE = {"busy": 8, "moderate": 5, "quiet": 3}
BIN_MAX = 50
BIN_START = BIN_MAX // 2
BINS_LOC = []
BINS_LOC_RATE = []
NOT_COLLECTED_BINS = []

# Truck variables
global TRUCK_START, CUR_TRUCK_START, TRUCK_MAX_CAP, TRUCK_MAX_MOVE, TRUCK_REMAIN_MOVE, AGENT_MOVE_LABEL
TRUCK_START = [(1,1),(3,3),(5,5),(1,5)]
CUR_TRUCK_START = TRUCK_START[0]
TRUCK_MAX_CAP = 100
TRUCK_MAX_MOVE = 150
TRUCK_REMAIN_MOVE = TRUCK_MAX_MOVE

# Landfill variables
global LANDFILL_ROW, LANDFILL_COL
LANDFILL_ROW = 1
LANDFILL_COL = 3
ENV_ARRAY[LANDFILL_ROW][LANDFILL_COL] = 2

# # Capacity variables
CAP_LVL = ["empty", "half", "full"]
# # empty = 0-40%
# # half = 40-70%
# # full = 70-100%

# Experiment parameters
global EXP_NUM, TOTAL_EXP, window
EXP_NUM = 0
TOTAL_EXP = 4

# Algorithm variables
global ALGO_LIST, ALGO_NAME, ALGO_LABEL
ALGO_LIST = ["A* search algorithm", "Reinforcement learning: Q-Learning"]
ALGO_NAME = ALGO_LIST[0]

In [19]:
# This section was referenced and adapted from 'A* Search Algorithm' by GeeksforGeeks
# Available at https://www.geeksforgeeks.org/a-search-algorithm/

# Heuristic function for A*
def heuristic(start,end):
    # Euclidean distance
    # Direct distance from start to end
    x1, y1 = start
    x2, y2 = end
    return math.sqrt((x1 - x2)**2 + (y1 - y2)**2)


# Find possible moves from a point
def get_neighbours(point):
    neighbours = []
    i, j = point

    # Check top grid
    if i > 0 and ENV_ARRAY[i-1][j] != 1:
        neighbours.append((i-1,j))
    
    # Check bottom grid
    if i < len(ENV_ARRAY)-1 and ENV_ARRAY[i+1][j] != 1:
        neighbours.append((i+1,j))

    # Check left grid
    if j > 0 and ENV_ARRAY[i][j-1] != 1:
        neighbours.append((i,j-1))
    
    # Check right grid
    if j < len(ENV_ARRAY[0])-1 and ENV_ARRAY[i][j+1] != 1:
        neighbours.append((i,j+1))

    return neighbours


# A* algorithm
def a_star(start,end):
    # Initialise start node scores
    gscore = {start: 0}
    fscore = {start: heuristic(start,end)}

    # Initialise open and closed list
    open_list = [(fscore[start],start)]
    closed_list = []

    # Initialise previous node for path tracking
    previous = {}

    while open_list:
        # Find node with smallest fscore
        # Pop node from open list
        current = heapq.heappop(open_list)[1]

        # Reach end node
        # Reconstruct path
        if current == end:
            path = []
            while current in previous:
                path.append(current)
                current = previous[current]
                if current == start:
                    break
            path.reverse()
            return path

        # Add current node to closed list
        closed_list.append(current)
            
        # Find neigbours
        for neighbour in get_neighbours(current):
            # Skip nodes in closed list
            if neighbour in closed_list:
                continue
            
            # Compute scores
            temp_gscore = gscore[current] + 1
            temp_fscore = temp_gscore + heuristic(neighbour,end)

            # Add new node to open list
            if neighbour not in [n for _,n in open_list if n == neighbour]:
                gscore[neighbour] = temp_gscore
                fscore[neighbour] = temp_fscore
                previous[neighbour] = current
                open_list.append((temp_fscore,neighbour))

            # If neighbour is in open list
            # If score improves
            elif temp_fscore < fscore.get(neighbour):
                # Update node details
                gscore[neighbour] = temp_gscore
                fscore[neighbour] = temp_fscore
                previous[neighbour] = current
    
    # No path found
    # Should never reach this point
    return None

In [20]:
def reset_bins():
    global NOT_COLLECTED_BINS
    NOT_COLLECTED_BINS = BINS_LOC[:]

In [21]:
# Find nearest next bin
def next_bin(start):
    nearest_bin = None
    nearest_distance = float("inf")
    nearest_path = None
    
    # Compute A* path distance
    for bin in NOT_COLLECTED_BINS:
        path = a_star(start,bin)

        # No path found
        # Should never reach here
        if path is None:
            print("No path found to", bin)
            continue
        
        # Calculate distance to bin
        distance = len(path)
        
        # Update nearest bin, distance, and path
        if distance < nearest_distance:
            nearest_bin = bin
            nearest_distance = distance
            nearest_path = path
    
    # Update agent destination
    x, y = nearest_bin
    prog_text = "Move to bin ({}, {})"
    PROG_LABEL.config(text=prog_text.format(x,y))
    NOT_COLLECTED_BINS.remove(nearest_bin)

    return nearest_path


# Find nearest path to landfill
def to_landfill(start):
    path = a_star(start,(LANDFILL_ROW,LANDFILL_COL))
    reset_bins()

    # Update agent destination
    prog_text = "Move to landfill"
    PROG_LABEL.config(text=prog_text)

    return path

In [22]:
# Reinforcement learning variable initialisation
def rf_init():
    # Initialise states
    global states
    states = [((x,y),truck_fill) \
              for x in range(len(ENV_ARRAY)) \
              for y in range(len(ENV_ARRAY[0])) \
              for truck_fill in range(len(CAP_LVL))]
    
    # Initialise actions
    global actions
    actions = ["move_up","move_down","move_left","move_right"]
    
    # Initialise q-table
    global q_table
    num_states = len(states)
    num_actions = len(actions)
    q_table = np.zeros((num_states, num_actions))
    
    # Initialise hyperparameter
    global learning_rate, discount_factor, epsilon
    learning_rate = 0.1
    discount_factor = 0.9
    epsilon = 0.9

In [23]:
# This was adapted from 'Q-Learning: A Complete Example in Python' by Daniel Soper
# Available at https://colab.research.google.com/drive/1E2RViy7xmor0mhqskZV14_NUj2jMpJz3

# Check if reached terminal state (no more moves and all grid spaces are explored)
def is_terminal():
    if TIME_LAPSE == TRUCK_MAX_MOVE * 5:
        return True
    else:
        return False


def get_reward(next_state,collect_amount,empty_amount):
    # Reward for collecting trash
    if collect_amount < 20:
        collect_reward = collect_amount * 2
    else:
        collect_reward = collect_amount * 3

    # Reward for emptying at landfill
    if empty_amount < 40:
        empty_reward = empty_amount * 1.5
    elif empty_amount < 70:
        empty_reward = empty_amount * 3
    else:
        empty_reward = empty_amount * 4

    # Penalty for collisions
    collision_penalty = 0
    row = next_state[0][0]
    col = next_state[0][1]
    if ENV_ARRAY[row][col] == 1:
        collision_penalty -= 1500

    # Penalty for each step
    movement_penalty = -10

    # Penalty for uncollected bin
    uncollected_penalty = 0
    for bin in bins_exact_fill:
        if bin > 0:
            uncollected_penalty -= 40
            uncollected_penalty -= bin

    # Total reward
    reward = collect_reward + uncollected_penalty + \
             empty_reward + collision_penalty + movement_penalty
    return reward


# Update q-value
# Use TD error and Bellman equation
def update_q_value(state,action,reward,next_state):
    current_q = q_table[states.index(state)][action]
    next_max_q = np.max(q_table[states.index(next_state)])
    td_error = reward + discount_factor * next_max_q - current_q
    new_q_value = current_q + learning_rate * td_error 
    q_table[states.index(state)][action] = new_q_value

# Epsilon-greedy exploration
def select_action(state):
    if np.random.uniform() < epsilon:
        q_values = q_table[states.index(state)]
        max_q_value = np.max(q_values)
        max_indices = np.where(q_values == max_q_value)[0]
        if len(max_indices) > 1:
            action = np.random.choice(max_indices)
        else:
            action = max_indices[0]
    else:
        action = np.random.randint(len(actions))
    return action

# Determine next state and reward
def take_action(state,action):
    global truck_exact_fill, bins_exact_fill, TIME_LAPSE

    # Extract info from state
    (truck_x,truck_y), truck_fill = state
    new_truck_x = truck_x
    new_truck_y = truck_y
    new_truck_fill = truck_fill

    # Update truck location
    if action == 0 and truck_x > 0: # Move up
        new_truck_x -= 1
    elif action == 1 and truck_x < len(ENV_ARRAY)-1: # Move down
        new_truck_x += 1
    elif action == 2 and truck_y > 0: # Move left
        new_truck_y -= 1
    elif action == 3 and truck_y < len(ENV_ARRAY[0])-1: # Move right
        new_truck_y += 1

    # Update bin exact fill
    TIME_LAPSE += 1
    if TIME_LAPSE % 5 == 0:
        for i in range(len(bins_exact_fill)):
            bins_exact_fill[i] += BINS_LOC_RATE[i]
    
    # Collect and empty truck fill
    collect_amount = 0
    empty_amount = 0
    if (new_truck_x,new_truck_y) == (LANDFILL_ROW,LANDFILL_COL): # At landfill
        empty_amount = truck_exact_fill
        truck_exact_fill = 0
    elif (new_truck_x,new_truck_y) in BINS_LOC: # At bin
        truck_can_collect = TRUCK_MAX_CAP - truck_exact_fill
        bin_index = BINS_LOC.index((new_truck_x,new_truck_y))
        trash = bins_exact_fill[bin_index]
        if truck_can_collect >= trash: # Collect all
            collect_amount = bins_exact_fill[bin_index]
            bins_exact_fill[bin_index] = 0
            truck_exact_fill += trash
        else: # Collect partial
            collect_amount = truck_can_collect
            bins_exact_fill[bin_index] -= truck_can_collect
            truck_exact_fill = TRUCK_MAX_CAP

    # Update truck fill
    truck_fill_level = truck_exact_fill / TRUCK_MAX_CAP
    if truck_fill_level < 0.4:
        new_truck_fill = 0
    elif truck_fill_level < 0.7:
        new_truck_fill = 1
    else:
        new_truck_fill = 2

    next_state = ((new_truck_x,new_truck_y),new_truck_fill)
    
    # Calculate reward based on next state
    reward = get_reward(next_state,collect_amount,empty_amount)

    return next_state,reward
        

def q_learning(env):
    for episode in range(1000):
        # Initialise variables
        global TIME_LAPSE, truck_exact_fill, bins_exact_fill
        TIME_LAPSE = 0
        truck_exact_fill = 0
        bins_exact_fill = [BIN_START] * len(BINS_LOC)

        # Initalise environment and state
        env.reset()
        state = env.get_state()

        while not is_terminal():
            # Choose action
            action = select_action(state)

            # Observe next state and reward
            next_state, reward = take_action(state,action)
            
            # Update q-value for current state and action
            update_q_value(state,action,reward,next_state)

            # Update state for next step
            state = next_state

In [24]:
def move(canvas,env,start=None,to_bin=None,state=None):
   # A* search
   if ALGO_NAME == ALGO_LIST[0]:
        # All bins collected
        if not NOT_COLLECTED_BINS:
            to_bin = False

        if to_bin:
            path = next_bin(start)
        else:
            path = to_landfill(start)
        canvas.after(300,env.update_A_star,path)
   
   # Reinforcement learning: Q-learning
   else:
        q_values = q_table[states.index(state)]
        max_q_value = np.max(q_values)
        max_indices = np.where(q_values == max_q_value)[0]
        if len(max_indices) > 1:
            action = np.random.choice(max_indices)
        else:
            action = max_indices[0]
       
        if action == 0: # Move up
           path = (state[0][0]-1,state[0][1])
        elif action == 1: # Move down
           path = (state[0][0]+1,state[0][1])
        elif action == 2: # Move left
           path = (state[0][0],state[0][1]-1)
        elif action == 3: # Move right
            path = (state[0][0],state[0][1]+1)

        prog_text = ""
        PROG_LABEL.config(text=prog_text)
       
        canvas.after(300,env.update_RL,path)

In [25]:
def end_run(landfill,truck):
    global EXP_NUM, ALGO_NAME,CUR_TRUCK_START
    EXP_NUM += 1

    if EXP_NUM == 1:
        print("\033[1m" + ALGO_NAME + "\033[0m")
    print("\033[3mExperiment",EXP_NUM,"\033[0m")
    print("Total amount of garbage collected:",landfill+truck)
    print("In landfill:", landfill)
    print("On agent:",truck)
    print()

    # End of experiment
    if ALGO_NAME == ALGO_LIST[0]: # First algorithm
        if EXP_NUM == TOTAL_EXP:
            prog_text = "End of experiment 1"
            PROG_LABEL.config(text=prog_text)
            
            # Reset for next algorithm
            EXP_NUM = 0
            ALGO_NAME = ALGO_LIST[1]
            CUR_TRUCK_START = TRUCK_START[0]
            ALGO_LABEL.config(text=ALGO_NAME)
            return False, True
        else:
            # Change truck start position
            CUR_TRUCK_START = TRUCK_START[EXP_NUM]
    
    if ALGO_NAME == ALGO_LIST[1]: # Second algorithm
        if EXP_NUM == TOTAL_EXP: # Last experiment
            prog_text = "End of experiment 2"
            PROG_LABEL.config(text=prog_text)
            return False, False
        else:
            # Change truck start position
            CUR_TRUCK_START = TRUCK_START[EXP_NUM]
    
    return True, False

In [26]:
# This was adapted from the lab 1 simple bot code
# Specifically the Bot class

class Truck():
    def __init__(self,size,canvas):
        self.x, self.y = CUR_TRUCK_START
        self.cell_size = size
        self.centre = self.cell_size/2
        self.size = self.cell_size/2*0.2
        self.direction = "down"
        self.capacity = 0
        self.name = "agent"
        self.canvas = canvas

    def draw(self):
        if self.direction == "up":
            head_x = self.x * self.cell_size + self.size
            head_y = self.y * self.cell_size + self.centre
            x1 = (self.x+1) * self.cell_size - self.size
            y1 = self.y * self.cell_size + self.size
            x2 = x1
            y2 = y1 + self.cell_size - 2*self.size
        elif self.direction == "down":
            head_x = (self.x+1) * self.cell_size - self.size
            head_y = self.y * self.cell_size + self.centre
            x1 = self.x * self.cell_size + self.size
            y1 = self.y * self.cell_size + self.size
            x2 = x1
            y2 = y1 + self.cell_size - 2*self.size
        elif self.direction == "right":
            head_x = self.x * self.cell_size + self.centre
            head_y = (self.y+1) * self.cell_size - self.size
            x1 = self.x * self.cell_size + self.size
            y1 = self.y * self.cell_size + self.size
            x2 = x1 + self.cell_size - 2*self.size
            y2 = y1
        else:
            head_x = self.x * self.cell_size + self.centre
            head_y = self.y * self.cell_size + self.size
            x1 = self.x * self.cell_size + self.size
            y1 = (self.y+1) * self.cell_size - self.size
            x2 = x1 + self.cell_size - 2*self.size
            y2 = y1
        
        vertices = [head_y,head_x,y1,x1,y2,x2]
        self.canvas.delete(self.name)
        self.canvas.create_polygon(vertices,fill="blue", tags=self.name)

        # Update truck capacity
        self.update_capacity()

    def move(self,x,y):
        global TRUCK_REMAIN_MOVE
        # Stop truck if reached max steps
        if TRUCK_REMAIN_MOVE == 0:
            PROG_LABEL.config(text="No more moves")
            return False
        
        TRUCK_REMAIN_MOVE -= 1
        label_text = f"Remaining {TRUCK_REMAIN_MOVE} moves"
        AGENT_MOVE_LABEL.config(text=label_text)

        if self.x < x:
            self.direction = "down"
        elif self.x > x:
            self.direction = "up"
        elif self.y < y:
            self.direction = "right"
        elif self.y > y:
            self.direction = "left"

        if x > 0 and y > 0 and x < len(ENV_ARRAY) and y < len(ENV_ARRAY):
            self.x = x
            self.y = y
            self.draw()
        return True

    def update_capacity(self):
        x = self.x * self.cell_size + self.centre
        y = self.y * self.cell_size + self.centre
        self.capacity_tag = self.canvas.create_text(y,x, \
                                                    text=str(self.capacity), \
                                                    fill="white", \
                                                    font=("Arial",14), \
                                                    tags=(self.name,"capacity"))
    
    def collect(self,bin):
        if TRUCK_MAX_CAP < self.capacity + bin:
            collect = TRUCK_MAX_CAP - self.capacity
            uncollected = bin - collect
            self.capacity = TRUCK_MAX_CAP
        else:
            self.capacity += bin
            uncollected = 0
        return uncollected
    
    def empty(self):
        self.capacity = 0

    def getLocation(self):
        return self.x, self.y
    
    def reset(self):
        global TRUCK_REMAIN_MOVE
        self.x, self.y = CUR_TRUCK_START
        self.direction = "down"
        self.capacity = 0
        TRUCK_REMAIN_MOVE = TRUCK_MAX_MOVE
        self.draw()

In [27]:
# This was adapted from the lab 1 simple bot code
# Specifically the Lamp class

class Bin():
    def __init__(self,row,col,size,area,canvas):
        self.name = str((row,col))
        self.capacity = "capacity" + self.name
        self.cell_centre = size//2
        self.x = row*size + self.cell_centre
        self.y = col*size + self.cell_centre
        self.radius = size//3
        self.colour = BIN_COLOUR[area]
        self.canvas = canvas
        self.rate = BIN_RATE[area]
        self.filled = BIN_START
        if area == "busy":
            self.text_colour = "white"
        else:
            self.text_colour = "black"

    def draw(self):
        self.canvas.create_oval(self.y-self.radius,self.x-self.radius, \
                                self.y+self.radius,self.x+self.radius, \
                                fill=self.colour,outline='', \
                                tags=self.name)
        self.update_capacity()

    def update_capacity(self):
        self.capacity_tag = self.canvas.create_text(self.y,self.x, \
                                                    text=str(self.filled), \
                                                    fill=self.text_colour, \
                                                    font=("Arial",14), \
                                                    tags=(self.name,self.capacity))
    
    def update_load(self,load):
        self.filled = load
        self.canvas.delete(self.capacity)
        self.draw()

    def add_load(self):
        load = self.filled + self.rate
        if load > BIN_MAX:
            load = BIN_MAX
        self.update_load(load)
    
    def reset(self):
        self.filled = 50
        self.draw()

In [28]:
# This was adapted from the lab 1 simple bot code

class Landfill():
    def __init__(self,canvas):
        self.name = "landfill"
        self.capacity = 0
        self.canvas = canvas

    def set_loc(self,x1,y1,x2,y2):
        self.x1 = x1
        self.x2 = x2
        self.y1 = y1
        self.y2 = y2
        self.centre_x = (x1+x2) / 2
        self.centre_y = (y1+y2) / 2
    
    def draw(self):
        self.canvas.create_rectangle(self.y1,self.x1,self.y2,self.x2, \
                                    fill="brown",outline="black", \
                                    tags=self.name)
        self.canvas.create_text(self.centre_y,self.centre_x, \
                                text=str(self.capacity), \
                                fill="white", \
                                font=("Arial",18), \
                                tags=self.name)
    
    def update_capacity(self,load):
        self.capacity += load
    
    def reset(self):
        self.capacity = 0
        self.draw()

In [29]:
class Environment():
    def __init__(self,canvas):
        global BINS_LOC, BINS_LOC_RATE
        self.rows = len(ENV_ARRAY)
        self.cols = len(ENV_ARRAY[0])
        self.cell_size = CANVAS_WIDTH // self.cols
        self.bins = []
        for i in range(self.rows):
            for j in range(self.cols):
                area = ENV_ARRAY[i][j]
                if area > 2:
                    self.bins.append(Bin(i,j,self.cell_size,AREA_ID[area],canvas))
                    BINS_LOC.append((i,j))
                    BINS_LOC_RATE.append(BIN_RATE[AREA_ID[area]])
        self.truck = Truck(self.cell_size,canvas)
        self.landfill = Landfill(canvas)
        self.canvas = canvas

    def draw(self):
        for i in range(self.rows):
            for j in range(self.cols):
                x1 = i * self.cell_size
                y1 = j * self.cell_size
                x2 = x1 + self.cell_size
                y2 = y1 + self.cell_size

                if ENV_ARRAY[i][j] == 1:
                    self.canvas.create_rectangle(y1,x1,y2,x2, \
                                                 fill="#808080",outline="black")
                else:
                    if ENV_ARRAY[i][j] == 2:
                        self.landfill.set_loc(x1,y1,x2,y2)
                        self.landfill.draw()
                        pass
                    else:
                        self.canvas.create_rectangle(y1,x1,y2,x2, \
                                                    fill="white",outline="black")
                
                # Add row index
                if i == 0:
                    x = x1 + self.cell_size / 2
                    y = y1 + self.cell_size / 2
                    self.canvas.create_text(y,x,text=str(j), \
                                            fill="white", \
                                            font=("Arial",16,"bold"))
                # Add column index
                elif j == 0:
                    x = x1 + self.cell_size / 2
                    y = y1 + self.cell_size / 2
                    self.canvas.create_text(y,x,text=str(i), \
                                            fill="white", \
                                            font=("Arial",16,"bold"))
                    
        for bin in self.bins:
            bin.draw()

        self.truck.draw()
    
    def truck_at_loc(self):
        truck_loc = self.truck.getLocation()
        
        # Truck at bin
        if truck_loc in BINS_LOC:
            index = BINS_LOC.index(truck_loc)
            bin = self.bins[index]
            
            # Truck collect, empty bin
            bin_load = bin.filled
            remain_bin = self.truck.collect(bin_load)
            bin.update_load(remain_bin)
            self.truck.draw()

        # Truck at landfill
        elif truck_loc == (LANDFILL_ROW,LANDFILL_COL):
            self.landfill.update_capacity(self.truck.capacity)
            self.landfill.draw()
            self.truck.empty()
            self.truck.draw()
            
    def update_A_star(self,path):
        # Move agent
        x, y = path[0]
        truck_move = self.truck.move(x,y)

        # Max truck steps has not reached
        if truck_move:
            # Update bins
            if (TRUCK_MAX_MOVE - TRUCK_REMAIN_MOVE) % 5 == 0:
                for bin in self.bins:
                    bin.add_load()

            # Remove moved path
            path = path[1:]

            if path:
                self.canvas.after(300,self.update_A_star,path)
            else:
                # Reached destination
                self.truck_at_loc()
                move(self.canvas,self,start=(x,y), \
                     to_bin=self.truck.capacity != TRUCK_MAX_CAP)
        
        # End experiment run
        else:
            next_run,next_exp = end_run(self.landfill.capacity,self.truck.capacity)
            
            if next_run:
                prog_text = "Reset environment for next run"
            elif next_exp:
                prog_text = "Reset environment for next experiment"

            # Next experiment run
            if next_run or next_exp:
                window.update()
                time.sleep(1)
                PROG_LABEL.config(text=prog_text)
                window.update()
                time.sleep(0.5)
                self.reset()
                if ALGO_NAME == ALGO_LIST[0]:
                    move(self.canvas,self,start=CUR_TRUCK_START,to_bin=True)
                else:
                    rf_init()
                    q_learning(self)
                    state = self.get_state()
                    move(self.canvas,self,state=state)

            # End of all experiments
            else:
                window.update()
                time.sleep(1)
                prog_text = "End of all experiments"
                PROG_LABEL.config(text=prog_text)


    def update_RL(self,path):
        # Move agent
        x, y = path
        truck_move = self.truck.move(x,y)
        
        # Max truck steps has not reached
        if truck_move:
            # Update bins
            if (TRUCK_MAX_MOVE - TRUCK_REMAIN_MOVE) % 5 == 0:
                for bin in self.bins:
                    bin.add_load()
            
            # Collect / empty truck
            self.truck_at_loc()

            # Next step
            state = self.get_state()
            move(self.canvas,self,state=state)
        else:
            next_run,next_exp = end_run(self.landfill.capacity,self.truck.capacity)
            
            if next_run:
                prog_text = "Reset environment for next run"
                window.update()
                time.sleep(1)
                PROG_LABEL.config(text=prog_text)
                window.update()
                time.sleep(0.5)
                self.reset()
                rf_init()
                q_learning(self)
                state = self.get_state()
                move(self.canvas,self,state=state)
        
    def reset(self):
        for bin in self.bins:
            bin.reset()
        self.truck.reset()
        self.landfill.reset()
        reset_bins()
    
    def get_state(self):
        truck_fill_level = self.truck.capacity / TRUCK_MAX_CAP
        if truck_fill_level < 0.4:
            truck_fill = 0
        elif truck_fill_level < 0.7:
            truck_fill = 1
        else:
            truck_fill = 2

        state = (self.truck.getLocation(),truck_fill)

        return state

In [30]:
def initialise(window):
    window.resizable(False,False)
    window.title("Garbage Collecting Agent")

    # Display algorithm name
    global ALGO_LABEL
    ALGO_LABEL = tk.Label(window,text=ALGO_NAME,font=("Arial",20,"bold","underline"))
    ALGO_LABEL.pack()

    # Display progress
    global PROG_LABEL
    PROG_LABEL = tk.Label(window,text="",font=("Arial",20))
    PROG_LABEL.pack()

    # Environment canvas
    canvas = tk.Canvas(window,width=CANVAS_WIDTH,height=CANVAS_HEIGHT)
    canvas.pack()

    # Display bin info label
    tk.Label(window,text="Bin",justify='left',font=("Arial",14,"bold")) \
            .pack(padx=4,anchor="w")
    red_bin = f"Red bin: busy area (increase by {BIN_RATE['busy']} every 5 steps)\n"
    orange_bin = f"Orange bin: moderately busy area (increase by {BIN_RATE['moderate']} every 5 steps)\n"
    yellow_bin = f"Yellow bin: quiet area (increase by {BIN_RATE['quiet']} every 5 steps)\n\n"
    info1 = f"Max bin load is {BIN_MAX}\n"
    info2 = "*Bin load will increase even when truck is collecting from it*"
    label_text = red_bin + orange_bin + yellow_bin + info1 + info2
    tk.Label(window,text=label_text,justify='left') \
            .pack(padx=4,anchor="w")
    
    # Display agent info label
    tk.Label(window,text="Agent",justify='left',font=("Arial",14,"bold")) \
            .pack(padx=4,side="top",anchor="w")
    info1 = f"Max agent load is {TRUCK_MAX_CAP}\n"
    info2 = f"Max agent steps is {TRUCK_MAX_MOVE}"
    label_text = info1 + info2
    tk.Label(window,text=label_text,justify='left') \
            .pack(padx=4,anchor="w")
    
    global AGENT_MOVE_LABEL
    label_text = f"Remaining {TRUCK_MAX_MOVE} moves"
    AGENT_MOVE_LABEL = tk.Label(window,text=label_text)
    AGENT_MOVE_LABEL.pack(padx=4,anchor="w")
    return canvas

In [31]:
def main():
    global window
    window = tk.Tk()
    canvas = initialise(window)
    env = Environment(canvas)
    env.draw()
    reset_bins()
    move(canvas,env,start=CUR_TRUCK_START,to_bin=True)
    window.mainloop()

In [32]:
main()