# Appendix - Assignment 4
### Naishal Chiragbhai Patel (np781)
### Himaniben Hareshkumar Patel (hhp46)
### Karan Ashokkumar Pardasani (kp955)

## Data Generation for Agent 1

In [None]:
from sortedcontainers import SortedSet
from math import sqrt
import multiprocessing
import multiprocess
import numpy as np
## https://stackoverflow.com/questions/8875706/heapq-with-custom-compare-predicate/8875823
# Priority Queue with custom comparator
class MyPriorityQueue(object):
    # compare1 is the function that will return f(n)-> sort in ascending order of f(n)
    # compare2 is the function that will return g(n)-> sort in ascending order of g(n) if f(n) is same
    def __init__(self, current_heuristic, target):
        self.current_heuristic = current_heuristic
#         self._data = []
        self._data = SortedSet()
        self.target = target
        self.g = dict()
        self.h = dict()
    def __len__(self):
        return len(self._data)
    #### It is required to update g(n) and h(n) before pushing the node into the priority queue
    def push(self, item):
        node = (self.calculate_priority(item), self.get_heuristic(item), item)
        self._data.add(node)
    def pop(self):
        node = self._data.pop(0)[2]
        return node
    def manhattan_distance(self, x, y): # Manhattan Distance -> heuristic = |x1-x2| + |y1-y2|
        return abs(x[0]-y[0])+abs(x[1]-y[1])
    def euclidean_distance(self, x, y): # Euclidean Distance -> heuristic = sqrt( (x1-x2)**2 + (y1-y2)**2 )
        return sqrt( (x[0]-x[1])**2 + (y[0]-y[1])**2 )
    def chebyshev_distance(self, x, y): # Chebyshev Distance -> heuristic = max(|x1-x2|, |y1-y2|)
        return max(abs(x[0]-x[1]), abs(y[0]-y[1]))
    def get_heuristic(self, x):  # returns the value of heuristic based on the heuristic function
        measure = self.current_heuristic
        if measure == 'm': # if h is manhattan
            self.h[x] = self.manhattan_distance(x, self.target)
        elif measure == 'e': # if h is euclidean
            self.h[x] = self.euclidean_distance(x, self.target)
        elif measure == 'c': # if c is chebyshev
            self.h[x] = self.chebyshev_distance(x, self.target)
        return self.h[x]
    def calculate_priority(self, x): # calculates priority for each cell - distance from cell to target
        return self.g[x] + self.get_heuristic(x)

class GridWorld:
    def __init__(self, grid, start, target, n, m, agent_grid = None ):
        self.n = n # denotes the number of rows
        self.m = m # denotes the number of columns
        self.start = start # denotes the start coordinate of the grid
        self.target = target # denotes the target coordinate of the grid
        self.trajectory = [] # denotes the trajectory of the agent in the grid
        self.grid = grid # denotes the full knowledge of the grid
        if agent_grid is None:
            self.agent_grid = np.full((n,n),'.')
        else:
            self.agent_grid = agent_grid # denotes the current knowledge of the grid
        self.sum_num_cells_processed = 0
        self.sum_bumps = 0
        self.sum_a_star = 0
        self.plan_time = 0
        self.agent_x = []
        self.agent_y = []
        self.agent_nx = []
        self.agent_ny = []
        self.agentGrid = []
        
    def a_star(self, source, current_heuristic = 'm'):
        dirx = [-1, 1, 0, 0] ## calculates the neighbours of the current node
        diry = [0, 0, 1, -1] ## calculates the neighbours of the current node
        visited = set() ## contains the nodes that have already been added in priority queue
        closed_list = set() ## contains the nodes that have been popped from priority queue
        open_list = MyPriorityQueue(current_heuristic, self.target) ## stores the visited nodes and pops it out according to it's priority
        planned_path = [] ## Stores the path that the agent has planned according to it's current knowledge
        open_list.g[source]=0
        open_list.calculate_priority( source)
        open_list.push( source) ## This function runs in O(logn)
        visited.add(source)#visited[source] = 1
        parent = {} ## stores the parent of each node that is visited
        parent[source] = None
        while(len(open_list)>0):
            curr = open_list.pop()
            if(curr in closed_list):
                continue
            self.sum_num_cells_processed = self.sum_num_cells_processed + 1
            closed_list.add(curr)
            if(curr[0] == self.target[0] and curr[1] == self.target[1]):
                break
            for i in np.arange(4):
                childx = curr[0] + dirx[i]
                childy = curr[1] + diry[i]
                child = (childx, childy)
                if(childx>=0 and childx<m and childy>=0 and childy<n and (child not in closed_list) and self.agent_grid[childx][childy]!='X'):
                    if(child not in visited):
                        visited.add(child)
                        parent[child] = curr
                        open_list.g[child] = open_list.g[curr]+1
                        open_list.calculate_priority(child)
                        open_list.push(child)
                    else:
                        if open_list.g[curr]+1<open_list.g[child]:
                            parent[child] = curr
                            open_list._data.discard(child)
                            open_list.g[child] = open_list.g[curr]+1
                            open_list.calculate_priority(child)
                            open_list.push(child)
        if(self.target not in visited):
            return []
        curr = self.target
        while(curr != source):
            planned_path.append(curr)
            curr = parent[curr]
        planned_path.append(source)
        return planned_path[::-1]
    
    
    def check_planned_path(self, planned_path):
        dirx = [-1, 1, 0, 0]
        diry = [0, 0, -1, 1]
        n = len(planned_path)
        traversed_path = []
        tmp = np.full((n,m),0)
        for i in np.arange(n):
            currx = planned_path[i][0]
            curry = planned_path[i][1]
            self.agent_x.append(currx)
            self.agent_y.append(curry)
            if(grid[currx][curry] == 'X'):
                self.agent_grid[currx][curry] = self.grid[currx][curry]
                self.sum_bumps+=1
                self.agent_x.pop()
                self.agent_y.pop()
                break
            traversed_path.append((currx,curry))
            
            for a in np.arange(self.m):
                for b in np.arange(self.n):
                    if(tmp[a][b]>1):
                        tmp[a][b] -= 1
                    else if(self.agent_grid.grid=='X'):
                        tmp[a][b] = -1
            
            for i in np.arange(4):
                childx = currx + dirx[i]
                childy = curry + diry[i]
                if(childx>=0 and childx<self.m and childy>=0 and childy<self.n):
                    self.agent_grid[childx][childy] = self.grid[childx][childy]
                    if (self.grid[childx][childy]=='X'):
                        tmp[childx][childy] = -1
                    else:
                        tmp[childx][childy] = 3
#             self.agentGrid.append(self.agent_grid)
            
            self.agentGrid.append(tmp)
            
        return traversed_path
    
    ## This is the function that computes the path that repeated a-star traverses
    def compute_path(self):
        path = []
        curr = self.start
        while(curr != self.target):
            self.sum_a_star+=1
            plan_start_time = time.time()
            planned_path = self.a_star(curr)
            plan_end_time = time.time()
            self.plan_time += (plan_start_time - plan_end_time)
            if( len(planned_path) == 0 ):
                return []
            traversed_path = self.check_planned_path(planned_path)
            n = len(traversed_path)
            path.append(traversed_path)
            if(traversed_path[n-1] == self.target):
                break
            curr = traversed_path[n-1]
        return path

### Plots --> avg len of trajectory, avg number of cells processed, avg num of bumps
import time
def calculate_path_length(ans_compute_path):
    ans = 0
    for x in ans_compute_path:
        ans = ans + len(x)-1
    return ans + 1
from tqdm import tqdm
import numpy as np

def update_final_discovered_grid(final_discovered_grid, rep_a_trajectory, grid):
    m = len(final_discovered_grid)
    n = len(final_discovered_grid[0])
    dirx = [-1, 1, 0, 0]
    diry = [0, 0, -1, 1]
    for x in rep_a_trajectory:
        for i in np.arange(len(x)):
            pos = x[i]
            final_discovered_grid[pos[0]][pos[1]] = grid[pos[0]][pos[1]]
            for j in np.arange(4):
                childx = pos[0] + dirx[j]
                childy = pos[1] + diry[j]
                if(childx>=0 and childx<m and childy>=0 and childy<n):
                    final_discovered_grid[childx][childy] = grid[childx][childy]
    return final_discovered_grid

def multi_process(grid):
# for s in grid_data:
    # cnt = 0
    s = grid
#     start = grid[1]
#     target = grid[2]
    i = 0
    grid = [] ## matrix that contains full knowledge of gridworld
    m = 50
    n = 50
    while(i<m*n):
        grid.append(list(s[i:i+n]))
        i = i + n
    grid = np.array(grid)
    gw = GridWorld(grid,(0,0),(49,49),50,50)
    overall_time_start = time.time()
    ans = gw.compute_path()
    overall_time_end = time.time()

    final_discovered_grid = np.full((50,50),'X')
    final_discovered_grid = update_final_discovered_grid(final_discovered_grid, ans, grid)
    gw_final_discovered = GridWorld(final_discovered_grid, (0,0), (49,49), 50, 50, agent_grid = final_discovered_grid)
    shortest_path_final_discovered = gw_final_discovered.a_star((0,0),'m') #Length2
    
#     return 2
    return len(shortest_path_final_discovered), gw.sum_num_cells_processed, gw.sum_bumps, gw.sum_a_star,\
               (overall_time_end - overall_time_start), gw.plan_time, calculate_path_length(ans), \
                gw.agent_x, gw.agent_y, gw.agentGrid


In [None]:
import numpy as np
cores = int(multiprocessing.cpu_count())
print(cores)
cnt = 0
result_y = []
# p_values = np.round(p_values,2)
# result_x = p_values[1:50]
avg_num_cells_processed = []
avg_bumps = []
avg_astar = []
avg_overall_time = []
avg_traj_len = []
avg_path_len = []
avg_plan_time = []

sum_num_cells_processed = 0
sum_bumps = 0
sum_astar = 0
cnt = 0
sum_overall_time = 0
sum_plan_time = 0
sum_traj_len = 0
sum_path_len = 0

f = open('dataProject4.txt', 'r')
grid_data = f.readlines()
data = []
p = multiprocess.Pool(processes=cores)
for i in p.map(multi_process,grid_data):
    cnt+=1
    print("Completed for grid:", cnt)
    print("Path Length: ", i[0])
    sum_path_len += i[0]
    sum_num_cells_processed += i[1]
    sum_bumps += i[2]
    sum_astar += i[3]
    sum_overall_time += i[4]
    sum_plan_time += i[5]
    sum_traj_len += i[6]
    ##Data Generated in check_planned_path function
    agent_nx = i[7]
    agent_nx = agent_nx[1:]
    agent_nx.append(99)
    agent_ny = i[8]
    agent_ny = agent_ny[1:]
    agent_ny.append(99)
    
    for k in range(0,len(i[7])-1):
        agentGrid = list(chain.from_iterable(i[5][j]))
#         agentGrid[:] = [0 if x=='0' else 1 for x in agentGrid]
        data_g.append(agentGrid)
        data_g.append(i[9][j])
        data_x.append(i[7][j])
        data_y.append(i[8][j])
        data_nx.append(agent_nx[j])
        data_ny.append(agent_ny[j])
    
    for j in range(0,len(i[7])):
        agentGrid = list(chain.from_iterable(i[9][j]))
#         agentGrid[:] = [0 if x=='0' else 1 for x in agentGrid]
#         print(agentGrid)
        data.append({'GridNumber': cnt, 'Agent_x': i[7][j], 'Agent_y': i[8][j], 'AgentGrid': agentGrid, 'Agent_nx': agent_nx[j], 'Agent_ny': agent_ny[j]})
# #     cnt+=1
    if(cnt == 50):
        break


In [None]:
fieldnames = ['GridNumber', 'Agent_x', 'Agent_y', 'AgentGrid', 'Agent_nx', 'Agent_ny']
with open('data.csv', 'w', encoding='UTF8', newline='') as f:
    writer = csv.DictWriter(f, fieldnames=fieldnames)
    writer.writeheader()
    writer.writerows(data)

In [None]:
#data for assignment 4
with open('DataAgent1_g.pkl', 'wb') as f:
    pickle.dump(data_g, f)
with open('DataAgent1_x.pkl', 'wb') as f:
    pickle.dump(data_x, f)
with open('DataAgent1_y.pkl', 'wb') as f:
    pickle.dump(data_y, f)
with open('DataAgent1_nx.pkl', 'wb') as f:
    pickle.dump(data_nx, f)
with open('DataAgent1_ny.pkl', 'wb') as f:
    pickle.dump(data_ny, f)

## Data Generation for Agent 4

In [None]:
import numpy as np
from sortedcontainers import SortedSet
from math import sqrt
import multiprocessing
from itertools import chain
np.set_printoptions(threshold=np.inf)

In [None]:
class MyPriorityQueue(object):
    def __init__(self, current_heuristic, target):
        self.current_heuristic = current_heuristic
        self._data = SortedSet()
        self.target = target
        self.g = dict()
        self.h = dict()
    def __len__(self):
        return len(self._data)
    def push(self, item):
        node = (self.calculate_priority(item), self.get_heuristic(item), item)
        self._data.add(node)
    def pop(self):
        node = self._data.pop(0)[2]
        return node
    def manhattan_distance(self, x, y): ### Manhattan Distance-> heuristic = |x0-y0| + |x1-y1|, x = [x0,x1] y = [y0,y1]
        return abs(x[0] - y[0]) +abs(x[1] - y[1])
    def euclidean_distance(self, x, y): ### Euclidean Distance-> heuristic = sqrt( (x0-y0)**2 + (x1-y1)**2 ), x = [x0,x1]
                                        ##  y = [y0,y1]
        return sqrt((x[0] - y[0])**2 + (x[1] - y[1])**2)
    def chebyshev_distance(self, x, y): ### Chebyshev Distance-> heuristic = max(abs(x0-y0),abs(x1-y1)),
                                        ### x = [x0,x1] y = [y0,y1]
        return max(abs(x[0] - x[1]), abs(y[0] - y[1]))
    def get_heuristic(self, x):
        measure = self.current_heuristic
        if measure == 'm':
            self.h[x] = self.manhattan_distance(x, self.target)
        elif measure == 'e':
            self.h[x] = self.euclidean_distance(x, self.target)
        elif measure == 'c':
            self.h[x] = self.chebyshev_distance(x, self.target)
        return self.h[x]
    def calculate_priority(self, x):
        return self.g[x] + self.get_heuristic(x)

In [None]:
class GridWorld:
    def __init__(self, grid, start, target, n, m):
        '''
        Notations in grid: If grid contains 0, it means the cell is empty
                            and if the grid contains 1, it means the cell is blocked
                            and if the grid contains -1, it means the cell is unconfirmed
        '''
        self.n = n ## Number of columns
        self.m = m ## Number of rows
        self.start = start  ## Starting cell of the agent
        self.target = target ## Target cell of the agent
        self.trajectory = [] ## Trajectory followed by the agent
        self.grid = grid  ## Grid containing complete knowledge
        self.dirx8 = [0, 0, 1, -1, 1, -1, 1, -1] ## Directions used for sensing
        self.diry8 = [1, -1, 0, 0, 1, -1, -1, 1] ## Directions used for sensing
        self.dirx4 = [0, 0, 1, -1]  ## Directions used for traversing
        self.diry4 = [1, -1, 0, 0]  ## Directions used for traversing
        self.sensed_x = set() ## List of cells sensed by agent
        self.status_x = {} ## Denotes the status of the cell. 1-> confirmed block, 0-> confirmed unblocked, 
                            ## -1 -> unconfirmed
        self.cx = {}
    def pre_computation(self):
        for i in np.arange(self.m):
            for j in np.arange(self.n):
                nx = 0
                for k in np.arange(8):
                    childx = i + self.dirx8[k]
                    childy = j + self.diry8[k]
                    if(childx>=0 and childx<self.m and childy>=0 and childy<self.n):
                        nx+=1
                cell = (i,j)
                self.nx[cell] = nx
                self.hx[cell] = nx
            
    def partial_sensing(self, cell, agentGridObject):
        ''' 
            Returns number of cells that are blocked by accessing true grid
            Calculates number of neighbours of a cell
            Calculates cx
        '''
        x = cell[0]
        y = cell[1]
        cx = 0
        for i in np.arange(8):
            childx = x + self.dirx8[i]
            childy = y + self.diry8[i]
            if(childx >= 0 and childx < self.m and childy>=0 and childy<self.n):
                if(self.grid[childx][childy] == '1'):
                    cx+=1
        agentGridObject.cx[cell] = cx ## Updating partial sensing information for the agent
        agentGridObject.sensed_x.add(cell) ## Adding cell to the set of sensed cells
        return cx
    def visit_cell(self, cell, agentGridObject):
        '''
            If the agent goes to the next cell, it will get sense whether the next cell is blocked or not
        '''
        x = cell[0]
        y = cell[1]
        agentGridObject.status_x[cell] = int(self.grid[x][y])
        agentGridObject.grid[x][y] = self.grid[x][y]
        return int(self.grid[x][y])

In [None]:
from collections import deque
class Inference_Agent4:
    def __init__(self, agentGridObject,m,n):
        self.agentGridObject = agentGridObject # Object representing the gridworld of the Agent
        self.dirx8 = [0, 0, 1, -1, 1, -1, 1, -1] # Direction used for sensing
        self.diry8 = [1, -1, 0, 0, 1, -1, -1 ,1] # Direction used for sensing
        self.kb_eqns = [] ## This is list of equation. One equation is represented as a list where first element is a set
                          ## of tuples and the second element is the value.
        self.q = deque()
        self.m = m
        self.n = n
    def mark_cell(self, type_, cell):
        currx = cell[0]
        curry = cell[1]
        cell = (currx, curry)
        if(type_ == 1):
            self.agentGridObject.status_x[cell] = 1
            self.agentGridObject.grid[currx][curry] = '1'
        else:
            self.agentGridObject.status_x[cell] = 0
            self.agentGridObject.grid[currx][curry] = '0'
    def put_val_kb(self):
        '''
        This function checks for each eqn in kb_eqns and updates each equation according to the current knowledge of the 
        agent. The new equation is removed from knowledge base and added in the queue.
        '''
        
        idx = 0
        for eqn in self.kb_eqns:
            var = eqn[0] ## This is a set of tuples
            val = eqn[1] ## This is the value in RHS
            flag = False
            new_eqn = set()
            for cell in var:
                if cell in self.agentGridObject.status_x and self.agentGridObject.status_x[cell]!=-1:
                    change= self.agentGridObject.status_x[cell]
#                     print("put value change: ", change," cell: ",cell)
                    val = val - change
                    flag= True
                else:
                    new_eqn.add(cell)
#             print("old_eqn: ",eqn," new_eqn: ",new_eqn)
            if(flag):
                self.kb_eqns.pop(idx)
                self.q.appendleft([new_eqn,val])
            idx+=1
    def get_eqn_current_cell(self, cell):
        '''
            Generate an equation for a cell, according to the current knowledge of the agent.
        '''
        var = set()
        val = self.agentGridObject.cx[cell]
        currx = cell[0]
        curry = cell[1]
        for i in np.arange(8):
            childx = currx + self.dirx8[i]
            childy = curry + self.diry8[i]
            child = (childx,childy)
            if(childx>=0 and childx<self.m and childy>=0 and childy<self.n):
                if(child not in self.agentGridObject.status_x or self.agentGridObject.status_x[child] == -1):
                    var.add(child)
                else:
                    val = val - self.agentGridObject.status_x[child]
        return var, val
    def check_solvability(self, eqn):
        '''
            Check whether the current equation is solvable of not.
            Two cases:
            if number of variables == val --> then all the value of variables is 1.
            if val == 0 --> then the value of all variables is 0
        '''
        var = eqn[0]
        val = eqn[1]
        flag = False
        if(len(var) == val):
            ### All the variables are equal to 1
            flag = True
            for cell in var:
                currx = cell[0]
                curry = cell[1]
                self.agentGridObject.status_x[cell] = 1
                self.agentGridObject.grid[currx][curry] = '1'
#                 print("^^^^^^^^^^^^^^^^^^^^^^^currx: ",currx," curry: ",curry)
        elif(val == 0):
            ### All the variables are equal to 0
            flag = True
            for cell in var:
                currx = cell[0]
                curry = cell[1]
                self.agentGridObject.grid[currx][curry] = '0'
                self.agentGridObject.status_x[cell] = 0
        return flag
    def simplify_eqn(self, eqn):
        '''
            Simplifies the equation, according to the current knowledge of the grid.
        '''
        var = eqn[0]
        val = eqn[1]
        new_eqn = set()
        for cell in var:
            if(cell in self.agentGridObject.status_x and self.agentGridObject.status_x[cell]>=0):
                val = val - self.agentGridObject.status_x[cell]
            else:
                new_eqn.add(cell)
        return [new_eqn, val]
    def check_subsequence(self, eqn1, eqn2):
        '''
            Check if eqn2 is subsequence of eqn1
        '''
        var1 = eqn1[0] 
        val1 = eqn1[1]
        var2 = eqn2[0]
        val2 = eqn2[1]
        return var2.issubset(var1)
    def simplify_kb_subsequence(self, q_eqn):
        idx = 0
        flag = True ### Denotes whether the current equation will be added to KB or not
        for kb_eqn in self.kb_eqns:
            if(kb_eqn==q_eqn): ############
#                 print("-----same equations-----")
                flag = False
                break          ############
            res = self.check_subsequence(kb_eqn, q_eqn) ## Will be True if q_eqn is subsequence of kb_eqn
            var_kb = kb_eqn[0]
            val_kb = kb_eqn[1]
            var_q = q_eqn[0]
            val_q = q_eqn[1]
            if(res == True):
                #######  No. 2 ######## Will reduce the eqn of KB
                self.kb_eqns.pop(idx)
                new_eq_kb = var_kb.difference(var_q)
                new_val_kb = val_kb - val_q
                if( len(new_eq_kb) == 1 ):
                    single_var = next(iter(new_eq_kb))
                    self.agentGridObject.status_x[single_var] = new_val_kb
                    self.agentGridObject.grid[single_var[0]][single_var[1]] = str(new_val_kb)
                else:
                    self.q.append([new_eq_kb,new_val_kb])
            res = self.check_subsequence(q_eqn, kb_eqn) ## Will be True if kb_eqn is subsequence of q_eqn
            if(res == True):
                flag = False
                ####### No. 1 ######### Will reduce the eqn of queue
                new_q_eqn = var_q.difference(var_kb)
                new_q_val = val_q - val_kb
                self.q.appendleft([new_q_eqn, new_q_val])
            idx += 1
        self.put_val_kb()
        return flag
    def simplify_kb_setDiff(self, eqn):
        var_q = eqn[0]
        val_q = eqn[1]
        flag = True
        for kb_eqn in self.kb_eqns:
            var_kb = kb_eqn[0]
            val_kb = kb_eqn[1]
            ##### We choose first and secnod s.t. first[1]-second[1] = rhs>=0
            first = eqn
            second = kb_eqn
            if(first[1]-second[1]<0):
                temp = first
                first = second
                second = temp
            rhs = first[1] - second[1]
            lhs = first[0].difference(second[0])
            if(len(lhs) == rhs):
                flag = False
                neg_lhs = second[0].difference(first[0])
                for cell in lhs:
                    self.agentGridObject.status_x[cell] = 1
                    self.agentGridObject.grid[cell[0]][cell[1]] = '1'
                for cell in neg_lhs:
                    self.agentGridObject.status_x[cell] = 0
                    self.agentGridObject.grid[cell[0]][cell[1]] = '0'
        self.put_val_kb()
        return flag
            
    def infer(self, type_, cell):
        m = self.agentGridObject.m
        n = self.agentGridObject.n
        '''
        Function starts inferrencing from the node that is present in planned_path. First, we will mark the cell and update
        the value of current cell in knowledge base equations.
        '''
        self.mark_cell(type_, cell) ## This function marks the current cell as blocked or unblocked
        self.put_val_kb()
        var, val = self.get_eqn_current_cell(cell)
        self.q.append([var,val])
        while(len(self.q)>0):
            eqn = self.q.popleft()
            eqn = self.simplify_eqn(eqn)
            flag = self.check_solvability(eqn)
            if(flag):
                self.put_val_kb()
                continue
            add_to_KB1 = self.simplify_kb_subsequence(eqn)
            add_to_KB2 = self.simplify_kb_setDiff(eqn)
            if( add_to_KB1 and add_to_KB2 ):
                self.kb_eqns.append(eqn)

In [None]:
class Agent4:
    def __init__(self, n, m, start, target, inference_agent, grid,agent_grid):
        self.n = n ## Number of columns in grid
        self.m = m ## Number of rows in the grid
        self.start = start ## The start cell of the grid
        self.target = target ## The target cell of the grid
        self.agent_grid = agent_grid  ## The current knowledge of the agent_grid
        self.dirx4 = [-1,1,0,0] ## 4 Directions for the agent to travel
        self.diry4 = [0,0,-1,1] ## 4 Directions for the agent to travel
        self.inference_agent = inference_agent
        self.grid = grid # The full knowledge of the maze
        self.num_bumps = 0 ## Number of bumps in planned_path
        self.sum_num_cells_processed = 0
        self.sum_a_star = 0
        self.lim_inference = 50
        self.agent_x = []
        self.agent_y = []
        self.agent_nx = []
        self.agent_ny = []
        self.agentGrid = []
        
    def a_star(self, source, current_heuristic = 'm'):
        '''
            A-star algorithm that plans path based on current knowledge of the agent
        '''
        dirx = [-1, 1, 0, 0] ## calculates the neighbours of the current node
        diry = [0, 0, 1, -1] ## calculates the neighbours of the current node
        visited = set() ## contains the nodes that have already been added in priority queue
        closed_list = set() ## contains the nodes that have been popped from priority queue
        open_list = MyPriorityQueue(current_heuristic, self.target) ## stores the visited nodes and pops it out 
                                                                    ## according to it's priority
        planned_path = [] ## Stores the path that the agent has planned according to it's current knowledge
        open_list.g[source]=0
        open_list.calculate_priority( source)
        open_list.push( source) ## This function runs in O(logn)
        visited.add(source) #visited[source] = 1
        parent = {} ## stores the parent of each node that is visited
        parent[source] = None
        while(len(open_list)>0):
            curr = open_list.pop()
            self.sum_num_cells_processed+=1
            closed_list.add(curr)
            if(curr[0] == self.target[0] and curr[1] == self.target[1]):
                break
            for i in np.arange(4):
                childx = curr[0] + dirx[i]
                childy = curr[1] + diry[i]
                child = (childx, childy)
                if(childx>=0 and childx<self.m and childy>=0 and childy<self.n and (child not in closed_list) and self.agent_grid.grid[childx][childy]!='1'):
                    if(child not in visited):
                        visited.add(child)
                        parent[child] = curr
                        open_list.g[child] = open_list.g[curr]+1
                        open_list.calculate_priority(child)
                        open_list.push(child) 
                    else:
                        if open_list.g[curr]+1<open_list.g[child]:
                            parent[child] = curr
                            open_list._data.discard(child)
                            open_list.g[child] = open_list.g[curr]+1
                            open_list.calculate_priority(child)
                            open_list.push(child)
        if(self.target not in visited):
            return []
        curr = self.target
        while(curr != source):
            planned_path.append(curr)
            curr = parent[curr]
        planned_path.append(source)
        return planned_path[::-1]
    def check_planned_path(self, planned_path):
        '''
            The agent should call the inference agent after visiting every cell and the inference agent updates the current
            knowledge of the agent. If the agent sees, that a node in the planned_path is blocked, then the agent will 
            start A-star again from the current node.
            
            There will be two types of updates in the current cell->
            Type-0: The current node is set to unblocked, need to update the neighbours accordingly and propagate.
            Type-1: The current node is set to blocked, need to update the neighbours accordingly and propagate.
        '''
        n = len(planned_path)
        traversed_path = []
        restart_cell = None
        tmp = np.full((n,m),0)
        for i in np.arange(n):
            cell = planned_path[i]
            currx = cell[0]
            curry = cell[1]
            self.agent_x.append(currx)
            self.agent_y.append(curry)
            
            for a in np.arange(self.m):
                for b in np.arange(self.n):
                    if(tmp[a][b]>1):
                        tmp[a][b] -= 1
                    else if(self.agent_grid.grid=='1'):
                        tmp[a][b] = -1
            
            ## We will start inference only if the next cell is not sensed since if the cell is already sensed then there
            ## will be no update in current knowledge.
            if( cell not in self.agent_grid.sensed_x):
                curr_status = self.grid.visit_cell(cell, self.agent_grid) ## Checking whether curr cell is blocked or not.
                self.grid.partial_sensing(cell, self.agent_grid) ## Cell is sensed only if it is not sensed or inferred before
                self.agent_grid.grid[currx][curry] = str(curr_status) ## updating the agent_grid for the current cell.
                self.inference_agent.infer(curr_status,cell) ##Start inferring about x
                if (self.grid.grid[childx][childy]=='1'):
                    tmp[currx][curry] = -1
                else:
                    tmp[currx][curry] = 3
            else:
                curr_status =  self.agent_grid.status_x[cell]
                if (self.grid.grid[childx][childy]=='1'):
                    tmp[childx][childy] = -1
                else if(tmp[currx][curry]>1):
                    tmp[currx][curry] -= 1
#             self.agentGrid.append(self.agent_grid.grid)
            
            self.agentGrid.append(tmp)
            
            if(curr_status == 1):
                self.num_bumps += 1
                restart_cell = planned_path[i-1]
                break
            ### the current node is unblocked so if the node in the planned_path is blocked, 
            ### then we need to update the restart node and break from this loop
            ### Will check only the 4 nodes ahead of the current node, since, the inference 
            ### agent will not update beyond 4 nodes
            planned_path_blocked = False
#             print("self.agent_grid.status_x: ",self.agent_grid.status_x)
#             print("self.agent_grid: ",self.agent_grid.grid)
            for j in np.arange(4):
                if(i+j+1 >= n):
                    break
                next_cell = planned_path[i+j+1]
                next_x = next_cell[0]
                next_y = next_cell[1]
                if(self.agent_grid.grid[next_x][next_y] == '1'):
                    planned_path_blocked = True
                    break
            if(planned_path_blocked == True):
                restart_cell = cell
                traversed_path.append(cell)
                break
            traversed_path.append(cell)
        return traversed_path, restart_cell
            
    def compute_path(self):
        '''
        
        '''
        path = []
        curr = self.start
        while(curr != self.target):
            self.sum_a_star+=1
            planned_path = self.a_star(curr)
            if( len(planned_path) == 0 ):
                return []
            traversed_path, restart_node = self.check_planned_path(planned_path)
            n = len(traversed_path)
            path.append(traversed_path)
            if(traversed_path[n-1] == self.target):
                break
            curr = restart_node
        return path

In [None]:
### Plots --> avg len of trajectory, avg number of cells processed, avg num of bumps

def calculate_path_length(ans_compute_path):
    ans = 0
    for x in ans_compute_path:
        ans = ans + len(x)-1
    return ans + 1
from tqdm import tqdm
def multi_process(grid):
# for s in grid_data:
    # cnt = 0
#     print(1)
    s = grid
    new_s = ''
    for c in s:
        if(c == '.'):
            new_s +='0'
        else:
            new_s += '1'
    i = 0
    grid = [] ## matrix that contains full knowledge of gridworld
    m = 50
    n = 50
    start = (0,0)
    target = (m-1,n-1)
    while(i<m*n):
        grid.append(list(new_s[i:i+n]))
        i = i + n
    grid = np.array(grid)
#     print(grid)
    gridObject = GridWorld(grid, start, target, n, m) ### Original grid knowledge object
    agentGridObject = GridWorld(np.full((m,n),'0'), start, target, n, m) ### Agent Grid Object
    inference_agent = Inference_Agent4(agentGridObject,m,n) ### Inference Agent Grid
    grid_agent = Agent4(n,m,start,target,inference_agent,gridObject, agentGridObject)
    ans = grid_agent.compute_path()
#     print(grid_agent.agent_grid.grid)
#     print("########################")
#     print(grid_agent.grid.grid)
    return grid_agent.sum_num_cells_processed, grid_agent.num_bumps, grid_agent.sum_a_star, grid_agent.agent_x, grid_agent.agent_y, grid_agent.agentGrid
        

In [None]:
cores = int(multiprocessing.cpu_count())
# print(cores)
cnt = 0
result_y = []
# p_values = np.round(p_values,2)
# result_x = p_values[1:50]
avg_num_cells_processed = []
avg_bumps = []
avg_astar = []
m = 50
n = 50
start = (0,0)
target = (m-1,n-1)
sum_num_cells_processed = 0
sum_bumps = 0
sum_astar = 0
cnt = 0
f = open('dataProject4-100.txt', 'r')
grid_data = f.readlines()
# grid_data = grid_data[:1]
data = []
p = multiprocessing.Pool(processes=cores)
for i in p.imap_unordered(multi_process,grid_data):
    cnt+=1
    print("Completed for grid:", cnt)
#     print("Path Length: ", i[0])
    sum_num_cells_processed += i[0]
    sum_bumps += i[1]
    sum_astar += i[2]
    
    agent_nx = i[3]
    agent_nx = agent_nx[1:]
    agent_nx.append(99)
    agent_ny = i[4]
    agent_ny = agent_ny[1:]
    agent_ny.append(99)
#     print(i[5])

    for k in range(0,len(i[3])-1):
        agentGrid = list(chain.from_iterable(i[5][j]))
#         agentGrid[:] = [0 if x=='0' else 1 for x in agentGrid]
        data_g.append(agentGrid)
        data_x.append(i[3][j])
        data_y.append(i[4][j])
        data_nx.append(agent_nx[j])
        data_ny.append(agent_ny[j])

    for j in range(0,len(i[3])):
        agentGrid = list(chain.from_iterable(i[5][j]))
#         agentGrid[:] = [0 if x=='0' else 1 for x in agentGrid]
#         print(agentGrid)
        data.append({'GridNumber': cnt, 'Agent_x': i[3][j], 'Agent_y': i[4][j], 'AgentGrid': agentGrid, 'Agent_nx': agent_nx[j], 'Agent_ny': agent_ny[j]})
#     cnt+=1
    if(cnt == 3):
        break

In [None]:
import csv
fieldnames = ['GridNumber', 'Agent_x', 'Agent_y', 'AgentGrid', 'Agent_nx', 'Agent_ny']
with open('data_4.csv', 'w', encoding='UTF8', newline='') as f:
    writer = csv.DictWriter(f, fieldnames=fieldnames)
    writer.writeheader()
    writer.writerows(data)

In [None]:
#data for assignment 4
with open('DataAgent4_g.pkl', 'wb') as f:
    pickle.dump(data_g, f)
with open('DataAgent4_x.pkl', 'wb') as f:
    pickle.dump(data_x, f)
with open('DataAgent4_y.pkl', 'wb') as f:
    pickle.dump(data_y, f)
with open('DataAgent4_nx.pkl', 'wb') as f:
    pickle.dump(data_nx, f)
with open('DataAgent4_ny.pkl', 'wb') as f:
    pickle.dump(data_ny, f)

## Training 

In [None]:
import ast
import pandas as pd
from tqdm import tqdm
from sklearn.model_selection import train_test_split
import numpy as np
import pickle

In [None]:
with open('data/data_50_agent1.pkl', 'rb') as handle:
    agent1_50 = pickle.load(handle)

In [None]:
agent_1_50 = pd.DataFrame(agent_1_50)

In [None]:
agent_1_50.head()
agent_1_50.shape

In [None]:
X = agent_1_50.loc[:,['curr_x', 'curr_y', 'Agent grid']]

In [None]:
Y = agent_1_50.loc[:,['next_x', 'next_y']]

In [None]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

In [None]:
X_train.reset_index(inplace=True, drop=True)
X_test.reset_index(inplace=True, drop=True)
Y_train.reset_index(inplace=True, drop=True)
Y_test.reset_index(inplace=True, drop=True)

In [None]:
X_train.head()

In [None]:
vectors_train = []
for i in tqdm(range(X_train.shape[0])):
    curr_x = X_train.iloc[i,0]
    curr_y = X_train.iloc[i,1]
    pos = 50*curr_x + curr_y
    l = X_train.iloc[i,2]
    l[pos] = l[pos]*100
    for pos in [[0,1], [0,-1], [1, 0], [-1, 0]]:
        child_x = curr_x + pos[0]
        child_y = curr_y + pos[1]
        
        if(child_x <0 or child_x>49):
            continue
        if(child_y < 0 or child_y>49):
            continue
        
        pos = 50*child_x + child_y
        
        if(l[pos] == 1):
            continue
        l[pos] = l[pos]*25
    vectors_train.append(l)

In [None]:
vectors_test = []
for i in tqdm(range(X_test.shape[0])):
    curr_x = X_test.iloc[i,0]
    curr_y = X_test.iloc[i,1]
    pos = 50*curr_x + curr_y
    l = X_test.iloc[i,2]
    l[pos] = l[pos]*100
    for pos in [[0,1], [0,-1], [1, 0], [-1, 0]]:
        child_x = curr_x + pos[0]
        child_y = curr_y + pos[1]
        
        if(child_x <0 or child_x>49):
            continue
        if(child_y < 0 or child_y>49):
            continue
        
        pos = 50*child_x + child_y
        
        if(l[pos] == 1):
            continue
        l[pos] = l[pos]*25
    vectors_test.append(l)

In [None]:
train_labels = []
for i in range(X_train.shape[0]):
    curr_x = X_train.iloc[i,0]
    curr_y = X_train.iloc[i,1]
    next_x = Y_train.iloc[i,0]
    next_y = Y_train.iloc[i,1]
    if(curr_x == next_x):
        if(curr_y > next_y):
            train_labels.append([0,0,0,1]) #left
        else:
            train_labels.append([0,1,0,0]) #right
    else:
        if(curr_x > next_x):
            train_labels.append([1,0,0,0]) #up
        else:
            train_labels.append([0,0,1,0]) #down

In [None]:
test_labels = []
for i in range(X_test.shape[0]):
    curr_x = X_test.iloc[i,0]
    curr_y = X_test.iloc[i,1]
    next_x = Y_test.iloc[i,0]
    next_y = Y_test.iloc[i,1]
    if(curr_x == next_x):
        if(curr_y > next_y):
            test_labels.append([0,0,0,1]) #left
        else:
            test_labels.append([0,1,0,0]) #right
    else:
        if(curr_x > next_x):
            test_labels.append([1,0,0,0]) #up
        else:
            test_labels.append([0,0,1,0]) #down

In [None]:
from keras.models import Sequential
from keras.layers import Dense, Dropout

In [None]:
model = Sequential()
model.add(Dense(1500, input_dim=2500, activation='relu'))
model.add(Dense(1500, activation='relu'))
model.add(Dense(4, activation='softmax'))

In [None]:
model.summary()

In [None]:
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [None]:
X = np.array(train_vectors)

In [None]:
Y = np.array(train_labels)

In [None]:
model.fit(X,Y,epochs=10)

In [None]:
X_test = np.array(test_vectors)
Y_test = np.array(test_labels)

In [None]:
pred = model.predict(X_test)

In [None]:
acc = 0
for i in range(len(pred)):
    max_ind = np.argmax(pred[i])
    actual_max_ind = np.argmax(Y_test[i])
    if max_ind == actual_max_ind:
        acc += 1

In [None]:
acc/len(X_test)