# Provided code

You shouldn't need to change anything in this section.

### Load data to Colab

In [24]:
if False:  # manual loading
    from google.colab import file
    uploaded = files.upload()  # then browse, select the files
    
else:  # automatic loading
    import requests
    import gzip
    
    filepath_d_gr = 'http://users.diag.uniroma1.it/challenge9/data/USA-road-d/' + 'USA-road-d.NY.gr.gz'
    filepath_t_gr = 'http://users.diag.uniroma1.it/challenge9/data/USA-road-t/' + 'USA-road-t.NY.gr.gz'
    filepath_d_co = 'http://users.diag.uniroma1.it/challenge9/data/USA-road-d/' + 'USA-road-d.NY.co.gz'
    
    def loader(url):
        name = url.rsplit('/', 1)[1].rsplit('.', 1)[0]
        savename = name + '.txt'
        
        with open(savename, 'wb') as f_out:
            with requests.get(url) as r:
                f_in = gzip.decompress(r.content)
                f_out.write(f_in)
                
        print(savename)
            
    loader(filepath_d_gr)
    loader(filepath_t_gr)
    loader(filepath_d_co)

USA-road-d.NY.gr.txt
USA-road-t.NY.gr.txt
USA-road-d.NY.co.txt


### Graph and Vertex classes

In [25]:
# Vertex implementation
class Vertex:
    # Initialization of a vertex, given a neighbor and the corresponding weight
    # Each vertex contains a list of neighbors and corresponding weights
    def __init__(self, i, neighbor_index, weight):
        self.index = i
        self.neighbors = [neighbor_index]
        self.weights = [weight]
        
    def getNeighbors(self):
        return self.neighbors
    
    def getWeights(self):
        return self.weights
    
    # Add a neighbor with corresponding weight to the vertex
    def _addNeighbor(self, neighbor_index, weight):
        self.neighbors.append(neighbor_index)
        self.weights.append(weight)


# Graph data structure
class Graph:
    # Initializes a graph with n_vertices nodes
    # The graph contains a list of vertices
    def __init__(self, n_vertices):
        self.vertices = [None] * (n_vertices+1)
        self.num_vertices = len(self.vertices)
    
    # Returns the i'th node
    def getVertex(self, i):
        if ((i > len(self.vertices)) | (i <= 0)):
            raise ValueError(f'index {i} is out of bounds')
        else:
            return self.vertices[i]
    
    # Adds a new vertex to the graph
    def _addVertex(self, vertex_index, neighor_index, distance):
        if (self.vertices[vertex_index] == None):
            # Construct new vertex
            self.vertices[vertex_index] = Vertex(vertex_index, neighor_index, distance)
        else:
            # Vertex already in graph but other neighbor, add extra edge
            self.vertices[vertex_index]._addNeighbor(neighor_index, distance)


In [26]:
import fileinput

# Read graph data
def readGraph(filePath):
    n_vertices = 0
    for line in fileinput.input([filePath]):
        words = line.split(" ")
        if (words[0] == "p"):
            n_vertices = int(words[2])
    graph = Graph(n_vertices)
    for line in fileinput.input([filePath]):
        words = line.split(" ")
        if (words[0] == "a"):
            graph._addVertex(int(words[1]), int(words[2]), float(words[3]))
    return graph


# Read coordinates data
def readCoordinates(filepath):
    # Start to count from 1
    coordinates = [None]
    for line in fileinput.input([filepath]):
        words = line.split(" ")
        if (words[0] == "v"):
            coordinates.append([float(words[2]), float(words[3])])
    return coordinates


### Usefull functions

In [27]:
import numpy as np
    
# Priority queue definition
class PriorityQueue(dict):
    def put(self, item, value):
        # Watch out that value is not overwritten with higher value, shouldn't be allowed to happen!
        if item in self :
            value = min( value, self[item])
        self[item] = value
    def pop(self):
        """
        Returns the item with the lowest weight
        """
        item_min = min(self, key=self.get)
        super().pop(item_min)
        return item_min

    
def angles2centimeters(lo, la):
    """
    Convert longitude and latitude to local orthogonal grid
    :param lo: longitude
    :param la: latitude
    :return: height and width coordinates in cm's
    """
    
    radius = 6300 * 1e4  # cm
    la_mean = 40794234.  # 1e-6 degree
    lo_mean = -74016939.  # 1e-6 degree
    
    w = radius * np.cos(np.radians(la / 1e6)) * np.radians((lo - lo_mean) / 1e6)
    h = radius * np.radians((la - la_mean) / 1e6)
    
    return w, h 

# Assignment

## Code skeletons

Feel free to move the following code to the relevant questions. 

Before submitting your code, make sure to execute all code fields sequentially. Notebooks that don't execute sequentially will be penalised.

## Answers

Answer the questions from the assignment and add appropriate code where relevant to the question.

In [6]:
# Question 1
graph = readGraph("USA-road-d.NY.gr.txt")
vertices_nr = graph.num_vertices

edges_nr = 0
for i in range(1, vertices_nr):
    edges_nr += len(graph.getVertex(i).getNeighbors())

print("The number of vertices is {}".format(vertices_nr))
print("The number of edges is {}".format(edges_nr))

The number of vertices is 264347
The number of edges is 733846


In [30]:
# Question 2
print("A* algorithm finds the cheapest path from a start node to the goal by keeping track of the current cheapest path from start node to node n in g(n) and searching for the best path from current node n to goal using a heuristic function h(n), e.g Euclidean distance. Adding up g(n) and h(n) results in the estimated cost of the cheapest path through node n, marked by f(n). The heuristic function should be consistent and admissible, consistent meaning that its estimate to the goal from the current node is always less than or equal to the estimate from any neighbouring node to the goal PLUS the cost of reaching that neighbor from the current node. Admissible means that the heuristic function never overestimates the actual cost of reaching the goal. \n")
print("The choice of a heuristic function is vital - if h(n) =  0, the algorithm behaves as a Dijkstra's algorithm, which always finds the shortest path. If h(n) is admissible (lower than the actual cost of reaching the goal from n), it always finds the shortest path, but might make the algorithm slower. If h(n) is perfect (estimates the cost of reaching the goal from n exactly), the algorithm becomes very fast. If h(n) is not admissible (overestimates the cost sometimes), it might not find the shortest path, but is quicker.")

A* algorithm finds the cheapest path from a start node to the goal by keeping track of the current cheapest path from start node to node n in g(n) and searching for the best path from current node n to goal using a heuristic function h(n), e.g Euclidean distance. Adding up g(n) and h(n) results in the estimated cost of the cheapest path through node n, marked by f(n). The heuristic function should be consistent and admissible, consistent meaning that its estimate to the goal from the current node is always less than or equal to the estimate from any neighbouring node to the goal PLUS the cost of reaching that neighbor from the current node. Admissible means that the heuristic function never overestimates the actual cost of reaching the goal. 

The choice of a heuristic function is vital - if h(n) =  0, the algorithm behaves as a Dijkstra's algorithm, which always finds the shortest path. If h(n) is admissible (lower than the actual cost of reaching the goal from n), it always finds the

In [51]:
# Question 3
import math 

# The graph and coordinates data
# TODO: implement
graph = readGraph('USA-road-d.NY.gr.txt')
co = readCoordinates('USA-road-d.NY.co.txt')


# Heuristic function
def h(node1, node2):
    """
    Heuristic function 1
    """
    co1 = co[node1]
    co2 = co[node2]
    
    w1, l1 = angles2centimeters(co1[0], co1[1])
    w2, l2 = angles2centimeters(co2[0], co2[1])
    
    EuclidDistSq = pow( w1 - w2, 2 ) + pow( l1 - l2, 2 )
    return math.sqrt( EuclidDistSq )


def printPath( cameFrom, start, goal ) :
    node = goal
    path = [goal]
    cost = 0
    while not (node == start) :
        previous = cameFrom[node]
        weights_prev = graph.getVertex(previous).getWeights()
        index =  graph.getVertex(previous).getNeighbors().index(node)
        cost += weights_prev[index]
        node = previous
        path.append(node)
    path.reverse()
    print(path)
    return (path, cost)


# Algorithm
def a_star_search(graph, co, start, goal):
    """
    A* algorithm
    :param graph: Graph object
    :param co: coordinates list
    :param start: index of start node
    :param goal: index of goal node
    :return: The path of nodes and total length
    """
    openSet = set([start]) # A set of nodes
    closed = set([]) # A set of nodes

    cameFrom = {} # A node-node key-value mapping

    gScore = {} # A node-int key-value mapping
    gScore[start] = 0

    fScore = {} # A node-int key-value mapping
    """ CHANGE HEURISTIC FUNCTION HERE """
    fScore[start] = gScore[start] + h(start, goal)

    nodeOrder = PriorityQueue()
    nodeOrder.put(start, fScore.get(start))

    while not len(openSet) == 0 :
        currentNode = nodeOrder.pop()
        if currentNode == goal :
            return printPath(cameFrom, start, goal)

        openSet.remove(currentNode)
        closed.add(currentNode)
        neighbours = graph.getVertex(currentNode).getNeighbors()

        weights = graph.getVertex(currentNode).getWeights()
        
        for neighbour in neighbours :
            
            if neighbour in closed:
                continue
            tentative_g_score = gScore[currentNode] + weights[ neighbours.index(neighbour) ]
            if neighbour not in openSet:
                openSet.add(neighbour)
            elif tentative_g_score >= gScore[neighbour]:
                continue
            cameFrom[neighbour] = currentNode
            gScore[neighbour] = tentative_g_score
            """ CHANGE HEURISTIC FUNCTION HERE """
            fScore[neighbour] = gScore[neighbour] + h(neighbour, goal)
            nodeOrder.put(neighbour, fScore[neighbour])
            
            # consistency check #
            """ CHANGE HEURISTIC FUNCTION HERE x2 """
            if not h(currentNode, goal) <= (weights[ neighbours.index(neighbour) ] + h(neighbour, goal)):
                print("Heuristics not consistent, estimate from " + str(currentNode) + " is bigger than from " + str(neighbour))
            
    return [], 0

In [53]:
# Question 3
print("A* function with Euclidean distance heuristics does return the shortest distance path.")
print("An interesting thing we found with implementing a consistency check is that there are some nodes (e.g try the algorithm with group nr 44) for which the heuristic function is not consistent, but the algorithm does not include these nodes in the final path.")

A* function with Euclidean distance heuristics does return the shortest distance path.
An interesting thing we found with implementing a consistency check is that there are some nodes (e.g try the algorithm with group nr 44) for which the heuristic function is not consistent, but the algorithm does not include these nodes in the final path.


In [48]:
# Question 4
def h4(node1, node2):
    return math.floor(h(node1, node2))
    '''
    #-- EUCLIDEAN DISTANCE with cm-s, rounded down --#
    
    co1 = co[node1]
    co2 = co[node2]
    
    w1, l1 = angles2centimeters(co1[0], co1[1])
    w2, l2 = angles2centimeters(co2[0], co2[1])
    
    EuclidDistSq = pow( w1 - w2, 2 ) + pow( l1 - l2, 2 )
    return math.floor(math.sqrt( EuclidDistSq ))
    '''

print("Consider the heuristic of the Euclidean distance heuristic above, but with rounding down the final result. Naturally, this will remain admissible, as the Euclidean distance heuristic is admissable and rounding down only further underestimates. It is however non-consistent. For examplle, when the heuristic returns the same value for two neighbouring nodes (possible due to the rounding down), yet the cost to travel between these two nodes is smaller than the difference between their Euclidean distance heuristic values, the consistency condition is violated.")

A* search with an heuristic function of h(n)=0 is optimal, as the heuristic function is consistent, because the estimate to the goal from any node is always 0, which in itself is less than the cost of reaching any following node. 
The running time of the algorithm is long, so the algorithm with no heuristics is not time- or space-efficient.


In [44]:
# Question 5
def h5(node1, node2):
    """
    Heuristic function 3
    """
    co1 = co[node1]
    co2 = co[node2]
    
    w1, l1 = angles2centimeters(co1[0], co1[1])
    w2, l2 = angles2centimeters(co2[0], co2[1])
    
    ManhattanDist = abs(w1-w2) +  abs(l1-l2)
    return ManhattanDist
print("Algorithm with Manhattan distance is not optimal, as the heuristic estimate is bigger from some nodes than the cost of reaching a certain following node plus the estimate to the goal from there (see consistency check when running the algorithm).\nAlgorithm with Euclidan heuristics returns the shortest distance path, but runs longer than the algorithm with Manhattan heuristics. Algorithm with Manhattan heuristics does not return the shortest distance path, but the running time in this case is very short.")

Algorithm with Manhattan distance is not optimal, as the heuristic estimate is bigger from some nodes than the cost of reaching a certain following node plus the estimate to the goal from there.
Algorithm with Euclidan heuristics returns the shortest distance path, but runs longer than the algorithm with Manhattan heuristics. Algorithm with Manhattan heuristics does not return the shortest distance path, but the running time in this case is very short.


In [35]:
# Queston 6
def h6(node1, node2):
    """
    Heuristic function 4
    """
    return 0
print("")




In [36]:
# Question 7
graph = readGraph('USA-road-t.NY.gr.txt')
co = readCoordinates('USA-road-d.NY.co.txt')
# TODO complete below
print("When used without taking the now-changed weigths of the edges into account, the Euclidean and Manhatten heuristic are no longer guaranteed to be admissible. As a result, the optimality of the solution is no longer guaranteed.")

In [37]:
# Question 8
# # TODO  check that this is admissible
##
def h_8_preparation(graph, start, goal) :
    return ... # list with pathlength i from start to node

def h_8(node1, node2):
    """
    Heuristic function 5
    """
    M = ...
    n = ...
    i1 = ...
    i2 = ...

    return (n-i)*M


In [54]:
# Calculate the path between your start and goal node. 
# Did you get the shortest-distance path? You can 
# verify your results in the distances.txt file.

import random
import time

group_number = 44 # TODO: change to your group number
num_vertices = graph.num_vertices  # TODO: number of vertices in the graph

random.seed(group_number)

start = random.randint(1, num_vertices + 1)
goal = random.randint(1, num_vertices + 1)

# Calculating the path between nodes
print( 'start : ' +  str(start) )
print( 'goal : ' + str(goal) )

time0 = time.time()
path, path_cost = a_star_search(graph, co, start, goal)
time1 = time.time()

print('cost of path: ' + str(path_cost))
path_length = len(path)
print('length of path: ' + str(path_length))
print("time elapsed: " + str(round(time1-time0, 2)))

start : 214191
goal : 61162
Heuristics not consistent, estimate from 224557 is bigger than from 220590
Heuristics not consistent, estimate from 64665 is bigger than from 64654
[214191, 214190, 214189, 214188, 214180, 214178, 214177, 214141, 214140, 214137, 214131, 214130, 214128, 214127, 213988, 213990, 213984, 213975, 213974, 213972, 213971, 213916, 213915, 213944, 213912, 213942, 213939, 213941, 213921, 213922, 213918, 213919, 209963, 209956, 209953, 209940, 209938, 209935, 209934, 209923, 209922, 209924, 209851, 209928, 209845, 209844, 209832, 209830, 209829, 209997, 209996, 209991, 209989, 209992, 209993, 209985, 209981, 209978, 209474, 209473, 209468, 209469, 209464, 209463, 209477, 209481, 209479, 209436, 209441, 209439, 209438, 209425, 209432, 209430, 209426, 209265, 209428, 209268, 209266, 209259, 209249, 209336, 209330, 209331, 209298, 209294, 209292, 209290, 209281, 209279, 209277, 209276, 209273, 209271, 206986, 206985, 206982, 206988, 206969, 206977, 206972, 206970, 206926,