Anna Mrukwa  
Makrokierunek sem. 6

In [1]:
import numpy as np
from collections import deque
from time import time
import pandas as pd
from tqdm.notebook import tqdm

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# Supplementary functions

In [3]:
def load_maze(textfile):
  with open(textfile) as f:
      content = f.read().split()
      dims = (int(content[1]), int(content[0])) # height, width
      maze = np.empty(dims, dtype=str)
      for i in range(2, dims[0]+2):
        for j in range(len(content[i])):
          maze[i-2, j] = content[i][j]
  return maze

In [4]:
def read_unweighted_graph_from_file(textfile):
  g = np.loadtxt(textfile, dtype=str)
  graph = dict((str(node), []) for node in range(1, int(g[0, 0])+1))
  for i in range(1, g.shape[0]):
    graph[g[i, 0]].append(g[i, 1])
  return graph

In [5]:
def read_weighted_graph_from_file(textfile):
  with open(textfile) as f:
    first_line = f.readline()
  nodes, edges = first_line.split()
  nodes = int(nodes)
  edges = int(edges)
  g = np.loadtxt(textfile, dtype=str, skiprows=1)
  node_names = [str(node) for node in range(1, nodes+1)]
  parents = np.full((nodes, nodes), '0', dtype=str)
  distances = np.ones((nodes, nodes))*np.Inf
  for i in range(0, nodes):
    distances[i, i] = 0
    parents[i, i] = node_names[i]
  for i in range(g.shape[0]):
    distances[node_names.index(g[i, 0]), node_names.index(g[i, 1])] = int(g[i, 2])
    parents[node_names.index(g[i, 0]), node_names.index(g[i, 1])] = g[i, 1]
  return node_names, nodes, edges, distances, parents

In [6]:
def generate_weighted_graph(nodes_no, outfile, range_from=1, range_to=10):
  graph = list(range(nodes_no))
  rd = np.random.randint(0, 2, (nodes_no, nodes_no))
  np.fill_diagonal(rd, 0)
  edges_no = int(np.sum(rd))
  with open(outfile, "w") as f:
    f.write(str(nodes_no)+" "+str(edges_no)+"\n")
    for i in range(1, nodes_no+1):
      for j in range(1, nodes_no+1):
        if rd[i-1][j-1] == 1:
          weight = np.random.randint(range_from, range_to)
          f.write(str(i)+" "+str(j)+" "+str(weight)+"\n")


# Floyd-Warshall algorithm

In [7]:
def show_fw_path(distances, parents, node_names, outfile):
  with open(outfile, "w") as f:
    for i in range(len(node_names)):
      for j in range(0, len(node_names)):
        if i != j:
          res = "d["+node_names[i]+","+node_names[j]+"] = " +str(distances[i, j]) + " PATH: "
          if distances[i, j] == np.Inf:
            res = res + "-"
            pt = " "
          else:
            pt = node_names[i]+"-"+parents[i, j]
            idx  = node_names.index(parents[i, j])
            while idx != j:
              pt = pt+ "-"+ parents[idx, j]
              idx  = node_names.index(parents[idx, j])
            
          res = res + pt
          f.write(res+'\n')

def floyd_warshall(infile, outfile):
  t0 = time()
  node_names, nodes, edges, distances, parents = read_weighted_graph_from_file(infile)

  for k in range(0, nodes):
    for i in range(0, nodes):
      if distances[i, k] != np.Inf:
        for j in range(0, nodes):
          if distances[i, k] + distances[k, j] < distances[i, j]:
            distances[i, j] = distances[i, k] + distances[k, j]
            parents[i, j] = parents[i, k]
  
  show_fw_path(distances, parents, node_names, outfile)
  t = time()-t0

  routes = nodes**2 - nodes - np.sum(np.isinf(distances))
  return nodes, edges, routes, t

# BFS

In [8]:
def get_graph_path(node_states, end):
  cur_node = end
  path = end
  while isinstance(node_states[cur_node][2], str):
    path = node_states[cur_node][2] + "->"+ path
    cur_node = node_states[cur_node][2]
  return path

# graph in the form of the adjacency list
def BFS(textfile, start, end):
  # function searches for the shortest path between the two specified nodes
  # initialization
  # visited or not, distance, predecessor
  graph = read_unweighted_graph_from_file(textfile)
  t0 = time()
  node_states = {k: ["unvisited", np.Inf, np.nan] for k in graph.keys()}

  node_states[start] = ["visited", 0, np.nan]
  # make a queue
  q = deque()
  q.append(start)
  # bfs
  while q:
    u = q.popleft()
    for adjacent in graph[u]:
      if node_states[adjacent][0] == "unvisited":
        node_states[adjacent][0] = "visited"
        node_states[adjacent][1] = node_states[u][1] + 1
        node_states[adjacent][2] = u
        if adjacent==end:
          print("End reached. Stopping execution.")
          path = get_graph_path(node_states, end)
          return node_states[end][1], time()-t0, path
        q.append(adjacent)
    node_states[u][0] == "analysed"
  
  path = get_graph_path(node_states, end)
  return node_states[end][1], time()-t0, path

# BFS maze

In [9]:
def process_node(node_coords, prev_node, distance, predecessor, node_state):
  node_state[node_coords] = "visited"
  predecessor[node_coords[0]][node_coords[1]] = prev_node
  distance[node_coords] = distance[prev_node] + 1
  return distance, predecessor, node_state


def show_path(maze, predecessor, end_node, start_node, outfile):
  prev = predecessor[end_node[0][0]][end_node[1][0]]
  while prev != start_node:
    maze[prev] = '*'
    prev = predecessor[prev[0]][prev[1]]
  print(maze)
  np.savetxt(outfile, maze, fmt="%s")


def BFS_maze(infile, outfile):
  maze = load_maze(infile)
  t0 = time()
  dims = maze.shape
  # initialization
  # states
  start_node = np.where(maze=="I")
  end_node = np.where(maze=="O")

  node_state = np.full(maze.shape, "unvisited")
  node_state[start_node] = "visited"
  # predecessors
  predecessor = [[np.nan for i in range(dims[1])] for j in range(dims[0])]

  # distances
  distance = np.full(maze.shape, np.Inf)
  distance[start_node] = 0
  # make a queue
  q = deque()
  start_node = (start_node[0][0], start_node[1][0])
  q.append(start_node)
  # bfs
  while q and node_state[end_node] == "unvisited":
    u = q.popleft()
    # check right neighbour
    if u[1] != dims[1]-1 and maze[u[0], u[1]+1]!="x":
      if node_state[u[0], u[1]+1] == "unvisited":
        process_node((u[0], u[1]+1), u, distance, predecessor, node_state)
        q.append((u[0], u[1]+1))
    # check left neighbour
    if u[1] != 0 and maze[u[0], u[1]-1] !="x":
      if node_state[u[0], u[1]-1] == "unvisited":
        process_node((u[0], u[1]-1), u, distance, predecessor, node_state)
        q.append((u[0], u[1]-1))
    # check up neighbour
    if u[0] != 0 and maze[u[0]-1, u[1]] != "x":
      if node_state[u[0]-1, u[1]] == "unvisited":
        process_node((u[0]-1, u[1]), u, distance, predecessor, node_state)
        q.append((u[0]-1, u[1]))
    # check down neighbour
    if u[0] != dims[0]-1 and maze[u[0]+1, u[1]] !="x":
      if node_state[u[0]+1, u[1]] == "unvisited":
        process_node((u[0]+1, u[1]), u, distance, predecessor, node_state)
        q.append((u[0]+1, u[1]))

    node_state[u] = "analysed"
  show_path(maze, predecessor, end_node, start_node, outfile)
  return distance[end_node][0], time()-t0, dims # height, then width

# Testing

## BFS

In [10]:
infiles = ["b10.txt", "b10.txt",
           "b20.txt", "b20.txt",
           "b50.txt",  "b50.txt",
           "b100.txt", "b100.txt"]
start_points = ["3", "8", "10", "5", "5", "7", "6", "99"]
end_points =   ["9", "9", "11", "7", "7", "9", "9", "100"]
paths = []
lengths = []
times = []
measures = ["Input filename", "Start point", "End point",
            "Shortest path", "Length", "Time"]

In [11]:
for i in tqdm(range(len(infiles))):
  length, t, path = BFS("/content/drive/My Drive/AaDS/"+infiles[i], 
                        start_points[i], end_points[i])
  lengths.append(length)
  times.append(t)
  paths.append(path)

  0%|          | 0/8 [00:00<?, ?it/s]

End reached. Stopping execution.
End reached. Stopping execution.
End reached. Stopping execution.
End reached. Stopping execution.
End reached. Stopping execution.
End reached. Stopping execution.
End reached. Stopping execution.
End reached. Stopping execution.


In [12]:
df = pd.DataFrame(list(zip(infiles, start_points, end_points, paths, lengths, times)),
               columns=measures)
df

Unnamed: 0,Input filename,Start point,End point,Shortest path,Length,Time
0,b10.txt,3,9,3->5->7->9,3,0.000113
1,b10.txt,8,9,8->4->5->7->9,4,0.000458
2,b20.txt,10,11,10->18->13->14->17->11,5,0.000494
3,b20.txt,5,7,5->18->13->14->17->11->2->3->7,8,0.000139
4,b50.txt,5,7,5->10->11->7,3,0.000241
5,b50.txt,7,9,7->25->9,2,0.00021
6,b100.txt,6,9,6->14->27->9,3,0.000672
7,b100.txt,99,100,99->100,1,0.00061


## Mazes

In [13]:
infiles = ["m5x5.txt", "m13x13.txt", "m15x15.txt"]
outfiles = ["m5x5_out.txt", "m13x13_out.txt", "m15x15_out.txt" ]

widths = []
heights = []
lengths = []
times = []

measures = ["Input filename", "Output filename", "Width", 
            "Height", "Length", "Time"]

In [14]:
for i in tqdm(range(len(infiles))):
  print(" ")
  dist, t, dims = BFS_maze("/content/drive/My Drive/AaDS/"+infiles[i], 
                           "/content/drive/My Drive/AaDS/"+outfiles[i])
  lengths.append(dist)
  times.append(t)
  heights.append(dims[0])
  widths.append(dims[1])

  0%|          | 0/3 [00:00<?, ?it/s]

 
[['x' 'x' 'x' 'O' 'x']
 ['x' '.' '*' '*' 'x']
 ['x' 'x' '*' 'x' 'x']
 ['I' '*' '*' '.' 'x']
 ['x' 'x' 'x' 'x' 'x']]
 
[['x' 'x' 'x' 'x' 'x' 'x' 'x' 'O' 'x' 'x' 'x' 'x' 'x']
 ['x' '.' '.' '.' 'x' '*' '*' '*' '.' '.' 'x' '.' 'x']
 ['x' '.' 'x' 'x' 'x' '*' 'x' '.' 'x' '.' 'x' '.' 'x']
 ['x' '.' 'x' '*' '*' '*' 'x' '.' 'x' '.' '.' '.' 'x']
 ['x' '.' 'x' '*' 'x' '.' 'x' 'x' 'x' 'x' 'x' 'x' 'x']
 ['x' '.' 'x' '*' 'x' '.' '.' '.' '.' '.' '.' '.' 'x']
 ['x' '.' 'x' '*' 'x' 'x' 'x' 'x' 'x' 'x' 'x' '.' 'x']
 ['x' '.' 'x' '*' '*' '*' '*' '*' 'x' '.' 'x' '.' 'x']
 ['x' '.' 'x' 'x' 'x' 'x' 'x' '*' 'x' '.' 'x' '.' 'x']
 ['x' '.' '.' '.' 'x' '*' '*' '*' 'x' '.' 'x' '.' 'x']
 ['x' 'x' 'x' '.' 'x' '*' 'x' 'x' 'x' '.' 'x' '.' 'x']
 ['x' '*' '*' '*' '*' '*' 'x' '.' '.' '.' '.' '.' 'x']
 ['x' 'I' 'x' 'x' 'x' 'x' 'x' 'x' 'x' 'x' 'x' 'x' 'x']]
 
[['x' 'x' 'x' 'x' 'x' 'x' 'x' 'x' 'x' 'x' 'x' 'x' 'x' 'O' 'x']
 ['x' '.' 'x' 'x' 'x' 'x' 'x' 'x' '*' '*' '*' 'x' 'x' '*' 'x']
 ['x' '.' 'x' 'x' 'x' 'x' 'x' 'x' '*

In [15]:
df = pd.DataFrame(list(zip(infiles, outfiles, 
                           widths, heights, lengths, times)),
               columns=measures)
df

Unnamed: 0,Input filename,Output filename,Width,Height,Length,Time
0,m5x5.txt,m5x5_out.txt,5,5,6.0,0.034808
1,m13x13.txt,m13x13_out.txt,13,13,26.0,0.013301
2,m15x15.txt,m15x15_out.txt,15,15,58.0,0.015047


## FW

In [16]:
np.random.seed(12)
generate_weighted_graph(7, "/content/drive/My Drive/AaDS/g7.txt")
generate_weighted_graph(9, "/content/drive/My Drive/AaDS/g9.txt")
generate_weighted_graph(11, "/content/drive/My Drive/AaDS/g11.txt")

In [17]:
infiles = ["g5.txt", "g7.txt", "g9.txt"]
           #"g200.txt", "g500.txt", "g1000.txt"]
outfiles = ["g5_out.txt", "g7_out.txt", "g9_out.txt"]
           # "g200_out.txt", "g500_out.txt", "g1000_out.txt"]
nodes = []
edges = []
routes = []
times = []
measures = ["Input filename", "Output filename", 
            "Nodes", "Edges", "Routes", "Times"]

In [18]:
for i in tqdm(range(len(infiles))):
  n, e, r, t = floyd_warshall("/content/drive/My Drive/AaDS/"+infiles[i],
                              "/content/drive/My Drive/AaDS/"+outfiles[i])
  nodes.append(n)
  times.append(t)
  edges.append(e)
  routes.append(r)

  0%|          | 0/3 [00:00<?, ?it/s]

In [19]:
df = pd.DataFrame(list(zip(infiles, outfiles, 
                           nodes, edges, routes, times)),
               columns=measures)
df

Unnamed: 0,Input filename,Output filename,Nodes,Edges,Routes,Times
0,g5.txt,g5_out.txt,5,9,20,0.039973
1,g7.txt,g7_out.txt,7,21,36,0.03911
2,g9.txt,g9_out.txt,9,33,72,0.016866
