Skip to content

Latest commit

 

History

History
934 lines (773 loc) · 21.4 KB

TopoSort.mdx

File metadata and controls

934 lines (773 loc) · 21.4 KB
id title author contributors prerequisites description frequency
toposort
Topological Sort
Benjamin Qi, Nathan Chen
Michael Cao, Andi Qu, Andrew Wang, Qi Wang, Maggie Liu, Martin Lin
graph-traversal
intro-dp
Ordering the vertices of a directed acyclic graph such that each vertex is visited before its children.
1

To review, a directed graph consists of edges that can only be traversed in one direction. Additionally, an acyclic graph defines a graph which does not contain cycles, meaning you are unable to traverse across one or more edges and return to the node you started on. Putting these definitions together, a directed acyclic graph, sometimes abbreviated as DAG, is a graph which has edges which can only be traversed in one direction and does not contain cycles.

Topological Sort

A topological sort of a directed acyclic graph is a linear ordering of its vertices such that for every directed edge $u\to v$ from vertex $u$ to vertex $v$, $u$ comes before $v$ in the ordering.

There are two common ways to topologically sort, one involving DFS and the other involving BFS.

interactive, both versions

DFS

example walkthrough code code
#include <algorithm>
#include <iostream>
#include <vector>

using std::cout;
using std::endl;
using std::vector;

vector<int> top_sort;
vector<vector<int>> graph;
vector<bool> visited;

void dfs(int node) {
	for (int next : graph[node]) {
		if (!visited[next]) {
			visited[next] = true;
			dfs(next);
		}
	}
	top_sort.push_back(node);
}

int main() {
	int n, m;  // The number of nodes and edges respectively
	std::cin >> n >> m;

	graph = vector<vector<int>>(n);
	for (int i = 0; i < m; i++) {
		int a, b;
		std::cin >> a >> b;
		graph[a - 1].push_back(b - 1);
	}

	visited = vector<bool>(n);
	for (int i = 0; i < n; i++) {
		if (!visited[i]) {
			visited[i] = true;
			dfs(i);
		}
	}
	std::reverse(top_sort.begin(), top_sort.end());

	vector<int> ind(n);
	for (int i = 0; i < n; i++) { ind[top_sort[i]] = i; }

	// Check if the topological sort is valid
	bool valid = true;
	for (int i = 0; i < n; i++) {
		for (int j : graph[i]) {
			if (ind[j] <= ind[i]) {
				valid = false;
				goto answer;
			}
		}
	}
answer:;

	if (valid) {
		for (int i = 0; i < n - 1; i++) { cout << top_sort[i] + 1 << ' '; }
		cout << top_sort.back() + 1 << endl;
	} else {
		cout << "IMPOSSIBLE" << endl;
	}
}
import java.io.*;
import java.util.*;

public class CourseSchedule {
	private static List<Integer>[] graph;
	private static List<Integer> topSort = new ArrayList<>();
	private static boolean[] visited;

	public static void main(String[] args) throws IOException {
		BufferedReader read = new BufferedReader(new InputStreamReader(System.in));
		StringTokenizer st = new StringTokenizer(read.readLine());
		int n = Integer.parseInt(st.nextToken());  // the number of nodes
		int m = Integer.parseInt(st.nextToken());  // and the number of edges

		graph = new ArrayList[n];
		for (int i = 0; i < n; i++) { graph[i] = new ArrayList<>(); }
		for (int i = 0; i < m; i++) {
			st = new StringTokenizer(read.readLine());
			int a = Integer.parseInt(st.nextToken()) - 1;
			int b = Integer.parseInt(st.nextToken()) - 1;
			graph[a].add(b);
		}

		visited = new boolean[n];
		for (int i = 0; i < n; i++) {
			if (!visited[i]) {
				visited[i] = true;
				dfs(i);
			}
		}
		Collections.reverse(topSort);

		int[] ind = new int[n];
		for (int i = 0; i < n; i++) { ind[topSort.get(i)] = i; }

		// Check if the topological sort is valid
		boolean valid = true;
	checkSort:
		for (int i = 0; i < n; i++) {
			for (int j : graph[i]) {
				if (ind[j] <= ind[i]) {
					valid = false;
					break checkSort;
				}
			}
		}

		if (!valid) {
			System.out.println("IMPOSSIBLE");
		} else {
			StringBuilder ans = new StringBuilder();
			topSort.forEach(node -> ans.append(node + 1).append(' '));
			ans.setLength(ans.length() - 1);
			System.out.println(ans);
		}
	}

	private static void dfs(int node) {
		for (int next : graph[node]) {
			if (!visited[next]) {
				visited[next] = true;
				dfs(next);
			}
		}
		topSort.add(node);
	}
}
import sys

MAX_N = 10**5
sys.setrecursionlimit(MAX_N)


def dfs(node: int) -> None:
	for next_ in graph[node]:
		if not visited[next_]:
			visited[next_] = True
			dfs(next_)
	top_sort.append(node)


# The number of nodes and edges respectively
n, m = map(int, input().strip().split())
graph = [[] for _ in range(n)]

for _ in range(m):
	a, b = map(int, input().strip().split())
	graph[a - 1].append(b - 1)

visited = [False] * n
top_sort = []
for i in range(n):
	if not visited[i]:
		visited[i] = True
		dfs(i)
top_sort = top_sort[::-1]

ind = [0] * n
for i in range(n):
	ind[top_sort[i]] = i

# Check if the topological sort is valid
valid = True
for i in range(n):
	for j in graph[i]:
		if ind[j] <= ind[i]:
			valid = False
			break

	if not valid:
		break

if valid:
	print(*[i + 1 for i in top_sort])
else:
	print("IMPOSSIBLE")

BFS

The BFS version is known as Kahn's Algorithm.

#include <algorithm>
#include <iostream>
#include <queue>
#include <vector>

using std::cout;
using std::endl;
using std::vector;

int main() {
	int n, m;
	std::cin >> n >> m;

	vector<vector<int>> graph(n);
	for (int i = 0; i < m; i++) {
		int a, b;
		std::cin >> a >> b;
		graph[a - 1].push_back(b - 1);
	}

	vector<int> in_degree(n);
	for (const vector<int> &nodes : graph) {
		for (int node : nodes) { in_degree[node]++; }
	}

	std::queue<int> queue;
	for (int i = 0; i < n; i++) {
		if (in_degree[i] == 0) { queue.push(i); }
	}
	vector<int> top_sort;
	while (!queue.empty()) {
		int curr = queue.front();
		queue.pop();
		top_sort.push_back(curr);
		for (int next : graph[curr]) {
			if (--in_degree[next] == 0) { queue.push(next); }
		}
	}

	if (top_sort.size() == n) {
		for (int i = 0; i < n - 1; i++) { cout << top_sort[i] + 1 << ' '; }
		cout << top_sort.back() + 1 << endl;
	} else {
		cout << "IMPOSSIBLE" << endl;
	}
}
import java.io.*;
import java.util.*;

public class CourseSchedule {
	public static void main(String[] args) throws IOException {
		BufferedReader read = new BufferedReader(new InputStreamReader(System.in));
		StringTokenizer st = new StringTokenizer(read.readLine());
		int n = Integer.parseInt(st.nextToken());
		int m = Integer.parseInt(st.nextToken());

		List<Integer>[] graph = new ArrayList[n];
		for (int i = 0; i < n; i++) { graph[i] = new ArrayList<>(); }
		for (int i = 0; i < m; i++) {
			st = new StringTokenizer(read.readLine());
			int a = Integer.parseInt(st.nextToken()) - 1;
			int b = Integer.parseInt(st.nextToken()) - 1;
			graph[a].add(b);
		}

		int[] inDegree = new int[n];
		for (List<Integer> nodes : graph) {
			for (int node : nodes) { inDegree[node]++; }
		}

		ArrayDeque<Integer> queue = new ArrayDeque<>();
		List<Integer> topSort = new ArrayList<>();
		for (int i = 0; i < n; i++) {
			if (inDegree[i] == 0) { queue.add(i); }
		}
		while (!queue.isEmpty()) {
			int curr = queue.poll();
			topSort.add(curr);
			for (int next : graph[curr]) {
				if (--inDegree[next] == 0) { queue.add(next); }
			}
		}

		if (topSort.size() == n) {
			StringBuilder ans = new StringBuilder();
			topSort.forEach(node -> ans.append(node + 1).append(' '));
			ans.setLength(ans.length() - 1);
			System.out.println(ans);
		} else {
			System.out.println("IMPOSSIBLE");
		}
	}
}
from collections import deque

n, m = map(int, input().split())
graph = [[] for _ in range(n)]
for _ in range(m):
	a, b = map(int, input().split())
	graph[a - 1].append(b - 1)

in_degree = [0 for _ in range(n)]
for nodes in graph:
	for node in nodes:
		in_degree[node] += 1

queue = deque([i for i, in_deg in enumerate(in_degree) if in_deg == 0])
top_sort = []
while queue:
	curr = queue.popleft()
	top_sort.append(curr)
	for next_ in graph[curr]:
		in_degree[next_] -= 1
		if in_degree[next_] == 0:
			queue.append(next_)

if len(top_sort) == n:
	print(*[x + 1 for x in top_sort])
else:
	print("IMPOSSIBLE")

If the length of the array containing the end order does not equal the number of elements that need to be sorted, then there is a cycle in the graph.

We can also use Kahn's algorithm to extract the lexicographically minimum topological sort by breaking ties lexographically.

Although the above code does not do this, one can simply replace the queue with a priority_queue to implement this extension.

Finding a Cycle

We can modify the DFS algorithm above to return a directed cycle in the case where a topological sort does not exist. To find the cycle, we add each node we visit onto the stack until we detect a node already on the stack.

For example, suppose that our stack currently consists of $s_1,s_2,\ldots,s_i$ and we then visit $u=s_j$ for some $j\le i$. If that's the case, then $s_j\to s_{j+1}\to \cdots\to s_i\to s_j$ is a cycle. We can reconstruct the cycle without explicitly storing the stack b marking $u$ as not part of the stack and recursively backtracking until $u$ is reached again.

This code assumes that there are no self-loops, i.e. no edges from a node to itself.

#include <algorithm>
#include <iostream>
#include <vector>

using namespace std;

vector<vector<int>> graph;
vector<bool> visited, on_stack;
vector<int> cycle;

bool dfs(int node) {
	visited[node] = on_stack[node] = true;
	for (int next : graph[node]) {
		if (on_stack[next]) {
			cycle.push_back(node);  // start cycle
			on_stack[node] = on_stack[next] = false;
			return true;
		} else if (!visited[next]) {
			if (dfs(next)) {  // continue cycle
				if (on_stack[node]) {
					cycle.push_back(node);
					on_stack[node] = false;
					return true;
				} else {  // found u again
					cycle.push_back(node);
					return false;
				}
			}

			if (!cycle.empty()) {
				return false;  // finished with cycle
			}
		}
	}

	on_stack[node] = false;
	return false;
}

int main() {
	int n, m;
	cin >> n >> m;
	graph = vector<vector<int>>(n);
	for (int i = 0; i < m; i++) {
		int a, b;
		cin >> a >> b;
		graph[a - 1].push_back(b - 1);
	}

	visited = vector<bool>(n);
	on_stack = vector<bool>(n);
	for (int i = 0; cycle.empty() && i < n; i++) { dfs(i); }

	if (cycle.empty()) {
		cout << "IMPOSSIBLE" << endl;
	} else {
		reverse(cycle.begin(), cycle.end());
		cout << cycle.size() + 1 << "\n";
		for (int node : cycle) { cout << node + 1 << " "; }
		cout << cycle[0] + 1 << endl;
	}
}
import java.io.*;
import java.util.*;

public class RoundTripII {
	private static List<Integer>[] graph;
	private static boolean[] visited, onStack;
	private static List<Integer> cycle = new ArrayList<>();

	public static void main(String[] args) throws IOException {
		BufferedReader read = new BufferedReader(new InputStreamReader(System.in));
		StringTokenizer st = new StringTokenizer(read.readLine());
		int n = Integer.parseInt(st.nextToken());
		int m = Integer.parseInt(st.nextToken());

		graph = new ArrayList[n];
		for (int i = 0; i < n; i++) { graph[i] = new ArrayList<>(); }
		for (int i = 0; i < m; i++) {
			st = new StringTokenizer(read.readLine());
			int a = Integer.parseInt(st.nextToken()) - 1;
			int b = Integer.parseInt(st.nextToken()) - 1;
			graph[a].add(b);
		}

		visited = new boolean[n];
		onStack = new boolean[n];
		for (int i = 0; cycle.isEmpty() && i < n; i++) { dfs(i); }

		if (cycle.isEmpty()) {
			System.out.println("IMPOSSIBLE");
		} else {
			Collections.reverse(cycle);
			System.out.println(cycle.size() + 1);
			for (int node : cycle) { System.out.print((node + 1) + " "); }
			System.out.println(cycle.get(0) + 1);
		}
	}

	private static boolean dfs(int node) {
		visited[node] = onStack[node] = true;
		for (int next : graph[node]) {
			if (onStack[next]) {
				cycle.add(node);  // start cycle
				onStack[node] = onStack[next] = false;
				return true;
			} else if (!visited[next]) {
				if (dfs(next)) {  // continue cycle
					if (onStack[node]) {
						cycle.add(node);
						onStack[node] = false;
						return true;
					} else {  // found u again
						cycle.add(node);
						return false;
					}
				}

				if (!cycle.isEmpty()) {
					return false;  // finished with cycle
				}
			}
		}

		onStack[node] = false;
		return false;
	}
}
import sys

MAX_N = 10**5
sys.setrecursionlimit(MAX_N)


def dfs(node: int) -> bool:
	visited[node] = on_stack[node] = True
	for next_ in graph[node]:
		if on_stack[next_]:
			cycle.append(node)  # start cycle
			on_stack[node] = on_stack[next_] = False
			return True

		elif not visited[next_]:
			if dfs(next_):  # continue cycle
				if on_stack[node]:
					cycle.append(node)
					on_stack[node] = False
					return True
				else:  # found u again
					cycle.append(node)
					return False

			if cycle:
				return False  # finished with cycle

	on_stack[node] = False
	return False


n, m = map(int, input().strip().split())
graph = [[] for _ in range(n)]
for _ in range(m):
	a, b = map(int, input().strip().split())
	graph[a - 1].append(b - 1)

visited = [False] * n
on_stack = [False] * n
cycle = []
for i in range(n):
	dfs(i)
	if cycle:
		break

if cycle:
	print(len(cycle) + 1)
	print(cycle[0] + 1, *[i + 1 for i in cycle[::-1]])
else:
	print("IMPOSSIBLE")

Dynamic Programming

One useful property of directed acyclic graphs is, as the name suggests, that no cycles exist. If we consider each node in the graph as a state, we can perform dynamic programming on the graph if we process the states in an order that guarantees for every edge $u\to v$ that $u$ is processed before $v$. Fortunately, this is the exact definition of a topological sort!

In this task, we must find the longest path in a DAG.

Solution - Longest Flight Route

Let $dp[v]$ denote the length of the longest path ending at the node $v$. Clearly

$$ dp[v]=\max_{\text{edge } u\to v \text{ exists}}dp[u]+1, $$

or $1$ if $v$ is node $1$. If we process the states in topological order, it is guaranteed that $dp[u]$ will already have been computed before computing $dp[v]$.

Note that the implementation of this idea below uses Kahn's algorithm for topological sorting:

#include <algorithm>
#include <iostream>
#include <queue>
#include <vector>

using std::cout;
using std::endl;
using std::vector;

int main() {
	int city_num, flight_num;
	std::cin >> city_num >> flight_num;

	vector<vector<int>> flights(city_num);
	vector<vector<int>> back_edge(city_num);
	for (int i = 0; i < flight_num; i++) {
		int a, b;
		std::cin >> a >> b;
		flights[--a].push_back(--b);
		back_edge[b].push_back(a);
	}

	// Use Kahn's algorithm to do a topological sort
	vector<int> in_degree(city_num);
	for (const vector<int> &nodes : flights) {
		for (int node : nodes) { in_degree[node]++; }
	}

	std::queue<int> queue;
	for (int i = 0; i < city_num; i++) {
		if (in_degree[i] == 0) { queue.push(i); }
	}
	vector<int> top_sort;
	while (!queue.empty()) {
		int curr = queue.front();
		queue.pop();
		top_sort.push_back(curr);
		for (int next : flights[curr]) {
			if (--in_degree[next] == 0) { queue.push(next); }
		}
	}

	// Compute the dist array in topological order
	vector<int> parent(city_num, -1);
	vector<int> dist(city_num, INT32_MIN);
	dist[0] = 1;
	for (int i = 0; i < top_sort.size(); i++) {
		int b = top_sort[i];
		for (int prev : back_edge[b]) {
			if (dist[prev] + 1 > dist[b]) {
				dist[b] = dist[prev] + 1;
				parent[b] = prev;
			}
		}
	}

	if (dist[city_num - 1] < 0) {
		cout << "IMPOSSIBLE" << endl;
	} else {
		// dist[city_num - 1] denotes the length of the longest path
		// ending at the final city. i.e. Lehmälä
		cout << dist[city_num - 1] << endl;

		// Begin from the final city, trace the parent pointer
		// to construct the entire path backward
		int at = city_num - 1;
		vector<int> route;
		while (parent[at] != -1) {
			route.push_back(at);
			at = parent[at];
		}
		route.push_back(0);

		// Print the route in the correct order
		std::reverse(route.begin(), route.end());
		for (int i = 0; i < route.size() - 1; i++) { cout << route[i] + 1 << ' '; }
		cout << route.back() + 1 << endl;
	}
}
import java.io.*;
import java.util.*;

public class LongestFlight {
	public static void main(String[] args) throws IOException {
		BufferedReader read = new BufferedReader(new InputStreamReader(System.in));
		StringTokenizer st = new StringTokenizer(read.readLine());
		int cityNum = Integer.parseInt(st.nextToken());
		int flightNum = Integer.parseInt(st.nextToken());

		List<Integer>[] flights = new ArrayList[cityNum];
		List<Integer>[] backEdge = new ArrayList[cityNum];
		for (int i = 0; i < cityNum; i++) {
			flights[i] = new ArrayList<>();
			backEdge[i] = new ArrayList<>();
		}
		for (int i = 0; i < flightNum; i++) {
			st = new StringTokenizer(read.readLine());
			int a = Integer.parseInt(st.nextToken()) - 1;
			int b = Integer.parseInt(st.nextToken()) - 1;
			flights[a].add(b);
			backEdge[b].add(a);
		}

		// Use Kahn's algorithm to do a topological sort
		int[] inDegree = new int[cityNum];
		for (List<Integer> nodes : flights) {
			for (int node : nodes) { inDegree[node]++; }
		}

		ArrayDeque<Integer> queue = new ArrayDeque<>();
		List<Integer> topSort = new ArrayList<>();
		for (int i = 0; i < cityNum; i++) {
			if (inDegree[i] == 0) { queue.add(i); }
		}
		while (!queue.isEmpty()) {
			int curr = queue.poll();
			topSort.add(curr);
			for (int next : flights[curr]) {
				if (--inDegree[next] == 0) { queue.add(next); }
			}
		}

		// Compute the dist array in topological order
		int[] parent = new int[cityNum];
		Arrays.fill(parent, -1);
		int[] dist = new int[cityNum];
		Arrays.fill(dist, Integer.MIN_VALUE);
		dist[0] = 1;
		for (int i = 0; i < topSort.size(); i++) {
			int b = topSort.get(i);
			for (int prev : backEdge[b]) {
				if (dist[prev] + 1 > dist[b]) {
					dist[b] = dist[prev] + 1;
					parent[b] = prev;
				}
			}
		}

		if (dist[cityNum - 1] < 0) {
			System.out.println("IMPOSSIBLE");
		} else {
			// dist[city_num - 1] denotes the length of the longest path
			// ending at the final city. i.e. Lehmälä
			System.out.println(dist[cityNum - 1]);

			// Begin from the final city, trace the parent pointer
			// to construct the entire path backward
			int at = cityNum - 1;
			List<Integer> route = new ArrayList<>();
			while (parent[at] != -1) {
				route.add(at);
				at = parent[at];
			}
			route.add(0);

			// Print the route in the correct order
			Collections.reverse(route);
			StringBuilder ans = new StringBuilder();
			for (int i = 0; i < route.size() - 1; i++) {
				ans.append(route.get(i) + 1).append(' ');
			}
			ans.append(route.get(route.size() - 1) + 1);
			System.out.println(ans);
		}
	}
}
from collections import deque

city_num, flight_num = list(map(int, input().split(" ")))
flights = [[] for _ in range(city_num)]
back_edge = [[] for _ in range(city_num)]
for _ in range(flight_num):
	a, b = list(map(int, input().split(" ")))
	a, b = a - 1, b - 1
	flights[a].append(b)
	back_edge[b].append(a)

# Use Kahn's algorithm to do a topological sort
in_degree = [0] * city_num
for nodes in flights:
	for node in nodes:
		in_degree[node] += 1

queue = deque([i for i, in_deg in enumerate(in_degree) if in_deg == 0])
top_sort = []
while queue:
	curr = queue.popleft()
	top_sort.append(curr)
	for next_ in flights[curr]:
		in_degree[next_] -= 1
		if in_degree[next_] == 0:
			queue.append(next_)

# Compute the dist array in topological order
parent = [-1] * city_num
dist = [-float("inf")] * city_num
dist[0] = 1
for i in range(len(top_sort)):
	b = top_sort[i]
	for prev in back_edge[b]:
		if dist[prev] + 1 > dist[b]:
			dist[b] = dist[prev] + 1
			parent[b] = prev

if dist[city_num - 1] == -float("inf"):
	print("IMPOSSIBLE")
else:
	# dist[city_num - 1] denotes the length of the longest path
	# ending at the final city. i.e. Lehmälä
	print(dist[city_num - 1])

	# Begin from the final city, trace the parent pointer
	# to construct the entire path backward
	at = city_num - 1
	route = []
	while parent[at] != -1:
		route.append(at)
		at = parent[at]
	route.append(0)

	# Print the route in the correct order
	print(*[c + 1 for c in route[::-1]])

Problems