Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add a new topological sort implementation #10592

Merged
merged 1 commit into from
May 8, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
165 changes: 165 additions & 0 deletions lib/Doctrine/ORM/Internal/TopologicalSort.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
<?php

declare(strict_types=1);

namespace Doctrine\ORM\Internal;

use Doctrine\ORM\Internal\TopologicalSort\CycleDetectedException;

use function array_keys;
use function array_reverse;
use function array_unshift;
use function spl_object_id;

/**
* TopologicalSort implements topological sorting, which is an ordering
* algorithm for directed graphs (DG) using a depth-first searching (DFS)
* to traverse the graph built in memory.
* This algorithm has a linear running time based on nodes (V) and edges
* between the nodes (E), resulting in a computational complexity of O(V + E).
*
* @internal
*/
final class TopologicalSort
{
private const NOT_VISITED = 1;
private const IN_PROGRESS = 2;
private const VISITED = 3;

/**
* Array of all nodes, indexed by object ids.
*
* @var array<int, object>
*/
private $nodes = [];

/**
* DFS state for the different nodes, indexed by node object id and using one of
* this class' constants as value.
*
* @var array<int, self::*>
*/
private $states = [];

/**
* Edges between the nodes. The first-level key is the object id of the outgoing
* node; the second array maps the destination node by object id as key. The final
* boolean value indicates whether the edge is optional or not.
*
* @var array<int, array<int, bool>>
*/
private $edges = [];
greg0ire marked this conversation as resolved.
Show resolved Hide resolved

/**
* Builds up the result during the DFS.
*
* @psalm-var list<object>
*/
private $sortResult = [];

/** @param object $node */
public function addNode($node): void
{
$id = spl_object_id($node);
$this->nodes[$id] = $node;
$this->states[$id] = self::NOT_VISITED;
$this->edges[$id] = [];
}

/** @param object $node */
public function hasNode($node): bool
{
return isset($this->nodes[spl_object_id($node)]);
}

/**
* Adds a new edge between two nodes to the graph
*
* @param object $from
* @param object $to
* @param bool $optional This indicates whether the edge may be ignored during the topological sort if it is necessary to break cycles.
*/
public function addEdge($from, $to, bool $optional): void
{
$fromId = spl_object_id($from);
$toId = spl_object_id($to);

if (isset($this->edges[$fromId][$toId]) && $this->edges[$fromId][$toId] === false) {
return; // we already know about this dependency, and it is not optional
}

$this->edges[$fromId][$toId] = $optional;
}

/**
* Returns a topological sort of all nodes. When we have an edge A->B between two nodes
* A and B, then A will be listed before B in the result.
*
* @psalm-return list<object>
*/
public function sort()
{
/*
* When possible, keep objects in the result in the same order in which they were added as nodes.
* Since nodes are unshifted into $this->>sortResult (see the visit() method), that means we
* need to work them in array_reverse order here.
*/
foreach (array_reverse(array_keys($this->nodes)) as $oid) {
if ($this->states[$oid] === self::NOT_VISITED) {
$this->visit($oid);
}
}

return $this->sortResult;
}

private function visit(int $oid): void
{
if ($this->states[$oid] === self::IN_PROGRESS) {
// This node is already on the current DFS stack. We've found a cycle!
throw new CycleDetectedException($this->nodes[$oid]);
}

if ($this->states[$oid] === self::VISITED) {
// We've reached a node that we've already seen, including all
// other nodes that are reachable from here. We're done here, return.
return;
}

$this->states[$oid] = self::IN_PROGRESS;

// Continue the DFS downwards the edge list
foreach ($this->edges[$oid] as $adjacentId => $optional) {
try {
$this->visit($adjacentId);
} catch (CycleDetectedException $exception) {
if ($exception->isCycleCollected()) {
// There is a complete cycle downstream of the current node. We cannot
// do anything about that anymore.
throw $exception;
}

if ($optional) {
// The current edge is part of a cycle, but it is optional and the closest
// such edge while backtracking. Break the cycle here by skipping the edge
// and continuing with the next one.
continue;
}

// We have found a cycle and cannot break it at $edge. Best we can do
// is to retreat from the current vertex, hoping that somewhere up the
// stack this can be salvaged.
$this->states[$oid] = self::NOT_VISITED;
$exception->addToCycle($this->nodes[$oid]);

throw $exception;
}
}

// We have traversed all edges and visited all other nodes reachable from here.
// So we're done with this vertex as well.

$this->states[$oid] = self::VISITED;
array_unshift($this->sortResult, $this->nodes[$oid]);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
<?php

declare(strict_types=1);

namespace Doctrine\ORM\Internal\TopologicalSort;

use RuntimeException;

use function array_unshift;

class CycleDetectedException extends RuntimeException
{
/** @var list<object> */
private $cycle;

/** @var object */
private $startNode;

/**
* Do we have the complete cycle collected?
*
* @var bool
*/
private $cycleCollected = false;

/** @param object $startNode */
public function __construct($startNode)
{
parent::__construct('A cycle has been detected, so a topological sort is not possible. The getCycle() method provides the list of nodes that form the cycle.');

$this->startNode = $startNode;
$this->cycle = [$startNode];
}

/** @return list<object> */
public function getCycle(): array
{
return $this->cycle;
}

/** @param object $node */
public function addToCycle($node): void
{
array_unshift($this->cycle, $node);

if ($node === $this->startNode) {
$this->cycleCollected = true;
}
}

public function isCycleCollected(): bool
{
return $this->cycleCollected;
}
}
Loading