# Dependency Graph Exercises

Build a complete `DependencyGraph` class from scratch. Use `lesson.ipynb` as reference.

In [None]:
from typing import List, Set, Dict, Optional
from collections import defaultdict, deque
from dataclasses import dataclass

@dataclass
class NodeConfig:
    name: str
    depends_on: List[str]

class DependencyError(Exception):
    def __init__(self, message: str, cycle: Optional[List[str]] = None):
        super().__init__(message)
        self.cycle = cycle

## Exercise 1: Graph Construction

Implement `__init__` and `_build_graph`.

**Requirements:**
- Store nodes in a dict by name
- Build adjacency_list (forward edges)
- Build reverse_adjacency_list (backward edges)

In [None]:
class DependencyGraph:
    def __init__(self, nodes: List[NodeConfig]):
        # TODO: Store nodes by name
        # TODO: Initialize adjacency lists
        # TODO: Build graph
        # TODO: Validate graph (skip for now)
        pass
    
    def _build_graph(self) -> None:
        # TODO: For each node, create edges from dependencies to node
        pass

In [None]:
# Test
nodes = [
    NodeConfig('A', []),
    NodeConfig('B', ['A']),
    NodeConfig('C', ['A', 'B'])
]

graph = DependencyGraph(nodes)
assert graph.adjacency_list['A'] == ['B', 'C']
assert graph.reverse_adjacency_list['C'] == ['A', 'B']
print("âœ“ Graph construction works!")

## Exercise 2: Missing Dependency Check

Implement `_check_missing_dependencies`.

In [None]:
def _check_missing_dependencies(self) -> None:
    # TODO: Check each node's dependencies exist in self.nodes
    # TODO: Collect all missing dependencies
    # TODO: Raise DependencyError with helpful message
    pass

DependencyGraph._check_missing_dependencies = _check_missing_dependencies

In [None]:
# Test
try:
    bad_nodes = [
        NodeConfig('A', []),
        NodeConfig('B', ['MISSING'])
    ]
    graph = DependencyGraph(bad_nodes)
    graph._check_missing_dependencies()
    assert False, "Should have raised error"
except DependencyError as e:
    assert 'MISSING' in str(e)
    print("âœ“ Missing dependency detection works!")
    print(f"  Error: {e}")

## Exercise 3: Cycle Detection

Implement `_check_cycles` using DFS.

**Hint:** Track recursion stack to detect back edges.

In [None]:
def _check_cycles(self) -> None:
    # TODO: Track visited and recursion stack
    # TODO: DFS from each unvisited node
    # TODO: If node in recursion stack, cycle found
    # TODO: Extract and return cycle path
    pass

DependencyGraph._check_cycles = _check_cycles

In [None]:
# Test
try:
    cycle_nodes = [
        NodeConfig('A', ['C']),
        NodeConfig('B', ['A']),
        NodeConfig('C', ['B'])
    ]
    graph = DependencyGraph(cycle_nodes)
    graph._check_cycles()
    assert False, "Should have detected cycle"
except DependencyError as e:
    assert e.cycle is not None
    print("âœ“ Cycle detection works!")
    print(f"  Cycle: {' â†’ '.join(e.cycle)}")

## Exercise 4: Topological Sort

Implement `topological_sort` using Kahn's algorithm.

In [None]:
def topological_sort(self) -> List[str]:
    # TODO: Calculate in-degree for each node
    # TODO: Queue nodes with in-degree 0
    # TODO: Process queue, reducing in-degree of dependents
    # TODO: Verify all nodes processed
    pass

DependencyGraph.topological_sort = topological_sort

In [None]:
# Test
nodes = [
    NodeConfig('A', []),
    NodeConfig('B', ['A']),
    NodeConfig('C', ['A']),
    NodeConfig('D', ['B', 'C'])
]

graph = DependencyGraph(nodes)
order = graph.topological_sort()

# Verify order is valid
assert order.index('A') < order.index('B')
assert order.index('A') < order.index('C')
assert order.index('B') < order.index('D')
assert order.index('C') < order.index('D')
print("âœ“ Topological sort works!")
print(f"  Order: {order}")

## Exercise 5: Execution Layers

Implement `get_execution_layers` for parallel execution.

In [None]:
def get_execution_layers(self) -> List[List[str]]:
    # TODO: Similar to topological sort, but collect all ready nodes at once
    # TODO: Each iteration creates one layer
    pass

DependencyGraph.get_execution_layers = get_execution_layers

In [None]:
# Test
layers = graph.get_execution_layers()

assert layers[0] == ['A']
assert set(layers[1]) == {'B', 'C'}
assert layers[2] == ['D']
print("âœ“ Execution layers work!")
for i, layer in enumerate(layers, 1):
    print(f"  Layer {i}: {layer}")

## Exercise 6: Dependency Queries

Implement `get_dependencies` and `get_dependents`.

In [None]:
def get_dependencies(self, node_name: str) -> Set[str]:
    # TODO: BFS backward through reverse_adjacency_list
    pass

def get_dependents(self, node_name: str) -> Set[str]:
    # TODO: BFS forward through adjacency_list
    pass

DependencyGraph.get_dependencies = get_dependencies
DependencyGraph.get_dependents = get_dependents

In [None]:
# Test
assert graph.get_dependencies('D') == {'A', 'B', 'C'}
assert graph.get_dependents('A') == {'B', 'C', 'D'}
print("âœ“ Dependency queries work!")

## Exercise 7: Complete Integration Test

Test with a complex real-world pipeline.

In [None]:
# Add validation to __init__
def __init_with_validation__(self, nodes: List[NodeConfig]):
    self.nodes = {node.name: node for node in nodes}
    self.adjacency_list: Dict[str, List[str]] = defaultdict(list)
    self.reverse_adjacency_list: Dict[str, List[str]] = defaultdict(list)
    self._build_graph()
    self._check_missing_dependencies()
    self._check_cycles()

DependencyGraph.__init__ = __init_with_validation__

In [None]:
# Complex ML pipeline
ml_pipeline = [
    NodeConfig('load_data', []),
    NodeConfig('clean_data', ['load_data']),
    NodeConfig('feature_engineering', ['clean_data']),
    NodeConfig('split_data', ['feature_engineering']),
    NodeConfig('train_model', ['split_data']),
    NodeConfig('validate_model', ['train_model', 'split_data']),
    NodeConfig('hyperparameter_tuning', ['validate_model']),
    NodeConfig('final_model', ['hyperparameter_tuning', 'train_model']),
    NodeConfig('generate_report', ['final_model', 'validate_model'])
]

graph = DependencyGraph(ml_pipeline)

print("Execution order:")
for i, node in enumerate(graph.topological_sort(), 1):
    print(f"{i}. {node}")

print("\nExecution layers:")
for i, layer in enumerate(graph.get_execution_layers(), 1):
    print(f"Layer {i}: {layer}")

print("\nDependency analysis:")
print(f"'generate_report' depends on: {sorted(graph.get_dependencies('generate_report'))}")
print(f"'clean_data' is needed by: {sorted(graph.get_dependents('clean_data'))}")

## ðŸŽ¯ Challenge Exercises

Try these advanced features:

### Challenge 1: Critical Path
Find the longest path through the graph (determines minimum execution time).

In [None]:
def get_critical_path(self) -> List[str]:
    """Find the longest path from any source to any sink."""
    # TODO: Calculate longest path for each node
    # TODO: Return path with maximum length
    pass

DependencyGraph.get_critical_path = get_critical_path

### Challenge 2: Graph Diff
Compare two graphs and identify changed nodes.

In [None]:
def diff(self, other: 'DependencyGraph') -> Dict[str, Set[str]]:
    """Compare with another graph.
    
    Returns:
        Dict with keys: 'added', 'removed', 'modified'
    """
    # TODO: Find added/removed/modified nodes
    pass

DependencyGraph.diff = diff

### Challenge 3: Minimal Rebuild Set
Given changed nodes, find minimal set of nodes to re-execute.

In [None]:
def get_rebuild_set(self, changed_nodes: List[str]) -> Set[str]:
    """Get all nodes that need re-execution when given nodes change."""
    # TODO: Find union of all dependents for changed nodes
    pass

DependencyGraph.get_rebuild_set = get_rebuild_set

## ðŸŽ“ Reflection

After completing exercises, consider:

1. **Time Complexity**
   - Graph construction: O(N + E)
   - Topological sort: O(N + E)
   - Cycle detection: O(N + E)
   - Dependency queries: O(N + E) worst case

2. **Space Complexity**
   - Two adjacency lists: O(N + E)
   - Node storage: O(N)

3. **Design Decisions**
   - Why two adjacency lists?
   - Why validate in constructor?
   - When to use BFS vs DFS?

See `solutions.ipynb` for complete implementations!