# Stage 3: Pipeline Mutation & Evolutionary Search

This notebook demonstrates how the C60.ai framework mutates pipeline graphs and performs evolutionary search.

In [1]:
import random
import matplotlib.pyplot as plt
import networkx as nx
from c60.engine.graph_schema import DAG, Node, Edge, NodeType


ModuleNotFoundError: No module named 'matplotlib'

## Define a Simple Pipeline Graph

We start with a simple pipeline: Imputer → Scaler → RandomForest.

In [2]:
def build_simple_pipeline():
    imputer = Node(node_id='imputer', node_type=NodeType.PREPROCESSOR, parameters={'strategy': 'mean'})
    scaler = Node(node_id='scaler', node_type=NodeType.PREPROCESSOR, parameters={'type': 'standard'})
    rf = Node(node_id='rf', node_type=NodeType.ESTIMATOR, parameters={'n_estimators': 100})
    dag = DAG()
    dag.add_node(imputer)
    dag.add_node(scaler)
    dag.add_node(rf)
    dag.add_edge(Edge(source='imputer', target='scaler'))
    dag.add_edge(Edge(source='scaler', target='rf'))
    return dag

base_dag = build_simple_pipeline()

NameError: name 'Node' is not defined

## Visualize the Base Pipeline


In [4]:
def plot_dag(dag, title='Pipeline DAG'):
    G = nx.DiGraph()
    for node_id, node in dag.nodes.items():
        G.add_node(node_id, label=f'{node.node_type.value}
{node_id}')
    for edge in dag.edges:
        G.add_edge(edge.source, edge.target)
    pos = nx.spring_layout(G, seed=42)
    labels = nx.get_node_attributes(G, 'label')
    plt.figure(figsize=(6, 3))
    nx.draw(G, pos, with_labels=True, labels=labels, node_size=1800, node_color='lightgreen', font_size=10, font_weight='bold', arrowsize=20)
    plt.title(title)
    plt.show()

plot_dag(base_dag, title='Base Pipeline DAG')

SyntaxError: unterminated f-string literal (detected at line 4) (2488724449.py, line 4)

## Simulate Mutation

Let's define a simple mutation: change the scaler type or add a new feature selector node.

In [None]:
def mutate_pipeline(dag):
    # Copy the DAG
    import copy
    dag2 = copy.deepcopy(dag)
    # Randomly mutate scaler type
    scaler = dag2.get_node('scaler')
    if scaler:
        scaler.parameters['type'] = random.choice(['standard', 'minmax', 'robust'])
    # With some probability, add a PCA node
    if 'pca' not in dag2.nodes and random.random() < 0.5:
        pca = Node(node_id='pca', node_type=NodeType.FEATURE_SELECTOR, parameters={'n_components': 2})
        dag2.add_node(pca)
        dag2.add_edge(Edge(source='scaler', target='pca'))
        # Connect PCA to RF
        dag2.edges = [e for e in dag2.edges if not (e.source == 'scaler' and e.target == 'rf')]
        dag2.add_edge(Edge(source='pca', target='rf'))
    return dag2

mutated_dag = mutate_pipeline(base_dag)
plot_dag(mutated_dag, title='Mutated Pipeline DAG')

## Simulate Evolutionary Search

We simulate a few generations of mutation and selection, using random scores for illustration.

In [None]:
population = [build_simple_pipeline() for _ in range(5)]
history = []
for gen in range(3):
    # Mutate population
    population = [mutate_pipeline(dag) for dag in population]
    # Assign random scores
    scores = [random.uniform(0.7, 0.99) for _ in population]
    best_idx = max(range(len(scores)), key=lambda i: scores[i])
    best_dag = population[best_idx]
    history.append((gen, best_dag, scores[best_idx]))
    print(f'Generation {gen+1}: Best Score = {scores[best_idx]:.3f}')
    plot_dag(best_dag, title=f'Best Pipeline Gen {gen+1}')

## Summary

- Demonstrated pipeline mutation and evolutionary search.
- Visualized the evolution of pipeline graphs.
- Next: We will score and optimize pipelines using cross-validation and hyperparameter search.