# Notebook to parse a workflow description and construct the task grouping

## 1. Import required libraries

In [8]:
import json
import itertools
import numpy as np

## 2. Parses workflow description

In [9]:
def load_workflow(file_path):
    """Load workflow from a JSON file."""
    with open(file_path, 'r') as f:
        return json.load(f)

## 3. Constructs a DAG of tasks based on dependencies.

In [18]:
def construct_dag(workflow):
    """Construct a directed graph (DAG) representation of the workflow."""
    dag = {}
    num_tasks = workflow["NumTasks"]
    
    # Create nodes for all tasks
    for i in range(1, num_tasks + 1):
        task_name = f"Task{i}"
        dag[task_name] = set()
        # Add edge if this task has an input task
        if "InputTask" in workflow[task_name]:
            dag[workflow[task_name]["InputTask"]].add(task_name)
    
    print(f"Constructed DAG: {dag}")
    return dag

## 4. Applies hard constraints to form initial maximal groups.

### 4.1. Util to decompose ScramArch into OS and CPU architecture

In [21]:
def extract_os_and_arch(scram_arch):
    # Split the ScramArch string into its components
    os_part, arch, _ = scram_arch.split('_')
    
    # Extract just the numeric version from the OS part
    # Strip all letters and keep only the number
    os_version = ''.join(char for char in os_part if char.isdigit())
    
    return os_version, arch

def parse_scram_arch_list(scram_arch_list):
    results = []
    for scram_arch in scram_arch_list:
        os_version, arch = extract_os_and_arch(scram_arch)
        results.append({
            'os_version': os_version,
            'cpu_arch': arch
        })
        print(f"ScramArch: {scram_arch}, OS Version: {os_version}, CPU Architecture: {arch}")
    return results

### 4.2. Evaluate hard constraints

In [20]:
def hard_constraint(ti, tj):
    """
    Check if two tasks satisfy hard constraints.
    Args:
        ti: First task (potential predecessor)
        tj: Second task (potential successor)
    Returns:
        bool: True if tasks have same OS version and CPU architecture,
              and tj either has no input task or depends on ti
    """
    # Extract OS version and architecture for both tasks
    ti_os, ti_arch = extract_os_and_arch(ti["ScramArch"][0])  # Using [0] as ScramArch is a list
    tj_os, tj_arch = extract_os_and_arch(tj["ScramArch"][0])
    
    return (
        ti_os == tj_os and
        ti_arch == tj_arch and
        ("InputTask" not in tj or tj["InputTask"] == ti)
    )

## 5. Evaluates soft constraints to refine the grouping.

In [22]:
def compute_soft_constraint(ti, tj, weights):
    """Compute soft constraint compatibility score for two tasks."""
    score = (
        weights["EvtThrou"] * (1 / (ti["TimePerEvent"] + tj["TimePerEvent"])) +
        weights["CpuEff"] * (min(ti["Multicore"], tj["Multicore"]) / max(ti["Multicore"], tj["Multicore"])) +
        weights["MemOcc"] * (min(ti["Memory"], tj["Memory"]) / max(ti["Memory"], tj["Memory"])) +
        weights["Acc"] * (1 if ti["RequiresGPU"] == tj["RequiresGPU"] else 0)
    )
    return score / sum(weights.values())  # Normalization

## 6. Iteratively updates the DAG and re-evaluates grouping decisions.

In [26]:
def initial_hard_groups(workflow):
    """Generate initial maximal groups using hard constraints."""
    tasks = [task for task in workflow if task.startswith("Task")]
    groups = []
    
    for task in tasks:
        grouped = False
        for group in groups:
            if all(hard_constraint(workflow[task], workflow[t]) for t in group):
                group.add(task)
                grouped = True
                break
        if not grouped:
            groups.append({task})
    
    return groups

def refine_groups(groups, workflow, threshold, weights):
    """Refine groups using soft constraints."""
    refined_groups = []
    
    for group in groups:
        group_tasks = list(group)
        scores = np.zeros((len(group_tasks), len(group_tasks)))

        # Compute pairwise soft constraints
        for i, j in itertools.combinations(range(len(group_tasks)), 2):
            scores[i, j] = scores[j, i] = compute_soft_constraint(
                workflow[group_tasks[i]], workflow[group_tasks[j]], weights
            )

        # Compute average compatibility
        print(f"Scores: {scores}")
        print(f"Group tasks: {group_tasks}")
        avg_score = np.sum(scores) / (len(group_tasks) * (len(group_tasks) - 1))

        # Apply threshold
        if avg_score < threshold:
            best_split = max(
                [(group_tasks[:i], group_tasks[i:]) for i in range(1, len(group_tasks))],
                key=lambda g: min(compute_soft_constraint(workflow[g[0][0]], workflow[g[1][0]], weights), avg_score)
            )
            refined_groups.append(set(best_split[0]))
            refined_groups.append(set(best_split[1]))
        else:
            refined_groups.append(group)

    return refined_groups

def regroup_and_iterate(workflow, threshold=0.5):
    """Iteratively evaluate DAG and refine groups."""
    weights = {"EvtThrou": 0.3, "CpuEff": 0.3, "MemOcc": 0.2, "Acc": 0.2}

    while True:
        groups = initial_hard_groups(workflow)
        refined = refine_groups(groups, workflow, threshold, weights)

        if refined == groups:
            break  # Stop if no changes occur

    return refined

## 7. Execute workflow composition

In [29]:
# Example usage:
IN_FILE = "../tests/ex0_perfect.json"

workflow = load_workflow(IN_FILE)
groups = regroup_and_iterate(workflow)
print(groups)

Scores: [[0.    0.715]
 [0.715 0.   ]]
Group tasks: ['Task2', 'Task1']
Scores: [[0.]]
Group tasks: ['Task3']
Scores: [[0.]]
Group tasks: ['Task4']
[{'Task2', 'Task1'}, {'Task3'}, {'Task4'}]


  avg_score = np.sum(scores) / (len(group_tasks) * (len(group_tasks) - 1))
