# Degridder Experiment - Compilation and Execution

This notebook handles the compilation and execution phases of the degridder experiment.
We're comparing with the Preesm implementation available at https://gitlab.insa-rennes.fr/Anaelle.Cloarec/degridder

This notebook:
1. Generates parametric scenarios
2. Compiles Preesm versions
3. Compiles IARA versions
4. Executes both versions
5. Collects performance data and saves it for analysis

The analysis and graph generation is done in a separate notebook: `analysis.ipynb`

In [None]:
import os

experiment_dir = !realpath ~/repos/iara/experiment/degridder
experiment_dir = experiment_dir[0]

grid_size = 5120
num_visibilities = 7848960

class Scenario:
  def __init__(self, name: str, num_cores: int, num_chunks: int, num_supports: int, grid_size: int, num_visibilities: int):
    self.name = name
    self.instance_path = f"{experiment_dir}/instances/{name}"
    self.iara_bin_path = None
    self.preesm_bin_path = None
    self.iara_scheduling_time = None
    self.preesm_scheduling_time = None
    self.srdag_edges = None
    self.srdag_nodes = None
    self.preesm_bin_size = None
    self.iara_bin_size = None
    self.num_cores = num_cores
    self.num_supports = num_supports
    self.num_chunks = num_chunks
    self.num_visibilities = num_visibilities
    self.grid_size = grid_size
    self.dataset_size = "large"

# Generate parametric scenarios

parameters = {
  "NUM_CORES" : [1,2,4,8],
  "NUM_CHUNK" : [1,2,4,8,16,32,64,128,256,512],
  "NUM_KERNEL_SUPPORT" : [8],
}

# parameters = {
#   "NUM_CORES" : [2],
#   "NUM_CHUNK" : [512],
#   "NUM_KERNEL_SUPPORT" : [8],
# }


def shuffle(x: list):
  # let's make sure the corner cases are checked first.
  if len(x) <= 3:
    return x
  m = len(x)//2
  return [x[0]] + [x[-1]] + [x[m]] + shuffle(x[1:m]) + shuffle(x[m+1:-1])

def generate_scenario_file(num_cores: int, num_chunk: int, num_supports: int):
    import os

    scenario_name = f"generated_large_{num_cores}cores_{num_chunk}chunks_{num_supports}supports.scenario"
    template_file = f"{experiment_dir}/templates/parametric_scenario_large_{num_cores}_cores.scenario"
    output_path = os.path.expanduser(f"~/repos/degridder/Scenarios/{scenario_name}")


    # Check if the scenario file already exists
    if os.path.exists(output_path):
        print(f"Scenario {scenario_name} already exists, skipping...")
        return scenario_name

    # Check if template file exists
    if not os.path.exists(template_file):
        print(f"Template file for {num_cores} cores not found: {template_file}")
        return None

    # Read the template file
    with open(template_file, 'r') as f:
        content = f.read()

    # Replace the placeholders
    content = content.replace('«NUM_CHUNK»', str(num_chunk))
    content = content.replace('«NUM_KERNEL_SUPPORT»', str(num_supports))

    # Write the new scenario file
    with open(output_path, 'w') as f:
        f.write(content)

    print(f"Generated scenario: {scenario_name}")
    return scenario_name

def generate_parametric_scenarios():
  global parameters
  global grid_size
  global num_visibilities
  # Let's start from the borders.
  for name, values in parameters.items():
    parameters[name] = shuffle(values)

  print(parameters)

  ! rm ~/repos/degridder/Scenarios/generated*


  ordered_scenarios = []
  names = list(parameters.keys())
  for num_cores in parameters["NUM_CORES"]:
    for num_chunk in parameters["NUM_CHUNK"]:
      for num_supports in parameters["NUM_KERNEL_SUPPORT"]:
        scenario_name = generate_scenario_file(num_cores, num_chunk, num_supports)
        scenario = Scenario(scenario_name, num_cores, num_chunk, num_supports, grid_size, num_visibilities)
        ordered_scenarios.append(scenario)
  return ordered_scenarios

all_ordered_scenarios =  generate_parametric_scenarios()

In [None]:
# Select scenarios to run
# ordered_scenarios = all_ordered_scenarios[10:]  # Skip first 10 scenarios
ordered_scenarios = all_ordered_scenarios  # Run all scenarios

print(f"Running {len(ordered_scenarios)} scenarios")
for i, scenario in enumerate(ordered_scenarios[:5]):  # Show first 5
    print(f"{i+1}. {scenario.name}")
if len(ordered_scenarios) > 5:
    print(f"... and {len(ordered_scenarios) - 5} more")

In [None]:
# Compile Preesm versions, with a timeout.

def compile_preesm():

  %cd {experiment_dir}
  # !rm -rf instances
  !mkdir -p instances

  for scenario_obj in ordered_scenarios:

    scenario = scenario_obj.name

    !mkdir -p "{experiment_dir}/instances/{scenario}"
    !rm -rf "{experiment_dir}/instances/{scenario}/*"

    command = f"~/repos/preesm-cli/commandLinePreesm.sh ~/Downloads/preesm-3.21.0.202501251928-linux.gtk.x86_64/ ~/repos/degridder/ Codegen.workflow {scenario} >{experiment_dir}/instances/{scenario}/preesm_stdout.txt 2>{experiment_dir}/instances/{scenario}/preesm_stderr.txt"

    print(command)

    ! \time -v -o "{experiment_dir}/instances/{scenario}/preesm_scheduling_time.txt" timeout 3m {command}

    %cd ~/repos/degridder/Code

    !mkdir -p build
    !rm -rf build/*
    !cmake -DCMAKE_BUILD_TYPE=Release --log-level=VERBOSE -B build
    %cd build
    !make

    ! rm -rf "{experiment_dir}/instances/{scenario}/preesm_build"
    ! cp -r ~/repos/degridder/Code/build "{experiment_dir}/instances/{scenario}/preesm_build"

compile_preesm()

In [None]:
# Retry compilation for scenarios where Preesm failed with increased timeout
for scenario_obj in ordered_scenarios:
  scenario = scenario_obj.name
  preesm_bin_path = f"{experiment_dir}/instances/{scenario}/preesm_build/degridder_pipeline"
  if not os.path.exists(preesm_bin_path):
    print(f"Retrying compilation for scenario: {scenario} with 5m timeout")
    !mkdir -p "{experiment_dir}/instances/{scenario}"
    !rm -rf "{experiment_dir}/instances/{scenario}/*"

    command = f"~/repos/preesm-cli/commandLinePreesm.sh ~/Downloads/preesm-3.21.0.202501251928-linux.gtk.x86_64/ ~/repos/degridder/ Codegen.workflow {scenario} >{experiment_dir}/instances/{scenario}/preesm_stdout.txt 2>{experiment_dir}/instances/{scenario}/preesm_stderr.txt"
    ! \time -v -o "{experiment_dir}/instances/{scenario}/preesm_scheduling_time.txt" timeout 5m {command}

    %cd ~/repos/degridder/Code
    !mkdir -p build
    !rm -rf build/*
    !cmake -DCMAKE_BUILD_TYPE=Release --log-level=VERBOSE -B build
    %cd build
    !make

    ! rm -rf "{experiment_dir}/instances/{scenario}/preesm_build"
    ! cp -r ~/repos/degridder/Code/build "{experiment_dir}/instances/{scenario}/preesm_build"

Now, let's compile our IARA version.

In [None]:
%cd {experiment_dir}

for scenario in ordered_scenarios:
  scenario.topology_file = f"{experiment_dir}/instances/{scenario.name}/build/topology.mlir"
  scenario.main_file = f"{experiment_dir}/instances/{scenario.name}/build/main.cpp"
  scenario.iara_build_dir = f"{experiment_dir}/instances/{scenario.name}/build"
  ! mkdir -p {scenario.iara_build_dir}
  ! rm -rf {scenario.iara_build_dir}/*
  %cd {scenario.iara_build_dir}
  assert os.getcwd() == scenario.iara_build_dir, f"Not in expected build dir: {os.getcwd()} vs {scenario.iara_build_dir}"
  ! SCHEDULER_MODE=virtual-fifo sh -x ../../../build_instance.sh

In [None]:
# Execute both versions for performance measurement

for scenario in ordered_scenarios:
  %cd {scenario.instance_path}/build
  ! NUM_CORES={scenario.num_cores} sh -x ../../../run_instance.sh

In [None]:
# Collect compilation data from Preesm

import os
import re

for scenario in all_ordered_scenarios:
  scenario.preesm_bin_path = f'{experiment_dir}/instances/{scenario.name}/preesm_build/degridder_pipeline'
  if os.path.exists(scenario.preesm_bin_path):
    scenario.preesm_bin_size = os.path.getsize(scenario.preesm_bin_path)
    walltime_str = ! grep "wall clock" '{experiment_dir}/instances/{scenario.name}/preesm_scheduling_time.txt' | cut -f 8 -d ' '
    time_str = walltime_str[0].strip()
    minutes, seconds = time_str.split(':')
    scenario.preesm_scheduling_time = float(minutes) * 60 + float(seconds)

  srdag_output = ! grep "SRDAG" {experiment_dir}/instances/{scenario.name}/preesm_stdout.txt
  if srdag_output and len(srdag_output) > 0:
    match = re.search(r'SRDAG with (\d+) vertices and (\d+) edges', srdag_output[0])
    if match:
      scenario.srdag_nodes = int(match.group(1))
      scenario.srdag_edges = int(match.group(2))
    else:
      scenario.srdag_nodes = None
      scenario.srdag_edges = None

!ls -lah {experiment_dir}/instances/*/preesm_build/degridder_pipeline

print("Preesm compilation results:")
for s in ordered_scenarios:
    status = "✓" if s.preesm_bin_size else "✗"
    size = f"{s.preesm_bin_size/1024:.1f}KB" if s.preesm_bin_size else "N/A"
    edges = str(s.srdag_edges) if s.srdag_edges else "N/A"
    print(f"{status} {s.name[:30]:<30} | Size: {size:<8} | Edges: {edges}")

In [None]:
# Collect compilation data from IARA

for scenario in all_ordered_scenarios:
  scenario.iara_bin_path = f'{experiment_dir}/instances/{scenario.name}/build/degridder_pipeline'
  if os.path.exists(scenario.iara_bin_path):
    scenario.iara_bin_size = os.path.getsize(scenario.iara_bin_path)
    walltime_str = ! grep "wall clock" '{experiment_dir}/instances/{scenario.name}/iara_scheduling_time.txt' | cut -f 8 -d ' '
    time_str = walltime_str[0].strip()
    minutes, seconds = time_str.split(':')
    scenario.iara_scheduling_time = float(minutes) * 60 + float(seconds)

!ls -lah {experiment_dir}/instances/*/build/degridder_pipeline

print("IARA compilation results:")
for s in ordered_scenarios:
    status = "✓" if s.iara_bin_size else "✗"
    size = f"{s.iara_bin_size/1024:.1f}KB" if s.iara_bin_size else "N/A"
    sched_time = f"{s.iara_scheduling_time:.2f}s" if s.iara_scheduling_time else "N/A"
    print(f"{status} {s.name[:30]:<30} | Size: {size:<8} | Scheduling: {sched_time}")

In [None]:
# Experiment completion summary
print("
=== EXPERIMENT COMPLETE ===")
print(f"Total scenarios processed: {len(all_ordered_scenarios)}")
print(f"Scenarios with Preesm results: {sum(1 for s in all_ordered_scenarios if s.preesm_bin_size)}")
print(f"Scenarios with IARA results: {sum(1 for s in all_ordered_scenarios if s.iara_bin_size)}")

# Count execution data by checking if files exist
preesm_exec_count = 0
iara_exec_count = 0
for scenario in all_ordered_scenarios:
    preesm_time_file = f'{experiment_dir}/instances/{scenario.name}/preesm_degridder_time.txt'
    iara_time_file = f'{experiment_dir}/instances/{scenario.name}/iara_degridder_time.txt'
    if os.path.exists(preesm_time_file):
        preesm_exec_count += 1
    if os.path.exists(iara_time_file):
        iara_exec_count += 1

print(f"Execution data files - Preesm: {preesm_exec_count}, IARA: {iara_exec_count}")
print(f"
Experiment data is stored in: {experiment_dir}/instances/")
print("Ready for analysis! Run the analysis.ipynb notebook to generate graphs.")

In [None]:
# Collect execution performance data

def parse_time_file(file_path):
    """Parse /usr/bin/time -v output to extract wall time and max RSS"""
    try:
        with open(file_path, 'r') as f:
            content = f.read()
        
        # Extract wall clock time (format: h:mm:ss or mm:ss.ss)
        wall_time_match = re.search(r'Elapsed \(wall clock\) time \(h:mm:ss or m:ss\): (.+)', content)
        wall_time_seconds = None
        if wall_time_match:
            time_str = wall_time_match.group(1).strip()
            if ':' in time_str:
                parts = time_str.split(':')
                if len(parts) == 3:  # h:mm:ss
                    hours, minutes, seconds = parts
                    wall_time_seconds = float(hours) * 3600 + float(minutes) * 60 + float(seconds)
                elif len(parts) == 2:  # mm:ss
                    minutes, seconds = parts
                    wall_time_seconds = float(minutes) * 60 + float(seconds)
            else:
                wall_time_seconds = float(time_str)
        
        # Extract maximum resident set size (in KB)
        max_rss_match = re.search(r'Maximum resident set size \(kbytes\): (\d+)', content)
        max_rss_kb = int(max_rss_match.group(1)) if max_rss_match else None
        
        return wall_time_seconds, max_rss_kb
    except FileNotFoundError:
        return None, None
    except Exception as e:
        print(f"Error parsing {file_path}: {e}")
        return None, None

# Extract execution data for all scenarios
for scenario in all_ordered_scenarios:
    # Parse Preesm execution data
    preesm_time_file = f'{experiment_dir}/instances/{scenario.name}/preesm_degridder_time.txt'
    scenario.preesm_wall_time, scenario.preesm_max_rss = parse_time_file(preesm_time_file)
    
    # Parse IARA execution data
    iara_time_file = f'{experiment_dir}/instances/{scenario.name}/iara_degridder_time.txt'
    scenario.iara_wall_time, scenario.iara_max_rss = parse_time_file(iara_time_file)

print("Execution Performance Summary:")
print("Scenario | Preesm Wall Time (s) | IARA Wall Time (s) | Preesm Max RSS (KB) | IARA Max RSS (KB)")
print("-" * 90)
for scenario in ordered_scenarios:
    preesm_wt = f"{scenario.preesm_wall_time:.2f}" if scenario.preesm_wall_time else "N/A"
    iara_wt = f"{scenario.iara_wall_time:.2f}" if scenario.iara_wall_time else "N/A"
    preesm_rss = str(scenario.preesm_max_rss) if scenario.preesm_max_rss else "N/A"
    iara_rss = str(scenario.iara_max_rss) if scenario.iara_max_rss else "N/A"
    print(f"{scenario.name[:20]:<20} | {preesm_wt:>17} | {iara_wt:>15} | {preesm_rss:>16} | {iara_rss:>14}")

In [None]:
# Experiment completion summary
print("\n=== EXPERIMENT COMPLETE ===")
print(f"Total scenarios processed: {len(all_ordered_scenarios)}")
print(f"Scenarios with Preesm results: {sum(1 for s in all_ordered_scenarios if s.preesm_bin_size)}")
print(f"Scenarios with IARA results: {sum(1 for s in all_ordered_scenarios if s.iara_bin_size)}")

# Count execution data by checking if files exist
preesm_exec_count = 0
iara_exec_count = 0
for scenario in all_ordered_scenarios:
    preesm_time_file = f'{experiment_dir}/instances/{scenario.name}/preesm_degridder_time.txt'
    iara_time_file = f'{experiment_dir}/instances/{scenario.name}/iara_degridder_time.txt'
    if os.path.exists(preesm_time_file):
        preesm_exec_count += 1
    if os.path.exists(iara_time_file):
        iara_exec_count += 1

print(f"Execution data files - Preesm: {preesm_exec_count}, IARA: {iara_exec_count}")
print(f"\nExperiment data is stored in: {experiment_dir}/instances/")
print("Ready for analysis! Run the analysis.ipynb notebook to generate graphs.")