We will define our genetic algorithm mapper with the following algorithm. For our algorithm, we use the following definition of fitness.
fitness = 1 / (latency  energy)
n = 5, k = 20, p = 10

1. Generate n = 5 randomly ordered strings of the valid dataflow, that is a random permutation of [R, S, P, Q, C, M, N]. Initialize f to 0.
2. Initialize a goal fitness g, dependent on latency and energy.
3. While f>g,
    Mutation: For i from 1 to n mutations, mutate each permutation k/n times to get k mutations. For each mutation, pick two parameters at random and swap them.
    Selection: Calculate latency and energy and evaluate the fitness of each k mutations. Take the p = 10 with the highest fitness.
    Crossover: Take pairs of p = 10 mutations and crossover to get n = 5 permutations. Let f = top fitness from these permutations.
4. Return best permutation.

In [1]:
import random
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from loaders import *
import yaml
from yaml import load

In [2]:
ARCH_CONFIG = dict( # TODO: need to set this to out config
        DRAM_factor_N=50,
        DRAM_factor_M=8,
        DRAM_factor_C=4,
        global_buffer_factor_N=1,
        global_buffer_factor_M=1,
        global_buffer_factor_C=1,
        PE_spatial_factor_M=1,
        PE_spatial_factor_C=1,
        scratchpad_factor_N=1,
    )

CONFIG = { **ARCH_CONFIG, "pe_meshX": 1, "pe_meshY": 16 } # TODO replace the pe mesh values

In [3]:
show_config('designs/system/constraints.yaml')

constraints:
  version: 0.4
  targets:
  - target: DRAM
    type: dataspace
    keep: [Inputs, Outputs, Weights]
    bypass: []
  - target: DRAM
    type: temporal
    factors: [R=1, S=1, P=1, Q=1]
    permutation: [R, S, Q, P, C, M, N]
  - target: global_buffer
    type: dataspace
    keep: [Inputs, Outputs, Weights]
    bypass: []
  - target: global_buffer
    type: temporal
    factors: [P=1, Q=1, R=1, S=1]
    permutation: [S, R, Q, P, C, M, N]
  - target: PE
    type: spatial
    factors: 
    - P=1
    - Q=1
    - R=1
    - S=1
    - N=1
    - C<={{pe_meshX}}
    - M<={{pe_meshY}}
    permutation: [C, M, R, S, P, Q, N]
    split: 1
  - target: scratchpad
    type: dataspace
    keep: [Weights]
    bypass: [Inputs, Outputs]
  - target: scratchpad
    type: temporal
    factors: [R=0, S=0, P=0, Q=0, M=1, C=1]
    permutation: [Q, P, N, C, M, S, R]

  - target: weight_reg
    type: dataspace
    keep: [Weights]
    bypass: [Inputs, Outputs]
  - target: weight_reg
    type: temporal


In [16]:
def fitness(dataflow, workload):
    # return random.randint(1, 1000)
    data = evaluate(dataflow, workload)
    energy, latency = data
    inverse_EDP = 1 / (energy * latency)
    print(f"{dataflow} has a fitness of {inverse_EDP}")
    return inverse_EDP


def evaluate(dataflow, workload):
    '''
    Evaluates the given dataflow on -- architecture

    dataflow: computation ordering in list format
    workload: the file path to the workload this is being evaluated on
    returns tuple of energy, latency
    '''
    
    mapper = 'designs/_include/mapper.yaml'
    constraints = 'designs/system/constraints.yaml'

    # create a new constraints file with the new PE permutation
    stream = open(constraints, 'r')
    dictionary = yaml.safe_load(stream)
    print(dictionary['constraints']['targets'][4])
    print(dictionary['constraints']['targets'][4]['permutation'])
    dictionary['constraints']['targets'][4]['permutation'] = dataflow
    print(dictionary['constraints']['targets'][4]['permutation'])
        
    filename = ''.join(dataflow)
    with open(f'iters/configs/{filename}.yaml', 'w') as file:
        yaml.dump(dictionary, file)

    # # TODO: is this section still needed?
    # ARCH_CONFIG['PE_permutation']=dataflow
    # CONFIG = { **ARCH_CONFIG, "pe_meshX": 1, "pe_meshY": 16 } # TODO replace the pe mesh values
    # config = CONFIG
    
    # THRES = (float('inf'), float('inf')) # TODO ?? 

    # min_energy = float('inf') # TODO: do we need this?
    # cycle_thres, energy_thres = THRES # TODO whats our threshold
    sys_1x16_result = run_timeloop_mapper( # TODO: this should be run_timeloop_mapper not run_timeloop_model!
        # config,
        {'pe_meshX': 1, 'pe_meshY': 16},
        architecture='designs/system/arch.yaml',
        mapper=mapper,
        problem=workload,
        constraints=constraints 
    )
    
    # print('done running mapper')
    stats = open('./output_dir/timeloop-mapper.stats.txt', 'r').read()
    mapping = sys_1x16_result.mapping
    # print(stats)
    # print(sys_1x16_result.energy, sys_1x16_result.cycles)
    print(mapping)

    lines = stats.split('\n')
    energy = float([l for l in lines if 'Energy:' in l][0].split(' ', 2)[1])
    cycles = int([l for l in lines if 'Cycles:' in l][0].split(' ', 1)[1])
    # min_energy = min(min_energy, energy)

    print(energy, cycles)
    return energy, cycles

    

In [17]:
# convolution
dataflow = ['R', 'S', 'P', 'Q', 'C', 'M', 'N']
workload = 'layer_shapes/conv2.yaml'

# n population -> k mutations -> p selection -> n population
# constraints: n | k, p/2 = n
n = 5
k = 20
p = 10

# Generate n base permutations
population = [random.sample(dataflow, len(dataflow)) for i in range(n)]

print("Initializing")
# Initialize base fitness and goal fitness
dfs_fitnesses = [[df, fitness(df, workload)] for df in population]
best_df, f = max(dfs_fitnesses, key=lambda x: x[1])
g = 1000  # if terminating using goal fitness
iter = 10  # if terminating using timeout

for i in range(iter):
    print("\nITERATION: ", i)
    # Mutation
    mutations = []  # len(mutations) = k
    for df in population:
        for m in range(k):
            mutation = df.copy()
            # swap two random indices
            idx1, idx2 = random.sample(range(len(dataflow)), 2)
            mutation[idx1], mutation[idx2] = mutation[idx2], mutation[idx1]
            mutations.append(mutation)
    
    # Selection
    print("selection")
    mutations_fitnesses = [[mutation, fitness(mutation, workload)] for mutation in mutations]
    mutations_fitnesses.sort(key=lambda x: x[1], reverse=True) # high to low fitness
    selections_fitnesses = mutations_fitnesses[:p] 
    selections = [x[0] for x in selections_fitnesses] # len(selections) = p

    # Crossover
    print("crossover")
    random.shuffle(selections)
    crossover_pairs = [(selections[i], selections[i+1]) for i in range(0, len(selections), 2)]
    crossovers = []  # len(crossovers) = n
    for pair in crossover_pairs:
        s1, s2 = pair
        cut_point = random.randint(1, len(s1) - 1)
        first_half = s1[:cut_point]
        second_half = s2.copy()
        for parameter in first_half:
            second_half.remove(parameter)
        crossover = first_half + second_half
        crossovers.append(crossover)

    crossovers_fitnesses = [[crossover, fitness(crossover, workload)] for crossover in crossovers]
    best_df_trial, f_trial = max(crossovers_fitnesses, key=lambda x: x[1])
    if f_trial > f:
        best_df, f = best_df_trial, f_trial
    if f >= g:
        break

Initializing
['C', 'M', 'R', 'S', 'P', 'Q', 'N']
['R', 'S', 'C', 'Q', 'M', 'N', 'P']
[INFO] 2025-04-30 02:10:15,520 - pytimeloop.accelergy_interface - Running Accelergy with command: accelergy /home/workspace/output_dir/parsed-processed-input.yaml -o ./output_dir/ -v


INFO:pytimeloop.accelergy_interface:Running Accelergy with command: accelergy /home/workspace/output_dir/parsed-processed-input.yaml -o ./output_dir/ -v


DRAM [ Weights:800 (800) Inputs:204800 (204800) Outputs:313600 (313600) ] 
-------------------------------------------------------------------------
| for N in [0:50)

global_buffer [ Weights:800 (800) Inputs:4096 (4096) Outputs:6272 (6272) ] 
--------------------------------------------------------------------------
|   for C in [0:4)

inter_PE_spatial [ ] 
--------------------
|     for M in [0:8) (Spatial-Y)

scratchpad [ Weights:25 (25) ] 
------------------------------
|       for R in [0:5)
|         for S in [0:5)
|           for P in [0:28)
|             for Q in [0:28)

weight_reg [ Weights:1 (1) ] 
input_activation_reg [ Inputs:1 (1) ] 
output_activation_reg [ Outputs:1 (1) ] 
---------------------------------------
|               << Compute >>

393.85 3920000
['R', 'S', 'C', 'Q', 'M', 'N', 'P'] has a fitness of 6.477137001811007e-10
['C', 'M', 'R', 'S', 'P', 'Q', 'N']
['S', 'N', 'M', 'R', 'P', 'Q', 'C']
[INFO] 2025-04-30 02:10:17,637 - pytimeloop.accelergy_interface - Runni

INFO:pytimeloop.accelergy_interface:Running Accelergy with command: accelergy /home/workspace/output_dir/parsed-processed-input.yaml -o ./output_dir/ -v


DRAM [ Weights:800 (800) Inputs:204800 (204800) Outputs:313600 (313600) ] 
-------------------------------------------------------------------------
| for N in [0:50)

global_buffer [ Weights:800 (800) Inputs:4096 (4096) Outputs:6272 (6272) ] 
--------------------------------------------------------------------------
|   for C in [0:4)

inter_PE_spatial [ ] 
--------------------
|     for M in [0:8) (Spatial-Y)

scratchpad [ Weights:25 (25) ] 
------------------------------
|       for R in [0:5)
|         for S in [0:5)
|           for P in [0:28)
|             for Q in [0:28)

weight_reg [ Weights:1 (1) ] 
input_activation_reg [ Inputs:1 (1) ] 
output_activation_reg [ Outputs:1 (1) ] 
---------------------------------------
|               << Compute >>

393.85 3920000
['S', 'N', 'M', 'R', 'P', 'Q', 'C'] has a fitness of 6.477137001811007e-10
['C', 'M', 'R', 'S', 'P', 'Q', 'N']
['N', 'Q', 'C', 'R', 'S', 'M', 'P']
[INFO] 2025-04-30 02:10:19,633 - pytimeloop.accelergy_interface - Runni

INFO:pytimeloop.accelergy_interface:Running Accelergy with command: accelergy /home/workspace/output_dir/parsed-processed-input.yaml -o ./output_dir/ -v


DRAM [ Weights:800 (800) Inputs:204800 (204800) Outputs:313600 (313600) ] 
-------------------------------------------------------------------------
| for N in [0:50)

global_buffer [ Weights:800 (800) Inputs:4096 (4096) Outputs:6272 (6272) ] 
--------------------------------------------------------------------------
|   for C in [0:4)

inter_PE_spatial [ ] 
--------------------
|     for M in [0:8) (Spatial-Y)

scratchpad [ Weights:25 (25) ] 
------------------------------
|       for R in [0:5)
|         for S in [0:5)
|           for P in [0:28)
|             for Q in [0:28)

weight_reg [ Weights:1 (1) ] 
input_activation_reg [ Inputs:1 (1) ] 
output_activation_reg [ Outputs:1 (1) ] 
---------------------------------------
|               << Compute >>

393.85 3920000
['N', 'Q', 'C', 'R', 'S', 'M', 'P'] has a fitness of 6.477137001811007e-10
['C', 'M', 'R', 'S', 'P', 'Q', 'N']
['C', 'M', 'R', 'S', 'Q', 'N', 'P']
[INFO] 2025-04-30 02:10:21,782 - pytimeloop.accelergy_interface - Runni

INFO:pytimeloop.accelergy_interface:Running Accelergy with command: accelergy /home/workspace/output_dir/parsed-processed-input.yaml -o ./output_dir/ -v


DRAM [ Weights:800 (800) Inputs:204800 (204800) Outputs:313600 (313600) ] 
-------------------------------------------------------------------------
| for N in [0:50)

global_buffer [ Weights:800 (800) Inputs:4096 (4096) Outputs:6272 (6272) ] 
--------------------------------------------------------------------------
|   for C in [0:4)

inter_PE_spatial [ ] 
--------------------
|     for M in [0:8) (Spatial-Y)

scratchpad [ Weights:25 (25) ] 
------------------------------
|       for R in [0:5)
|         for S in [0:5)
|           for P in [0:28)
|             for Q in [0:28)

weight_reg [ Weights:1 (1) ] 
input_activation_reg [ Inputs:1 (1) ] 
output_activation_reg [ Outputs:1 (1) ] 
---------------------------------------
|               << Compute >>

393.85 3920000
['C', 'M', 'R', 'S', 'Q', 'N', 'P'] has a fitness of 6.477137001811007e-10
['C', 'M', 'R', 'S', 'P', 'Q', 'N']
['Q', 'N', 'P', 'S', 'M', 'C', 'R']
[INFO] 2025-04-30 02:10:23,776 - pytimeloop.accelergy_interface - Runni

INFO:pytimeloop.accelergy_interface:Running Accelergy with command: accelergy /home/workspace/output_dir/parsed-processed-input.yaml -o ./output_dir/ -v


DRAM [ Weights:800 (800) Inputs:204800 (204800) Outputs:313600 (313600) ] 
-------------------------------------------------------------------------
| for N in [0:50)

global_buffer [ Weights:800 (800) Inputs:4096 (4096) Outputs:6272 (6272) ] 
--------------------------------------------------------------------------
|   for C in [0:4)

inter_PE_spatial [ ] 
--------------------
|     for M in [0:8) (Spatial-Y)

scratchpad [ Weights:25 (25) ] 
------------------------------
|       for R in [0:5)
|         for S in [0:5)
|           for P in [0:28)
|             for Q in [0:28)

weight_reg [ Weights:1 (1) ] 
input_activation_reg [ Inputs:1 (1) ] 
output_activation_reg [ Outputs:1 (1) ] 
---------------------------------------
|               << Compute >>

393.85 3920000
['Q', 'N', 'P', 'S', 'M', 'C', 'R'] has a fitness of 6.477137001811007e-10

ITERATION:  0
selection
['C', 'M', 'R', 'S', 'P', 'Q', 'N']
['R', 'M', 'C', 'Q', 'S', 'N', 'P']
[INFO] 2025-04-30 02:10:25,838 - pytimeloop.ac

INFO:pytimeloop.accelergy_interface:Running Accelergy with command: accelergy /home/workspace/output_dir/parsed-processed-input.yaml -o ./output_dir/ -v


DRAM [ Weights:800 (800) Inputs:204800 (204800) Outputs:313600 (313600) ] 
-------------------------------------------------------------------------
| for N in [0:50)

global_buffer [ Weights:800 (800) Inputs:4096 (4096) Outputs:6272 (6272) ] 
--------------------------------------------------------------------------
|   for C in [0:4)

inter_PE_spatial [ ] 
--------------------
|     for M in [0:8) (Spatial-Y)

scratchpad [ Weights:25 (25) ] 
------------------------------
|       for R in [0:5)
|         for S in [0:5)
|           for P in [0:28)
|             for Q in [0:28)

weight_reg [ Weights:1 (1) ] 
input_activation_reg [ Inputs:1 (1) ] 
output_activation_reg [ Outputs:1 (1) ] 
---------------------------------------
|               << Compute >>

393.85 3920000
['R', 'M', 'C', 'Q', 'S', 'N', 'P'] has a fitness of 6.477137001811007e-10
['C', 'M', 'R', 'S', 'P', 'Q', 'N']
['R', 'S', 'C', 'P', 'M', 'N', 'Q']
[INFO] 2025-04-30 02:10:27,864 - pytimeloop.accelergy_interface - Runni

INFO:pytimeloop.accelergy_interface:Running Accelergy with command: accelergy /home/workspace/output_dir/parsed-processed-input.yaml -o ./output_dir/ -v


DRAM [ Weights:800 (800) Inputs:204800 (204800) Outputs:313600 (313600) ] 
-------------------------------------------------------------------------
| for N in [0:50)

global_buffer [ Weights:800 (800) Inputs:4096 (4096) Outputs:6272 (6272) ] 
--------------------------------------------------------------------------
|   for C in [0:4)

inter_PE_spatial [ ] 
--------------------
|     for M in [0:8) (Spatial-Y)

scratchpad [ Weights:25 (25) ] 
------------------------------
|       for R in [0:5)
|         for S in [0:5)
|           for P in [0:28)
|             for Q in [0:28)

weight_reg [ Weights:1 (1) ] 
input_activation_reg [ Inputs:1 (1) ] 
output_activation_reg [ Outputs:1 (1) ] 
---------------------------------------
|               << Compute >>

393.85 3920000
['R', 'S', 'C', 'P', 'M', 'N', 'Q'] has a fitness of 6.477137001811007e-10
['C', 'M', 'R', 'S', 'P', 'Q', 'N']
['M', 'S', 'C', 'Q', 'R', 'N', 'P']
[INFO] 2025-04-30 02:10:29,914 - pytimeloop.accelergy_interface - Runni

INFO:pytimeloop.accelergy_interface:Running Accelergy with command: accelergy /home/workspace/output_dir/parsed-processed-input.yaml -o ./output_dir/ -v


DRAM [ Weights:800 (800) Inputs:204800 (204800) Outputs:313600 (313600) ] 
-------------------------------------------------------------------------
| for N in [0:50)

global_buffer [ Weights:800 (800) Inputs:4096 (4096) Outputs:6272 (6272) ] 
--------------------------------------------------------------------------
|   for C in [0:4)

inter_PE_spatial [ ] 
--------------------
|     for M in [0:8) (Spatial-Y)

scratchpad [ Weights:25 (25) ] 
------------------------------
|       for R in [0:5)
|         for S in [0:5)
|           for P in [0:28)
|             for Q in [0:28)

weight_reg [ Weights:1 (1) ] 
input_activation_reg [ Inputs:1 (1) ] 
output_activation_reg [ Outputs:1 (1) ] 
---------------------------------------
|               << Compute >>

393.85 3920000
['M', 'S', 'C', 'Q', 'R', 'N', 'P'] has a fitness of 6.477137001811007e-10
['C', 'M', 'R', 'S', 'P', 'Q', 'N']
['R', 'C', 'S', 'Q', 'M', 'N', 'P']
[INFO] 2025-04-30 02:10:31,913 - pytimeloop.accelergy_interface - Runni

INFO:pytimeloop.accelergy_interface:Running Accelergy with command: accelergy /home/workspace/output_dir/parsed-processed-input.yaml -o ./output_dir/ -v


DRAM [ Weights:800 (800) Inputs:204800 (204800) Outputs:313600 (313600) ] 
-------------------------------------------------------------------------
| for N in [0:50)

global_buffer [ Weights:800 (800) Inputs:4096 (4096) Outputs:6272 (6272) ] 
--------------------------------------------------------------------------
|   for C in [0:4)

inter_PE_spatial [ ] 
--------------------
|     for M in [0:8) (Spatial-Y)

scratchpad [ Weights:25 (25) ] 
------------------------------
|       for R in [0:5)
|         for S in [0:5)
|           for P in [0:28)
|             for Q in [0:28)

weight_reg [ Weights:1 (1) ] 
input_activation_reg [ Inputs:1 (1) ] 
output_activation_reg [ Outputs:1 (1) ] 
---------------------------------------
|               << Compute >>

393.85 3920000
['R', 'C', 'S', 'Q', 'M', 'N', 'P'] has a fitness of 6.477137001811007e-10
['C', 'M', 'R', 'S', 'P', 'Q', 'N']
['R', 'S', 'P', 'Q', 'M', 'N', 'C']
[INFO] 2025-04-30 02:10:33,899 - pytimeloop.accelergy_interface - Runni

INFO:pytimeloop.accelergy_interface:Running Accelergy with command: accelergy /home/workspace/output_dir/parsed-processed-input.yaml -o ./output_dir/ -v


DRAM [ Weights:800 (800) Inputs:204800 (204800) Outputs:313600 (313600) ] 
-------------------------------------------------------------------------
| for N in [0:50)

global_buffer [ Weights:800 (800) Inputs:4096 (4096) Outputs:6272 (6272) ] 
--------------------------------------------------------------------------
|   for C in [0:4)

inter_PE_spatial [ ] 
--------------------
|     for M in [0:8) (Spatial-Y)

scratchpad [ Weights:25 (25) ] 
------------------------------
|       for R in [0:5)
|         for S in [0:5)
|           for P in [0:28)
|             for Q in [0:28)

weight_reg [ Weights:1 (1) ] 
input_activation_reg [ Inputs:1 (1) ] 
output_activation_reg [ Outputs:1 (1) ] 
---------------------------------------
|               << Compute >>

393.85 3920000
['R', 'S', 'P', 'Q', 'M', 'N', 'C'] has a fitness of 6.477137001811007e-10
['C', 'M', 'R', 'S', 'P', 'Q', 'N']
['R', 'S', 'C', 'P', 'M', 'N', 'Q']
[INFO] 2025-04-30 02:10:35,920 - pytimeloop.accelergy_interface - Runni

INFO:pytimeloop.accelergy_interface:Running Accelergy with command: accelergy /home/workspace/output_dir/parsed-processed-input.yaml -o ./output_dir/ -v


KeyboardInterrupt: 