In [1]:
import pandas as pd
import numpy as np
from loaders import *

from scripts.utils import *

In [2]:
alexnet_layers = [
    ConfigRegistry.ALEXNET_LAYER1,
    ConfigRegistry.ALEXNET_LAYER2,
    ConfigRegistry.ALEXNET_LAYER3,
    ConfigRegistry.ALEXNET_LAYER4,
    ConfigRegistry.ALEXNET_LAYER5,
]

alexnet_maps = [
    ConfigRegistry.ALBIREO_MAPPING_ALEXNET_L1,
    ConfigRegistry.ALBIREO_MAPPING_ALEXNET_L2,
    ConfigRegistry.ALBIREO_MAPPING_ALEXNET_L3,
    ConfigRegistry.ALBIREO_MAPPING_ALEXNET_L4,
    ConfigRegistry.ALBIREO_MAPPING_ALEXNET_L5,
]

debugging_layers = [
    ConfigRegistry.DEBUGGING_LAYER
]

In [3]:
!accelergyTables -r /home/workspace/final-project/example_designs/albireo/albireo_estimation_tables

albireo_accelergy_result = run_accelergy(
    ConfigRegistry.ALBIREO_ARCH,
    ConfigRegistry.ALBIREO_COMPONENTS_DIR
)
print(albireo_accelergy_result.ert_verbose)

/home/workspace/final-project/example_designs/albireo/albireo_estimation_tables is already added as a root for table based plug-in
ERT_summary:
  version: 0.3
  table_summary:
  - name: system_arch.chip.plcg[0..5].plcu[0..2].psumBuffer[0..4]
    actions:
    - name: read
      average_energy: 1.0
      max_energy: 1
      min_energy: 1
    - name: write
      average_energy: 1.0
      max_energy: 1
      min_energy: 1
    - name: idle
      energy: 0
    primitive_estimation(s):
    - system_arch.chip.plcg[0..5].plcu[0..2].psumBuffer[0..4]:
        estimator: dummy_table
  - name: system_arch.DRAM
    actions:
    - name: read
      energy: 512
    - name: write
      energy: 512
    - name: idle
      energy: 0
    primitive_estimation(s):
    - system_arch.DRAM:
        estimator: Cacti
  - name: system_arch.chip.plcg[0..5].plcu[0..2].plcu_dummy_buffer
    actions:
    - name: access
      energy: 0
    primitive_estimation(s):
    - action_name: access
      arguments: null
      en

### Custom mapping of AlexNet

In [None]:
alexnet_stats = []
alexnet_loops = []

for i, layer in enumerate(alexnet_layers):
    results = run_timeloop_model(
        ConfigRegistry.ALBIREO_ARCH,
        ConfigRegistry.ALBIREO_COMPONENTS_DIR,
        alexnet_maps[i],
        layer
    )
    print(f"Layer: {i+1}")
    stats, loops = results
    print(loops)
    
    alexnet_stats.append(stats)
    alexnet_loops.append(loops)

In [None]:
fname = './results/alexnet/manual/9/'

extract_numbers(alexnet_stats, fname + 'stats.pkl')
_ = load_data(fname + 'stats.pkl')

save_loop_nests(alexnet_loops, fname + 'loops.txt')

### Mapper of AlexNet

In [4]:
alexnet_stats = []
alexnet_loops = []

for i, layer in enumerate(alexnet_layers):
    albireo_results = run_timeloop_mapper(
        ConfigRegistry.ALBIREO_ARCH,
        ConfigRegistry.ALBIREO_COMPONENTS_DIR,
        albireo_accelergy_result.art,
        albireo_accelergy_result.ert,
        ConfigRegistry.ALBIREO_CONSTRAINTS,
        ConfigRegistry.ALBIREO_MAPPER,
        layer,
    )
    print(f"Layer: {i+1}")
    stats, loops = albireo_results
    print(loops)
    
    alexnet_stats.append(stats)
    alexnet_loops.append(loops)

input file: tmp.yaml
  _______                __                
 /_  __(_)___ ___  ___  / /___  ____  ____ 
  / / / / __ `__ \/ _ \/ / __ \/ __ \/ __ \
 / / / / / / / / /  __/ / /_/ / /_/ / /_/ /
/_/ /_/_/ /_/ /_/\___/_/\____/\____/ .___/ 
                                  /_/      

Problem configuration complete.
Found Accelergy ERT (energy reference table), replacing internal energy model.
Found Accelergy ART (area reference table), replacing internal area model.
Architecture configuration complete.
Sparse optimization configuration complete.
Using threads = 8
Mapper configuration complete.
Initializing Index Factorization subspace.
  Factorization options along problem dimension C = 8
  Factorization options along problem dimension M = 6336
  Factorization options along problem dimension R = 1
  Factorization options along problem dimension S = 1
  Factorization options along problem dimension N = 1
  Factorization options along problem dimension P = 64
  Factorization options alo

[  0] Utilization = 0.10 | pJ/Compute =    3.333 | L5[IO] M8 - L4[WIO] Q1 - L3[] Q11 M4 - L2[W] Q5 M3X - L1[] P55 C3X - L0[O] S11 R11 
[  0] Utilization = 0.10 | pJ/Compute =    3.331 | L5[IO] M8 - L4[WIO] Q1 - L3[] M4 Q11 - L2[W] Q5 M3X - L1[] P55 C3X - L0[O] S11 R11 
[  1] Utilization = 0.17 | pJ/Compute =    2.956 | L5[IO] P5 M8 - L4[WIO] Q11 M2 - L3[] Q1 - L2[W] P11 M3X - L1[] M2 C3 Q5X - L0[O] S11 R11 
[  3] Utilization = 0.17 | pJ/Compute =    2.965 | L5[IO] P5 M8 - L4[WIO] Q11 M2 - L3[] Q1 - L2[W] P11 C3 M3X - L1[] M2 Q5X - L0[O] S11 R11 
[  0] Utilization = 0.20 | pJ/Compute =    3.101 | L5[IO] Q55 - L4[WIO] M4 - L3[] Q1 M3X - L2[W] P11 M4 M2X - L1[] P5 C3X - L0[O] S11 R11 
[  2] Utilization = 0.03 | pJ/Compute =    4.317 | L5[IO] M12 - L4[WIO] Q11 P5 M4 - L3[] P11 - L2[W] Q5 C3X - L1[] M2 - L0[O] S11 R11 
[  4] Utilization = 0.50 | pJ/Compute =    2.956 | L5[IO] P5 M8 - L4[WIO] Q11 M2 - L3[] Q1 C3X - L2[W] P11 M3X - L1[] M2 Q5X - L0[O] S11 R11 
[  5] Utilization = 0.17 | pJ/Co



Summary stats for best mapping found by mapper:
  Utilization = 0.83 | pJ/Compute =    2.940
Layer: 1

DRAM [ Inputs:154587 (154587) Outputs:290400 (290400) ] 
-------------------------------------------------------
| for Q in [0:11)

shared_glb [ Weights:34848 (34848) Inputs:18387 (18387) Outputs:26400 (26400) ] 
-------------------------------------------------------------------------------
|   for M in [0:4)

dummy_buffer [ ] 
----------------
|     for M in [0:2)
|       for Q in [0:5) (Spatial-X)

weight_cache [ Weights:4356 (4356) ] 
------------------------------------
|         for P in [0:11)
|           for M in [0:3) (Spatial-X)

plcu_dummy_buffer [ ] 
---------------------
|             for M in [0:4)
|               for C in [0:3)
|                 for P in [0:5) (Spatial-X)

psumBuffer [ Outputs:1 (1) ] 
----------------------------
|                   for S in [0:11)
|                     for R in [0:11)


input file: tmp.yaml
  _______                __               

[  4] Utilization = 0.18 | pJ/Compute =    2.855 | L5[IO] M32 - L4[WIO] Q3 C12 - L3[] P3 C4 M2X - L2[W] Q3 M2X - L1[] Q3 P9 M2X C2X - L0[O] S5 R5 
[  4] Utilization = 0.18 | pJ/Compute =    2.849 | L5[IO] M32 - L4[WIO] Q3 C12 - L3[] C4 P3 M2X - L2[W] Q3 M2X - L1[] Q3 P9 M2X C2X - L0[O] S5 R5 
[  2] Utilization = 0.18 | pJ/Compute =    2.855 | L5[IO] M32 - L4[WIO] Q3 C12 - L3[] P3 C2 M2X C2X - L2[W] Q3 C2 M2X - L1[] Q3 P9 M2X - L0[O] S5 R5 
[  5] Utilization = 0.09 | pJ/Compute =    2.835 | L5[IO] M32 - L4[WIO] Q3 C12 - L3[] P3 C4 M2X - L2[W] Q3 M2X - L1[] Q3 P9 C2 M2X - L0[O] S5 R5 
[  0] Utilization = 0.18 | pJ/Compute =    2.829 | L5[IO] M32 - L4[WIO] Q3 C12 - L3[] P3 C2 M2X C2X - L2[W] Q3 M2X - L1[] Q3 P9 C2 M2X - L0[O] S5 R5 
[  3] Utilization = 0.10 | pJ/Compute =    3.865 | L5[IO] Q3 P9 M32 - L4[WIO] P3 M4 C4 - L3[] C4 - L2[W] Q3 M2 Q3X - L1[] C2 C3X - L0[O] S5 R5 
[  7] Utilization = 0.09 | pJ/Compute =    2.868 | L5[IO] M32 - L4[WIO] Q3 C12 - L3[] P3 C4 M2X - L2[W] Q3 C2 M2X - 



Summary stats for best mapping found by mapper:
  Utilization = 0.53 | pJ/Compute =    2.440
Layer: 2

DRAM [ Inputs:92256 (92256) Outputs:186624 (186624) ] 
-----------------------------------------------------
| for C in [0:2)
|   for M in [0:8)

shared_glb [ Weights:38400 (38400) Inputs:46128 (46128) Outputs:23328 (23328) ] 
-------------------------------------------------------------------------------
|     for C in [0:2)

dummy_buffer [ ] 
----------------
|       for M in [0:2)
|         for P in [0:9)
|           for M in [0:4) (Spatial-X)

weight_cache [ Weights:2400 (2400) ] 
------------------------------------
|             for P in [0:3)
|               for C in [0:6)
|                 for Q in [0:3) (Spatial-X)

plcu_dummy_buffer [ ] 
---------------------
|                   for Q in [0:9)
|                     for C in [0:4)
|                       for M in [0:4) (Spatial-X)

psumBuffer [ Outputs:1 (1) ] 
----------------------------
|                         for S in



Mapspace split! Per-split Mapping Dimension [IndexFactorization] Size: 1413434880 Residue: 0
Mapspace construction complete.
Search configuration complete.


[  3] Utilization = 0.09 | pJ/Compute =    4.194 | L5[IO] Q13 P13 M2 C16 - L4[WIO] M2 - L3[] M6 C8 C2X - L2[W] Q1 M2X - L1[] M4 M2X - L0[O] S3 R3 
[  0] Utilization = 0.04 | pJ/Compute =    4.139 | L5[IO] Q13 P13 M2 C16 - L4[WIO] M2 - L3[] M6 C8 - L2[W] Q1 M2X - L1[] M4 C2 M2X - L0[O] S3 R3 
[  5] Utilization = 0.09 | pJ/Compute =    3.908 | L5[IO] P13 M8 C4 - L4[WIO] Q1 - L3[] M8 M2X C2X - L2[W] Q13 C2X - L1[] M3 C16 - L0[O] S3 R3 
[  4] Utilization = 0.04 | pJ/Compute =    4.230 | L5[IO] Q13 P13 M2 C16 - L4[WIO] M2 - L3[] M6 C16 - L2[W] Q1 M2X - L1[] M4 M2X - L0[O] S3 R3 
[  7] Utilization = 0.09 | pJ/Compute =    4.085 | L5[IO] Q13 P13 M2 C16 - L4[WIO] M2 C2 - L3[] M6 - L2[W] Q1 M2X - L1[] M4 C4 M2X C2X - L0[O] S3 R3 
[  2] Utilization = 0.04 | pJ/Compute =    4.230 | L5[IO] Q13 P13 M2 C16 - L4[WIO] M2 - L3[] M6 C8 - L2[W] C2 M2X - L1[] M4 M2X - L0[O] S3 R3 
[  5] Utilization = 0.09 | pJ/Compute =    3.625 | L5[IO] P13 C4 M8 - L4[WIO] Q1 - L3[] M8 M2X C2X - L2[W] Q13 C2X - L1[] M3 C



Summary stats for best mapping found by mapper:
  Utilization = 0.53 | pJ/Compute =    3.155
Layer: 3

DRAM [ Inputs:57600 (57600) Outputs:64896 (64896) ] 
---------------------------------------------------
| for M in [0:8)
|   for C in [0:2)

shared_glb [ Weights:55296 (55296) Inputs:28800 (28800) Outputs:8112 (8112) ] 
-----------------------------------------------------------------------------
|     for Q in [0:13)
|       for M in [0:2)
|         for C in [0:2)

dummy_buffer [ ] 
----------------
|           for M in [0:2)
|             for C in [0:2)
|               for P in [0:13)
|                 for M in [0:3) (Spatial-X)
|                   for C in [0:2) (Spatial-X)

weight_cache [ Weights:576 (576) ] 
----------------------------------
|                     for C in [0:2)
|                       for C in [0:2) (Spatial-X)

plcu_dummy_buffer [ ] 
---------------------
|                         for M in [0:2)
|                           for C in [0:2)
|                   



Mapspace construction complete.
Search configuration complete.


[  1] Utilization = 0.18 | pJ/Compute =    3.857 | L5[IO] M8 - L4[WIO] Q13 C8 - L3[] P13 M2 C4 M2X C2X - L2[W] C3 C2X - L1[] M6 M2X - L0[O] S3 R3 
[  7] Utilization = 0.40 | pJ/Compute =    2.823 | L5[IO] M4 C2 - L4[WIO] C4 - L3[] M4 M2X C3X - L2[W] Q13 P13 M2 C4 M2X - L1[] C4 M3X - L0[O] S3 R3 
[  0] Utilization = 0.09 | pJ/Compute =    3.802 | L5[IO] M8 - L4[WIO] Q13 C8 - L3[] P13 M2 C4 M2X C2X - L2[W] C3 - L1[] M6 C2 M2X - L0[O] S3 R3 
[  2] Utilization = 0.09 | pJ/Compute =    3.876 | L5[IO] M8 - L4[WIO] Q13 C8 - L3[] P13 M2 C4 M2X C2X - L2[W] C6 - L1[] M6 M2X - L0[O] S3 R3 
[  3] Utilization = 0.13 | pJ/Compute =    3.995 | L5[IO] Q13 M4 C8 - L4[WIO] P13 C2 - L3[] C2 M2X C3X - L2[W] M12 M2X - L1[] M2 C4 - L0[O] S3 R3 
[  5] Utilization = 0.13 | pJ/Compute =    3.763 | L5[IO] M8 - L4[WIO] Q13 C8 - L3[] P13 M2 C4 M2X C3X - L2[W] Q1 - L1[] M6 C4 M2X - L0[O] S3 R3 
[  6] Utilization = 0.53 | pJ/Compute =    3.843 | L5[IO] M8 - L4[WIO] Q13 C8 - L3[] P13 M2 C4 M2X C3X - L2[W] Q1 C2X - L



Summary stats for best mapping found by mapper:
  Utilization = 0.53 | pJ/Compute =    3.151
Layer: 4

DRAM [ Inputs:86400 (86400) Outputs:64896 (64896) ] 
---------------------------------------------------
| for M in [0:8)

shared_glb [ Weights:165888 (165888) Inputs:86400 (86400) Outputs:8112 (8112) ] 
-------------------------------------------------------------------------------
|   for Q in [0:13)
|     for C in [0:8)

dummy_buffer [ ] 
----------------
|       for M in [0:2)
|         for C in [0:4)
|           for P in [0:13)
|             for M in [0:2) (Spatial-X)
|               for C in [0:3) (Spatial-X)

weight_cache [ Weights:432 (432) ] 
----------------------------------
|                 for Q in [0:1)
|                   for C in [0:2) (Spatial-X)

plcu_dummy_buffer [ ] 
---------------------
|                     for M in [0:6)
|                       for M in [0:2) (Spatial-X)
|                         for C in [0:2) (Spatial-X)

psumBuffer [ Outputs:1 (1) ] 
----

[  6] Utilization = 0.02 | pJ/Compute =    4.583 | L5[IO] Q13 P13 M2 C2 - L4[WIO] C2 - L3[] M16 C3 - L2[W] C2 M2X - L1[] M4 C16 - L0[O] S3 R3 
[  4] Utilization = 0.09 | pJ/Compute =    4.603 | L5[IO] Q13 P13 M2 C2 - L4[WIO] C2 - L3[] M16 C3 - L2[W] C2 M2X - L1[] M4 C4 C4X - L0[O] S3 R3 
[  2] Utilization = 0.13 | pJ/Compute =    5.730 | L5[IO] M64 C8 - L4[WIO] C2 - L3[] Q1 - L2[W] Q13 M2 C3X - L1[] P13 C4 M2X C2X - L0[O] S3 R3 
[  3] Utilization = 0.09 | pJ/Compute =    3.617 | L5[IO] P13 M8 C8 - L4[WIO] C2 - L3[] M4 C2 M2X - L2[W] Q13 C4 M2X - L1[] C3 M2X - L0[O] S3 R3 
[  5] Utilization = 0.04 | pJ/Compute =    4.590 | L5[IO] Q13 P13 M2 C2 - L4[WIO] C2 - L3[] M16 C3 - L2[W] C2 M2X - L1[] M4 C8 C2X - L0[O] S3 R3 
[  2] Utilization = 0.13 | pJ/Compute =    3.650 | L5[IO] C8 M64 - L4[WIO] C2 - L3[] Q1 - L2[W] Q13 M2 C3X - L1[] P13 C4 M2X C2X - L0[O] S3 R3 
[  1] Utilization = 0.18 | pJ/Compute =    3.443 | L5[IO] M8 - L4[WIO] P13 M4 C2 - L3[] M2 C2 C4X - L2[W] C3 M2X - L1[] Q13 M2 C4 C



Summary stats for best mapping found by mapper:
  Utilization = 0.36 | pJ/Compute =    2.852
Layer: 5

DRAM [ Inputs:86400 (86400) Outputs:43264 (43264) ] 
---------------------------------------------------
| for M in [0:4)
|   for C in [0:8)

shared_glb [ Weights:27648 (27648) Inputs:10800 (10800) Outputs:10816 (10816) ] 
-------------------------------------------------------------------------------
|     for C in [0:3)
|       for Q in [0:13)

dummy_buffer [ ] 
----------------
|         for P in [0:13)
|           for M in [0:4) (Spatial-X)

weight_cache [ Weights:2304 (2304) ] 
------------------------------------
|             for M in [0:2)
|               for C in [0:2) (Spatial-X)

plcu_dummy_buffer [ ] 
---------------------
|                 for M in [0:2)
|                   for C in [0:8)
|                     for M in [0:4) (Spatial-X)

psumBuffer [ Outputs:1 (1) ] 
----------------------------
|                       for S in [0:3)
|                         for R in [

In [5]:
fname = './results/alexnet/mapper/6/'

extract_numbers(alexnet_stats, fname + 'stats.pkl')
_ = load_data(fname + 'stats.pkl')

save_loop_nests(alexnet_loops, fname + 'loops.txt')

In [None]:
for i, (stats, loops) in enumerate(alexnet_albireo_layerwise_results):
    print(f"Layer: {i+1}")
    log = False
    for line in stats.split("\n"):
        if "Summary Stats" in line:
            log = True
        if log:
            print(line)

In [None]:
total_cycles = 0
for i, (stats, loops) in enumerate(alexnet_albireo_layerwise_results):
    for line in stats.split("\n"):
        if "Cycles: " in line:
            cycles = int(line[len("Cycles: "):])
            total_cycles += cycles
            print(f"Cycles in layer {i+1}: {cycles}")
            break
print(f"Albireo total cycles: {total_cycles}")

In [None]:
total_energy = 0
for i, (stats, loops) in enumerate(alexnet_albireo_layerwise_results):
    for line in stats.split("\n"):
        if "Energy: " in line:
            energy = float(line[len("Energy: "):].split(' uJ')[0])
            total_energy += energy
            print(f"Energy in layer {i+1}: {energy}")
            break
print(f"Total energy: {total_energy} uJ")