In [None]:
import pandas as pd
import numpy as np
from loaders import *

from scripts.utils import *

In [None]:
vgg_layers = [
    ConfigRegistry.VGG_LAYER1,
    ConfigRegistry.VGG_LAYER2,
    ConfigRegistry.VGG_LAYER3,
    ConfigRegistry.VGG_LAYER4,
    ConfigRegistry.VGG_LAYER5,
    ConfigRegistry.VGG_LAYER6,
    ConfigRegistry.VGG_LAYER7,
    ConfigRegistry.VGG_LAYER8,
    ConfigRegistry.VGG_LAYER9,
    ConfigRegistry.VGG_LAYER10,
    ConfigRegistry.VGG_LAYER11,
    ConfigRegistry.VGG_LAYER12,
    ConfigRegistry.VGG_LAYER13,
]

vgg_maps = [
    ConfigRegistry.ALBIREO_MAPPING_VGG_L1,
    ConfigRegistry.ALBIREO_MAPPING_VGG_L2,
    ConfigRegistry.ALBIREO_MAPPING_VGG_L3,
    ConfigRegistry.ALBIREO_MAPPING_VGG_L4,
    ConfigRegistry.ALBIREO_MAPPING_VGG_L5,
    ConfigRegistry.ALBIREO_MAPPING_VGG_L6,
    ConfigRegistry.ALBIREO_MAPPING_VGG_L7,
    ConfigRegistry.ALBIREO_MAPPING_VGG_L8,
    ConfigRegistry.ALBIREO_MAPPING_VGG_L9,
    ConfigRegistry.ALBIREO_MAPPING_VGG_L10,
    ConfigRegistry.ALBIREO_MAPPING_VGG_L11,
    ConfigRegistry.ALBIREO_MAPPING_VGG_L12,
    ConfigRegistry.ALBIREO_MAPPING_VGG_L13,
]

In [3]:
!accelergyTables -r /home/workspace/final-project/example_designs/albireo/albireo_estimation_tables

albireo_accelergy_result = run_accelergy(
    ConfigRegistry.ALBIREO_ARCH,
    ConfigRegistry.ALBIREO_COMPONENTS_DIR
)
print(albireo_accelergy_result.ert_verbose)

/home/workspace/final-project/example_designs/albireo/albireo_estimation_tables is already added as a root for table based plug-in
ERT_summary:
  version: 0.3
  table_summary:
  - name: system_arch.chip.plcg[0..8].plcu[0..2].psumBuffer[0..4]
    actions:
    - name: read
      average_energy: 1.0
      max_energy: 1
      min_energy: 1
    - name: write
      average_energy: 1.0
      max_energy: 1
      min_energy: 1
    - name: idle
      energy: 0
    primitive_estimation(s):
    - system_arch.chip.plcg[0..8].plcu[0..2].psumBuffer[0..4]:
        estimator: dummy_table
  - name: system_arch.DRAM
    actions:
    - name: read
      energy: 512
    - name: write
      energy: 512
    - name: idle
      energy: 0
    primitive_estimation(s):
    - system_arch.DRAM:
        estimator: Cacti
  - name: system_arch.chip.plcg[0..8].plcu[0..2].plcu_dummy_buffer
    actions:
    - name: access
      energy: 0
    primitive_estimation(s):
    - action_name: access
      arguments: null
      en

### Custom mapping of VGG16

In [None]:
vgg16_stats = []
vgg16_loops = []

for i, layer in enumerate(vgg_layers):
    results = run_timeloop_model(
        ConfigRegistry.ALBIREO_ARCH,
        ConfigRegistry.ALBIREO_COMPONENTS_DIR,
        vgg_maps[i],
        layer
    )
    print(f"Layer: {i+1}")
    stats, loops = results
    print(loops)
    
    vgg16_stats.append(stats)
    vgg16_loops.append(loops)

In [None]:
fname = './results/vgg16/manual/9/'

extract_numbers(vgg16_stats, fname + 'stats.pkl')
_ = load_data(fname + 'stats.pkl')

save_loop_nests(vgg16_loops, fname + 'loops.txt')

### Mapper of VGG16

In [4]:
vgg16_stats = []
vgg16_loops = []

for i, layer in enumerate(vgg_layers):
    albireo_results = run_timeloop_mapper(
        ConfigRegistry.ALBIREO_ARCH,
        ConfigRegistry.ALBIREO_COMPONENTS_DIR,
        albireo_accelergy_result.art,
        albireo_accelergy_result.ert,
        ConfigRegistry.ALBIREO_CONSTRAINTS,
        ConfigRegistry.ALBIREO_MAPPER,
        layer,
    )
    
    print(f"Layer: {i+1}")
    stats, loops = albireo_results
    print(loops)
    
    vgg16_stats.append(stats)
    vgg16_loops.append(loops)

input file: tmp.yaml
  _______                __                
 /_  __(_)___ ___  ___  / /___  ____  ____ 
  / / / / __ `__ \/ _ \/ / __ \/ __ \/ __ \
 / / / / / / / / /  __/ / /_/ / /_/ / /_/ /
/_/ /_/_/ /_/ /_/\___/_/\____/\____/ .___/ 
                                  /_/      

Problem configuration complete.
Found Accelergy ERT (energy reference table), replacing internal energy model.
Found Accelergy ART (area reference table), replacing internal area model.
Architecture configuration complete.
Sparse optimization configuration complete.
Using threads = 8
Mapper configuration complete.
Initializing Index Factorization subspace.
  Factorization options along problem dimension C = 8
  Factorization options along problem dimension M = 1716
  Factorization options along problem dimension R = 1
  Factorization options along problem dimension S = 1
  Factorization options along problem dimension N = 1
  Factorization options along problem dimension P = 6336
  Factorization options a

[  2] Utilization = 0.36 | pJ/Compute =    5.490 | L5[IO] Q7 P28 M8 - L4[WIO] Q1 - L3[] Q1 Q4X M2X - L2[W] Q4 P2 M2 C3X - L1[] Q2 P4 M2X - L0[O] S3 R3 
[  5] Utilization = 0.12 | pJ/Compute =    5.616 | L5[IO] Q7 P28 M8 - L4[WIO] Q1 - L3[] C3 Q4X M2X - L2[W] Q4 P2 M2 - L1[] Q2 P4 M2X - L0[O] S3 R3 
[  6] Utilization = 0.12 | pJ/Compute =    5.616 | L5[IO] Q7 P28 M8 - L4[WIO] C3 - L3[] Q1 Q4X M2X - L2[W] Q4 P2 M2 - L1[] Q2 P4 M2X - L0[O] S3 R3 
[  1] Utilization = 0.12 | pJ/Compute =    5.490 | L5[IO] Q7 P28 M8 - L4[WIO] Q1 - L3[] Q1 Q4X M2X - L2[W] Q4 P2 M2 - L1[] Q2 P4 C3 M2X - L0[O] S3 R3 
[  7] Utilization = 0.12 | pJ/Compute =    6.653 | L5[IO] Q7 P28 M8 C3 - L4[WIO] Q1 - L3[] Q1 Q4X M2X - L2[W] Q4 P2 M2 - L1[] Q2 P4 M2X - L0[O] S3 R3 
[  3] Utilization = 0.12 | pJ/Compute =    5.616 | L5[IO] Q7 P28 M8 - L4[WIO] Q1 - L3[] Q1 Q4X M2X - L2[W] Q4 P2 M2 C3 - L1[] Q2 P4 M2X - L0[O] S3 R3 
[  4] Utilization = 0.09 | pJ/Compute =    6.295 | L5[IO] Q2 P7 - L4[WIO] Q2 P2 M4 - L3[] P2 M8 Q2X



Summary stats for best mapping found by mapper:
  Utilization = 0.36 | pJ/Compute =    5.200
Layer: 1

DRAM [ Inputs:153228 (153228) Outputs:3211264 (3211264) ] 
---------------------------------------------------------
| for P in [0:4)
|   for M in [0:4)

shared_glb [ Weights:432 (432) Inputs:39324 (39324) Outputs:200704 (200704) ] 
-----------------------------------------------------------------------------
|     for M in [0:4)
|       for Q in [0:2)
|         for P in [0:4)

dummy_buffer [ ] 
----------------
|           for Q in [0:28)
|             for P in [0:7)
|               for Q in [0:2) (Spatial-X)
|                 for C in [0:3) (Spatial-X)

weight_cache [ Weights:36 (36) ] 
--------------------------------
|                   for Q in [0:1)
|                     for M in [0:2) (Spatial-X)

plcu_dummy_buffer [ ] 
---------------------
|                       for P in [0:2)
|                         for Q in [0:2) (Spatial-X)
|                           for M in [0:2) (

[  5] Utilization = 0.06 | pJ/Compute =    3.052 | L5[IO] P14 - L4[WIO] P2 C8 - L3[] Q56 M2 C2 M2X - L2[W] P2 M8 P2X - L1[] Q2 C4 M2X - L0[O] S3 R3 
[  4] Utilization = 0.12 | pJ/Compute =    3.078 | L5[IO] P14 - L4[WIO] P2 C8 - L3[] Q56 M2 C2 M2X - L2[W] P2 M8 P2X - L1[] Q2 C2 M2X C2X - L0[O] S3 R3 
[  1] Utilization = 0.24 | pJ/Compute =    3.131 | L5[IO] P14 - L4[WIO] P2 C8 - L3[] Q56 M2 M2X C4X - L2[W] P2 M8 C2 P2X - L1[] Q2 M2X - L0[O] S3 R3 
[  1] Utilization = 0.24 | pJ/Compute =    3.039 | L5[IO] P14 - L4[WIO] P2 C8 - L3[] M2 Q56 M2X C4X - L2[W] P2 M8 C2 P2X - L1[] Q2 M2X - L0[O] S3 R3 
[  5] Utilization = 0.06 | pJ/Compute =    2.960 | L5[IO] P14 - L4[WIO] P2 C8 - L3[] M2 C2 Q56 M2X - L2[W] P2 M8 P2X - L1[] Q2 C4 M2X - L0[O] S3 R3 
[  4] Utilization = 0.12 | pJ/Compute =    2.986 | L5[IO] P14 - L4[WIO] P2 C8 - L3[] M2 C2 Q56 M2X - L2[W] P2 M8 P2X - L1[] Q2 C2 M2X C2X - L0[O] S3 R3 
[  3] Utilization = 0.12 | pJ/Compute =    3.991 | L5[IO] P14 M4 - L4[WIO] Q7 P2 - L3[] Q4 P4 C8



Summary stats for best mapping found by mapper:
  Utilization = 0.47 | pJ/Compute =    2.801
Layer: 2

DRAM [ Inputs:831744 (831744) Outputs:802816 (802816) ] 
-------------------------------------------------------
| for Q in [0:28)
|   for M in [0:4)

shared_glb [ Weights:9216 (9216) Inputs:43776 (43776) Outputs:7168 (7168) ] 
---------------------------------------------------------------------------
|     for P in [0:4)
|       for C in [0:2)

dummy_buffer [ ] 
----------------
|         for M in [0:2)
|           for P in [0:14)
|             for M in [0:2) (Spatial-X)
|               for C in [0:4) (Spatial-X)

weight_cache [ Weights:288 (288) ] 
----------------------------------
|                 for P in [0:2)
|                   for M in [0:2) (Spatial-X)

plcu_dummy_buffer [ ] 
---------------------
|                     for Q in [0:2)
|                       for C in [0:8)
|                         for Q in [0:2) (Spatial-X)
|                           for M in [0:2) (Spa

[  3] Utilization = 0.06 | pJ/Compute =    3.988 | L5[IO] Q2 P14 M2 - L4[WIO] Q7 C2 - L3[] P2 M4 C32 - L2[W] Q4 Q2X - L1[] M16 P4X - L0[O] S3 R3 
[  7] Utilization = 0.06 | pJ/Compute =    3.823 | L5[IO] Q2 P14 M2 - L4[WIO] Q7 C2 - L3[] P2 M4 C4 - L2[W] Q4 Q2X - L1[] M16 C8 P4X - L0[O] S3 R3 
[  0] Utilization = 0.46 | pJ/Compute =    3.847 | L5[IO] Q2 P14 M2 - L4[WIO] Q7 C2 - L3[] P2 M4 C2 C8X - L2[W] Q4 Q2X - L1[] M16 C2 P4X - L0[O] S3 R3 
[  2] Utilization = 0.45 | pJ/Compute =    3.908 | L5[IO] Q2 P14 M2 - L4[WIO] Q7 C2 - L3[] P2 M4 C2 C8X - L2[W] Q4 C2 Q2X - L1[] M16 P4X - L0[O] S3 R3 
[  4] Utilization = 0.06 | pJ/Compute =    3.553 | L5[IO] Q4 P4 - L4[WIO] Q28 P2 M2 C2 - L3[] C2 M2X - L2[W] P14 M4 C2X - L1[] M8 C4 C2X - L0[O] S3 R3 
[  5] Utilization = 0.03 | pJ/Compute =    3.542 | L5[IO] Q4 P4 - L4[WIO] Q28 P2 M2 C2 - L3[] C2 M2X - L2[W] P14 M4 C2X - L1[] M8 C8 - L0[O] S3 R3 
[  3] Utilization = 0.06 | pJ/Compute =    3.976 | L5[IO] Q2 P14 M2 - L4[WIO] Q7 C2 - L3[] M4 C32 P2 -



Summary stats for best mapping found by mapper:
  Utilization = 0.47 | pJ/Compute =    3.308
Layer: 3

DRAM [ Inputs:831744 (831744) Outputs:1605632 (1605632) ] 
---------------------------------------------------------
| for Q in [0:7)
|   for M in [0:8)
|     for C in [0:2)

shared_glb [ Weights:4608 (4608) Inputs:65664 (65664) Outputs:28672 (28672) ] 
-----------------------------------------------------------------------------
|       for M in [0:2)
|         for C in [0:4)

dummy_buffer [ ] 
----------------
|           for C in [0:2)
|             for Q in [0:4)
|               for P in [0:2)
|                 for M in [0:2) (Spatial-X)
|                   for C in [0:4) (Spatial-X)

weight_cache [ Weights:36 (36) ] 
--------------------------------
|                     for Q in [0:4)
|                       for P in [0:7)
|                         for M in [0:2)
|                           for M in [0:2) (Spatial-X)

plcu_dummy_buffer [ ] 
---------------------
|             

[  4] Utilization = 0.12 | pJ/Compute =    3.522 | L5[IO] Q2 P7 M2 - L4[WIO] M2 - L3[] Q4 P4 M4 P2X M2X - L2[W] Q7 M4 C32 C2X - L1[] Q1 C2X - L0[O] S3 R3 
[  3] Utilization = 0.06 | pJ/Compute =    3.432 | L5[IO] Q2 P7 M2 - L4[WIO] P4 M2 C2 - L3[] Q7 M8 P2X - L2[W] Q2 C32 M2X - L1[] M2 C2 Q2X - L0[O] S3 R3 
[  2] Utilization = 0.12 | pJ/Compute =    3.486 | L5[IO] Q2 P7 M2 - L4[WIO] P4 M2 C2 - L3[] Q7 M8 P2X - L2[W] Q2 C32 M2X - L1[] M2 Q2X C2X - L0[O] S3 R3 
[  7] Utilization = 0.12 | pJ/Compute =    3.374 | L5[IO] Q2 P7 M2 - L4[WIO] P4 M2 C2 - L3[] Q7 M8 P2X - L2[W] Q2 C8 M2X - L1[] M2 C4 Q2X C2X - L0[O] S3 R3 
[  5] Utilization = 0.06 | pJ/Compute =    3.522 | L5[IO] Q2 P7 M2 - L4[WIO] M2 - L3[] Q4 P4 M4 P2X M2X - L2[W] Q7 M4 C32 C2X - L1[] C2 - L0[O] S3 R3 
[  2] Utilization = 0.12 | pJ/Compute =    3.352 | L5[IO] Q2 P7 M2 - L4[WIO] P4 M2 C2 - L3[] M8 Q7 P2X - L2[W] Q2 C32 M2X - L1[] M2 Q2X C2X - L0[O] S3 R3 
[  0] Utilization = 0.01 | pJ/Compute =    4.357 | L5[IO] Q4 P2 C4 - L4[W



Summary stats for best mapping found by mapper:
  Utilization = 0.24 | pJ/Compute =    2.908
Layer: 4

DRAM [ Inputs:430592 (430592) Outputs:401408 (401408) ] 
-------------------------------------------------------
| for Q in [0:14)
|   for M in [0:2)

shared_glb [ Weights:73728 (73728) Inputs:44544 (44544) Outputs:14336 (14336) ] 
-------------------------------------------------------------------------------
|     for C in [0:4)

dummy_buffer [ ] 
----------------
|       for M in [0:16)
|         for P in [0:2) (Spatial-X)
|           for M in [0:4) (Spatial-X)

weight_cache [ Weights:288 (288) ] 
----------------------------------
|             for Q in [0:4)
|               for P in [0:2)
|                 for C in [0:8)

plcu_dummy_buffer [ ] 
---------------------
|                   for P in [0:7)
|                     for C in [0:2)
|                       for P in [0:2) (Spatial-X)
|                         for C in [0:2) (Spatial-X)

psumBuffer [ Outputs:1 (1) ] 
--------

[  1] Utilization = 0.06 | pJ/Compute =    3.502 | L5[IO] Q7 C16 - L4[WIO] Q4 P7 M128 - L3[] Q2 P4 - L2[W] P2 C2X - L1[] C2 M2X C2X - L0[O] S3 R3 
[  2] Utilization = 0.03 | pJ/Compute =    3.476 | L5[IO] Q7 C16 - L4[WIO] Q4 P7 M128 - L3[] Q2 P4 - L2[W] P2 C2X - L1[] C4 M2X - L0[O] S3 R3 
[  0] Utilization = 0.03 | pJ/Compute =    3.224 | L5[IO] Q7 P2 M8 - L4[WIO] P2 C16 - L3[] C2 M2X - L2[W] P2 M4 C4 M2X - L1[] Q8 P7 M2 - L0[O] S3 R3 
[  1] Utilization = 0.06 | pJ/Compute =    3.462 | L5[IO] Q7 C16 - L4[WIO] Q4 M128 P7 - L3[] Q2 P4 - L2[W] P2 C2X - L1[] C2 M2X C2X - L0[O] S3 R3 
[  2] Utilization = 0.03 | pJ/Compute =    3.436 | L5[IO] Q7 C16 - L4[WIO] Q4 M128 P7 - L3[] Q2 P4 - L2[W] P2 C2X - L1[] C4 M2X - L0[O] S3 R3 
[  2] Utilization = 0.03 | pJ/Compute =    3.431 | L5[IO] Q7 C16 - L4[WIO] M128 Q4 P7 - L3[] Q2 P4 - L2[W] P2 C2X - L1[] C4 M2X - L0[O] S3 R3 
[  1] Utilization = 0.06 | pJ/Compute =    3.457 | L5[IO] Q7 C16 - L4[WIO] M128 Q4 P7 - L3[] Q2 P4 - L2[W] P2 C2X - L1[] C2 M2X



Summary stats for best mapping found by mapper:
  Utilization = 0.47 | pJ/Compute =    3.378
Layer: 5

DRAM [ Inputs:430592 (430592) Outputs:802816 (802816) ] 
-------------------------------------------------------
| for P in [0:2)
|   for C in [0:2)
|     for M in [0:32)

shared_glb [ Weights:4608 (4608) Inputs:111360 (111360) Outputs:12544 (12544) ] 
-------------------------------------------------------------------------------
|       for Q in [0:56)
|         for C in [0:2)

dummy_buffer [ ] 
----------------
|           for M in [0:2)
|             for P in [0:2)
|               for M in [0:2) (Spatial-X)
|                 for C in [0:4) (Spatial-X)

weight_cache [ Weights:144 (144) ] 
----------------------------------
|                   for M in [0:2)
|                     for C in [0:2)
|                       for C in [0:2) (Spatial-X)

plcu_dummy_buffer [ ] 
---------------------
|                         for P in [0:7)
|                           for P in [0:2) (Spatial

[  0] Utilization = 0.06 | pJ/Compute =    3.354 | L5[IO] P7 M4 C8 - L4[WIO] P2 M8 - L3[] Q28 M2 C4 - L2[W] P4 M2X - L1[] M2 C4 Q2X C2X - L0[O] S3 R3 
[  1] Utilization = 0.03 | pJ/Compute =    3.341 | L5[IO] P7 M4 C8 - L4[WIO] P2 M8 - L3[] Q28 M2 C4 - L2[W] P4 M2X - L1[] M2 C8 Q2X - L0[O] S3 R3 
[  3] Utilization = 0.06 | pJ/Compute =    4.260 | L5[IO] Q7 P2 M4 C4 - L4[WIO] Q4 P4 M4 - L3[] Q2 P7 M2 C4 C2X - L2[W] M2 C2X - L1[] M2 C4 M2X - L0[O] S3 R3 
[  0] Utilization = 0.06 | pJ/Compute =    3.264 | L5[IO] P7 M4 C8 - L4[WIO] P2 M8 - L3[] M2 C4 Q28 - L2[W] P4 M2X - L1[] M2 C4 Q2X C2X - L0[O] S3 R3 
[  1] Utilization = 0.03 | pJ/Compute =    3.250 | L5[IO] P7 M4 C8 - L4[WIO] P2 M8 - L3[] M2 C4 Q28 - L2[W] P4 M2X - L1[] M2 C8 Q2X - L0[O] S3 R3 
[  2] Utilization = 0.12 | pJ/Compute =    4.286 | L5[IO] Q7 P2 M4 C4 - L4[WIO] Q4 P4 M4 - L3[] Q2 P7 M2 C4 C2X - L2[W] M2 C2X - L1[] M2 C2 M2X C2X - L0[O] S3 R3 
[  7] Utilization = 0.24 | pJ/Compute =    4.179 | L5[IO] Q7 P2 M8 - L4[WIO] M8 C2



Summary stats for best mapping found by mapper:
  Utilization = 0.47 | pJ/Compute =    4.129
Layer: 6

DRAM [ Inputs:861184 (861184) Outputs:802816 (802816) ] 
-------------------------------------------------------
| for Q in [0:7)
|   for P in [0:2)
|     for M in [0:8)

shared_glb [ Weights:73728 (73728) Inputs:76800 (76800) Outputs:7168 (7168) ] 
-----------------------------------------------------------------------------
|       for M in [0:8)
|         for C in [0:2)

dummy_buffer [ ] 
----------------
|           for M in [0:2)
|             for C in [0:2)
|               for P in [0:7)
|                 for Q in [0:4) (Spatial-X)
|                   for C in [0:2) (Spatial-X)

weight_cache [ Weights:576 (576) ] 
----------------------------------
|                     for P in [0:4)
|                       for C in [0:2) (Spatial-X)

plcu_dummy_buffer [ ] 
---------------------
|                         for Q in [0:2)
|                           for M in [0:2)
|             



Mapspace construction complete.
Search configuration complete.


[  5] Utilization = 0.06 | pJ/Compute =    4.216 | L5[IO] Q7 M16 C16 - L4[WIO] Q1 - L3[] Q1 Q2X C2X - L2[W] Q2 P7 M32 C2 P2X - L1[] P2 C4 - L0[O] S3 R3 
[  4] Utilization = 0.12 | pJ/Compute =    4.243 | L5[IO] Q7 M16 C16 - L4[WIO] Q1 - L3[] Q1 Q2X C2X - L2[W] Q2 P7 M32 C2 P2X - L1[] P2 C2 C2X - L0[O] S3 R3 
[  3] Utilization = 0.24 | pJ/Compute =    4.296 | L5[IO] Q7 M16 C16 - L4[WIO] Q1 - L3[] Q1 Q2X C2X - L2[W] Q2 P7 M32 C2 P2X - L1[] P2 C4X - L0[O] S3 R3 
[  7] Utilization = 0.24 | pJ/Compute =    2.751 | L5[IO] P7 C8 - L4[WIO] M4 C4 - L3[] Q2 C4 M4X - L2[W] Q7 M4 C2 Q2X - L1[] M8 P4X - L0[O] S3 R3 
[  0] Utilization = 0.47 | pJ/Compute =    2.637 | L5[IO] P7 C8 - L4[WIO] M4 C4 - L3[] Q2 C2 M4X C2X - L2[W] Q7 M4 Q2X - L1[] M8 C2 P4X - L0[O] S3 R3 
[  2] Utilization = 0.47 | pJ/Compute =    2.712 | L5[IO] P7 C8 - L4[WIO] M4 C4 - L3[] Q2 C2 M4X C2X - L2[W] Q7 M4 C2 Q2X - L1[] M8 P4X - L0[O] S3 R3 
[  0] Utilization = 0.47 | pJ/Compute =    2.630 | L5[IO] P7 C8 - L4[WIO] M4 C4 - L3[] 



Summary stats for best mapping found by mapper:
  Utilization = 0.47 | pJ/Compute =    2.630
Layer: 7

DRAM [ Inputs:230400 (230400) Outputs:401408 (401408) ] 
-------------------------------------------------------
| for P in [0:7)
|   for C in [0:8)

shared_glb [ Weights:147456 (147456) Inputs:5760 (5760) Outputs:57344 (57344) ] 
-------------------------------------------------------------------------------
|     for M in [0:4)
|       for C in [0:4)

dummy_buffer [ ] 
----------------
|         for C in [0:2)
|           for Q in [0:2)
|             for M in [0:4) (Spatial-X)
|               for C in [0:2) (Spatial-X)

weight_cache [ Weights:576 (576) ] 
----------------------------------
|                 for Q in [0:7)
|                   for M in [0:4)
|                     for Q in [0:2) (Spatial-X)

plcu_dummy_buffer [ ] 
---------------------
|                       for M in [0:8)
|                         for C in [0:2)
|                           for P in [0:4) (Spatial-X

[  7] Utilization = 0.12 | pJ/Compute =    2.791 | L5[IO] Q7 M2 C16 - L4[WIO] Q2 - L3[] M2 C4 P2X - L2[W] Q2 P7 M32 M2X - L1[] C8 P2X M2X - L0[O] S3 R3 
[  3] Utilization = 0.06 | pJ/Compute =    3.442 | L5[IO] Q14 M32 - L4[WIO] C128 - L3[] Q1 Q2X C2X - L2[W] P28 M8 M2X - L1[] C2 - L0[O] S3 R3 
[  6] Utilization = 0.12 | pJ/Compute =    3.497 | L5[IO] Q14 M32 - L4[WIO] C128 - L3[] Q1 Q2X C4X - L2[W] P28 M8 M2X - L1[] Q1 - L0[O] S3 R3 
[  1] Utilization = 0.03 | pJ/Compute =    3.415 | L5[IO] Q14 M32 - L4[WIO] C128 - L3[] Q1 Q2X - L2[W] P28 M8 C4 M2X - L1[] Q1 - L0[O] S3 R3 
[  5] Utilization = 0.06 | pJ/Compute =    3.442 | L5[IO] Q14 M32 - L4[WIO] C128 - L3[] Q1 Q2X C2X - L2[W] P28 M8 C2 M2X - L1[] Q1 - L0[O] S3 R3 
[  2] Utilization = 0.12 | pJ/Compute =    3.497 | L5[IO] Q14 M32 - L4[WIO] C128 - L3[] Q1 Q2X C2X - L2[W] P28 M8 M2X - L1[] Q1 C2X - L0[O] S3 R3 
[  4] Utilization = 0.03 | pJ/Compute =    4.132 | L5[IO] Q7 M8 C16 - L4[WIO] Q2 M16 C16 - L3[] P7 M2 C2 - L2[W] Q2 M2 P2X - L



Summary stats for best mapping found by mapper:
  Utilization = 0.24 | pJ/Compute =    2.549
Layer: 8

DRAM [ Inputs:460800 (460800) Outputs:401408 (401408) ] 
-------------------------------------------------------
| for Q in [0:2)
|   for P in [0:7)
|     for M in [0:4)
|       for C in [0:8)

shared_glb [ Weights:73728 (73728) Inputs:6144 (6144) Outputs:7168 (7168) ] 
---------------------------------------------------------------------------
|         for C in [0:4)

dummy_buffer [ ] 
----------------
|           for Q in [0:7)
|             for P in [0:2)
|               for M in [0:4) (Spatial-X)

weight_cache [ Weights:4608 (4608) ] 
------------------------------------
|                 for M in [0:4)
|                   for C in [0:4)
|                     for Q in [0:2) (Spatial-X)

plcu_dummy_buffer [ ] 
---------------------
|                       for M in [0:4)
|                         for C in [0:4)
|                           for P in [0:2) (Spatial-X)
|             

[  7] Utilization = 0.12 | pJ/Compute =    2.791 | L5[IO] Q7 M2 C16 - L4[WIO] Q2 - L3[] M2 C4 P2X - L2[W] Q2 P7 M32 M2X - L1[] C8 P2X M2X - L0[O] S3 R3 
[  3] Utilization = 0.06 | pJ/Compute =    3.442 | L5[IO] Q14 M32 - L4[WIO] C128 - L3[] Q1 Q2X C2X - L2[W] P28 M8 M2X - L1[] C2 - L0[O] S3 R3 
[  6] Utilization = 0.12 | pJ/Compute =    3.497 | L5[IO] Q14 M32 - L4[WIO] C128 - L3[] Q1 Q2X C4X - L2[W] P28 M8 M2X - L1[] Q1 - L0[O] S3 R3 
[  1] Utilization = 0.03 | pJ/Compute =    3.415 | L5[IO] Q14 M32 - L4[WIO] C128 - L3[] Q1 Q2X - L2[W] P28 M8 C4 M2X - L1[] Q1 - L0[O] S3 R3 
[  5] Utilization = 0.06 | pJ/Compute =    3.442 | L5[IO] Q14 M32 - L4[WIO] C128 - L3[] Q1 Q2X C2X - L2[W] P28 M8 C2 M2X - L1[] Q1 - L0[O] S3 R3 
[  2] Utilization = 0.12 | pJ/Compute =    3.497 | L5[IO] Q14 M32 - L4[WIO] C128 - L3[] Q1 Q2X C2X - L2[W] P28 M8 M2X - L1[] Q1 C2X - L0[O] S3 R3 
[  4] Utilization = 0.03 | pJ/Compute =    4.132 | L5[IO] Q7 M8 C16 - L4[WIO] Q2 M16 C16 - L3[] P7 M2 C2 - L2[W] Q2 M2 P2X - L



Summary stats for best mapping found by mapper:
  Utilization = 0.24 | pJ/Compute =    2.549
Layer: 9

DRAM [ Inputs:460800 (460800) Outputs:401408 (401408) ] 
-------------------------------------------------------
| for Q in [0:2)
|   for P in [0:7)
|     for M in [0:4)
|       for C in [0:8)

shared_glb [ Weights:73728 (73728) Inputs:6144 (6144) Outputs:7168 (7168) ] 
---------------------------------------------------------------------------
|         for C in [0:4)

dummy_buffer [ ] 
----------------
|           for Q in [0:7)
|             for P in [0:2)
|               for M in [0:4) (Spatial-X)

weight_cache [ Weights:4608 (4608) ] 
------------------------------------
|                 for M in [0:4)
|                   for C in [0:4)
|                     for Q in [0:2) (Spatial-X)

plcu_dummy_buffer [ ] 
---------------------
|                       for M in [0:4)
|                         for C in [0:4)
|                           for P in [0:2) (Spatial-X)
|             

[  1] Utilization = 0.21 | pJ/Compute =    3.374 | L5[IO] P7 M4 C16 - L4[WIO] M8 - L3[] C8 Q7X - L2[W] M8 - L1[] Q2 C4 P2X M2X - L0[O] S3 R3 
[  4] Utilization = 0.06 | pJ/Compute =    3.353 | L5[IO] C16 - L4[WIO] Q7 P7 - L3[] Q2 M4 M2X C2X - L2[W] P2 M2 - L1[] M16 C16 M2X - L0[O] S3 R3 
[  7] Utilization = 0.24 | pJ/Compute =    3.372 | L5[IO] C16 - L4[WIO] Q7 P7 - L3[] Q2 M4 M2X C2X - L2[W] P2 M2 C2X - L1[] M16 C4 M2X C2X - L0[O] S3 R3 
[  6] Utilization = 0.21 | pJ/Compute =    3.421 | L5[IO] P7 M4 C16 - L4[WIO] M8 - L3[] C8 Q7X - L2[W] M8 C2 - L1[] Q2 C2 P2X M2X - L0[O] S3 R3 
[  3] Utilization = 0.12 | pJ/Compute =    3.359 | L5[IO] C16 - L4[WIO] Q7 P7 - L3[] Q2 M4 M2X C2X - L2[W] P2 M2 - L1[] M16 C8 M2X C2X - L0[O] S3 R3 
[  0] Utilization = 0.12 | pJ/Compute =    4.093 | L5[IO] Q2 M16 C2 - L4[WIO] Q7 P2 - L3[] P7 C16 M4X - L2[W] C2 C2X - L1[] M8 C2 C2X - L0[O] S3 R3 
[  5] Utilization = 0.12 | pJ/Compute =    4.158 | L5[IO] Q2 M16 C2 - L4[WIO] Q7 P2 - L3[] P7 C16 M4X - L2[W] C4 



Summary stats for best mapping found by mapper:
  Utilization = 0.41 | pJ/Compute =    3.401
Layer: 10

DRAM [ Inputs:131072 (131072) Outputs:100352 (100352) ] 
-------------------------------------------------------
| for P in [0:7)
|   for M in [0:4)
|     for C in [0:16)

shared_glb [ Weights:36864 (36864) Inputs:2048 (2048) Outputs:3584 (3584) ] 
---------------------------------------------------------------------------
|       for M in [0:8)

dummy_buffer [ ] 
----------------
|         for C in [0:8)
|           for Q in [0:7) (Spatial-X)

weight_cache [ Weights:576 (576) ] 
----------------------------------
|             for M in [0:8)
|               for C in [0:2) (Spatial-X)

plcu_dummy_buffer [ ] 
---------------------
|                 for Q in [0:2)
|                   for C in [0:2)
|                     for P in [0:2) (Spatial-X)
|                       for M in [0:2) (Spatial-X)

psumBuffer [ Outputs:1 (1) ] 
----------------------------
|                         fo

[  7] Utilization = 0.24 | pJ/Compute =    3.372 | L5[IO] C16 - L4[WIO] Q7 P7 - L3[] Q2 M4 M2X C2X - L2[W] P2 M2 C2X - L1[] M16 C4 M2X C2X - L0[O] S3 R3 
[  4] Utilization = 0.06 | pJ/Compute =    3.353 | L5[IO] C16 - L4[WIO] Q7 P7 - L3[] Q2 M4 M2X C2X - L2[W] P2 M2 - L1[] M16 C16 M2X - L0[O] S3 R3 
[  1] Utilization = 0.21 | pJ/Compute =    3.374 | L5[IO] P7 M4 C16 - L4[WIO] M8 - L3[] C8 Q7X - L2[W] M8 - L1[] Q2 C4 P2X M2X - L0[O] S3 R3 
[  3] Utilization = 0.12 | pJ/Compute =    3.359 | L5[IO] C16 - L4[WIO] Q7 P7 - L3[] Q2 M4 M2X C2X - L2[W] P2 M2 - L1[] M16 C8 M2X C2X - L0[O] S3 R3 
[  6] Utilization = 0.21 | pJ/Compute =    3.421 | L5[IO] P7 M4 C16 - L4[WIO] M8 - L3[] C8 Q7X - L2[W] M8 C2 - L1[] Q2 C2 P2X M2X - L0[O] S3 R3 
[  5] Utilization = 0.12 | pJ/Compute =    4.158 | L5[IO] Q2 M16 C2 - L4[WIO] Q7 P2 - L3[] P7 C16 M4X - L2[W] C4 - L1[] M8 C4X - L0[O] S3 R3 
[  0] Utilization = 0.12 | pJ/Compute =    4.093 | L5[IO] Q2 M16 C2 - L4[WIO] Q7 P2 - L3[] P7 C16 M4X - L2[W] C2 C2X - L



Summary stats for best mapping found by mapper:
  Utilization = 0.41 | pJ/Compute =    3.401
Layer: 11

DRAM [ Inputs:131072 (131072) Outputs:100352 (100352) ] 
-------------------------------------------------------
| for P in [0:7)
|   for M in [0:4)
|     for C in [0:16)

shared_glb [ Weights:36864 (36864) Inputs:2048 (2048) Outputs:3584 (3584) ] 
---------------------------------------------------------------------------
|       for M in [0:8)

dummy_buffer [ ] 
----------------
|         for C in [0:8)
|           for Q in [0:7) (Spatial-X)

weight_cache [ Weights:576 (576) ] 
----------------------------------
|             for M in [0:8)
|               for C in [0:2) (Spatial-X)

plcu_dummy_buffer [ ] 
---------------------
|                 for Q in [0:2)
|                   for C in [0:2)
|                     for P in [0:2) (Spatial-X)
|                       for M in [0:2) (Spatial-X)

psumBuffer [ Outputs:1 (1) ] 
----------------------------
|                         fo

[  1] Utilization = 0.21 | pJ/Compute =    3.374 | L5[IO] P7 M4 C16 - L4[WIO] M8 - L3[] C8 Q7X - L2[W] M8 - L1[] Q2 C4 P2X M2X - L0[O] S3 R3 
[  4] Utilization = 0.06 | pJ/Compute =    3.353 | L5[IO] C16 - L4[WIO] Q7 P7 - L3[] Q2 M4 M2X C2X - L2[W] P2 M2 - L1[] M16 C16 M2X - L0[O] S3 R3 
[  3] Utilization = 0.12 | pJ/Compute =    3.359 | L5[IO] C16 - L4[WIO] Q7 P7 - L3[] Q2 M4 M2X C2X - L2[W] P2 M2 - L1[] M16 C8 M2X C2X - L0[O] S3 R3 
[  7] Utilization = 0.24 | pJ/Compute =    3.372 | L5[IO] C16 - L4[WIO] Q7 P7 - L3[] Q2 M4 M2X C2X - L2[W] P2 M2 C2X - L1[] M16 C4 M2X C2X - L0[O] S3 R3 
[  6] Utilization = 0.21 | pJ/Compute =    3.421 | L5[IO] P7 M4 C16 - L4[WIO] M8 - L3[] C8 Q7X - L2[W] M8 C2 - L1[] Q2 C2 P2X M2X - L0[O] S3 R3 
[  5] Utilization = 0.12 | pJ/Compute =    4.158 | L5[IO] Q2 M16 C2 - L4[WIO] Q7 P2 - L3[] P7 C16 M4X - L2[W] C4 - L1[] M8 C4X - L0[O] S3 R3 
[  0] Utilization = 0.12 | pJ/Compute =    4.093 | L5[IO] Q2 M16 C2 - L4[WIO] Q7 P2 - L3[] P7 C16 M4X - L2[W] C2 C2X - L



Summary stats for best mapping found by mapper:
  Utilization = 0.41 | pJ/Compute =    3.401
Layer: 12

DRAM [ Inputs:131072 (131072) Outputs:100352 (100352) ] 
-------------------------------------------------------
| for P in [0:7)
|   for M in [0:4)
|     for C in [0:16)

shared_glb [ Weights:36864 (36864) Inputs:2048 (2048) Outputs:3584 (3584) ] 
---------------------------------------------------------------------------
|       for M in [0:8)

dummy_buffer [ ] 
----------------
|         for C in [0:8)
|           for Q in [0:7) (Spatial-X)

weight_cache [ Weights:576 (576) ] 
----------------------------------
|             for M in [0:8)
|               for C in [0:2) (Spatial-X)

plcu_dummy_buffer [ ] 
---------------------
|                 for Q in [0:2)
|                   for C in [0:2)
|                     for P in [0:2) (Spatial-X)
|                       for M in [0:2) (Spatial-X)

psumBuffer [ Outputs:1 (1) ] 
----------------------------
|                         fo

[  4] Utilization = 0.06 | pJ/Compute =    3.353 | L5[IO] C16 - L4[WIO] Q7 P7 - L3[] Q2 M4 M2X C2X - L2[W] P2 M2 - L1[] M16 C16 M2X - L0[O] S3 R3 
[  1] Utilization = 0.21 | pJ/Compute =    3.374 | L5[IO] P7 M4 C16 - L4[WIO] M8 - L3[] C8 Q7X - L2[W] M8 - L1[] Q2 C4 P2X M2X - L0[O] S3 R3 
[  7] Utilization = 0.24 | pJ/Compute =    3.372 | L5[IO] C16 - L4[WIO] Q7 P7 - L3[] Q2 M4 M2X C2X - L2[W] P2 M2 C2X - L1[] M16 C4 M2X C2X - L0[O] S3 R3 
[  3] Utilization = 0.12 | pJ/Compute =    3.359 | L5[IO] C16 - L4[WIO] Q7 P7 - L3[] Q2 M4 M2X C2X - L2[W] P2 M2 - L1[] M16 C8 M2X C2X - L0[O] S3 R3 
[  6] Utilization = 0.21 | pJ/Compute =    3.421 | L5[IO] P7 M4 C16 - L4[WIO] M8 - L3[] C8 Q7X - L2[W] M8 C2 - L1[] Q2 C2 P2X M2X - L0[O] S3 R3 
[  5] Utilization = 0.12 | pJ/Compute =    4.158 | L5[IO] Q2 M16 C2 - L4[WIO] Q7 P2 - L3[] P7 C16 M4X - L2[W] C4 - L1[] M8 C4X - L0[O] S3 R3 
[  0] Utilization = 0.12 | pJ/Compute =    4.093 | L5[IO] Q2 M16 C2 - L4[WIO] Q7 P2 - L3[] P7 C16 M4X - L2[W] C2 C2X - L



Summary stats for best mapping found by mapper:
  Utilization = 0.41 | pJ/Compute =    3.401
Layer: 13

DRAM [ Inputs:131072 (131072) Outputs:100352 (100352) ] 
-------------------------------------------------------
| for P in [0:7)
|   for M in [0:4)
|     for C in [0:16)

shared_glb [ Weights:36864 (36864) Inputs:2048 (2048) Outputs:3584 (3584) ] 
---------------------------------------------------------------------------
|       for M in [0:8)

dummy_buffer [ ] 
----------------
|         for C in [0:8)
|           for Q in [0:7) (Spatial-X)

weight_cache [ Weights:576 (576) ] 
----------------------------------
|             for M in [0:8)
|               for C in [0:2) (Spatial-X)

plcu_dummy_buffer [ ] 
---------------------
|                 for Q in [0:2)
|                   for C in [0:2)
|                     for P in [0:2) (Spatial-X)
|                       for M in [0:2) (Spatial-X)

psumBuffer [ Outputs:1 (1) ] 
----------------------------
|                         fo

In [5]:
fname = './results/vgg16/mapper/9/'

extract_numbers(vgg16_stats, fname + 'stats.pkl')
_ = load_data(fname + 'stats.pkl')

save_loop_nests(vgg16_loops, fname + 'loops.txt')

In [None]:
for i, (stats, loops) in enumerate(vgg_albireo_layerwise_results):
    print(f"Layer: {i+1}")
    log = False
    for line in stats.split("\n"):
        if "Summary Stats" in line:
            log = True
        if log:
            print(line)

In [None]:
total_cycles = 0
for i, (stats, loops) in enumerate(vgg_albireo_layerwise_results):
    for line in stats.split("\n"):
        if "Cycles: " in line:
            cycles = int(line[len("Cycles: "):])
            total_cycles += cycles
            print(f"Cycles in layer {i+1}: {cycles}")
            break
print(f"Albireo total cycles: {total_cycles}")

In [None]:
total_energy = 0
for i, (stats, loops) in enumerate(vgg_albireo_layerwise_results):
    for line in stats.split("\n"):
        if "Energy: " in line:
            energy = float(line[len("Energy: "):].split(' uJ')[0])
            total_energy += energy
            print(f"Energy in layer {i+1}: {energy}")
            break
print(f"Total energy: {total_energy} uJ")