# Project

In [1]:
import os
import pytimeloop.timeloopfe.v4 as tl

In [2]:
def run_timeloop_model(architecture, mapping, problem):
    if os.path.exists("./output_dir"):
        os.system("rm -r ./output_dir")

    spec = tl.Specification.from_yaml_files(architecture, mapping, problem)
    
    return tl.call_model(spec, output_dir="./output_dir")

In [3]:
def run_timeloop_mapper(architecture, problem, constraints, mapper):
    if os.path.exists("./output_dir"):
        os.system("rm -r ./output_dir")

    spec = tl.Specification.from_yaml_files(architecture, problem, constraints, mapper)

    return tl.call_mapper(spec, output_dir="./output_dir")

### Data-Parallel

In [5]:
# result = run_timeloop_model(
#     architecture='designs/system/arch.yaml',
#     mapping='designs/system/map_dp.yaml',
#     problem='layer_shapes/prob.yaml'
# )
# stats = open('./output_dir/timeloop-model.stats.txt', 'r').read()

result = run_timeloop_mapper(
    architecture='designs/system/arch.yaml',
    problem='layer_shapes/prob.yaml',
    constraints='designs/system/constraints_dp.yaml',
    mapper='designs/_include/mapper.yaml'
)
stats = open('./output_dir/timeloop-mapper.stats.txt', 'r').read()

# print(stats)
print(result.mapping)

lines = stats.split('\n')
energy = float([l for l in lines if 'Energy:' in l][0].split(' ', 2)[1])
cycles = int([l for l in lines if 'Cycles:' in l][0].split(' ', 1)[1])
print("energy: ", energy)
print("cycles: ", cycles)

SystemMemory [ Weights:307200 (307200) Inputs:3936256 (3936256) Outputs:8957952 (8957952) ] 
-------------------------------------------------------------------------------------------
| for N in [0:4)

inter_GPU_spatial [ ] 
---------------------
|   for N in [0:16) (Spatial-X)

LocalMemory [ Weights:307200 (307200) Inputs:61504 (61504) Outputs:139968 (139968) ] 
------------------------------------------------------------------------------------
|     for M in [0:16)
|       for C in [0:16)

LocalBuffer [ Weights:1200 (1200) Inputs:3844 (3844) Outputs:8748 (8748) ] 
inter_PE_spatial [ ] 
--------------------
|         for M in [0:4) (Spatial-Y)
|           for C in [0:4) (Spatial-X)

RegisterFile [ Weights:75 (75) ] 
--------------------------------
|             for Q in [0:27)
|               for P in [0:27)
|                 for S in [0:5)
|                   for R in [0:5)
|                     for M in [0:3)
|                       << Compute >>

energy:  104303.44
cycles:  5598

### Tensor-Parallel

In [12]:
# result = run_timeloop_model(
#     architecture='designs/system/arch.yaml',
#     mapping='designs/system/map_tp.yaml',
#     problem='layer_shapes/prob.yaml'
# )
# stats = open('./output_dir/timeloop-model.stats.txt', 'r').read()

result = run_timeloop_mapper(
    architecture='designs/system/arch.yaml',
    problem='layer_shapes/prob.yaml',
    constraints='designs/system/constraints_tp.yaml',
    mapper='designs/_include/mapper.yaml'
)
stats = open('./output_dir/timeloop-mapper.stats.txt', 'r').read()

# print(stats)
print(result.mapping)

lines = stats.split('\n')
energy = float([l for l in lines if 'Energy:' in l][0].split(' ', 2)[1])
cycles = int([l for l in lines if 'Cycles:' in l][0].split(' ', 1)[1])
print("energy: ", energy)
print("cycles: ", cycles)

SystemMemory [ Weights:307200 (307200) Inputs:3936256 (3936256) Outputs:8957952 (8957952) ] 
inter_GPU_spatial [ ] 
---------------------
| for M in [0:16) (Spatial-X)

LocalMemory [ Weights:19200 (19200) Inputs:3936256 (3936256) Outputs:559872 (559872) ] 
--------------------------------------------------------------------------------------
|   for N in [0:64)
|     for C in [0:16)

LocalBuffer [ Weights:1200 (1200) Inputs:3844 (3844) Outputs:8748 (8748) ] 
--------------------------------------------------------------------------
|       for M in [0:3)

inter_PE_spatial [ ] 
--------------------
|         for M in [0:4) (Spatial-Y)
|           for C in [0:4) (Spatial-X)

RegisterFile [ Weights:25 (25) ] 
--------------------------------
|             for Q in [0:27)
|               for P in [0:27)
|                 for S in [0:5)
|                   for R in [0:5)
|                     << Compute >>

energy:  111271.22
cycles:  55987200
