# Project

In [51]:
import os
import pytimeloop.timeloopfe.v4 as tl
from pytimeloop.timeloopfe.common.nodes import DictNode
from pytimeloop.timeloopfe.v4.art import Art
from pytimeloop.timeloopfe.v4.ert import Ert
import shutil

In [52]:
def run_timeloop_model(architecture, mapping, problem, config: dict = None):
    if os.path.exists("./output_dir"):
        os.system("rm -r ./output_dir")

    if config:
        spec = tl.Specification.from_yaml_files(architecture, mapping, problem, jinja_parse_data=config)
    else:
        spec = tl.Specification.from_yaml_files(architecture, mapping, problem)
    
    return tl.call_model(spec, output_dir="./output_dir")

In [78]:
def run_timeloop_mapper_dp(architecture, problem, constraints, mapper, config: dict = None):
    if os.path.exists("./output_dir"):
        os.system("rm -r ./output_dir")

    if config:
        spec = tl.Specification.from_yaml_files(architecture, problem, constraints, mapper, jinja_parse_data=config)
    else:
        spec = tl.Specification.from_yaml_files(architecture, problem, constraints, mapper)

    result = tl.accelergy_app(spec, output_dir="./output_dir")
    shutil.copy("output_dir/ART.yaml", "output_dir/timeloop-model.ART.yaml")
    shutil.copy("output_dir/ART.yaml", "output_dir/timeloop-mapper.ART.yaml")
    shutil.copy("output_dir/ERT.yaml", "output_dir/timeloop-model.ERT.yaml")
    shutil.copy("output_dir/ERT.yaml", "output_dir/timeloop-mapper.ERT.yaml")

    spec.ERT = Ert(**DictNode.from_yaml_files("output_dir/ERT.yaml")["ERT"])
    spec.ART = Art(**DictNode.from_yaml_files("output_dir/ART.yaml")["ART"])
    
    spec.ERT['tables'][0]['actions'][1]['energy'] = 0.0
    spec.ERT['tables'][0]['actions'][2]['energy'] = 0.0
    spec.ERT['tables'][0]['actions'][3]['energy'] = 0.0

    return tl.call_mapper(spec, output_dir="./output_dir")

In [79]:
def run_timeloop_mapper_tp(architecture, problem, constraints, mapper, config: dict = None):
    if os.path.exists("./output_dir"):
        os.system("rm -r ./output_dir")

    if config:
        spec = tl.Specification.from_yaml_files(architecture, problem, constraints, mapper, jinja_parse_data=config)
    else:
        spec = tl.Specification.from_yaml_files(architecture, problem, constraints, mapper)

    result = tl.accelergy_app(spec, output_dir="./output_dir")
    shutil.copy("output_dir/ART.yaml", "output_dir/timeloop-model.ART.yaml")
    shutil.copy("output_dir/ART.yaml", "output_dir/timeloop-mapper.ART.yaml")
    shutil.copy("output_dir/ERT.yaml", "output_dir/timeloop-model.ERT.yaml")
    shutil.copy("output_dir/ERT.yaml", "output_dir/timeloop-mapper.ERT.yaml")

    spec.ERT = Ert(**DictNode.from_yaml_files("output_dir/ERT.yaml")["ERT"])
    spec.ART = Art(**DictNode.from_yaml_files("output_dir/ART.yaml")["ART"])

    return tl.call_mapper(spec, output_dir="./output_dir")

In [83]:
problems = [
    'layer_shapes/alexnet/0.yaml',
    'layer_shapes/alexnet/1.yaml',
    'layer_shapes/alexnet/2.yaml',
    'layer_shapes/alexnet/3.yaml',
    'layer_shapes/alexnet/4.yaml',
    'layer_shapes/alexnet/5.yaml',
    'layer_shapes/alexnet/6.yaml',
    'layer_shapes/alexnet/7.yaml'
]

num_gpus = 16

### Data-Parallel

In [88]:
for num_gpus in [16, 32, 64]:
    print("num_gpus", num_gpus)

    total_energy = 0
    total_cycles = 0
    
    for problem in problems:
        result = run_timeloop_mapper_dp(
            architecture='designs/system/arch.yaml',
            problem=problem,
            constraints='designs/system/constraints_dp.yaml',
            mapper='designs/_include/mapper.yaml',
            config={'gpu_meshX': num_gpus}
        )
        
        stats = open('./output_dir/timeloop-mapper.stats.txt', 'r').read()
    
        # print(stats)
        # print(result.mapping)
    
        lines = stats.split('\n')
        energy = float([l for l in lines if 'Energy:' in l][0].split(' ', 2)[1])
        cycles = int([l for l in lines if 'Cycles:' in l][0].split(' ', 1)[1])
    
        total_energy += energy
        total_cycles += cycles
        
        print(problem, energy, cycles)
    
    print(total_energy, total_cycles)

num_gpus 16
layer_shapes/alexnet/0.yaml 40881.29 93702400
layer_shapes/alexnet/1.yaml 102613.66 55987200
layer_shapes/alexnet/2.yaml 54237.7 28035072
layer_shapes/alexnet/3.yaml 72205.47 37380096
layer_shapes/alexnet/4.yaml 48252.78 24920064
layer_shapes/alexnet/5.yaml 174536.62 9437184
layer_shapes/alexnet/6.yaml 77452.28 4194304
layer_shapes/alexnet/7.yaml 18935.42 1024000
589115.2200000001 254680320
num_gpus 32
layer_shapes/alexnet/0.yaml 40928.88 46851200
layer_shapes/alexnet/1.yaml 103242.8 27993600
layer_shapes/alexnet/2.yaml 55596.65 14017536
layer_shapes/alexnet/3.yaml 74017.41 18690048
layer_shapes/alexnet/4.yaml 49460.74 12460032
layer_shapes/alexnet/5.yaml 332805.5 4718592
layer_shapes/alexnet/6.yaml 147931.83 2097152
layer_shapes/alexnet/7.yaml 36159.16 512000
840142.97 127340160
num_gpus 64
layer_shapes/alexnet/0.yaml 41024.04 23425600
layer_shapes/alexnet/1.yaml 104501.09 13996800
layer_shapes/alexnet/2.yaml 58996.7 7008768
layer_shapes/alexnet/3.yaml 78550.81 9345024
lay

### Tensor-Parallel

In [89]:
for num_gpus in [16, 32, 64]:
    print("num_gpus", num_gpus)
    
    total_energy = 0
    total_cycles = 0
    
    for problem in problems:
        result = run_timeloop_mapper_tp(
            architecture='designs/system/arch.yaml',
            problem=problem,
            constraints='designs/system/constraints_tp.yaml',
            mapper='designs/_include/mapper.yaml',
            config={'gpu_meshX': num_gpus}
        )
        
        stats = open('./output_dir/timeloop-mapper.stats.txt', 'r').read()
    
        # print(stats)
        # print(result.mapping)
    
        lines = stats.split('\n')
        energy = float([l for l in lines if 'Energy:' in l][0].split(' ', 2)[1])
        cycles = int([l for l in lines if 'Cycles:' in l][0].split(' ', 1)[1])
    
        total_energy += energy
        total_cycles += cycles
        
        print(problem, energy, cycles)
    
    print(total_energy, total_cycles)

num_gpus 16
layer_shapes/alexnet/0.yaml 45686.34 93702400
layer_shapes/alexnet/1.yaml 111271.22 55987200
layer_shapes/alexnet/2.yaml 60696.18 28035072
layer_shapes/alexnet/3.yaml 86205.2 37380096
layer_shapes/alexnet/4.yaml 57704.07 24920064
layer_shapes/alexnet/5.yaml 34039.38 9437184
layer_shapes/alexnet/6.yaml 14995.78 4194304
layer_shapes/alexnet/7.yaml 4444.99 1376256
415043.16000000003 255032576
num_gpus 32
layer_shapes/alexnet/0.yaml 49053.68 46851200
layer_shapes/alexnet/1.yaml 130642.99 37324800
layer_shapes/alexnet/2.yaml 70741.73 14017536
layer_shapes/alexnet/3.yaml 109158.87 18690048
layer_shapes/alexnet/4.yaml 73006.52 12460032
layer_shapes/alexnet/5.yaml 34865.41 4718592
layer_shapes/alexnet/6.yaml 15532.65 2097152
layer_shapes/alexnet/7.yaml 5514.85 524288
488516.70000000007 136683648
num_gpus 64
layer_shapes/alexnet/0.yaml 55591.05 23425600
layer_shapes/alexnet/1.yaml 161996.95 18662400
layer_shapes/alexnet/2.yaml 94498.08 9345024
layer_shapes/alexnet/3.yaml 154112.02 9