# Project

In [1]:
import os
import yaml
import pytimeloop.timeloopfe.v4 as tl
from pytimeloop.timeloopfe.common.nodes import DictNode
from pytimeloop.timeloopfe.v4.art import Art
from pytimeloop.timeloopfe.v4.ert import Ert
import shutil

In [2]:
def run_timeloop_mapper(architecture, problem, constraints, mapper, config: dict = None):
    if os.path.exists("./output_dir"):
        os.system("rm -r ./output_dir")

    if config:
        spec = tl.Specification.from_yaml_files(architecture, problem, constraints, mapper, jinja_parse_data=config)
    else:
        spec = tl.Specification.from_yaml_files(architecture, problem, constraints, mapper)

    result = tl.accelergy_app(spec, output_dir="./output_dir")
    shutil.copy("output_dir/ART.yaml", "output_dir/timeloop-mapper.ART.yaml")
    shutil.copy("output_dir/ERT.yaml", "output_dir/timeloop-mapper.ERT.yaml")

    spec.ERT = Ert(**DictNode.from_yaml_files("output_dir/ERT.yaml")["ERT"])
    spec.ART = Art(**DictNode.from_yaml_files("output_dir/ART.yaml")["ART"])
    
    spec.ERT['tables'][0]['actions'][1]['energy'] = 0.0
    spec.ERT['tables'][0]['actions'][2]['energy'] = 0.0
    spec.ERT['tables'][0]['actions'][3]['energy'] = 0.0

    return tl.call_mapper(spec, output_dir="./output_dir")

In [13]:
LAYERS = [
    'layer_shapes/alexnet/0.yaml',
    'layer_shapes/alexnet/1.yaml',
    'layer_shapes/alexnet/2.yaml',
    'layer_shapes/alexnet/3.yaml',
    'layer_shapes/alexnet/4.yaml',
    'layer_shapes/alexnet/5.yaml',
    'layer_shapes/alexnet/6.yaml',
    'layer_shapes/alexnet/7.yaml'
]

NUM_GPUS = [16,]

### Data-Parallel

In [16]:
for num_gpus in NUM_GPUS:
    print("Number of GPUs: ", num_gpus)

    total_energy = 0
    total_cycles = 0
    
    for problem in LAYERS:
        result = run_timeloop_mapper(
            architecture='designs/system/arch.yaml',
            problem=problem,
            constraints='designs/system/constraints_dp.yaml',
            mapper='designs/_include/mapper.yaml',
            config={'gpu_meshX': num_gpus}
        )
        
        stats = open('./output_dir/timeloop-mapper.stats.txt', 'r').read()
    
        # print(stats)
        # print(result.mapping)
    
        lines = stats.split('\n')
        energy = float([l for l in lines if 'Energy:' in l][0].split(' ', 2)[1])
        cycles = int([l for l in lines if 'Cycles:' in l][0].split(' ', 1)[1])
    
        total_energy += energy
        total_cycles += cycles
        
        print(problem, energy, cycles)
    
    print(total_energy, total_cycles)

Number of GPUs:  16
layer_shapes/alexnet/0.yaml 40881.29 93702400
layer_shapes/alexnet/1.yaml 102613.66 55987200
layer_shapes/alexnet/2.yaml 54237.7 28035072
layer_shapes/alexnet/3.yaml 72205.47 37380096
layer_shapes/alexnet/4.yaml 48252.78 24920064
layer_shapes/alexnet/5.yaml 174226.51 9437184
layer_shapes/alexnet/6.yaml 77452.28 4194304
layer_shapes/alexnet/7.yaml 18935.42 1024000
588805.1100000001 254680320


### Tensor-Parallel

In [17]:
for num_gpus in NUM_GPUS:
    print("Number of GPUs: ", num_gpus)
    
    total_energy = 0
    total_cycles = 0
    
    for problem in LAYERS:
        result = run_timeloop_mapper(
            architecture='designs/system/arch.yaml',
            problem=problem,
            constraints='designs/system/constraints_tp.yaml',
            mapper='designs/_include/mapper.yaml',
            config={'gpu_meshX': num_gpus}
        )
        
        stats = open('./output_dir/timeloop-mapper.stats.txt', 'r').read()
    
        # print(stats)
        # print(result.mapping)
    
        lines = stats.split('\n')
        energy = float([l for l in lines if 'Energy:' in l][0].split(' ', 2)[1])
        cycles = int([l for l in lines if 'Cycles:' in l][0].split(' ', 1)[1])

        parsed_processed_input = open('./output_dir/parsed-processed-input.yaml', 'r').read()
        instance = yaml.safe_load(parsed_processed_input)['problem']['instance']

        num_hops = num_gpus * (num_gpus - 1)
        tensor_size = instance['N'] * instance['M'] * instance['P'] * instance['Q']
        
        network_energy = num_hops * tensor_size * 1.3e-6 * 16
        
        total_energy += energy + network_energy
        total_cycles += cycles
        
        print(problem, energy + network_energy, cycles)
    
    print(total_energy, total_cycles)

Number of GPUs:  16
layer_shapes/alexnet/0.yaml 105846.4368 93702400
layer_shapes/alexnet/1.yaml 154299.536384 55987200
layer_shapes/alexnet/2.yaml 80459.22324800001 28035072
layer_shapes/alexnet/3.yaml 98852.078832 37380096
layer_shapes/alexnet/4.yaml 70624.628832 24920064
layer_shapes/alexnet/5.yaml 30025.182848 9437184
layer_shapes/alexnet/6.yaml 14259.552848 4194304
layer_shapes/alexnet/7.yaml 4194.118 1376256
558560.7577920001 255032576
