In [None]:
import hashlib
import os
import pickle
from fastfusion import Specification
from fastfusion.mapper.FFM.exploration.mapper_multi_einsum import get_sims
from fastfusion.mapper.FFM.joining.sim import SIM
from fastfusion.mapper.FFM.joining.simexplore import join_sims

if True:
    archname = "four_level"
    spec = Specification.from_yaml(
        f"architecture/{archname}.arch.yaml",
        "workloads/mha_full.workload.yaml",
        "workloads/mha_full.renames.yaml",
    )
# spec.calculate_component_energy_area(0)
# sims, decompress_data = get_single_einsum_sims(spec, "Q")
else:
    archname = "snowcat"
    spec = Specification.from_yaml(
        f"architecture/{archname}.arch.yaml",
        "workloads/matmuls8_mixed.workload.yaml",
    )

# workload = spec.workload
# renames = spec.renames

# pr = cProfile.Profile()
# pr.enable()

# sims = get_single_einsum_sims(spec, "Q", rank_variable_bounds)

def cache(filename):
    filename = filename if filename.endswith(".pkl") else f"{filename}.pkl"
    def decorator(func):
        def wrapper(*args, **kwargs):
            if os.path.exists(filename):
                return pickle.load(open(filename, "rb"))
            else:
                result = func(*args, **kwargs)
                pickle.dump(result, open(filename, "wb"))
                return result
        return wrapper
    return decorator

# @cache(hashlib.md5(spec._yaml_source.encode()).hexdigest())
def get_sims_with_cache():
    spec.calculate_component_energy_area()
    flattened_architecture = spec.get_flattened_architecture()
    sims, decompress_data = get_sims(spec, flattened_architecture)#, pkl_cache="sims.pkl")
    return sims, decompress_data, flattened_architecture
sims, decompress_data, flattened_architecture = get_sims_with_cache()


# TODO: Pass a job ID to the mapper jobs so we don't have to serialize the numbering after

INFO        Loading yaml file architecture/four_level.arch.yaml
INFO        Found top key variables in architecture/four_level.arch.yaml
INFO        Found top key architecture in architecture/four_level.arch.yaml
INFO        Found top key component_classes in architecture/four_level.arch.yaml
INFO        Loading yaml file workloads/mha_full.workload.yaml
INFO        Found top key workload in workloads/mha_full.workload.yaml
INFO        Loading yaml file workloads/mha_full.renames.yaml
INFO        Found top key renames in workloads/mha_full.renames.yaml
INFO        Calculated "1024*1024*128*8" = 1073741824.
INFO        Calculated "1024*1024*32*8" = 268435456.
INFO        Calculated "0.5" = 0.5.
Generating storage and loop choices for Einsum I: 12it [00:00, 253.91it/s]
Generating storage and loop choices for Einsum V: 120it [00:00, 200.85it/s]
Generating storage and loop choices for Einsum K: 120it [00:00, 154.52it/s]
Generating storage and loop choices for Einsum Q: 120it [00:00, 241.21

ERROR:tornado.general:SEND Error: Host unreachable
Generating Partial Mappings:   5%|▍         | 64/1367 [00:06<02:27,  8.82it/s] 

In [None]:
import copy

ts = set([
    # TensorStorage(name="I", above_loop_index="*", resource_name="GlobalBuffer", size=0),
    # TensorStorage(name="V", above_loop_index=1, resource_name="GlobalBuffer", size=0),
    # TensorStorage(name="K", above_loop_index=1, resource_name="GlobalBuffer", size=0),
    # TensorStorage(name="Q", above_loop_index=1, resource_name="GlobalBuffer", size=0),
    # TensorStorage(name="QK", above_loop_index="*", resource_name="GlobalBuffer", size=0),
    # TensorStorage(name="AV", above_loop_index="*", resource_name="GlobalBuffer", size=0),
    # TensorStorage(name="Z", above_loop_index="*", resource_name="GlobalBuffer", size=0),
    # TensorStorage(name="T2", above_loop_index=0, resource_name="MainMemory", size=0),
    # TensorStorage(name="T3", above_loop_index=0, resource_name="MainMemory", size=0),
    # TensorStorage(name="T4", above_loop_index=0, resource_name="MainMemory", size=0),
    # TensorStorage(name="T5", above_loop_index=0, resource_name="MainMemory", size=0),
    # TensorStorage(name="T6", above_loop_index=0, resource_name="MainMemory", size=0),
    # TensorStorage(name="T7", above_loop_index=0, resource_name="MainMemory", size=0),  
    # TensorStorage(name="T8", above_loop_index=0, resource_name="MainMemory", size=0),
    # TensorStorage(name="T9", above_loop_index=0, resource_name="MainMemory", size=0),
])
to_join = {einsum_name: SIM.filter_by_tensor_storage(sims2, ts) for einsum_name, sims2 in sims.items()}
mappings = join_sims(to_join, spec, flattened_architecture, drop_valid_reservations=archname != "snowcat")
mappings.decompress(decompress_data)

In [None]:
import importlib
import fastfusion.visualization.interactive
importlib.reload(fastfusion.visualization.interactive)
from fastfusion.visualization.interactive import plotly_show

mappings2 = copy.deepcopy(mappings)
# mappings2.make_pareto(columns=["RESOURCE_GlobalBuffer_LEVEL_0", "Total_Energy"])
# plotly_show(mappings2.data, "RESOURCE_GlobalBuffer_LEVEL_0", "Total_Energy", logscales=False, einsum_names=spec.workload.einsum_names)
plotly_show(mappings2.data, "Total_Latency", "Total_Energy", logscales=False, einsum_names=spec.workload.einsum_names)
# from fastfusion.visualization.interactive import mapping2svg
# mapping2svg(mappings.data.iloc[0], spec.workload.einsum_names)

In [None]:
mappings.data.sort_values(by="Total_Energy", ascending=True).head()
from fastfusion.mapper.FFM.deprecate_maybe.visualization import make_mapping
from IPython.display import SVG
newmapping = make_mapping(mappings.data.iloc[0], spec.workload.einsum_names)
display(SVG(newmapping.render()))

# {'n1'}-1 || [GlobalBuffer] T1 sz 0 above 1
# TODO: Re-add -1 to the mapper one eisnum freenig
# compatibility2sims['Matmul1']["{'n1'}-1 || [GlobalBuffer] T1 sz 0 above 1"]
# Above 1: 8192
# Above 2: 8321
# compatibility2sims['Matmul2']["{'n1'}-1 || [GlobalBuffer] T1 sz 0 above 1, [GlobalBuffer] T2 sz 0 above 0"]

In [None]:
compatibility2sims = {einsum_name: {s.compatibility_str(): s for s in sims2} for einsum_name, sims2 in sims.items()}
print(compatibility2sims)