In [1]:
jupyter_pwd = %pwd
if jupyter_pwd == "/":
    %cd /workspace

# %pdb on

# ipynb形式のライブラリのインポート
%run ./lib/lib.ipynb

# ipynb形式のライブラリノートを.py形式に変更したものをインポート
import lib
import lib.lab_lib
from lib.lab_lib import *

# 生データの入ったCSVファイルの保持されたディレクトリ名を格納している変数
csvDirPath = "./csv_files/"

# NPBのベンチマーク名のリスト
benchmarkNames = ["cg", "ep", "ft", "is", "lu", "mg"]

# NPBのプロセス数
npb_process :list[int] = [2, 4, 8, 16, 32, 64, 128, 256]
train_npb_process :list[int] = npb_process[:-1]
test_npb_process :list[int] = npb_process[-1:]
# NPBのCGの初期変数
cg_na: list[int] = [14000, 30000, 75000, 100000, 1500000]
cg_nonzer: list[int] = [11, 12, 13, 14, 15, 18, 21]
cg_niter: list[int] = [15, 30, 75, 90, 100]
cg_shift: list[int] = [20, 40, 60, 80, 110, 200]

train_cg_na: list[int] = cg_na[:-1]
train_cg_nonzer: list[int] = cg_nonzer[:-1]
train_cg_niter: list[int] = cg_niter[:-1]
train_cg_shift: list[int] = cg_shift[:-1]

test_cg_na: list[int] = cg_na[-1:]
test_cg_nonzer: list[int] = cg_nonzer[-1:]
test_cg_niter: list[int] = cg_niter[-1:]
test_cg_shift: list[int] = cg_shift[-1:]

# LULESH ベンチマークプログラムのプロセス数・問題サイズ・イテレーション数
lulesh_processes: list[int] = [8, 27, 64, 125, 216, 343, 512]
lulesh_iterations: list[int] = [8, 16, 32, 64, 128, 256]
lulesh_sizes: list[int] = [16, 24, 32, 48, 64, 128]

train_lulesh_processes: list[int] = [8, 27, 64, 125, 216, 343]
train_lulesh_iterations: list[int] = [8, 16, 32, 64, 128]
train_lulesh_sizes: list[int] = [16, 24, 32, 48]

test_lulesh_processes: list[int] = [512, 729, 1000]
test_lulesh_iterations: list[int] = [256, 512, 1024]
test_lulesh_sizes: list[int] = [64, 96, 128]

# Extra-Pのオプション
modelerNames: list[str] = [
    # "refining", 
    "multi-parameter",
    "default", 
    # "basic --options poly_exponents=-1,0,1,2,3 log_exponents=0,1 force_combination_exponents=1 allow_negative_exponents=1"
    ]

modelerOption: str = """ --options \#spm=Basic \#spo=poly_exponents=-1,0,1,2,3,log_exponents=0,1,force_combination_exponents=1,allow_negative_exponents=True"""

list_modelName: list[str] = [
    "modelIp",
    "modelLog",
    "modelLinAndIp",
    "modelLinAndLog",
    "modelIpAndLin",
    "modelIpAndLog",
    "modelLogAndLin",
    "modelLogAndIp",
    "modelProcessDividedByProblemSize",
    "modelProblemSizeDividedByProcess",
    "modelInfiniteProductOfProblemSizeMultipliedByProcesses",
    "modelInfiniteProductOfProblemSizeDividedByProcesses",
    "modelLinearSumOf2elementCombination",
    "modelLinearSumOfElementCombinations",
    "modelLinearSumOf2elementCombinationWithSquared",
    "modelLinearSumOf2elementCombinationWithCubed",
    "modelSquareRootOfProcess",
    "modelSquareRootTimesOtherElems",
    "modelObeyOneParameter",
    "modelLin"
    # "modelBasicTree",
]
list_csvDir = [
    "./csv_files/lulesh_1st/",
    "./csv_files/lulesh_2nd/",
    "./csv_files/lulesh_3rd/",
]

/workspace


DEBUG:__main__:hello
DEBUG:lib.lab_lib:hello


In [2]:
# モデル構築関数

def returnModelByExtraP (
    input_benchmarkName :str,
    input_expVar :list[str],
    input_resVar :str,
    input_trainDF :pd.DataFrame,
    input_testDF :pd.DataFrame,
    input_dict_symbols :dict[str, any],
    input_modelerName :str,
    input_modelerOption :str,
):
    functionNames :list[str] = sorted(list(set(input_trainDF["Name"])))
    
    resDict :dict[str, dict[str, any]] = {}
    dict_functionName_model_all :dict[str, any] = {}
    dict_functionName_model_perCall :dict[str, any] = {}
    
    resVar_all :str = input_resVar
    resVar_perCall :str = f"{resVar_all}PerCall"

    for functionName in functionNames:
        trainDF_perFunc :pd.DataFrame = input_trainDF[input_trainDF["Name"] == functionName].reset_index()
        model_fromExtraP_all = get_ExtraP_model(
            benchmarkName=input_benchmarkName,
            inputDF_perFunc=trainDF_perFunc,
            expVar = input_expVar,
            resVar = resVar_all,
            functionName=functionName,
            modelerName=input_modelerName,
            modelerOption=input_modelerOption,
            dict_symbols=input_dict_symbols,
        )
        dict_functionName_model_all[functionName] = model_fromExtraP_all

        model_fromExtraP_perCall = get_ExtraP_model(
            benchmarkName=input_benchmarkName,
            inputDF_perFunc = trainDF_perFunc,
            expVar = input_expVar,
            resVar = resVar_perCall,
            functionName = functionName,
            modelerName=input_modelerName,
            modelerOption=input_modelerOption,
            dict_symbols=input_dict_symbols,
        )
        dict_functionName_model_perCall[functionName] = model_fromExtraP_perCall

    resDict["all"] = dict_functionName_model_all
    resDict["perCall"] = dict_functionName_model_perCall

    return(resDict)


In [3]:


# モデル構築

expVar :list[str] = ["process", "iteration", "size"]
resVar_ex :str = "Exclusive"
resVar_in :str = "Inclusive"


benchmarkName = "lulesh"

trainDF_lulesh_in :pd.DataFrame = ret_averaged_rawDF_lulesh(
    list_process=train_lulesh_processes,
    list_iteration=train_lulesh_iterations,
    list_size=train_lulesh_sizes,
    list_csvDir=list_csvDir,
    resVar = resVar_in
)
testDF_lulesh_in :pd.DataFrame = ret_averaged_rawDF_lulesh(
    list_process = test_lulesh_processes[-1:],
    list_iteration=test_lulesh_iterations[-1:],
    list_size=test_lulesh_sizes[-1:],
    list_csvDir=list_csvDir,
    resVar = resVar_in
)
trainDF_lulesh_in = trainDF_lulesh_in.reset_index()
testDF_lulesh_in = testDF_lulesh_in.reset_index()

trainDF_lulesh_ex :pd.DataFrame = ret_averaged_rawDF_lulesh(
    list_process=train_lulesh_processes,
    list_iteration=train_lulesh_iterations,
    list_size=train_lulesh_sizes,
    list_csvDir=list_csvDir,
    resVar = resVar_ex
)
testDF_lulesh_ex :pd.DataFrame = ret_averaged_rawDF_lulesh(
    list_process = test_lulesh_processes[-1:],
    list_iteration=test_lulesh_iterations[-1:],
    list_size=test_lulesh_sizes[-1:],
    list_csvDir=list_csvDir,
    resVar = resVar_ex
)
trainDF_lulesh_ex = trainDF_lulesh_ex.reset_index()
testDF_lulesh_ex = testDF_lulesh_ex.reset_index()

functionNames_lulesh :list[str] = sorted(list(set(trainDF_lulesh_in["Name"])))

dict_symbols_lulesh = {}
for elem in expVar:
    dict_symbols_lulesh[elem] = symbols(elem, real=True)

target_env = [
    (dict_symbols_lulesh["size"], test_lulesh_sizes[-1]),
    (dict_symbols_lulesh["iteration"], test_lulesh_iterations[-1]),
    (dict_symbols_lulesh["process"], test_lulesh_processes[-1])
]

modelerName :str = "multi-parameter"

resVar_call :str = "#Call"


In [None]:

models_dict_ex :dict[str, any] = returnModelByExtraP(
    input_benchmarkName=benchmarkName,
    input_expVar = expVar,
    input_resVar = resVar_ex,
    input_trainDF = trainDF_lulesh_ex,
    input_testDF = testDF_lulesh_ex,
    input_dict_symbols=dict_symbols_lulesh,
    input_modelerName=modelerName,
    input_modelerOption = modelerOption,
)

models_dict_in :dict[str, any] = returnModelByExtraP(
    input_benchmarkName=benchmarkName,
    input_expVar = expVar,
    input_resVar = resVar_in,
    input_trainDF = trainDF_lulesh_in,
    input_testDF = testDF_lulesh_in,
    input_dict_symbols=dict_symbols_lulesh,
    input_modelerName = modelerName,
    input_modelerOption = modelerOption,
)

In [None]:
# TODO
# テスト環境をループ
# meanでそれぞれの環境のデータを集計

for elem_process in test_lulesh_processes:
    for elem_iteration in test_lulesh_iterations:
        for elem_size in test_lulesh_sizes:
            pass
