In [1]:
jupyter_pwd = %pwd
if jupyter_pwd == "/":
    %cd /workspace


In [2]:
# ipynb形式のライブラリのインポート
%run ./lib/lib.ipynb

# 生データの入ったCSVファイルの保持されたディレクトリ名を格納している変数
csvDirPath = "./csv_files/"

# NPBのベンチマーク名のリスト
benchmarkNames = ["cg", "ep", "ft", "is", "lu", "mg"]

# LULESH ベンチマークプログラムのプロセス数・問題サイズ・イテレーション数
lulesh_processes: list[int] = [8, 27, 64, 125, 216, 343, 512]
lulesh_iterations: list[int] = [8, 16, 32, 64, 128, 256]
lulesh_sizes: list[int] = [16, 24, 32, 48, 64, 128]

DEBUG:__main__:hello


In [3]:
# ipynb形式のライブラリノートを.py形式に変更したものをインポート
import lib
import lib.lab_lib
from lib.lab_lib import *

DEBUG:lib.lab_lib:hello


In [4]:
train_lulesh_processes: list[int] = [8, 27, 64, 125, 216, 343]
train_lulesh_iterations: list[int] = [8, 16, 32, 64, 128]
train_lulesh_sizes: list[int] = [16, 24, 32, 48, 64]

test_lulesh_processes: list[int] = [512]
test_lulesh_iterations: list[int] = [256]
test_lulesh_sizes: list[int] = [128]

trainDF_lulesh: pd.DataFrame = return_rawDF_lulesh(
    list_process=train_lulesh_processes,
    list_iteration=train_lulesh_iterations,
    list_size=train_lulesh_sizes,
    csvDir=csvDirPath,
)

# InclusivePerCall列の生成
trainDF_lulesh["InclusivePerCall"] = -1
trainDF_lulesh = trainDF_lulesh.reset_index()
for i, sr in trainDF_lulesh.iterrows():
    trainDF_lulesh.at[i, "InclusivePerCall"] = convertPprofTime(
        sr["Inclusive"]
    ) / float(sr["#Call"])
trainDF_lulesh = trainDF_lulesh.sort_values("#Call")
# Inclusive列の整形
_before_converted: list[float] = list(trainDF_lulesh["Inclusive"])
_after_converted: list[float] = map(convertPprofTime, _before_converted)
trainDF_lulesh["Inclusive"] = list(_after_converted)

testDF_lulesh: pd.DataFrame = return_rawDF_lulesh(
    list_process=test_lulesh_processes,
    list_iteration=test_lulesh_iterations,
    list_size=test_lulesh_sizes,
    csvDir=csvDirPath,
)

# InclusivePerCall列の生成
testDF_lulesh["InclusivePerCall"] = -1
testDF_lulesh = testDF_lulesh.reset_index()
for i, sr in testDF_lulesh.iterrows():
    testDF_lulesh.at[i, "InclusivePerCall"] = convertPprofTime(sr["Inclusive"]) / float(
        sr["#Call"]
    )
testDF_lulesh = testDF_lulesh.sort_values("#Call")

# Inclusive列の整形
_before_converted: list[float] = list(testDF_lulesh["Inclusive"])
_after_converted: list[float] = map(convertPprofTime, _before_converted)
testDF_lulesh["Inclusive"] = list(_after_converted)

functionNames: list[str] = sorted(list(set(trainDF_lulesh["Name"])))
list_series: list[pd.Series] = []
for functionName in functionNames:

    trainDF_perFunc: pd.DataFrame = trainDF_lulesh[
        trainDF_lulesh["Name"] == functionName
    ]
    testDF_perFunc: pd.DataFrame = testDF_lulesh[testDF_lulesh["Name"] == functionName]

    expVar: list[str] = ["process", "iteration", "size"]
    resVar: str

    dict_symbols = {}
    for elem in expVar:
        dict_symbols[elem] = symbols(elem, real=True)
    target_env = [
        (dict_symbols["size"], test_lulesh_sizes[0]),
        (dict_symbols["iteration"], test_lulesh_iterations[0]),
        (dict_symbols["process"], test_lulesh_processes[0]),
    ]

    # 総実行時間

    # Extra-Pへの入力ファイルの作榮
    resVar = "Inclusive"
    str_ExtraPinputData: str = gen_ExtraPinputDataFromDF(
        inputDF=trainDF_perFunc,
        expVar=expVar,
        resVar=resVar,
    )
    filePath: str = f"./extra-p_docker/share/input_lulesh_perFunc.txt"
    with open(filePath, mode="w") as f:
        f.write(str_ExtraPinputData)

    # Extra-Pの実行とその出力の取得
    res_str: str = subprocess.run(
        "extrap --text ./extra-p_docker/share/input_lulesh_perFunc.txt  | grep Model",
        stdout=subprocess.PIPE,
        text=True,
        shell=True,
    ).stdout

    # 取得したExtra-Pの出力の整形
    res_str = res_str.replace("Model: ", "")
    res_str = convert_log(res_str)

    model_sympy = sympify(res_str, locals=dict_symbols)
    predicted_all = model_sympy.subs(target_env).evalf()

    # print(predicted_all)

    # 1コール当たりの実行時間

    # Extra-Pへの入力ファイルの作榮
    resVar = "InclusivePerCall"
    str_ExtraPinputData: str = gen_ExtraPinputDataFromDF(
        inputDF=trainDF_perFunc,
        expVar=expVar,
        resVar=resVar,
    )
    filePath: str = f"./extra-p_docker/share/input_lulesh_perFunc.txt"
    with open(filePath, mode="w") as f:
        f.write(str_ExtraPinputData)

    # Extra-Pの実行とその出力の取得
    res_str: str = subprocess.run(
        "extrap --text ./extra-p_docker/share/input_lulesh_perFunc.txt  | grep Model",
        stdout=subprocess.PIPE,
        text=True,
        shell=True,
    ).stdout

    # 取得したExtra-Pの出力の整形
    res_str = res_str.replace("Model: ", "")
    res_str = convert_log(res_str)

    model_sympy = sympify(res_str, locals=dict_symbols)
    predicted_perFunc = model_sympy.subs(target_env).evalf()

    # print(predicted_perFunc)

    # 関数コール回数の予測

    # 関数コール回数予測のためのモデルを構築
    resVar = "#Call"
    result_series_list: list[pd.DataFrame] = []
    list_modelName: list[str] = [
        "modelIp",
        "modelLog",
        "modelLinAndIp",
        "modelLinAndLog",
        "modelIpAndLin",
        "modelIpAndLog",
        "modelLogAndLin",
        "modelLogAndIp",
        "modelProcessDividedByProblemSize",
        "modelProblemSizeDividedByProcess",
        "modelInfiniteProductOfProblemSizeMultipliedByProcesses",
        "modelInfiniteProductOfProblemSizeDividedByProcesses",
        "modelLinearSumOf2elementCombination",
        "modelLinearSumOfElementCombinations",
        "modelLinearSumOf2elementCombinationWithSquared",
        "modelLinearSumOf2elementCombinationWithCubed",
        "modelSquareRootOfProcess",
        "modelSquareRootTimesOtherElems",
        "modelObeyOneParameter",
        "modelLin"
        # "modelBasicTree",
    ]
    trainDF_perFunc = trainDF_perFunc.rename({"Name": "functionName"}, axis="columns")
    testDF_perFunc = testDF_perFunc.rename({"Name": "functionName"}, axis="columns")

    bestModelDict: dict = return_bestModelObject(
        inputDF=trainDF_perFunc,
        list_expVar=expVar,
        list_resVar=[resVar],
        list_modelName=list_modelName,
    )

    bestModel = bestModelDict["object"]

    predicted = float(np.array(bestModel.predict(inputDF=testDF_perFunc[expVar])))
    _call: float = float(testDF_perFunc.iloc[0][resVar])

    # print(_call)

    # 実際の値の取得と結果の整理

    real_time: float = testDF_perFunc.reset_index().loc[0]["Inclusive"]
    real_call: float = testDF_perFunc.reset_index().loc[0]["#Call"]

    _series: pd.Series = pd.Series(
        {
            "functionName": functionName,
            "real_time": real_time,
            "predicted_all": predicted_all,
            "predicted_from_perCall": predicted_perFunc * predicted,
            "real_call": real_call,
            "predicted_call": predicted,
        }
    )

    # print(_series)

    list_series.append(_series)

Loading file: 100%|██████████| [00:00<00:00, Validating experiment]
Generating models: 100%|██████████| [00:00<00:00]
Loading file: 100%|██████████| [00:00<00:00, Validating experiment]
Generating models: 100%|██████████| [00:00<00:00]
Loading file: 100%|██████████| [00:00<00:00, Validating experiment]
Generating models: 100%|██████████| [00:00<00:00]
Loading file: 100%|██████████| [00:00<00:00, Validating experiment]
Generating models: 100%|██████████| [00:00<00:00]
Loading file: 100%|██████████| [00:00<00:00, Validating experiment]
Generating models: 100%|██████████| [00:00<00:00]
Loading file: 100%|██████████| [00:00<00:00, Validating experiment]
Generating models: 100%|██████████| [00:00<00:00]
Loading file: 100%|██████████| [00:00<00:00, Validating experiment]
Generating models: 100%|██████████| [00:00<00:00]
Loading file: 100%|██████████| [00:00<00:00, Validating experiment]
Generating models: 100%|██████████| [00:00<00:00]
Loading file: 100%|██████████| [00:00<00:00, Validating 

In [5]:
pd.concat(list_series, axis=1).T

Unnamed: 0,functionName,real_time,predicted_all,predicted_from_perCall,real_call,predicted_call
0,.TAU_application,1149.925,1288.20034616662,1288.20034616662,1.0,1.0
1,MPI_Allreduce(),110.861,155.57144884615,184.113257775612,255.0,255.0
2,MPI_Barrier(),0.039,0.0061337046666666,0.0061337046666666,1.0,1.0
3,MPI_Comm_rank(),0.002,0.0034066141963997,0.0024993797267453,2309.0,2309.0
4,MPI_Comm_size(),0.0,4.66073333333333e-07,4.66073333333333e-07,1.0,1.0
5,MPI_Finalize(),0.653,-0.87768774903484,-0.87768774903484,1.0,1.0
6,MPI_Init(),0.593,0.36815920709653,0.36815920709653,1.0,1.0
7,MPI_Irecv(),0.018,0.0237588378914084,0.0173408882000237,8965.8,9907.354054
8,MPI_Isend(),0.248,1.54147758517037,3.31738815983449,8965.8,9907.354054
9,MPI_Reduce(),0.069,0.115818278291975,0.115818278291975,1.0,1.0


In [6]:
testDF_perFunc.reset_index()

Unnamed: 0,level_0,index,%Time,Exclusive,Inclusive,#Call,#Subrs,functionName,process,iteration,size,InclusivePerCall
0,31,31,0.0,0.000256,2.56e-07,0.001953,0.0,void_VerifyAndWriteFinalOutput(Real_t_Domain,512,256,128,0.000131


In [None]:
%reset

***

---

___

# 2022年4月17日～

次のような表を作成する

採用される割合 (MAPE の最大値 [%] ，MAPE の最小値 [%]) [%]

| ベンチマークプログラム名 | 線形モデル               | 対数モデル               | 反比例モデル              |
|--------------|---------------------|---------------------|---------------------|
| str          | float(float, float) | float(float, float) | float(float, float) |


目標となるのは一気にこのベンチマークプログラムを作成することだが、既存のライブラリ関数などを利用し、まずはベンチマークごとに作成可能にする。

メモ

## 実装予定

1. 行方向に最小値を検出
2. 最小値以外をNaNに変更
3. 列方向に最小値と最大値を検出

## 


***

---

___