In [12]:
# ipynb形式のライブラリのインポート
%run ./lib/lib.ipynb

# 生データの入ったCSVファイルの保持されたディレクトリ名を格納している変数
csvDirPath = "./csv_files/"

# NPBのベンチマーク名のリスト
benchmarkNames = ["cg", "ep", "ft", "is", "lu", "mg"]

# LULESH ベンチマークプログラムのプロセス数・問題サイズ・イテレーション数
lulesh_processes: list[int] = [8, 27, 64, 125, 216, 343, 512]
lulesh_iterations: list[int] = [8, 16, 32, 64, 128, 256]
lulesh_sizes: list[int] = [16, 24, 32, 48, 64, 128]

DEBUG:__main__:hello


In [13]:
# ipynb形式のライブラリノートを.py形式に変更したものをインポート
import lib
import lib.lab_lib
from lib.lab_lib import *

# 2022年6月8日

重み付きMAPEを算出する関数を実装

In [14]:
def calcWeightedMAPEscore(
    inputDF :pd.DataFrame,
    inputColumnDict :dict,
) -> float:
    """calcWeightedMAPEscore()
    
    重み付き平均MAPEを算出する関数

    Args:
        inputDF (pd.DataFrame) : 下記のようなテーブル構成
        |<関数名>|<コール回数>|<MAPE>|
        inputColumnDict (dict) : 上記のテーブル構成を前提に、次の辞書を構成する。{"funcName":<関数名>,"call":<コール回数>,"MAPE":<MAPE>,}

    Returns:
        float: 重み付き平均MAPE

    """
    _col_funcName :str = inputColumnDict["funcName"]
    _col_call :str = inputColumnDict["call"]
    _col_MAPE :str = inputColumnDict["MAPE"]

    _ser_funcName :pd.Series = inputDF[_col_funcName]
    _ser_call :pd.Series = inputDF[_col_call]
    _ser_MAPE :pd.Series = inputDF[_col_MAPE]

    _ret_numerator :float = 0
    _ret_denominator :float = sum(_ser_call)

    for index in range(len(_ser_funcName)):
        _ret_numerator += _ser_call[index] * _ser_MAPE[index]

    return _ret_numerator / _ret_denominator

def test_calcWeightedMAPEscore():
    関数名 :list[str]= ["name0", "name1", "name2", "name3"]
    inputColumnDict = {"funcName":"関数名", "call":"コール回数", "MAPE":"MAPE"}
    # テストケース1
    コール回数 :list[float] = [1, 2, 3, 4]
    MAPE :list[float] = [1, 2, 3, 4]
    inputDF :pd.DataFrame = pd.DataFrame.from_dict({"関数名":関数名, "コール回数":コール回数, "MAPE":MAPE})
    expected_result = 3.0
    actually_result = calcWeightedMAPEscore(inputDF = inputDF, inputColumnDict=inputColumnDict)
    # print(inputDF)
    assert expected_result == actually_result, f"expected_result={expected_result},actually_result={actually_result}"
    # テストケース2
    コール回数 :list[float] = [5,4,3,2]
    MAPE :list[float] = [7,7,7,7]
    inputDF :pd.DataFrame = pd.DataFrame.from_dict({"関数名":関数名, "コール回数":コール回数, "MAPE":MAPE})
    expected_result = 7.0
    actually_result = calcWeightedMAPEscore(inputDF = inputDF, inputColumnDict=inputColumnDict)
    # print(inputDF)
    assert expected_result == actually_result, f"expected_result={expected_result},actually_result={actually_result}"
    # テストケース3
    コール回数 :list[float] = [1,2,3,4]
    MAPE :list[float] = [5,4,3,2]
    inputDF :pd.DataFrame = pd.DataFrame.from_dict({"関数名":関数名, "コール回数":コール回数, "MAPE":MAPE})
    expected_result = 3.0
    actually_result = calcWeightedMAPEscore(inputDF = inputDF, inputColumnDict=inputColumnDict)
    # print(inputDF)
    assert expected_result == actually_result, f"expected_result={expected_result},actually_result={actually_result}"


In [15]:
test_calcWeightedMAPEscore()

最適モデルを返す関数を実装

In [16]:
def return_bestModelObject(
    inputDF :pd.DataFrame,
    list_expVar :list[str],
    list_resVar :list[str],
    list_modelName :list[str]
):
    """return_bestModelObject()

    入力に対して最適なモデルを返す関数

    Args:
        inputDF (pd.DataFrame) : 下記のようなテーブル構成
        |<説明変数1>|<説明変数2>|...|<目的変数>|
        list_expVar (list[str]) : 説明変数のリスト
        list_resVar (list[str]) : 目的変数のリスト
        list_modelName (list[str]) : モデル名のリスト

    Returns:
        dict : {"object":各モデルのオブジェクト, "modelName":モデル名}

    """

    models = Models(inputDF=inputDF, expVarColNames=list_expVar, resVarColNames=list_resVar, modelNames=list_modelName)

    models.setUpDataBeforeCalcLr()
    models.calcLr()
    models.calcMAPE()

    retObject = None
    retModelName :str = None

    dict_MAPE :dict[float] = models.returnCalculatedMAPE()

    retModelName = min(dict_MAPE, key=dict_MAPE.get)
    retObject = models.returnObject(modelName=retModelName)
    
    return {"object":retObject, "modelName":retModelName}

def test_return_bestModelObject():

    exp_1 :np.ndarray = np.linspace(1, 10, 10)
    exp_2 :np.ndarray = np.linspace(10, 1, 10)
    exp_3 :np.ndarray = np.linspace(20, 10, 10)

    coefficient_1 :int = 7
    coefficient_2 :int = 5
    coefficient_3 :int = -3

    list_modelName :list[str] = ["modelLin", "modelIp", "modelLog"]
    list_expVar :list[str] = ["process", "exp_2", "exp_3"]
    list_resVar :list[str] = ["res_"]

    # 線形モデル
    res_ :np.ndarray = coefficient_1 * exp_1 + coefficient_2 * exp_2 + coefficient_3 * exp_3
    inputDF :pd.DataFrame = pd.DataFrame.from_dict({"process":exp_1, "exp_2":exp_2, "exp_3":exp_3, "res_":res_})
    inputDF["functionName"] = "functionName"
    retDict = return_bestModelObject(inputDF=inputDF, list_expVar=list_expVar, list_resVar=list_resVar, list_modelName=list_modelName)

    expected :str = "modelLin"
    actually :str = retDict["modelName"]
    assert actually == expected, f"expected={expected}, actually={actually}"
    assert retDict["object"] != None

    # 反比例モデル
    res_ :np.ndarray = coefficient_1 / exp_1 + coefficient_2 / exp_2 + coefficient_3 / exp_3
    inputDF :pd.DataFrame = pd.DataFrame.from_dict({"process":exp_1, "exp_2":exp_2, "exp_3":exp_3, "res_":res_})
    inputDF["functionName"] = "functionName"
    retDict = return_bestModelObject(inputDF=inputDF, list_expVar=list_expVar, list_resVar=list_resVar, list_modelName=list_modelName)

    expected :str = "modelIp"
    actually :str = retDict["modelName"]
    assert actually == expected, f"expected={expected}, actually={actually}"
    assert retDict["object"] != None

    # 対数モデル
    res_ :np.ndarray = coefficient_1 * np.log10(exp_1) + coefficient_2 * np.log10(exp_2) + coefficient_3 * np.log10(exp_3)
    inputDF :pd.DataFrame = pd.DataFrame.from_dict({"process":exp_1, "exp_2":exp_2, "exp_3":exp_3, "res_":res_})
    inputDF["functionName"] = "functionName"
    retDict = return_bestModelObject(inputDF=inputDF, list_expVar=list_expVar, list_resVar=list_resVar, list_modelName=list_modelName)

    expected :str = "modelLog"
    actually :str = retDict["modelName"]
    assert actually == expected, f"expected={expected}, actually={actually}"
    assert retDict["object"] != None

In [17]:
test_return_bestModelObject()

return_bestModelObject(), calcWeightedMAPEscore() を用いて重み付きMAPEの結果を出す

In [18]:
train_lulesh_processes: list[int] = [8, 27, 64, 125, 216, 343]
train_lulesh_iterations: list[int] = [8, 16, 32, 64, 128]
train_lulesh_sizes: list[int] = [16, 24, 32, 48, 64]

test_lulesh_processes: list[int] = [512]
test_lulesh_iterations: list[int] = [256]
test_lulesh_sizes: list[int] = [128]

input_list_expVarNames: list[str] = ["process", "iteration", "size"]
input_list_resVarNames: list[str] = ["#Call"]

list_modelName: list[str] = [
    "modelIp",
    "modelLog",
    "modelLinAndIp",
    "modelLinAndLog",
    "modelIpAndLin",
    "modelIpAndLog",
    "modelLogAndLin",
    "modelLogAndIp",
    "modelProcessDividedByProblemSize",
    "modelProblemSizeDividedByProcess",
    "modelLinearSumOf2elementCombination",
    "modelLinearSumOfElementCombinations",
    "modelLin"
    # "modelBasicTree",
]

input_rawDF_train: pd.DataFrame = return_rawDFinLULESH(
    processes=train_lulesh_processes,
    iterations=train_lulesh_iterations,
    sizes=train_lulesh_sizes,
    csvDirPath=csvDirPath,
)
input_rawDF_test :pd.DataFrame = return_rawDFinLULESH(
    processes=test_lulesh_processes,
    iterations=test_lulesh_iterations,
    sizes=test_lulesh_sizes,
    csvDirPath=csvDirPath
)

input_rawDF_train = input_rawDF_train.rename(columns={"Name":"functionName"})
input_rawDF_test = input_rawDF_test.rename(columns={"Name":"functionName"})
functionNames :list[str] = list(set(input_rawDF_train["functionName"]))

list_series :list[pd.Series] = []

# 関数ごとのDFを作成
for functionName in functionNames:
    input_rawDF_per_function_train :pd.DataFrame = input_rawDF_train[input_rawDF_train["functionName"] == functionName]
    input_rawDF_per_function_test :pd.DataFrame = input_rawDF_test[input_rawDF_test["functionName"] == functionName]

    # return_bestModelObject()を利用し最適モデルでMAPEを出す
    bestModelDict = return_bestModelObject(inputDF = input_rawDF_per_function_train, list_expVar=input_list_expVarNames, list_resVar=input_list_resVarNames, list_modelName=list_modelName)

    # 最適モデルで予測対象の環境でのコール回数を予測
    bestModel = bestModelDict["object"]
    predicted = float(np.array(bestModel.predict(input_rawDF_per_function_test[input_list_expVarNames])))
    # pd.series({"functionName":, "call":, "MAPE":, "predicted_call":})
    _call :float = float(input_rawDF_per_function_test.iloc[0]["#Call"])
    # print(f"returnMapeScore(l1=[{_call}], l2=[{predicted}])")
    # print(f"type({_call}) = {type(_call)}, type(predicted) = {type(predicted)}")
    _MAPE :float = float(returnMapeScore(l1=[_call], l2=[predicted]))
    _series :pd.Series = pd.Series({"functionName":functionName, "call":input_rawDF_per_function_test.iloc[0]["#Call"], "MAPE":_MAPE, "predicted_call":predicted})
    list_series.append(_series)
# calcWeightedMAPEscore()の入力を作成
inputDF :pd.DataFrame = pd.concat(list_series, axis=1).T
# calcWeightedMAPEscore()を実行



In [19]:
inputDF

Unnamed: 0,functionName,call,MAPE,predicted_call
0,MPI_Init(),1.0,0.0,1.0
1,MPI_Reduce(),1.0,0.0,1.0
2,void_Domain::CreateRegionIndexSets(Int_t_Int_t),1.0,0.0,1.0
3,void_Domain::~Domain(),1.0,0.0,1.0
4,void_Domain::BuildMesh(Int_t_Int_t_Int_t),1.0,0.0,1.0
5,MPI_Barrier(),1.0,0.0,1.0
6,.TAU_application,1.0,0.0,1.0
7,void_CommSyncPosVel(Domain,256.0,0.0,256.0
8,Real_t_CalcElemVolume(const,538968000.0,99.349868,3504002.239722
9,void_Domain::SetupSymmetryPlanes(Int_t),1.0,0.0,1.0


In [20]:
retNum :float = calcWeightedMAPEscore(inputDF=inputDF, inputColumnDict={"funcName":"functionName", "call":"call", "MAPE":"MAPE"})

retNum

99.3443442202314

重み付きMAPEの算出@NPB

In [21]:
train_npb_processes :list[int] = [2,4,8,16,32,64,128]
train_npb_sizes :list[str] = ["A", "B", "C"]

test_npb_processes :list[int] = [256]
test_npb_sizes :list[str] = ["D"]

list_modelName: list[str] = [
    "modelIp",
    "modelLog",
    "modelLinAndIp",
    "modelLinAndLog",
    "modelIpAndLin",
    "modelIpAndLog",
    "modelLogAndLin",
    "modelLogAndIp",
    "modelProcessDividedByProblemSize",
    "modelProblemSizeDividedByProcess",
    "modelLinearSumOf2elementCombination",
    "modelLinearSumOfElementCombinations",
    "modelLin"
    # "modelBasicTree",
]

# NPBベンチマークの指定
benchmarkName :str = "cg"

input_rawDF_train :pd.DataFrame = return_rawDF_with_init_param(
    benchmark_name=benchmarkName, classes=train_npb_sizes, processes=train_npb_processes
)
input_rawDF_test :pd.DataFrame = return_rawDF_with_init_param(
    benchmark_name=benchmarkName, classes=test_npb_sizes, processes=test_npb_processes
)

input_expVar :list[str] = input_rawDF_train.columns.tolist()
for element_be_removed in [
    "functionName",
    "functionCallNum",
    "intBenchmarkClass",
    "benchmarkName",
    "benchmarkClass",
]:
    input_expVar.remove(element_be_removed)
input_resVar :list[str] = ["functionCallNum"]
functionNames :list[str] = list(set(input_rawDF_train["functionName"]))

list_series :list[pd.Series] = []

for functionName in functionNames:
    input_rawDF_per_function_train :pd.DataFrame = input_rawDF_train[input_rawDF_train["functionName"] == functionName]
    input_rawDF_per_function_test :pd.DataFrame = input_rawDF_test[input_rawDF_test["functionName"] == functionName]

    bestModelDict = return_bestModelObject(inputDF=input_rawDF_per_function_train, list_expVar=input_expVar, list_resVar=input_resVar, list_modelName=list_modelName)

    bestModel = bestModelDict["object"]
    predicted = float(np.array(bestModel.predict(inputDF=input_rawDF_per_function_test[input_expVar])))
    _call :float = float(input_rawDF_per_function_test.iloc[0]["functionCallNum"])
    _MAPE :float = float(returnMapeScore(l1=[_call], l2=[predicted]))
    _series :pd.Series = pd.Series({"functionName":functionName, "call":_call, "MAPE":_MAPE, "predicted_call":predicted})
    list_series.append(_series)

inputDF :pd.DataFrame = pd.concat(list_series, axis=1).T

retNum :float = calcWeightedMAPEscore(inputDF=inputDF, inputColumnDict={"funcName":"functionName", "call":"call", "MAPE":"MAPE"})

retNum




174.8135168264971

各ベンチマークプログラムの入力に対して重み付きMAPEを算出する関数

In [22]:
def returnWeightedMAPEScoreFromDF(inputDFtrain :pd.DataFrame, inputDFtest :pd.DataFrame, list_expVar :list[str], list_resVar :list[str], list_modelName):
    return -1

def returnWeightedMapeScoreFromCondition(benchmarkName :str, trainCondition :dict, testCondition :dict, csvDirPath :str):
    if benchmarkName == "lulesh":
        # lulesh の生データ取得処理
        rawDF_train :pd.DataFrmae = return_rawDFinLULESH(
            processes=trainCondition["processes"],
            iterations=trainCondition["iterations"],
            sizes=trainCondition["sizes"],
            csvDirPath=csvDirPath
        )
        rawDF_train :pd.DataFrmae = return_rawDFinLULESH(
            processes=testCondition["processes"],
            iterations=testCondition["iterations"],
            sizes=testCondition["sizes"],
            csvDirPath=csvDirPath
        )
        rawDF_train = rawDF_train.rename(columns={"Name":"functionName"})
        rawDF_test = rawDF_test.rename(columns={"Name":"functionName"})

        # 説明変数及び目的変数の処理
        list_expVar :list[str] = ["process", "iteration", "size"]
        list_resVar :list[str] = ["#Call"]

        functionNames :list[str] = list(set(rawDF_train["functionName"]))

    elif (benchmarkName in ["cg", "ep", "ft", "is", "lu", "mg"]):
        # NPB の生データ取得処理
        rawDF_train :pd.DataFrame = return_rawDF_with_init_param(
            benchmark_name = benchmarkName, classes=trainCondition["sizes"], processes = trainCondition["processes"]
        )
        rawDF_test :pd.DataFrame = return_rawDF_with_init_param(
            benchmark_name = benchmarkName, classes=testCondition["sizes"], processes = testCondition["processes"]
        )

        # 説明変数及び目的変数の処理
        list_expVar :list[str] = rawDF_train.columns.tolist()
        for element_be_removed in [
            "functionName",
            "functionCallNum",
            "intBenchmarkClass",
            "benchmarkName",
            "benchmarkClass",
        ]:
            list_expVar.remove(element_be_removed)
        list_resVar :list[str] = ["functionCallNum"]

        functionNames :list[str] = list(set(rawDF_train["functionName"]))

    else:
        warnings.warn(f"{benchmarkName}は想定外のベンチマークプログラム名です")
        return -1

    # 関数ごとのDFを作成
    for functionName in functionNames:
        rawDF_per_function_train :pd.DataFrame = rawDF_train[rawDF_train["functionName"] == functionName]
        rawDF_per_function_test :pd.DataFrame = rawDF_test[rawDF_test["functionName"] == functionName]

        bestModelDict :dict = return_bestModelObject(inputDF=rawDF_per_function_train, list_expVar=list_expVar, list_resVar=list_resVar, list_modelName=list_modelName)
        bestModel = bestModelDict["object"]
        predicted = float(np.array(bestModel.predict(inputDF=rawDF_per_function_test[list_expVar])))
        _call :float = float(rawDF_per_function_test.iloc[0][list_resVar[0]])
        _MAPE :float = float(returnMapeScore(l1=[_call], l2=[predicted]))
        _series :pd.Series = pd.Series({"functionName":functionName, "call":_call, "MAPE":_MAPE, "predicted_call":predicted})
        list_series.append(_series)

    DF_toCalcWeightedMAPE :pd.DataFrame = pd.concat(list_series, axis=1).T

    retNum :float = calcWeightedMAPEscore(inputDF=DF_toCalcWeightedMAPE, inputColumnDict={"funcName":"functionName", "call":"call", "MAPE":"MAPE"})

    return retNum


# 2022年4月17日～

次のような表を作成する

採用される割合 (MAPE の最大値 [%] ，MAPE の最小値 [%]) [%]

| ベンチマークプログラム名 | 線形モデル               | 対数モデル               | 反比例モデル              |
|--------------|---------------------|---------------------|---------------------|
| str          | float(float, float) | float(float, float) | float(float, float) |


目標となるのは一気にこのベンチマークプログラムを作成することだが、既存のライブラリ関数などを利用し、まずはベンチマークごとに作成可能にする。

メモ

## 実装予定

1. 行方向に最小値を検出
2. 最小値以外をNaNに変更
3. 列方向に最小値と最大値を検出

## 
