In [1]:
jupyter_pwd = %pwd
if jupyter_pwd == "/":
    %cd /workspace


In [2]:
# ipynb形式のライブラリのインポート
%run ./lib/lib.ipynb

# 生データの入ったCSVファイルの保持されたディレクトリ名を格納している変数
csvDirPath = "./csv_files/"

# NPBのベンチマーク名のリスト
benchmarkNames = ["cg", "ep", "ft", "is", "lu", "mg"]

# LULESH ベンチマークプログラムのプロセス数・問題サイズ・イテレーション数
lulesh_processes: list[int] = [8, 27, 64, 125, 216, 343, 512]
lulesh_iterations: list[int] = [8, 16, 32, 64, 128, 256]
lulesh_sizes: list[int] = [16, 24, 32, 48, 64, 128]

DEBUG:__main__:hello


In [3]:
# ipynb形式のライブラリノートを.py形式に変更したものをインポート
import lib
import lib.lab_lib
from lib.lab_lib import *

DEBUG:lib.lab_lib:hello


# TODO

1. ✅CG, MGで時間をExtra-Pから取得できることを確認
2. ✅時間の単位を「秒」にする
    * 列「Inclusive total msec」に一部存在している”：”で区切られた値の単位は、msec ではなく分と秒を区切る”：”
3. ✅相対コストの算出関数の実装
4. ✅Extra-P のDocker環境の構築
4. Extra-P による予測との組み合わせを行う
    * なにをどう組み合わせるのかがわかっていないので、それは確かめる
    * 元データ, Extra-P単体で予測したデータ, <何か> で結果を作成

# 予測結果に必要なもの

* ✅元データ（予測環境の生データ）
* ✅Extra-P単体で予測したデータ
* Extra-Pで各関数の実行時間を予測し、それにコール回数を掛けた値で予測したデータ
    * ✅コール回数で実行時間を割る
    * ✅Extra-Pでモデルを作成 <- コール回数で実行時間を割る
    * ✅👆を利用して予測
    * ✅本プログラムでモデルを作成 <- コール回数で実行時間を割る
    * ✅👆を利用して予測
    * 「コール回数 * 実行時間」を計算して予測

上記のセルよりモデルは次式

$$ 2.5521930556315375 + 0.013737041505459383 * iteration + 3.41165137739477 * 10^7 * iteration * size^3 * \log_2 size $$

In [4]:
train_lulesh_processes: list[int] = [8, 27, 64, 125, 216, 343]
train_lulesh_iterations: list[int] = [8, 16, 32, 64, 128]
train_lulesh_sizes: list[int] = [16, 24, 32, 48, 64]

test_lulesh_processes: list[int] = [512]
test_lulesh_iterations: list[int] = [256]
test_lulesh_sizes: list[int] = [128]

In [5]:
targetDF_lulesh: pd.DataFrame = return_rawDF_lulesh(
    list_process = test_lulesh_processes,
    list_iteration = test_lulesh_iterations,
    list_size = test_lulesh_sizes,
    csvDir = csvDirPath
)

targetDF_lulesh["InclusivePerCall"] = -1
targetDF_lulesh = targetDF_lulesh.reset_index()
for i, sr in targetDF_lulesh.iterrows():
    targetDF_lulesh.at[i, "InclusivePerCall"] = convertPprofTime(sr["Inclusive"]) / float(sr["#Call"])
targetDF_lulesh

Unnamed: 0,index,%Time,Exclusive,Inclusive,#Call,#Subrs,Name,process,iteration,size,InclusivePerCall
0,0,100.0,0.00348,19:09.925,1.0,1.0,.TAU_application,512,256,128,1149.925
1,1,100.0,13:13.799,19:09.925,1.0,2828.0,int_main(int_char_**),512,256,128,1149.925
2,2,16.8,1:53.429,3:12.675,256.0,536871000.0,void_CalcKinematicsForElems(Domain,512,256,128,0.7526367
3,3,9.6,1:50.861,1:50.861,255.0,0.0,MPI_Allreduce(),512,256,128,0.434749
4,4,6.9,1:19.552,1:19.552,538968000.0,0.0,Real_t_CalcElemVolume(const,512,256,128,1.476006e-07
5,5,2.9,2037,33621,769.0,10503.8,void_CommSend(Domain,512,256,128,0.04372042
6,6,2.7,31334,31334,769.0,0.0,MPI_Waitall(),512,256,128,0.04074642
7,7,1.1,12869,12869,8965.8,0.0,MPI_Wait(),512,256,128,0.001435343
8,8,0.7,2773,7888,256.0,2790.0,void_CommSyncPosVel(Domain,512,256,128,0.0308125
9,9,0.6,660,7184,257.0,5344.8,void_CommSBN(Domain,512,256,128,0.02795331


In [6]:
trainDF_lulesh: pd.DataFrame = return_rawDF_lulesh(
    list_process=train_lulesh_processes,
    list_iteration=train_lulesh_iterations,
    list_size=train_lulesh_sizes,
    csvDir=csvDirPath,
)

trainDF_lulesh["InclusivePerCall"] = -1
trainDF_lulesh = trainDF_lulesh.reset_index()
for i, sr in trainDF_lulesh.iterrows():
    trainDF_lulesh.at[i, "InclusivePerCall"] = convertPprofTime(
        sr["Inclusive"]
    ) / float(sr["#Call"])
trainDF_lulesh

Unnamed: 0,index,%Time,Exclusive,Inclusive,#Call,#Subrs,Name,process,iteration,size,InclusivePerCall
0,0,100.0,0.00387,6478,1.000000,1.000,.TAU_application,8,8,16,6.478000e+00
1,1,100.0,25,6478,1.000000,100.125,int_main(int_char_**),8,8,16,6.478000e+00
2,2,87.4,5663,5663,1.000000,0.000,MPI_Finalize(),8,8,16,5.663000e+00
3,3,11.9,768,768,1.000000,0.000,MPI_Init(),8,8,16,7.680000e-01
4,4,0.2,6,11,8.000000,32768.000,void_CalcKinematicsForElems(Domain,8,8,16,1.375000e-03
...,...,...,...,...,...,...,...,...,...,...,...
4795,27,0.0,0.00875,0.00875,1.000000,0.000,void_Domain::SetupSymmetryPlanes(Int_t),343,128,64,8.750000e-06
4796,28,0.0,0.00158,0.00346,1.000000,2.000,void_ParseCommandLineOptions(int_char_**_Int_t...,343,128,64,3.460000e-06
4797,29,0.0,0.00188,0.00188,2.000000,0.000,StrToInt,343,128,64,9.400000e-07
4798,30,0.0,0.000443,0.000443,1.000000,0.000,MPI_Comm_size(),343,128,64,4.430000e-07


In [7]:
testDF_lulesh: pd.DataFrame = return_rawDF_lulesh(
    list_process = test_lulesh_processes,
    list_iteration = test_lulesh_iterations,
    list_size = test_lulesh_sizes,
    csvDir=csvDirPath,
)

testDF_lulesh["InclusivePerCall"] = -1
testDF_lulesh = testDF_lulesh.reset_index()
for i, sr in testDF_lulesh.iterrows():
    testDF_lulesh.at[i, "InclusivePerCall"] = convertPprofTime(
    sr["Inclusive"]) / float(sr["#Call"])
testDF_lulesh

Unnamed: 0,index,%Time,Exclusive,Inclusive,#Call,#Subrs,Name,process,iteration,size,InclusivePerCall
0,0,100.0,0.00348,19:09.925,1.0,1.0,.TAU_application,512,256,128,1149.925
1,1,100.0,13:13.799,19:09.925,1.0,2828.0,int_main(int_char_**),512,256,128,1149.925
2,2,16.8,1:53.429,3:12.675,256.0,536871000.0,void_CalcKinematicsForElems(Domain,512,256,128,0.7526367
3,3,9.6,1:50.861,1:50.861,255.0,0.0,MPI_Allreduce(),512,256,128,0.434749
4,4,6.9,1:19.552,1:19.552,538968000.0,0.0,Real_t_CalcElemVolume(const,512,256,128,1.476006e-07
5,5,2.9,2037,33621,769.0,10503.8,void_CommSend(Domain,512,256,128,0.04372042
6,6,2.7,31334,31334,769.0,0.0,MPI_Waitall(),512,256,128,0.04074642
7,7,1.1,12869,12869,8965.8,0.0,MPI_Wait(),512,256,128,0.001435343
8,8,0.7,2773,7888,256.0,2790.0,void_CommSyncPosVel(Domain,512,256,128,0.0308125
9,9,0.6,660,7184,257.0,5344.8,void_CommSBN(Domain,512,256,128,0.02795331


In [8]:
# ✅関数ごとにfor文を回す
#   * 今回は試験的に１種類の関数で実施
# ✅ExtraP向けの入力データを作成（総実行時間）
# ✅ExtraP向けの入力データを作成（１回実行あたり）
# ✅ExtraPでモデル構築（総実行時間）
# ✅ExtraPでモデル構築（１回実行あたり）
# ✅ExtraPで時間を予測（総実行時間）
# ✅ExtraPで時間を予測（１回実行あたり）
# 関数コール回数予測のためのモデルを構築
# 関数コール回数を予測
# 予測された実行時間と予測されたコール回数をまとめる

functionNames: list[str] = sorted(list(set(trainDF_lulesh["Name"])))
for functionName in functionNames:
    pass

functionName
trainDF_perFunc: pd.DataFrame = trainDF_lulesh[trainDF_lulesh["Name"] == functionName]
testDF_perFunc :pd.DataFrame = testDF_lulesh[testDF_lulesh["Name"] == functionName]

expVar: list[str] = ["process", "iteration", "size"]
resVar: str

In [9]:
# 総実行時間

In [10]:
resVar = "Inclusive"
str_ExtraPinputData: str = gen_ExtraPinputDataFromDF(
    inputDF=trainDF_perFunc,
    expVar=expVar,
    resVar=resVar,
)

filePath: str = f"./extra-p_docker/share/input_lulesh_perFunc.txt"

with open(filePath, mode="w") as f:
    f.write(str_ExtraPinputData)

%sx extrap --text ./extra-p_docker/share/input_lulesh_perFunc.txt

['',
 'Loading file: |          | [00:00<?]',
 'Loading file: 100%|██████████| [00:00<00:00, Creating calltree]',
 'Loading file: 100%|██████████| [00:00<00:00, Validating experiment]',
 'Loading file: 100%|██████████| [00:00<00:00, Validating experiment]',
 '',
 'Generating models: |          | [00:00<?]',
 'Generating models: 100%|██████████| [00:00<00:00]',
 'Callpath: reg',
 '\tMetric: time',
 '\t\tMeasurement point: (8.00E+00,8.00E+00,1.60E+01) Mean: 9.12E-03 Median: 9.12E-03',
 '\t\tMeasurement point: (8.00E+00,8.00E+00,2.40E+01) Mean: 1.14E-02 Median: 1.14E-02',
 '\t\tMeasurement point: (8.00E+00,8.00E+00,3.20E+01) Mean: 1.06E-02 Median: 1.06E-02',
 '\t\tMeasurement point: (8.00E+00,8.00E+00,4.80E+01) Mean: 1.06E-02 Median: 1.06E-02',
 '\t\tMeasurement point: (8.00E+00,8.00E+00,6.40E+01) Mean: 1.14E-02 Median: 1.14E-02',
 '\t\tMeasurement point: (1.60E+01,8.00E+00,1.60E+01) Mean: 9.00E-03 Median: 9.00E-03',
 '\t\tMeasurement point: (1.60E+01,8.00E+00,2.40E+01) Mean: 1.09E-02 Med

$$ 総実行時間 =  0.0022275601939364897 + 5.478738185049739 * 10 ^ {-5} * size^{\frac{1}{4}} * \log_2{size} $$

In [11]:
# 上式より予測対象環境での総実行時間は
print(0.0022275601939364897 + 5.478738185049739 * 10**(-5) * test_lulesh_sizes[0]**(1/4) * np.log2(test_lulesh_sizes[0]))

0.003517534557914641


In [12]:
# 1回実行あたり

In [13]:
resVar = "InclusivePerCall"
str_ExtraPinputData: str = gen_ExtraPinputDataFromDF(
    inputDF=trainDF_perFunc,
    expVar=expVar,
    resVar=resVar,
)

filePath: str = f"./extra-p_docker/share/input_lulesh_perFunc.txt"

with open(filePath, mode="w") as f:
    f.write(str_ExtraPinputData)

%sx extrap --text ./extra-p_docker/share/input_lulesh_perFunc.txt

['',
 'Loading file: |          | [00:00<?]',
 'Loading file: 100%|██████████| [00:00<00:00, Creating calltree]',
 'Loading file: 100%|██████████| [00:00<00:00, Validating experiment]',
 'Loading file: 100%|██████████| [00:00<00:00, Validating experiment]',
 '',
 'Generating models: |          | [00:00<?]',
 'Generating models: 100%|██████████| [00:00<00:00]',
 'Callpath: reg',
 '\tMetric: time',
 '\t\tMeasurement point: (8.00E+00,8.00E+00,1.60E+01) Mean: 7.30E-05 Median: 7.30E-05',
 '\t\tMeasurement point: (8.00E+00,8.00E+00,2.40E+01) Mean: 9.12E-05 Median: 9.12E-05',
 '\t\tMeasurement point: (8.00E+00,8.00E+00,3.20E+01) Mean: 8.48E-05 Median: 8.48E-05',
 '\t\tMeasurement point: (8.00E+00,8.00E+00,4.80E+01) Mean: 8.48E-05 Median: 8.48E-05',
 '\t\tMeasurement point: (8.00E+00,8.00E+00,6.40E+01) Mean: 9.12E-05 Median: 9.12E-05',
 '\t\tMeasurement point: (1.60E+01,8.00E+00,1.60E+01) Mean: 7.20E-05 Median: 7.20E-05',
 '\t\tMeasurement point: (1.60E+01,8.00E+00,2.40E+01) Mean: 8.72E-05 Med

$$ 一回当たりの実行時間 = 5.330340491459821*10^{-5} + 7.744229122486689*10^{-6} * \log_2{size} $$

In [14]:
# 上式より、予測対象環境での1回あたりの実行時間は
print(5.330340491459821*10**(-5) + 7.744229122486689*10**(-6) * np.log2(test_lulesh_sizes[0]))

0.00010751300877200504


In [15]:
# 関数コール回数予測のためのモデルを構築
result_series_list: list[pd.DataFrame] = []

list_modelName: list[str] = [
    "modelIp",
    "modelLog",
    "modelLinAndIp",
    "modelLinAndLog",
    "modelIpAndLin",
    "modelIpAndLog",
    "modelLogAndLin",
    "modelLogAndIp",
    "modelProcessDividedByProblemSize",
    "modelProblemSizeDividedByProcess",
    "modelInfiniteProductOfProblemSizeMultipliedByProcesses",
    "modelInfiniteProductOfProblemSizeDividedByProcesses",
    "modelLinearSumOf2elementCombination",
    "modelLinearSumOfElementCombinations",
    "modelLinearSumOf2elementCombinationWithSquared",
    "modelLinearSumOf2elementCombinationWithCubed",
    "modelSquareRootOfProcess",
    "modelSquareRootTimesOtherElems",
    "modelObeyOneParameter",
    "modelLin"
    # "modelBasicTree",
]

trainDF_perFunc = trainDF_perFunc.rename({"Name":"functionName"}, axis="columns")
testDF_perFunc = testDF_perFunc.rename({"Name":"functionName"}, axis="columns")

models = Models(
    inputDF = trainDF_perFunc,
    expVarColNames = expVar,
    resVarColNames = [resVar],
    targetDF=None,
    modelNames = list_modelName,
)

models.setUpDataBeforeCalcLr()
models.calcLr()
models.calcMAPE()


dictCalcedMAPE = models.returnCalculatedMAPE()
for key in dictCalcedMAPE.keys():
    dictCalcedMAPE[key] = float(dictCalcedMAPE[key])
dict_for_series: dict = copy.deepcopy(dictCalcedMAPE)
dict_for_series["functionName"] = functionName

series :pd.Series = pd.Series(dict_for_series)
result_series_list.append(series)

resultDF: pd.DataFrame = pd.DataFrame(result_series_list)
resultDF = addLowestMAPEColumn(
    inputDF=resultDF, model_name_list=list_modelName, version=2
)
resultDF_after = addLowestMAPEsModelNameColumn(
    inputDF=resultDF, model_name_list=list_modelName, version=2
)



resultDF_after

Unnamed: 0,modelLin,modelIp,modelLog,modelProcessDividedByProblemSize,modelProblemSizeDividedByProcess,modelInfiniteProductOfProblemSizeDividedByProcesses,modelInfiniteProductOfProblemSizeMultipliedByProcesses,modelLinAndIp,modelLinAndLog,modelIpAndLin,...,modelLinearSumOf2elementCombination,modelLinearSumOfElementCombinations,modelLinearSumOf2elementCombinationWithSquared,modelLinearSumOf2elementCombinationWithCubed,modelSquareRootOfProcess,modelSquareRootTimesOtherElems,modelObeyOneParameter,functionName,最低値,最適モデル
0,7.515889,7.567432,7.557196,8.307769,8.86864,8.801621,8.728072,7.583734,7.532287,7.520195,...,8.13089,7.240187,7.677744,7.818955,7.537277,8.606013,7.515889,void_VerifyAndWriteFinalOutput(Real_t_Domain,7.240187,modelLinearSumOfElementCombinations


In [16]:
bestModelDict :dict = return_bestModelObject(
    inputDF = trainDF_perFunc,
    list_expVar = expVar,
    list_resVar = [resVar],
    list_modelName = list_modelName,
)

bestModel = bestModelDict["object"]

predicted = float(
    np.array(bestModel.predict(inputDF = testDF_perFunc[expVar]))
)
_call :float = float(testDF_perFunc.iloc[0][resVar])
_MAPE :float = float(returnMapeScore(l1=[_call], l2=[predicted]))
_series :pd.Series = pd.Series({
    "functionName" : functionName,
    "call" : _call,
    "MAPE" : _MAPE,
    "predicted_call" : predicted,
})

_series

functionName      void_VerifyAndWriteFinalOutput(Real_t_Domain
call                                                  0.000131
MAPE                                                 48.358918
predicted_call                                        0.000068
dtype: object

In [None]:
%reset

***

---

___

# 2022年4月17日～

次のような表を作成する

採用される割合 (MAPE の最大値 [%] ，MAPE の最小値 [%]) [%]

| ベンチマークプログラム名 | 線形モデル               | 対数モデル               | 反比例モデル              |
|--------------|---------------------|---------------------|---------------------|
| str          | float(float, float) | float(float, float) | float(float, float) |


目標となるのは一気にこのベンチマークプログラムを作成することだが、既存のライブラリ関数などを利用し、まずはベンチマークごとに作成可能にする。

メモ

## 実装予定

1. 行方向に最小値を検出
2. 最小値以外をNaNに変更
3. 列方向に最小値と最大値を検出

## 


***

---

___