In [1]:
jupyter_pwd = %pwd
if jupyter_pwd == "/":
    %cd /workspace


In [2]:
# ipynb形式のライブラリのインポート
%run ./lib/lib.ipynb

# 生データの入ったCSVファイルの保持されたディレクトリ名を格納している変数
csvDirPath = "./csv_files/"

# NPBのベンチマーク名のリスト
benchmarkNames = ["cg", "ep", "ft", "is", "lu", "mg"]

# LULESH ベンチマークプログラムのプロセス数・問題サイズ・イテレーション数
lulesh_processes: list[int] = [8, 27, 64, 125, 216, 343, 512]
lulesh_iterations: list[int] = [8, 16, 32, 64, 128, 256]
lulesh_sizes: list[int] = [16, 24, 32, 48, 64, 128]

DEBUG:__main__:hello


In [3]:
# ipynb形式のライブラリノートを.py形式に変更したものをインポート
import lib
import lib.lab_lib
from lib.lab_lib import *

DEBUG:lib.lab_lib:hello


In [4]:
date: str = "2022年8月12日"

list_modelName: list[str] = [
    "modelIp",
    "modelLog",
    "modelLinAndIp",
    "modelLinAndLog",
    "modelIpAndLin",
    "modelIpAndLog",
    "modelLogAndLin",
    "modelLogAndIp",
    "modelProcessDividedByProblemSize",
    "modelProblemSizeDividedByProcess",
    "modelLinearSumOf2elementCombination",
    "modelLinearSumOfElementCombinations",
    "modelLinearSumOf2elementCombinationWithSquared",
    "modelLinearSumOf2elementCombinationWithCubed",
    "modelSquareRootOfProcess",
    "modelSquareRootTimesOtherElems",
    "modelLin"
    # "modelBasicTree",
]

input_list_process: list[int] = [2, 4, 8, 16, 32, 64, 128]
target_list_process: list[int] = [256]

cg_input_list_na: list[int] = [14000, 30000, 75000, 100000]
cg_input_list_nonzer: list[int] = [11, 12, 13, 14, 15, 18]
cg_input_list_niter: list[int] = [15, 30, 75, 90]
cg_input_list_shift: list[int] = [20, 40, 60, 80, 110]

cg_target_list_na: list[int] = [1500000]
cg_target_list_nonzer: list[int] = [21]
cg_target_list_niter: list[int] = [100]
cg_target_list_shift: list[int] = [200]

mg_input_list_problem_size: list[int] = [32, 64, 128, 256]
mg_input_list_nit: list[int] = [4, 10, 20, 35]

mg_target_list_problem_size: list[int] = [512]
mg_target_list_nit: list[int] = [50]

In [5]:
input_rawDF_cg: pd.DataFrame = return_rawDF_cg(
    list_process=input_list_process,
    list_na=cg_input_list_na,
    list_nonzer=cg_input_list_nonzer,
    list_niter=cg_input_list_niter,
    list_shift=cg_input_list_shift,
    csvDir=csvDirPath,
)

target_rawDF_cg: pd.DataFrame = return_rawDF_cg(
    list_process=target_list_process,
    list_na=cg_target_list_na,
    list_nonzer=cg_target_list_nonzer,
    list_niter=cg_target_list_niter,
    list_shift=cg_target_list_shift,
    csvDir=csvDirPath,
)



In [6]:
cg_list_exp: list[str] = ["process", "nonzer", "niter", "shift"]

list_res: list[str] = ["#Call"]

In [7]:
# 精度の低い関数を抽出したうえでチューニングする

targetFunctionNames: list[str] = [
    "VECSET",
    "MPI_Wait()",
    "MPI_Send()",
    "SPRNVC",
    "RANDLC",
    "MPI_Irecv()",
    "ICNVRT",
]

In [8]:
result_series_list: list[pd.DataFrame] = []

function_names: list[str] = list(set(input_rawDF_cg["Name"].tolist()))

input_rawDF_cg = input_rawDF_cg.rename(columns={"Name": "functionName"})
target_rawDF_cg = target_rawDF_cg.rename(columns={"Name": "functionName"})

for function_name in function_names:
    if (function_name in targetFunctionNames) == False:
        continue
    input_rawDF_per_function: pd.DataFrame = input_rawDF_cg[
        input_rawDF_cg["functionName"] == function_name
    ]

    models = Models(
        inputDF=input_rawDF_per_function,
        expVarColNames=cg_list_exp,
        resVarColNames=list_res,
        targetDF=None,
        modelNames=list_modelName,
    )

    models.setUpDataBeforeCalcLr()
    models.calcLr()
    models.calcMAPE()

    dictCalcedMAPE = models.returnCalculatedMAPE()

    for key in dictCalcedMAPE.keys():
        dictCalcedMAPE[key] = float(dictCalcedMAPE[key])

    dict_for_series: dict = copy.deepcopy(dictCalcedMAPE)
    dict_for_series["functionName"] = function_name

    series: pd.Series = pd.Series(dict_for_series)
    result_series_list.append(series)

resultDF: pd.DataFrame = pd.DataFrame(result_series_list)
resultDF = addLowestMAPEColumn(
    inputDF=resultDF, model_name_list=list_modelName, version=2
)
resultDF = addLowestMAPEsModelNameColumn(
    inputDF=resultDF, model_name_list=list_modelName, version=2
)

In [9]:
resultDF

Unnamed: 0,modelLin,modelIp,modelLog,modelProcessDividedByProblemSize,modelProblemSizeDividedByProcess,modelLinAndIp,modelLinAndLog,modelIpAndLin,modelIpAndLog,modelLogAndLin,modelLogAndIp,modelLinearSumOf2elementCombination,modelLinearSumOfElementCombinations,modelLinearSumOf2elementCombinationWithSquared,modelLinearSumOf2elementCombinationWithCubed,modelSquareRootOfProcess,modelSquareRootTimesOtherElems,functionName,最低値,最適モデル
0,34.257466,52.562973,45.284268,62.963228,300.749245,57.396655,44.259878,41.795204,45.84256,41.027576,55.026592,24.25644,11.93839,28.27598,36.185272,37.614516,63.624554,MPI_Send(),11.93839,modelLinearSumOfElementCombinations
1,111.481033,111.481037,111.481028,59.844174,59.844079,111.481027,111.481029,111.481043,111.48104,111.481031,111.481025,111.481049,111.48099,111.480967,111.480967,111.48103,111.481071,VECSET,59.844079,modelProblemSizeDividedByProcess
2,148.513995,148.512478,148.4889,68.457716,68.45567,148.512456,148.488902,148.514017,148.488924,148.513993,148.512452,149.98478,148.513906,149.097089,149.201887,148.513989,151.966288,RANDLC,68.45567,modelProblemSizeDividedByProcess
3,34.257466,52.562973,45.284268,62.963228,300.749245,57.396655,44.259878,41.795204,45.84256,41.027576,55.026592,24.25644,11.93839,28.27598,36.185272,37.614516,63.624554,MPI_Irecv(),11.93839,modelLinearSumOfElementCombinations
4,111.481033,111.481037,111.481028,59.844174,59.844079,111.481027,111.481029,111.481043,111.48104,111.481031,111.481025,111.481049,111.48099,111.480967,111.480967,111.48103,111.481071,SPRNVC,59.844079,modelProblemSizeDividedByProcess
5,148.514237,148.512721,148.489142,68.457742,68.456685,148.512699,148.489145,148.514259,148.489167,148.514235,148.512696,149.985025,148.514148,149.097332,149.202129,148.514231,151.966536,ICNVRT,68.456685,modelProblemSizeDividedByProcess
6,34.257466,52.562973,45.284268,62.963228,300.749245,57.396655,44.259878,41.795204,45.84256,41.027576,55.026592,24.25644,11.93839,28.27598,36.185272,37.614516,63.624554,MPI_Wait(),11.93839,modelLinearSumOfElementCombinations


In [10]:
df_ICNVRT = input_rawDF_cg[input_rawDF_cg["functionName"] == "ICNVRT"]
df_Wait = input_rawDF_cg[input_rawDF_cg["functionName"] == "MPI_Wait()"]
df_Send = input_rawDF_cg[input_rawDF_cg["functionName"] == "MPI_Send()"]
df_RANDLC = input_rawDF_cg[input_rawDF_cg["functionName"] == "RANDLC"]
df_VECSET = input_rawDF_cg[input_rawDF_cg["functionName"] == "VECSET"]
df_Irecv = input_rawDF_cg[input_rawDF_cg["functionName"] == "MPI_Irecv()"]
df_SPRNVC = input_rawDF_cg[input_rawDF_cg["functionName"] == "SPRNVC"]

In [11]:
df_SPRNVC

Unnamed: 0,%Time,Exclusive,Inclusive,#Call,#Subrs,functionName,process,na,nonzer,niter,shift
7,1.7,59,126,14000.0,541041.0,SPRNVC,2,14000,11,15,20
7,1.7,58,125,14000.0,541041.0,SPRNVC,2,14000,11,15,40
7,1.7,58,125,14000.0,541041.0,SPRNVC,2,14000,11,15,60
7,1.7,59,125,14000.0,541041.0,SPRNVC,2,14000,11,15,80
7,1.7,57,124,14000.0,541041.0,SPRNVC,2,14000,11,15,110
...,...,...,...,...,...,...,...,...,...,...,...
4,24.6,770,1659,100000.0,7082680.0,SPRNVC,128,100000,18,90,20
4,24.5,770,1658,100000.0,7082680.0,SPRNVC,128,100000,18,90,40
4,31.1,769,1657,100000.0,7082680.0,SPRNVC,128,100000,18,90,60
4,31.1,769,1657,100000.0,7082680.0,SPRNVC,128,100000,18,90,80


In [12]:
test_Model_obeyOneParameter_ForMultipleRegression()

In [None]:
%reset

***

---

___

# 2022年4月17日～

次のような表を作成する

採用される割合 (MAPE の最大値 [%] ，MAPE の最小値 [%]) [%]

| ベンチマークプログラム名 | 線形モデル               | 対数モデル               | 反比例モデル              |
|--------------|---------------------|---------------------|---------------------|
| str          | float(float, float) | float(float, float) | float(float, float) |


目標となるのは一気にこのベンチマークプログラムを作成することだが、既存のライブラリ関数などを利用し、まずはベンチマークごとに作成可能にする。

メモ

## 実装予定

1. 行方向に最小値を検出
2. 最小値以外をNaNに変更
3. 列方向に最小値と最大値を検出

## 


***

---

___