In [1]:
jupyter_pwd = %pwd
if jupyter_pwd == "/":
    %cd /workspace

# %pdb on

# ipynb形式のライブラリのインポート
%run ./lib/lib.ipynb

# ipynb形式のライブラリノートを.py形式に変更したものをインポート
import lib
import lib.lab_lib
from lib.lab_lib import *

# 生データの入ったCSVファイルの保持されたディレクトリ名を格納している変数
csvDirPath = "./csv_files/"

# NPBのベンチマーク名のリスト
benchmarkNames = ["cg", "ep", "ft", "is", "lu", "mg"]

# NPBのプロセス数
npb_process :list[int] = [2, 4, 8, 16, 32, 64, 128, 256]
train_npb_process :list[int] = npb_process[:-1]
test_npb_process :list[int] = npb_process[-1:]
# NPBのCGの初期変数
cg_na: list[int] = [14000, 30000, 75000, 100000, 1500000]
cg_nonzer: list[int] = [11, 12, 13, 14, 15, 18, 21]
cg_niter: list[int] = [15, 30, 75, 90, 100]
cg_shift: list[int] = [20, 40, 60, 80, 110, 200]

train_cg_na: list[int] = cg_na[:-1]
train_cg_nonzer: list[int] = cg_nonzer[:-1]
train_cg_niter: list[int] = cg_niter[:-1]
train_cg_shift: list[int] = cg_shift[:-1]

test_cg_na: list[int] = cg_na[-1:]
test_cg_nonzer: list[int] = cg_nonzer[-1:]
test_cg_niter: list[int] = cg_niter[-1:]
test_cg_shift: list[int] = cg_shift[-1:]
# NPBのMGの初期変数
mg_size :list[int] = [32, 64, 128, 256, 512]
mg_nit: list[int] = [4, 10, 20, 35, 50]

train_mg_size :list[int] = mg_size[:-1]
train_mg_nit :list[int] = mg_nit[:-1]

test_mg_size :list[int] = mg_size[-1:]
test_mg_nit :list[int] = mg_nit[-1:]


# LULESH ベンチマークプログラムのプロセス数・問題サイズ・イテレーション数
lulesh_processes: list[int] = [8, 27, 64, 125, 216, 343, 512]
lulesh_iterations: list[int] = [8, 16, 32, 64, 128, 256]
lulesh_sizes: list[int] = [16, 24, 32, 48, 64, 128]

train_lulesh_processes: list[int] = [8, 27, 64, 125, 216, 343]
train_lulesh_iterations: list[int] = [8, 16, 32, 64, 128]
train_lulesh_sizes: list[int] = [16, 24, 32, 48]

test_lulesh_processes: list[int] = [512, 729, 1000]
test_lulesh_iterations: list[int] = [256, 512, 1024]
test_lulesh_sizes: list[int] = [64, 96, 128]

# Extra-Pのオプション
modelerNames: list[str] = [
    # "refining", 
    "multi-parameter",
    "default", 
    # "basic --options poly_exponents=-1,0,1,2,3 log_exponents=0,1 force_combination_exponents=1 allow_negative_exponents=1"
    ]

modelerOption: str = """ --options \#spm=Basic \#spo=poly_exponents=-1,0,1,2,3,log_exponents=0,1,force_combination_exponents=1,allow_negative_exponents=True"""

list_modelName: list[str] = [
    "modelIp",
    "modelLog",
    "modelLinAndIp",
    "modelLinAndLog",
    "modelIpAndLin",
    "modelIpAndLog",
    "modelLogAndLin",
    "modelLogAndIp",
    "modelProcessDividedByProblemSize",
    "modelProblemSizeDividedByProcess",
    "modelInfiniteProductOfProblemSizeMultipliedByProcesses",
    "modelInfiniteProductOfProblemSizeDividedByProcesses",
    "modelLinearSumOf2elementCombination",
    "modelLinearSumOfElementCombinations",
    "modelLinearSumOf2elementCombinationWithSquared",
    "modelLinearSumOf2elementCombinationWithCubed",
    "modelSquareRootOfProcess",
    "modelSquareRootTimesOtherElems",
    "modelObeyOneParameter",
    "modelLin"
    # "modelBasicTree",
]
list_csvDir = [
    "./csv_files/lulesh_1st/",
    "./csv_files/lulesh_2nd/",
    "./csv_files/lulesh_3rd/",
]

DEBUG:__main__:hello
DEBUG:lib.lab_lib:hello


In [2]:
test_returnConvertedTargetPprofTimeDF()
test_addPerCallCol()


In [3]:
trainDF_mg: pd.DataFrame = return_rawDF_mg(
    list_process=train_npb_process,
    list_problem_size=train_mg_size,
    list_nit=train_mg_nit,
    csvDir=csvDirPath,
)

testDF_mg: pd.DataFrame = return_rawDF_mg(
    list_process=test_npb_process,
    list_nit=test_mg_nit,
    list_problem_size=test_mg_size,
    csvDir=csvDirPath,
)

trainDF_mg = returnConvertedTargetPprofTimeDF(
    inputDF=trainDF_mg, resVars=["Exclusive", "Inclusive"]
)
testDF_mg = returnConvertedTargetPprofTimeDF(
    inputDF=testDF_mg, resVars=["Exclusive", "Inclusive"]
)

trainDF_mg = addPerCallCol(
    inputDF = trainDF_mg,
    targetColNames=["Exclusive", "Inclusive"],
    CallColName="#Call"
)
testDF_mg = addPerCallCol(
    inputDF = testDF_mg,
    targetColNames=["Exclusive", "Inclusive"],
    CallColName="#Call"
)


In [4]:
# 学習用のデータ
trainDF_cg: pd.DataFrame = return_rawDF_cg(
    list_process=train_npb_process,
    list_na=train_cg_na,
    list_nonzer=train_cg_nonzer,
    list_niter=train_cg_niter,
    list_shift=train_cg_shift,
    csvDir=csvDirPath,
)

# 予測対象用のデータ
testDF_cg: pd.DataFrame = return_rawDF_cg(
    list_process=test_npb_process,
    list_na=test_cg_na,
    list_nonzer=test_cg_nonzer,
    list_niter=test_cg_niter,
    list_shift=test_cg_shift,
    csvDir=csvDirPath,
)

trainDF_cg = returnConvertedTargetPprofTimeDF(
    inputDF=trainDF_cg, resVars=["Exclusive", "Inclusive"]
)
testDF_cg = returnConvertedTargetPprofTimeDF(
    inputDF=testDF_cg, resVars=["Exclusive", "Inclusive"]
)

trainDF_cg = addPerCallCol(
    inputDF = trainDF_cg,
    targetColNames=["Exclusive", "Inclusive"],
    CallColName="#Call"
)
testDF_cg = addPerCallCol(
    inputDF = testDF_cg,
    targetColNames=["Exclusive", "Inclusive"],
    CallColName="#Call",
)





In [5]:
"""
functionNames_cg list[str]:
    ベンチマークプログラムCGで実行された関数の関数名を保持した文字列のリスト

functionNames_mg list[str]:
    ベンチマークプログラムMGで実行された関数の関数名を保持した文字列のリスト

expVar_cg list[str]:
    ベンチマークプログラムCGの説明変数

expVar_mg list[str]:
    ベンチマークプログラムMGの説明変数

resVar_call str:
    関数コール回数の目的変数

resVar_excl str:
    Exclusiveの目的変数(all)

resVar_excl_perCall str:
    Exclusiveの目的変数(perCall)

dict_symbols_cg dict[str, any]:
    ベンチマークプログラムCGのシンボルを保持した辞書

dict_symbols_mg dict[str, any]:
    ベンチマークプログラムMGのシンボルを保持した辞書

benchmarkName_cg str:
    ベンチマーク名CG

benchmarkName_mg str:
    ベンチマーク名MG

modelerName str:
    Extra-Pのモデル名を保持した文字列

modelerOption str:
    Extra-Pのモデルオプションを保持した文字列

"""
functionNames_cg: list[str] = sorted(list(set(trainDF_cg["Name"].to_list())))
functionNames_mg: list[str] = sorted(list(set(trainDF_mg["Name"].to_list())))
expVar_cg: list[str] = ["process", "na", "nonzer", "niter", "shift"]
expVar_mg: list[str] = ["process", "problem_size", "nit"]
resVar_call: str = "#Call"
resVar_excl: str = "Exclusive"
resVar_excl_perCall :str = f"{resVar_excl}PerCall"
dict_symbols_cg: dict[str, any] = {}
dict_symbols_mg: dict[str, any] = {}
benchmarkName_cg: str = "cg"
benchmarkName_mg: str = "mg"
modelerName: str = "multi-parameter"
modelerOption: str = """ --options \#spm=Basic \#spo=poly_exponents=-1,0,1,2,3,log_exponents=0,1,force_combination_exponents=1,allow_negative_exponents=True"""

for elem in expVar_cg:
    dict_symbols_cg[elem] = symbols(elem, real=True)
for elem in expVar_mg:
    dict_symbols_mg[elem] = symbols(elem, real=True)


In [6]:
# MGにおけるモデル構築

"""
dict_mg_model_exclusive_all dict[str, any]:
    Exclusive予測用のモデル@all to all
dict_mg_model_exclusive*_perCall dict[str, any]:
    Exclusive予測用のモデル@perCall
dict_cg_model_call dict[str, any]:
    関数コール回数用のモデル
"""
dict_mg_model_exclusive_all: dict[str, any] = {}
dict_mg_model_exclusive_perCall: dict[str, any] = {}
dict_mg_model_call: dict[str, any] = {}

for functionName in functionNames_mg:

    trainDF_mg_perFunc: pd.DataFrame = trainDF_mg[trainDF_mg["Name"] == functionName]

    mg_model_exclusive_all: str = get_ExtraP_model(
        inputDF_perFunc=trainDF_mg_perFunc,
        expVar=expVar_mg,
        resVar=resVar_excl,
        functionName=functionName,
        dict_symbols=dict_symbols_mg,
        benchmarkName=benchmarkName_mg,
        modelerName=modelerName,
        modelerOption=modelerOption,
    )

    mg_model_exclusive_perCall :str = get_ExtraP_model(
        inputDF_perFunc = trainDF_mg_perFunc,
        expVar = expVar_mg,
        resVar = resVar_excl_perCall,
        functionName=functionName,
        dict_symbols=dict_symbols_mg,
        benchmarkName=benchmarkName_mg,
        modelerName = modelerName,
        modelerOption=modelerOption,
    )

    mg_model_call: str = get_ExtraP_model(
        inputDF_perFunc=trainDF_mg_perFunc,
        expVar=expVar_mg,
        resVar=resVar_call,
        functionName=functionName,
        dict_symbols=dict_symbols_mg,
        benchmarkName=benchmarkName_mg,
        modelerName=modelerName,
        modelerOption=modelerOption,
    )

    dict_mg_model_exclusive_all[functionName] = mg_model_exclusive_all
    dict_mg_model_exclusive_perCall[functionName] = mg_model_exclusive_perCall
    dict_mg_model_call[functionName] = mg_model_call


In [7]:
# MGにおける予測

_tmp_list :list[pd.Series] = []
for i, sr in testDF_mg.iterrows():
    functionName :str = sr["Name"]
    target_env :list[set[any]] = []
    for expVar in expVar_mg:
        target_env.append(
            (dict_symbols_mg[expVar], sr[expVar])
        )
    
    """
    _predicted_call:
        予測された関数コール回数
    _predicted_excl_all:
        all to all で予測されたExclusive
    _predicted_excl_perCall:
        関数コール回数を経由して予測されたExclusive
    """
    _predicted_call :float
    _predicted_excl_all :float
    _predicted_excl_perCall :float

    _predicted_call = dict_mg_model_call[functionName].subs(target_env).evalf()
    _predicted_excl_all = dict_mg_model_exclusive_all[functionName].subs(target_env).evalf()
    _predicted_excl_perCall = dict_mg_model_exclusive_perCall[functionName].subs(target_env).evalf() * _predicted_call

    _tmp_series :pd.Series = pd.Series({
        "real_Exclusive": sr["Exclusive"],
        "real_call": sr["#Call"],
        "predicted_Exclusive_all":_predicted_excl_all,
        "predicted_Exclusive_perCall":_predicted_excl_perCall,
        "predicted_call": _predicted_call,
        "process": sr["process"],
        "problem_size": sr["problem_size"],
        "functionName":functionName
    })
    _tmp_list.append(_tmp_series)

print(sr)
DF_result_mg :pd.DataFrame = pd.DataFrame(data = _tmp_list)
DF_result_mg


%Time                      0.0
Exclusive                  0.0
Inclusive                  0.0
#Call                  1.00391
#Subrs                     0.0
Name                TIMER_READ
process                    256
problem_size               512
nit                         50
ExclusivePerCall           0.0
InclusivePerCall           0.0
Name: 40, dtype: object


Unnamed: 0,real_Exclusive,real_call,predicted_Exclusive_all,predicted_Exclusive_perCall,predicted_call,process,problem_size,functionName
0,5.68e-06,1.0,1.148625e-05,1.148625e-05,0.999999999999999,256,512,.TAU_application
1,0.000485,1.0,0.0003191502281525,0.0003191502281525,0.999999999999999,256,512,MG_MPI
2,0.001,51.0,0.0017738762524419,0.001307115882792,51.0,256,512,MG3P
3,0.408,461.0,0.501804318415954,0.184891067091235,333.5,256,512,RESID
4,0.003,1330.0,0.001815227923625,0.0007346710623391,947.5,256,512,COMM3
5,0.543,1.0,0.377098214285714,0.377098214285714,0.999999999999999,256,512,MPI_Init()
6,0.518,1.0,0.270626340619059,0.270626340619059,0.999999999999999,256,512,MPI_Finalize()
7,0.144,459.0,0.233819786934564,0.187496888542182,331.5,256,512,PSINV
8,0.031,6698.62,0.0554269944020884,0.0630274568868383,3553.6608882506,256,512,GIVE3
9,0.222,6902.62,1.69815301136088,1.06895336743973,3962.30907527713,256,512,MPI_Send()


In [8]:
DF_result_mg = add_relativeErrorRateCol(
    inputDF = DF_result_mg,
    real_colName = "real_Exclusive",
    predicted_colName= "predicted_Exclusive_all",
    targetColName= "vs all"
)
DF_result_mg = add_relativeErrorRateCol(
    inputDF = DF_result_mg,
    real_colName = "real_Exclusive",
    predicted_colName= "predicted_Exclusive_perCall",
    targetColName= "vs perCall"
)

print(DF_result_mg.mean())

DF_result_mg

real_Exclusive                    0.059273
real_call                      1532.155896
predicted_Exclusive_all           0.100198
predicted_Exclusive_perCall       0.067069
predicted_call                 2183.102198
process                         256.000000
problem_size                    512.000000
vs all                          113.652767
vs perCall                       81.332123
dtype: float64


  print(DF_result_mg.mean())


Unnamed: 0,real_Exclusive,real_call,predicted_Exclusive_all,predicted_Exclusive_perCall,predicted_call,process,problem_size,functionName,vs all,vs perCall
0,5.68e-06,1.0,1.148625e-05,1.148625e-05,0.999999999999999,256,512,.TAU_application,102.222711267606,102.222711267605
1,0.000485,1.0,0.0003191502281525,0.0003191502281525,0.999999999999999,256,512,MG_MPI,34.1958292468975,34.1958292468976
2,0.001,51.0,0.0017738762524419,0.001307115882792,51.0,256,512,MG3P,77.3876252441991,30.7115882792066
3,0.408,461.0,0.501804318415954,0.184891067091235,333.5,256,512,RESID,22.9912545137143,54.6835619874425
4,0.003,1330.0,0.001815227923625,0.0007346710623391,947.5,256,512,COMM3,39.4924025458321,75.5109645886961
5,0.543,1.0,0.377098214285714,0.377098214285714,0.999999999999999,256,512,MPI_Init(),30.5528150486714,30.5528150486715
6,0.518,1.0,0.270626340619059,0.270626340619059,0.999999999999999,256,512,MPI_Finalize(),47.7555326990233,47.7555326990234
7,0.144,459.0,0.233819786934564,0.187496888542182,331.5,256,512,PSINV,62.3748520378916,30.2061725987374
8,0.031,6698.62,0.0554269944020884,0.0630274568868383,3553.6608882506,256,512,GIVE3,78.7967561357689,103.314377054317
9,0.222,6902.62,1.69815301136088,1.06895336743973,3962.30907527713,256,512,MPI_Send(),664.933788901299,381.510525873751


In [9]:
DF_result_mg = DF_result_mg.astype({
    "predicted_Exclusive_all":"float",
    "predicted_Exclusive_perCall":"float",
    "predicted_call":float,
    "vs all": "float",
    "vs perCall":"float",
})

print(DF_result_mg.dtypes)

print(DF_result_mg.to_csv(float_format="%.3f"))

real_Exclusive                 float64
real_call                      float64
predicted_Exclusive_all        float64
predicted_Exclusive_perCall    float64
predicted_call                 float64
process                          int64
problem_size                     int64
functionName                    object
vs all                         float64
vs perCall                     float64
dtype: object
,real_Exclusive,real_call,predicted_Exclusive_all,predicted_Exclusive_perCall,predicted_call,process,problem_size,functionName,vs all,vs perCall
0,0.000,1.000,0.000,0.000,1.000,256,512,.TAU_application,102.223,102.223
1,0.000,1.000,0.000,0.000,1.000,256,512,MG_MPI,34.196,34.196
2,0.001,51.000,0.002,0.001,51.000,256,512,MG3P,77.388,30.712
3,0.408,461.000,0.502,0.185,333.500,256,512,RESID,22.991,54.684
4,0.003,1330.000,0.002,0.001,947.500,256,512,COMM3,39.492,75.511
5,0.543,1.000,0.377,0.377,1.000,256,512,MPI_Init(),30.553,30.553
6,0.518,1.000,0.271,0.271,1.000,256,512,MPI_Finalize(),47.756,