In [None]:
# ipynb形式のライブラリのインポート
%run ./lib.ipynb

# 修正したモデルから卒論時に集計したデータを作成する

1. 表

| ベンチマーク名 | 平均誤差率(%) | コスト比(%) |
|---------|----------|---------|


2. 表

| ベンチマーク名 | 採用割合(最大MAPE(%), 最小MAPE(%)) |
|---------|----------------------------|
|         | モデル(1), モデル(2), ...        |


* 平均誤差率：大規模実行時の関数コール回数との比較
* MAPE：トレーニングデータとの比較

In [None]:
plt.figure(figsize=(4, 3))
plt.xlabel("使用したプロファイル数")
plt.ylabel("平均誤差率(%)")

In [None]:
# pd.get_option("display.max_columns")
# pd.get_option("display.max_rows")
pd.set_option('display.max_columns', 200)
pd.set_option('display.max_rows', 200)

In [None]:
# ベンチマーク名・関数名・プロセス数・問題サイズを指定することで、その条件での関数コール回数を取得する関数

def returnSpecificData(benchmarkName="cg", functionName=".TAU_application", process=256, benchmarkClass="D"):
    targetRawDF = returnRawDF(Benchmark=benchmarkName, functionName=functionName, benchmarkClass=[
                              benchmarkClass], FixedProcess=process, Processes=[process], FixedBenchmarkClass=benchmarkClass)
    return targetRawDF.iat[0, 0]
# returnSpecificData(benchmarkName="mg", functionName="BUBBLE", process=256, benchmarkClass="B")

In [None]:
# benchmarksからbt, spを除外する
benchmarks = [benchmark for benchmark in benchmarks if benchmark !=
              'bt' and benchmark != 'sp']
# pandasのDFをprintした時の幅を広げる
pd.set_option('display.width', 100)

In [None]:
dictTmp = returnDictForPlotPerNumOfUsedData(Benchmark=benchmarks, fix="Class", benchmarkClass=[
    "A", "B", "C", "D"], FixedProcess=64, Processes=[1, 2, 4, 8, 16, 32, 64, 128, 256], FixedBenchmarkClass="C")

In [None]:
pd.options.display.float_format = '{:.4g}'.format

tmpDF = pd.DataFrame()
for benchmark in benchmarks:
    listToLearn = [1, 2, 4, 8, 16, 32, 64, 128]
    listToPredict = [256]
    benchmark_x = dictTmp[benchmark]["x"]
    benchmark_y = dictTmp[benchmark]["y"]
    index = benchmark_x.index(len(listToLearn))
    MAPE = benchmark_y[index]
    relativeCost = returnRelativeCost(benchmark=benchmark, variablesToLearn=listToLearn,
                                      variablesToPredict=listToPredict, fixedClassOrProcess="Class", fixed="C")
    dictRowData = {"ベンチマーク名": benchmark.upper(
    ), "平均絶対誤差率[％]": MAPE, "相対コスト[％]": relativeCost}
    iDF = pd.DataFrame.from_dict(dictRowData, orient='index').T
    tmpDF = tmpDF.append(iDF)
tmpDFMean = tmpDF.mean()
type(tmpDFMean)
print(tmpDF.to_latex(index=False))

In [None]:
# dictTmp

plt.figure(figsize=(5.72, 4), dpi=200)
for benchmark in list(dictTmp.keys()):
    x = dictTmp[benchmark]["x"]
    y = dictTmp[benchmark]["y"]
    plt.plot(x, y, marker='o', label=benchmark.upper())
    plt.legend()
    plt.xlabel("使用したプロファイル数")
    plt.ylabel("平均絶対誤差率[％]")

In [None]:
plt.figure(figsize=(5.72, 4), dpi=200)

# Extra-PでfixProcessデータを入力して出力したモデルの図時
plot_x = np.linspace(0.8, 256, 500)
# -3590464.6990329633 + 3759195.349891038 * p^(1/4)
plot_y = []
for x in plot_x:
    plot_y.append(2286768.3333333326 + 301997.61904761934 * math.log2(x)**(1))
plt.plot(plot_x, plot_y, label="ExtraP")

x = [1, 2, 4, 8, 16, 32, 64, 128]
y = [1984770.0, 2263540.0, 2821070.0, 3936140.0,
     3936140.0, 3936140.0, 3936140.0, 3936140.0]
x = np.array(x).reshape(-1, 1)
y = np.array(y).reshape(-1, 1)
plt.scatter(x, y, marker="o", label="予測に用いた関数コール回数")
plot_x = np.array(plot_x).reshape(-1, 1)
x_target = [256]
y_target = [3936140]
plt.scatter(x_target, y_target, marker="o", label="予測したい関数コール回数の実測値")

benchmarkName = "CG"
functionName = "ICNVRT"

# 線形モデル
# 対数モデル

# 反比例モデル
modelIpMk2 = ModelIp_mk2(train_x=x, train_y=y, target_x=x_target, target_y=y_target,
                         benchmark_name=benchmarkName, function_name=functionName)
modelIpMk2.calc_lr()
plot_y_IpMk2 = modelIpMk2.predict(plot_x)
plt.plot(plot_x, plot_y_IpMk2, label="反比例モデル")
# 線形飽和モデル
modelBranchMk2 = ModelBranch_mk2(train_x=x, train_y=y, target_x=x_target,
                                 target_y=y_target, benchmark_name=benchmarkName, function_name=functionName)
modelBranchMk2.calc_lr()
plot_y_BranchMk2 = modelBranchMk2.predict(plot_x)
plt.plot(plot_x, plot_y_BranchMk2, label="線形飽和モデル")
# # 線形モデル
# model_lin = ModelLin(x, y, "CG", "ICNVRT", test_ratio=0)
# model_lin.calc_lr()
# plot_y_lin = model_lin.predict(plot_x)
# plt.plot(plot_x, plot_y_lin, label="線形モデル")
# # 対数モデル
# model_log10 = ModelLog10(x, y, "CG", "ICNVRT", test_ratio=0)
# model_log10.calc_lr()
# plot_y_log10 = model_log10.predict(plot_x)
# plt.plot(plot_x, plot_y_log10, label="対数モデル")
# # 反比例モデル
# model_ip = ModelIP(x, y, "CG", "ICNVRT", test_ratio=0)
# model_ip.calc_lr()
# plot_y_ip = model_ip.predict(plot_x)
# plt.plot(plot_x, plot_y_ip, label="反比例モデル")
# # 線形飽和モデル
# model_branch = ModelBranch(x, y, "CG", "ICNVRT", test_ratio=0)
# model_branch.calc_lr()
# plot_y_branch = model_branch.predict(plot_x)
# plt.plot(plot_x, plot_y_branch, label="線形飽和モデル")
# 凡例の表示
plt.legend()
# 軸ラベルの設定
plt.ylabel("関数コール回数")
plt.xlabel("実行コア数")

plt.scatter(x, y, marker="o")

In [None]:
# 実際にプロットする


# print(f"fix={fix}, benchmarkClasses={benchmarkClasses}, fixedProcess={fixedProcess}, Processes={processes}, FixedBenchmarkClass={fixedBenchmarkClass}")
# print(f"targetNumOfProcess={targetNumOfProcess}, targetProblemSize={fixedBenchmarkClass}, fix={fix}")

# DF = returnRawDFperBenchmark(Benchmark="mg", fix="Process", benchmarkClass=["A", "B", "C", "D"], Processes=[
#                              1, 2, 4, 8, 16, 32, 64, 128, 256], FixedBenchmarkClass="B", FixedProcess=64)
# DF.dropna(how='any')
# DF

In [None]:
%reset

In [1]:
# ipynb形式のライブラリのインポート
%run ./lib/lib.ipynb

In [2]:
# 全体集計用にスキのないリスト
benchmarkNames = ["cg", "ep", "ft", "is", "lu", "mg"]
classes = ["A", "B", "C", "D"]
processes = [1, 2, 4, 8, 16, 32, 64, 128, 256]
# モデル試験用に用いるデータのインデックス値を指定
targetIndex = -1
csvDirPath = "./csv_files/"

In [3]:
# 実験用に小規模なリスト
benchmarkNames = ["cg"]
classes = ["B"]
processes = [1, 2, 4, 8, 16, 32, 64, 128, 256]

In [4]:
listOfSeriesData = []

for benchmarkName in benchmarkNames:
    dfPerBenchmark = returnCollectedExistingData(
        benchmarkNames=[benchmarkName], classes=classes, processes=processes, csvDirPath=csvDirPath)
    for benchmarkClass in classes:
        dfPerBenchmarkClass = dfPerBenchmark[dfPerBenchmark["benchmarkClass"]
                                             == benchmarkClass]
        functionNames = sorted(list(set(dfPerBenchmarkClass["functionName"])))
        for functionName in functionNames:
            dfPerFunction = dfPerBenchmarkClass[dfPerBenchmarkClass["functionName"] == functionName]

            # 説明変数と目的変数とをリスト化したものを抽出
            # プロセス数
            rawX = dfPerFunction['process'].tolist()
            # 関数コール回数
            rawY = dfPerFunction['functionCallNum'].tolist()

            # 説明変数のリストと目的変数のリストをモデル構築用・モデル試験用に分割
            trainX = rawX[:targetIndex]
            trainY = rawY[:targetIndex]
            targetX = rawX[targetIndex:]
            targetY = rawY[targetIndex:]

            seriesPerFunction = returnSeriesOfData(benchmarkName=benchmarkName, functionName=functionName, rawX=trainX, rawY=trainY,
                                                   targetProcess=targetX[0], targetBenchmarkClass=benchmarkClass, targetFunctionCallNum=targetY[0], csvDirPath=csvDirPath)
            listOfSeriesData.append(seriesPerFunction)


DF = pd.concat(listOfSeriesData, axis=1)
DF

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,12,13,14,15,16,17,18,19,20,21
benchmarkName,cg,cg,cg,cg,cg,cg,cg,cg,cg,cg,...,cg,cg,cg,cg,cg,cg,cg,cg,cg,cg
functionName,.TAU_application,ALLOC_SPACE,CG,CONJ_GRAD,ICNVRT,INITIALIZE_MPI,MAKEA,MPI_Barrier(),MPI_Bcast(),MPI_Comm_rank(),...,MPI_Init(),MPI_Irecv(),MPI_Reduce(),MPI_Send(),MPI_Wait(),SETUP_PROC_INFO,SETUP_SUBMATRIX_INFO,SPARSE,SPRNVC,VECSET
usedDataX,"[1, 2, 4, 8, 16, 32, 64, 128]","[1, 2, 4, 8, 16, 32, 64, 128]","[1, 2, 4, 8, 16, 32, 64, 128]","[1, 2, 4, 8, 16, 32, 64, 128]","[1, 2, 4, 8, 16, 32, 64, 128]","[1, 2, 4, 8, 16, 32, 64, 128]","[1, 2, 4, 8, 16, 32, 64, 128]","[1, 2, 4, 8, 16, 32, 64, 128]","[1, 2, 4, 8, 16, 32, 64, 128]","[1, 2, 4, 8, 16, 32, 64, 128]",...,"[1, 2, 4, 8, 16, 32, 64, 128]","[1, 2, 4, 8, 16, 32, 64, 128]","[1, 2, 4, 8, 16, 32, 64, 128]","[1, 2, 4, 8, 16, 32, 64, 128]","[1, 2, 4, 8, 16, 32, 64, 128]","[1, 2, 4, 8, 16, 32, 64, 128]","[1, 2, 4, 8, 16, 32, 64, 128]","[1, 2, 4, 8, 16, 32, 64, 128]","[1, 2, 4, 8, 16, 32, 64, 128]","[1, 2, 4, 8, 16, 32, 64, 128]"
usedDataY,"[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]","[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]","[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]","[23.5, 31.0, 46.0, 76.0, 76.0, 76.0, 76.0, 76.0]","[371055.0, 561762.0, 943178.0, 1706010.0, 1706...","[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]","[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]","[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]","[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]","[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]",...,"[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]","[2576.0, 4203.0, 5462.0, 13984.0, 13984.0, 199...","[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]","[2576.0, 4203.0, 5462.0, 13984.0, 13984.0, 199...","[2576.0, 4203.0, 5462.0, 13984.0, 13984.0, 199...","[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]","[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]","[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]","[21625.0, 29250.0, 44500.0, 75000.0, 75000.0, ...","[21625.0, 29250.0, 44500.0, 75000.0, 75000.0, ..."
numOfData,8,8,8,8,8,8,8,8,8,8,...,8,8,8,8,8,8,8,8,8,8
ProcessOrClass,Class,Class,Class,Class,Class,Class,Class,Class,Class,Class,...,Class,Class,Class,Class,Class,Class,Class,Class,Class,Class
fixed,B,B,B,B,B,B,B,B,B,B,...,B,B,B,B,B,B,B,B,B,B
targetProcess,256,256,256,256,256,256,256,256,256,256,...,256,256,256,256,256,256,256,256,256,256
targetProblemSize,B,B,B,B,B,B,B,B,B,B,...,B,B,B,B,B,B,B,B,B,B
targetNumOfFunctionCall,1.0,1.0,1.0,76.0,1706010.0,1.0,1.0,1.0,1.0,1.0,...,1.0,25992.0,1.0,25992.0,25992.0,1.0,1.0,1.0,75000.0,75000.0


In [None]:
# 生データの取得
cgDF = returnCollectedExistingData(benchmarkNames=["cg"], classes=["A", "B", "C", "D"], processes=[
                                   1, 2, 4, 8, 16, 32, 64, 128, 256], csvDirPath="./csv_files/")
cgDF
# ベンチマーククラスがAの情報を取得
cgDFfixedA = cgDF[cgDF["benchmarkClass"] == "A"]
cgDFfixedA
# 関数名のリストを取得
functionNames = sorted(list(set(cgDFfixedA["functionName"])))
print(functionNames)

# 関数名を関数名のリストから抽出
functionNameCG = cgDFfixedA[cgDFfixedA["functionName"] == "CG"]
functionNameCG

# 説明変数と目的変数とをリスト化したものを抽出
# プロセス数
raw_x = functionNameCG['process'].tolist()
# 関数コール回数
raw_y = functionNameCG['functionCallNum'].tolist()

print(f"raw_x={raw_x}")
print(f"raw_y={raw_y}")

bencmarkName = "CG"
functionName = "CG"
fixProcessOrClass = "Class"
fixed = "A"
targetProcess = 256
targetBenchmarkClass = fixed
targetFunctionCallNum = raw_y[-1]
returnSeriesOfData(benchmarkName="benhmarkName", functionName="functionName", rawX=[1, 2, 3], rawY=[
                   1, 2, 3], fixProcessOrClass="Class", fixed="B", targetProcess=256, targetBenchmarkClass="B", targetFunctionCallNum=-1, csvDirPath="./csv_files/")