In [1]:
import pandas as pd
import numpy as np
import math
from os.path import join
from collections import defaultdict
from collections import OrderedDict
from matplotlib import pyplot as plt
from matplotlib import colors
from matplotlib.ticker import MaxNLocator
%matplotlib inline
plt.rcParams['font.family'] = 'IPAPGothic'

In [2]:
def gen_4graph(df, filename, avg=False):
    if avg:
        time_bench = df['time_avg'].values
    else:
        time_bench = df['time'].values
    bench_bench = df['bench'].values
    macro_bench = df['macro'].values
    ppn_bench = [int(x) for x in df['ppn'].values]
    omp_bench = [int(x) for x in df['omp_num_threads'].values]
    y = defaultdict()
    y["MPI process"] = defaultdict()
    y["OpenMP thread"] = defaultdict()
    y["SIMD"] = defaultdict()
    y["SIMD and RoA"] = defaultdict()
    y["SIMD"]['Default'] = []
    y["SIMD"]['SIMD'] = []
    y["SIMD and RoA"]['Default'] = []
    y["SIMD and RoA"]['SIMD'] = []
    y["SIMD and RoA"]['Restructure of Array'] = []
    for i in range(len(time_bench)):
        # ppn
        if ppn_bench[i] not in y["MPI process"]:
            y["MPI process"][ppn_bench[i]] = [time_bench[i]]
        else:
            y["MPI process"][ppn_bench[i]].append(time_bench[i])
        # omp
        if omp_bench[i] not in y["OpenMP thread"]:
            y["OpenMP thread"][omp_bench[i]] = [time_bench[i]]
        else:
            y["OpenMP thread"][omp_bench[i]].append(time_bench[i])

        if bench_bench[i]:
            y["SIMD"]["Default"].append(time_bench[i])
            y["SIMD and RoA"]["Default"].append(time_bench[i])
        elif macro_bench[i]:
            y["SIMD"]["SIMD"].append(time_bench[i])
            y["SIMD and RoA"]["Restructure of Array"].append(time_bench[i])
        else:
            y["SIMD"]["SIMD"].append(time_bench[i])
            y["SIMD and RoA"]["SIMD"].append(time_bench[i])

    fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(20, 20), dpi=900)
    cmap = ['b', 'g', 'r', 'c', 'm', 'y', 'k']
    marker = ['o', 'x', '^', '*']
    row = 0
    col = 0
    for key in y:
        cnt1 = 0
        cnt2 = 0
    #     fig = plt.figure()
    #     ax = plt.subplot(111)
        for k in y[key]:
            x = [i for i in range(len(y[key][k]))]
            if key == "OpenMP thread":
                axes[row, col].plot(x[:], y[key][k][:], c=cmap[cnt1], marker=marker[cnt2], label=k)
    #         ax.scatter(x[:], y[key][k][:], c=cmap[cnt1], marker=marker[cnt2], label=k)
            else:
                axes[row, col].scatter(x[:], y[key][k][:], c=cmap[cnt1], marker=marker[cnt2], label=k)
            if cnt1 == len(cmap) - 1:
                cnt1 = 0
                cnt2 += 1
            else:
                cnt1 += 1
        axes[row, col].set_title(key, fontsize=20)
    #     axes[row, col].set_title(key)
        box = axes[row, col].get_position()
        axes[row, col].set_position([box.x0, box.y0, box.width * 0.8, box.height])
        axes[row, col].set_xlabel("順序", fontsize=18)
        axes[row, col].set_ylabel("実行時間(sec)", fontsize=18)
        axes[row, col].tick_params(axis='both', which='major', labelsize=15)
        axes[row, col].tick_params(axis='both', which='minor', labelsize=15)
        axes[row, col].xaxis.set_major_locator(MaxNLocator(integer=True))
        axes[row, col].yaxis.set_major_locator(MaxNLocator(integer=True))
        # Put a legend to the right of the current axis
        axes[row, col].legend(loc='center left', bbox_to_anchor=(1, 0.5), fontsize=15)
        if col == 1:
            row += 1
            col = 0
        else:
            col += 1
    plt.title(key)
#     fig.savefig(join("/Users/hashmup/Dropbox/研究室/卒業論文/thesis/images", filename))
#     plt.show()

In [None]:
def gen_graph(dfs, filename):
    y = defaultdict()
    y["MPI process"] = defaultdict()
    y["OpenMP thread"] = defaultdict()
#     y["SIMD"] = defaultdict()
    y["SIMD and RoA"] = defaultdict()
    for title in dfs:
        df = dfs[title]
        time_bench = df['time_avg'].values
        bench_bench = df['bench'].values
        macro_bench = df['macro'].values
        ppn_bench = [int(x) for x in df['ppn'].values]
        omp_bench = [int(x) for x in df['omp_num_threads'].values]
        y["MPI process"][title] = defaultdict()
        y["OpenMP thread"][title] = defaultdict()
#         y["SIMD"][title] = defaultdict()
        y["SIMD and RoA"][title] = defaultdict()
#         y["SIMD"][title]['Default'] = []
#         y["SIMD"][title]['SIMD'] = []
        y["SIMD and RoA"][title]['Default'] = []
        y["SIMD and RoA"][title]['SIMD'] = []
        y["SIMD and RoA"][title]['Restructure of Array'] = []
        for i in range(len(time_bench)):
            # ppn
            if ppn_bench[i] not in y["MPI process"][title]:
                y["MPI process"][title][ppn_bench[i]] = [time_bench[i]]
            else:
                y["MPI process"][title][ppn_bench[i]].append(time_bench[i])
            # omp
            if omp_bench[i] not in y["OpenMP thread"][title]:
                y["OpenMP thread"][title][omp_bench[i]] = [time_bench[i]]
            else:
                y["OpenMP thread"][title][omp_bench[i]].append(time_bench[i])

            if bench_bench[i]:
#                 y["SIMD"][title]["Default"].append(time_bench[i])
                y["SIMD and RoA"][title]["Default"].append(time_bench[i])
            elif macro_bench[i]:
#                 y["SIMD"][title]["SIMD"].append(time_bench[i])
                y["SIMD and RoA"][title]["Restructure of Array"].append(time_bench[i])
            else:
#                 y["SIMD"][title]["SIMD"].append(time_bench[i])
                y["SIMD and RoA"][title]["SIMD"].append(time_bench[i])

    cmap = ['b', 'g', 'r', 'c', 'm', 'y', 'k']
    marker = ['o', 'x', '^', '*']
    for title in y:
        legend_table = defaultdict()
        row = 0
        col = 0
        fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(20, 20), dpi=900)
        for key in y[title]:
            cnt1 = 0
            cnt2 = 0
            for k, v in sorted(y[title][key].items()):
                _cnt1 = cnt1
                _cnt2 = cnt2
                if k in legend_table:
                    _cnt1 = legend_table[k][1]
                    _cnt2 = legend_table[k][2]
                x = [i for i in range(len(v))]
                if title == "OpenMP thread":
                    a = axes[row, col].plot(x[:], y[title][key][k][:], c=cmap[_cnt1], marker=marker[_cnt2], label=k)[0]
        #         ax.scatter(x[:], y[key][k][:], c=cmap[cnt1], marker=marker[cnt2], label=k)
                else:
                    a = axes[row, col].scatter(x[:], y[title][key][k][:], c=cmap[_cnt1], marker=marker[_cnt2], label=k)
                if a.get_label().isdigit():
                    legend_table[int(a.get_label())] = [a, cnt1, cnt2]
                else:
                    legend_table[a.get_label()] = [a, cnt1, cnt2]
                if cnt1 == len(cmap) - 1:
                    cnt1 = 0
                    cnt2 += 1
                else:
                    cnt1 += 1
            axes[row, col].set_title(key, fontsize=20)
            box = axes[row, col].get_position()
            axes[row, col].set_position([box.x0, box.y0, box.width * 0.8, box.height])
            axes[row, col].set_xlabel("順序", fontsize=18)
            axes[row, col].set_ylabel("実行時間(sec)", fontsize=18)
            axes[row, col].tick_params(axis='both', which='major', labelsize=15)
            axes[row, col].tick_params(axis='both', which='minor', labelsize=15)
            axes[row, col].xaxis.set_major_locator(MaxNLocator(integer=True))
            axes[row, col].yaxis.set_major_locator(MaxNLocator(integer=True))
            if col == 1:
                row += 1
                col = 0
            else:
                col += 1
        plt.suptitle(title, fontsize=30)
        key = []
        val = []
        for k, v in sorted(legend_table.items()):
            key.append(k)
            val.append(v[0])
        lg = fig.legend(tuple(val), tuple(key), 'center right', title=title, fontsize=25)
        lg.get_title().set_fontsize(28)
        fig.savefig(join("/Users/hashmup/Dropbox/研究室/卒業論文/thesis/images", "{0}-{1}.pdf".format(filename, title.replace(' ', '-'))))
#         plt.show()

In [None]:
def gen_diff_graph(dfs, filename):
    y = defaultdict()
    y["MPI process"] = defaultdict()
    y["OpenMP thread"] = defaultdict()
#     y["SIMD"] = defaultdict()
    y["SIMD and RoA"] = defaultdict()
    for title in dfs:
        df = dfs[title]
        time_bench = df['time_avg'].values
        time0_bench = df['time0'].values
        time1_bench = df['time1'].values
        time2_bench = df['time2'].values
        time3_bench = df['time3'].values
        time4_bench = df['time4'].values
        bench_bench = df['bench'].values
        macro_bench = df['macro'].values
        ppn_bench = [int(x) for x in df['ppn'].values]
        omp_bench = [int(x) for x in df['omp_num_threads'].values]
        y["MPI process"][title] = defaultdict()
        y["OpenMP thread"][title] = defaultdict()
#         y["SIMD"][title] = defaultdict()
        y["SIMD and RoA"][title] = defaultdict()
#         y["SIMD"][title]['Default'] = []
#         y["SIMD"][title]['SIMD'] = []
        y["SIMD and RoA"][title]['Default'] = []
        y["SIMD and RoA"][title]['SIMD'] = []
        y["SIMD and RoA"][title]['Restructure of Array'] = []
        for i in range(len(time_bench)):
            # ppn
            if ppn_bench[i] not in y["MPI process"][title]:
                y["MPI process"][title][ppn_bench[i]] = [[time_bench[i], time0_bench[i], time1_bench[i], time2_bench[i], time3_bench[i], time4_bench[i]]]
            else:
                y["MPI process"][title][ppn_bench[i]].append([time_bench[i], time0_bench[i], time1_bench[i], time2_bench[i], time3_bench[i], time4_bench[i]])
            # omp
            if omp_bench[i] not in y["OpenMP thread"][title]:
                y["OpenMP thread"][title][omp_bench[i]] = [[time_bench[i], time0_bench[i], time1_bench[i], time2_bench[i], time3_bench[i], time4_bench[i]]]
            else:
                y["OpenMP thread"][title][omp_bench[i]].append([time_bench[i], time0_bench[i], time1_bench[i], time2_bench[i], time3_bench[i], time4_bench[i]])

            if bench_bench[i]:
#                 y["SIMD"][title]["Default"].append(time_bench[i])
                y["SIMD and RoA"][title]["Default"].append([time_bench[i], time0_bench[i], time1_bench[i], time2_bench[i], time3_bench[i], time4_bench[i]])
            elif macro_bench[i]:
#                 y["SIMD"][title]["SIMD"].append(time_bench[i])
                y["SIMD and RoA"][title]["Restructure of Array"].append([time_bench[i], time0_bench[i], time1_bench[i], time2_bench[i], time3_bench[i], time4_bench[i]])
            else:
#                 y["SIMD"][title]["SIMD"].append(time_bench[i])
                y["SIMD and RoA"][title]["SIMD"].append([time_bench[i], time0_bench[i], time1_bench[i], time2_bench[i], time3_bench[i], time4_bench[i]])

    cmap = ['b', 'g', 'r', 'c', 'm', 'y', 'k']
    marker = ['o', 'x', '^', '*']
    for title in y:
        row = 0
        col = 0
        fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(20, 20), dpi=900)
        for key in y[title]:
            cnt1 = 0
            cnt2 = 0
            for k, v in sorted(y[title][key].items()):
                for v2 in v:
                    x = [i for i in range(len(v2))]
                    axes[row, col].scatter(x[:], v2[:], c=cmap[cnt1], marker=marker[cnt2], label=k)
                if cnt1 == len(cmap) - 1:
                    cnt1 = 0
                    cnt2 += 1
                else:
                    cnt1 += 1
            axes[row, col].set_title(key, fontsize=20)
            box = axes[row, col].get_position()
            axes[row, col].set_position([box.x0, box.y0, box.width * 0.8, box.height])
            axes[row, col].set_xlabel("順序", fontsize=18)
            axes[row, col].set_ylabel("実行時間(sec)", fontsize=18)
            axes[row, col].tick_params(axis='both', which='major', labelsize=15)
            axes[row, col].tick_params(axis='both', which='minor', labelsize=15)
            axes[row, col].xaxis.set_major_locator(MaxNLocator(integer=True))
            axes[row, col].yaxis.set_major_locator(MaxNLocator(integer=True))
            axes[row, col].legend(loc='center left', bbox_to_anchor=(1, 0.5), fontsize=15)
            if col == 1:
                row += 1
                col = 0
            else:
                col += 1
        plt.suptitle(title, fontsize=30)
        fig.savefig(join("/Users/hashmup/Dropbox/研究室/卒業論文/thesis/images", "{0}-{1}.pdf".format(filename, title.replace(' ', '-'))))
#         plt.show()

In [None]:
df_50 = pd.read_csv("cluster/data/2018-01-29_23-05/result_candidate.csv")
df_50['time_avg'] = (df_50['time'] + df_50['time0'] + df_50['time1'] + df_50['time2'] + df_50['time3'] + df_50['time4']) / 6.0
df_50 = df_50.sort_values(by=["time_avg"]).reset_index(drop=True)
df_100 = pd.read_csv("cluster/data/2018-01-28_20-16/result_candidate.csv")
df_100['time_avg'] = (df_100['time'] + df_100['time0'] + df_100['time1'] + df_100['time2'] + df_100['time3'] + df_100['time4']) / 6.0
df_100 = df_100.sort_values(by="time_avg").reset_index(drop=True)
df_250 = pd.read_csv("cluster/data/2018-01-29_04-47/result_candidate.csv")
df_250['time_avg'] = (df_250['time'] + df_250['time0'] + df_250['time1'] + df_250['time2'] + df_250['time3'] + df_250['time4']) / 6.0
df_250 = df_250.sort_values(by="time_avg").reset_index(drop=True)
df_500 = pd.read_csv("cluster/data/2018-01-29_20-46/result_candidate.csv")
df_500['time_avg'] = (df_500['time'] + df_500['time0'] + df_500['time1'] + df_500['time2'] + df_500['time3'] + df_500['time4']) / 6.0
df_500 = df_500.sort_values(by="time_avg").reset_index(drop=True)
gen_graph({"シミュレーション時間 50": df_50, "シミュレーション時間 100": df_100, "シミュレーション時間 250": df_250, "シミュレーション時間 500": df_500}, "cluster")

In [None]:
df_50 = pd.read_csv("cluster/data/2018-01-29_23-05/result_candidate.csv")
df_50['time_avg'] = (df_50['time'] + df_50['time0'] + df_50['time1'] + df_50['time2'] + df_50['time3'] + df_50['time4']) / 6.0
df_100 = pd.read_csv("cluster/data/2018-01-28_20-16/result_candidate.csv")
df_100['time_avg'] = (df_100['time'] + df_100['time0'] + df_100['time1'] + df_100['time2'] + df_100['time3'] + df_100['time4']) / 6.0
df_250 = pd.read_csv("cluster/data/2018-01-29_04-47/result_candidate.csv")
df_250['time_avg'] = (df_250['time'] + df_250['time0'] + df_250['time1'] + df_250['time2'] + df_250['time3'] + df_250['time4']) / 6.0
df_500 = pd.read_csv("cluster/data/2018-01-29_20-46/result_candidate.csv")
df_500['time_avg'] = (df_500['time'] + df_500['time0'] + df_500['time1'] + df_500['time2'] + df_500['time3'] + df_500['time4']) / 6.0
df_50 = df_50.sort_values(by=["time_avg"]).reset_index(drop=True)[:50]
df_100 = df_100.sort_values(by="time_avg").reset_index(drop=True)[:50]
df_250 = df_250.sort_values(by="time_avg").reset_index(drop=True)[:50]
df_500 = df_500.sort_values(by="time_avg").reset_index(drop=True)[:50]
gen_graph({"シミュレーション時間 50": df_50, "シミュレーション時間 100": df_100, "シミュレーション時間 250": df_250, "シミュレーション時間 500": df_500}, "cl

In [None]:
df_50 = pd.read_csv("cluster/data/2018-01-29_23-05/result_candidate.csv")
df_50['time_avg'] = (df_50['time'] + df_50['time0'] + df_50['time1'] + df_50['time2'] + df_50['time3'] + df_50['time4']) / 6.0
df_50 = df_50.sort_values(by=["time_avg"]).reset_index(drop=True)
df_100 = pd.read_csv("cluster/data/2018-01-28_20-16/result_candidate.csv")
df_100['time_avg'] = (df_100['time'] + df_100['time0'] + df_100['time1'] + df_100['time2'] + df_100['time3'] + df_100['time4']) / 6.0
df_100 = df_100.sort_values(by="time_avg").reset_index(drop=True)
df_250 = pd.read_csv("cluster/data/2018-01-29_04-47/result_candidate.csv")
df_250['time_avg'] = (df_250['time'] + df_250['time0'] + df_250['time1'] + df_250['time2'] + df_250['time3'] + df_250['time4']) / 6.0
df_250 = df_250.sort_values(by="time_avg").reset_index(drop=True)
df_500 = pd.read_csv("cluster/data/2018-01-29_20-46/result_candidate.csv")
df_500['time_avg'] = (df_500['time'] + df_500['time0'] + df_500['time1'] + df_500['time2'] + df_500['time3'] + df_500['time4']) / 6.0
df_500 = df_500.sort_values(by="time_avg").reset_index(drop=True)
gen_diff_graph({"stoptime 50": df_50, "stoptime 100": df_100, "stoptime 250": df_250, "stoptime 500": df_500}, "cluster-diff")

In [None]:
df_50 = pd.read_csv("cluster/data/2018-01-29_23-05/result_candidate.csv")
df_50['time_avg'] = (df_50['time'] + df_50['time0'] + df_50['time1'] + df_50['time2'] + df_50['time3'] + df_50['time4']) / 6.0
df_100 = pd.read_csv("cluster/data/2018-01-28_20-16/result_candidate.csv")
df_100['time_avg'] = (df_100['time'] + df_100['time0'] + df_100['time1'] + df_100['time2'] + df_100['time3'] + df_100['time4']) / 6.0
df_250 = pd.read_csv("cluster/data/2018-01-29_04-47/result_candidate.csv")
df_250['time_avg'] = (df_250['time'] + df_250['time0'] + df_250['time1'] + df_250['time2'] + df_250['time3'] + df_250['time4']) / 6.0
df_500 = pd.read_csv("cluster/data/2018-01-29_20-46/result_candidate.csv")
df_500['time_avg'] = (df_500['time'] + df_500['time0'] + df_500['time1'] + df_500['time2'] + df_500['time3'] + df_500['time4']) / 6.0
df_50 = df_50.sort_values(by=["time_avg"]).reset_index(drop=True)[:20]
df_100 = df_100.sort_values(by="time_avg").reset_index(drop=True)[:20]
df_250 = df_250.sort_values(by="time_avg").reset_index(drop=True)[:20]
df_500 = df_500.sort_values(by="time_avg").reset_index(drop=True)[:20]
gen_diff_graph({"stoptime 50": df_50, "stoptime 100": df_100, "stoptime 250": df_250, "stoptime 500": df_500}, "cluster-diff-top20")

In [None]:
# df_50 = pd.read_csv("cluster/data/2018-01-29_23-05/result_all.csv")
# df_50 = df_50.sort_values(by="time").reset_index(drop=True)
# gen_4graph(df_50, "cluster-50.pdf")

In [None]:
# df_100 = pd.read_csv("cluster/data/2018-01-28_20-16/result_all.csv")
# df_100 = df_100.sort_values(by="time").reset_index(drop=True)
# gen_4graph(df_100, "cluster-100.pdf")

In [None]:
# df_250 = pd.read_csv("cluster/data/2018-01-29_04-47/result_all.csv")
# df_250 = df_250.sort_values(by="time").reset_index(drop=True)
# gen_4graph(df_250, "cluster-250.pdf")

In [None]:
# df_500 = pd.read_csv("cluster/data/2018-01-29_20-46/result_all.csv")
# df_500 = df_500.sort_values(by="time").reset_index(drop=True)
# gen_4graph(df_500, "cluster-500.pdf")

In [None]:
# df_50 = pd.read_csv("cluster/data/2018-01-29_23-05/result_candidate.csv")
# df_50['time_avg'] = (df_50['time'] + df_50['time0'] + df_50['time1'] + df_50['time2'] + df_50['time3'] + df_50['time4']) / 6.0
# df_50 = df_50.sort_values(by="time_avg").reset_index(drop=True)
# gen_4graph(df_50, "cluster-50.pdf", True)

In [None]:
# df_100 = pd.read_csv("cluster/data/2018-01-28_20-16/result_candidate.csv")
# df_100['time_avg'] = (df_100['time'] + df_100['time0'] + df_100['time1'] + df_100['time2'] + df_100['time3'] + df_100['time4']) / 6.0
# df_100 = df_100.sort_values(by="time_avg").reset_index(drop=True)
# gen_4graph(df_100, "cluster-100.pdf", True)

In [None]:
# df_250 = pd.read_csv("cluster/data/2018-01-29_04-47/result_candidate.csv")
# df_250['time_avg'] = (df_250['time'] + df_250['time0'] + df_250['time1'] + df_250['time2'] + df_250['time3'] + df_250['time4']) / 6.0
# df_250 = df_250.sort_values(by="time_avg").reset_index(drop=True)
# gen_4graph(df_250, "cluster-250.pdf", True)

In [None]:
# df_500 = pd.read_csv("cluster/data/2018-01-29_20-46/result_candidate.csv")
# df_500['time_avg'] = (df_500['time'] + df_500['time0'] + df_500['time1'] + df_500['time2'] + df_500['time3'] + df_500['time4']) / 6.0
# df_500 = df_500.sort_values(by="time_avg").reset_index(drop=True)
# gen_4graph(df_500, "cluster-500.pdf", True)