In [1]:
%matplotlib inline

from __future__ import unicode_literals

import numpy as np
import os
import pandas as pd
import re
import seaborn as sns
from matplotlib.colors import LogNorm
from matplotlib import pyplot as plt

In [None]:
ansi_escape = re.compile(r'\x1b[^m]*m')

dataframe = pd.DataFrame(columns=["Dataset", "MatrixType", "Optimization", "Size",
                                  "Time", "SetupTime", "RefTime", "SetupRefTime",
                                  "Threads", "Blocks", "Parallelization", "Flags", "BlockExperiment"])
dataset_size = {
    1682: "ml100k",
    3952: "ml1M",
    10677: "ml10M",
    26744: "ml20M"
}

for fname in os.listdir("./kickstart/"):
    if fname == "gprof.txt":
        continue
    
    with open("./kickstart/{}".format(fname), "r") as f:
        dataset_entry = {col: 0 for col in dataframe.columns}
        dataset_entry["MatrixType"] = "dense"
        dataset_entry["Optimization"] = "flag"
        dataset_entry["Dataset"] = "ml100k"
        dataset_entry["Flags"] = "|".join(fname.split(".")[1:-1])
        for line in f:
            line = line.decode("utf-8").strip()
            data = line.split()
            if line.startswith("Calculating items"):
                dataset_entry["Size"] = int(ansi_escape.sub('', data[6]))
            elif line.startswith("Computation took"):
                dataset_entry["Time"] = float(ansi_escape.sub('', data[2]))
    
    dataframe.loc[dataframe.shape[0]] = dataset_entry

for alg in ["openmp", "cuda", "sparse"]:
    for fname in os.listdir("./{}".format(alg)):
        with open("./{}/{}".format(alg, fname), "r") as f:
            dataset_entry = {col: 0 for col in dataframe.columns}
            dataset_entry["MatrixType"] = "sparse" if alg == "sparse" else "dense"
            dataset_entry["Optimization"] = "openmp" if alg == "openmp" else "cuda"
            dataset_entry["Flags"] = "O3|hugepages"
            
            if "block" in fname:
                dataset_entry["Blocks"] = int(fname.split("_")[1])
                dataset_entry["BlockExperiment"] = 1
            else:
                dataset_entry["Blocks"] = 32
                dataset_entry["BlockExperiment"] = 0

            if "threads" in fname:
                dataset_entry["Threads"] = int(fname.split("_")[1])
            else:
                dataset_entry["Threads"] = 12

            if "single" in fname:
                dataset_entry["Parallelization"] = "single"
                dataset_entry["Threads"] = int(fname.split("_")[1])
            elif "double" in fname:
                dataset_entry["Parallelization"] = "double"
                dataset_entry["Threads"] = int(fname.split("_")[1])
            else:
                dataset_entry["Parallelization"] = "double"

            for line in f:
                line = line.decode("utf-8").strip()
                data = line.split()
                if line.startswith("Calculating items"):
                    dataset_entry["Size"] = int(ansi_escape.sub('', data[6]))
                    dataset_entry["Dataset"] = dataset_size[dataset_entry["Size"]]
                elif line.startswith("Computation took"):
                    dataset_entry["Time"] = float(ansi_escape.sub('', data[2]))
                    dataset_entry["SetupTime"] = float(ansi_escape.sub('', data[6]))
                elif line.startswith("Reference computation took"):
                    dataset_entry["RefTime"] = float(ansi_escape.sub('', data[3]))
                    dataset_entry["SetupRefTime"] = float(ansi_escape.sub('', data[6]))
                elif line.startswith("Optimized computation took"):
                    dataset_entry["Time"] = float(ansi_escape.sub('', data[3]))
                    dataset_entry["SetupTime"] = float(ansi_escape.sub('', data[6]))

            dataframe.loc[dataframe.shape[0]] = dataset_entry

In [2]:
dataframe = pd.read_csv("./data_times.csv")
problem_sizes = (dataframe.Size.unique()**2 + dataframe.Size.unique())/2

In [None]:
openmp_double_time = dataframe[(dataframe.Optimization == "openmp") & (dataframe.Parallelization == "double")]\
    .sort(["Threads", "Size"]).reset_index(drop=True)\
    .pivot("Threads", "Dataset", "Time")[["ml100k", "ml1M", "ml10M"]]
openmp_double_setup = dataframe[(dataframe.Optimization == "openmp") & (dataframe.Parallelization == "double")]\
    .sort(["Threads", "Size"]).reset_index(drop=True)\
    .pivot("Threads", "Dataset", "SetupTime")[["ml100k", "ml1M", "ml10M"]]
# openmp_single_time = dataframe[(dataframe.Optimization == "openmp") & (dataframe.Parallelization == "single")]\
#     .sort(["Threads", "Size"]).reset_index(drop=True)\
#     .pivot("Threads", "Dataset", "Time")[["ml100k", "ml1M"]]
# openmp_single_setup = dataframe[(dataframe.Optimization == "openmp") & (dataframe.Parallelization == "single")]\
#     .sort(["Threads", "Size"]).reset_index(drop=True)\
#     .pivot("Threads", "Dataset", "SetupTime")[["ml100k", "ml1M"]]
openmp_sparse_time = dataframe[(dataframe.Optimization == "cuda") & (dataframe.MatrixType == "sparse") &\
                               (dataframe.BlockExperiment == 0)]\
    .sort(["Threads", "Size"]).reset_index(drop=True)\
    .pivot("Threads", "Dataset", "RefTime")[["ml100k", "ml1M", "ml10M", "ml20M"]]
openmp_sparse_setup = dataframe[(dataframe.Optimization == "cuda") & (dataframe.MatrixType == "sparse") &\
                               (dataframe.BlockExperiment == 0)]\
    .sort(["Threads", "Size"]).reset_index(drop=True)\
    .pivot("Threads", "Dataset", "SetupRefTime")[["ml100k", "ml1M", "ml10M", "ml20M"]]
    
openmp_double = openmp_double_time + openmp_double_setup
openmp_double.columns = ["ml100k (I)", "ml1M (I)", "ml10M (I)"]
# openmp_single = openmp_single_time + openmp_single_setup
# openmp_single.columns = ["ml100k (II)", "ml1M (II)"]
openmp_sparse = openmp_sparse_time + openmp_sparse_setup
openmp_sparse.columns = ["ml100k (II)", "ml1M (II)", "ml10M (II)", "ml20M (II)"]
openmp = pd.concat((openmp_double.loc[2:], openmp_sparse), axis=1)\
    [["ml100k (I)", "ml100k (II)", "ml1M (I)", "ml1M (II)", "ml10M (I)", "ml10M (II)", "ml20M (II)"]]\
    .sort_index(ascending=False).divide(problem_sizes[[0, 0, 1, 1, 2, 2, 3]], axis='columns')

fig, ax = plt.subplots()
heatmap_plot = sns.heatmap(openmp/1e-6, annot=True, fmt=".3g", ax=ax, linewidths=.5, cmap="Blues",
                           norm=LogNorm(vmin=openmp.min(), vmax=openmp.max()), cbar=True)
ax.set_xlabel("Dataset")
ax.set_ylabel("Número de threads")
ax.set_yticklabels("2 4 6 8 12".split())
fig.set_size_inches((10, 6))
fig.suptitle("Tiempo de ejecución normalizado\npara experimentos con OpenMP (en \u03BCs)",
             fontsize=14, fontweight='bold')
fig.subplots_adjust(top=0.9)
fig.savefig("../plots/heatmap_openmp.png", bbox_inches="tight")

In [None]:
mask = ((dataframe.Optimization == "cuda") & (dataframe.Threads == 12) & (dataframe.))\
        | (((dataframe.Optimization == "openmp") & (dataframe.Parallelization == "double")\
        & (dataframe.Threads == 12)))
cuda_omp_dense = dataframe[mask][["Dataset", "Optimization", "Time", "SetupTime"]]\
    .reset_index(drop=True).loc[[0, 3, 2, 5, 1, 4]].reset_index(drop=True)
cuda_omp_dense.loc[cuda_omp_dense.Optimization == "cuda", "Optimization"] = "CUDA"
cuda_omp_dense.loc[cuda_omp_dense.Optimization == "openmp", "Optimization"] = "OpenMP"
cuda_omp_dense["TotalTime"] = cuda_omp_dense["Time"] + cuda_omp_dense["SetupTime"]
cuda_omp_dense[["Time", "SetupTime", "TotalTime"]] = cuda_omp_dense[["Time", "SetupTime", "TotalTime"]]\
    .divide(problem_sizes[[0, 0, 1, 1, 2, 2]], axis=0)
cuda_omp_dense[["Time", "SetupTime", "TotalTime"]] = cuda_omp_dense[["Time", "SetupTime", "TotalTime"]]/1e-6

g = sns.factorplot(x="Dataset", y="TotalTime", hue="Optimization", size=6, data=cuda_omp_dense,
                   kind="bar", palette="RdBu_r", legend=False)
g.fig.get_axes()[0].set_yscale('log')
g.despine(left=True)
g.set_xlabels("Dataset")
g.set_ylabels("Tiempo (Escala Logarítmica en \u03BC)")
g.fig.suptitle("Comparación de tiempos normalizados para\nexperimentos CUDA y OpenMP con matrices densas",
               fontsize=14, fontweight='bold')
g.fig.subplots_adjust(top=.9)
plt.legend(title="Optimización")

g.fig.savefig("../plots/cuda_omp_dense.png")

In [None]:
mask = ((dataframe.Optimization == "cuda") & (dataframe.Threads == 12) & (dataframe.BlockExperiment == 0))\
        | (((dataframe.Optimization == "openmp") & (dataframe.Parallelization == "double")\
        & (dataframe.Threads == 12)))
cuda_omp = pd.DataFrame(columns=["Experiment", "Dataset", "Time", "SetupTime", "TotalTime", "Size"])

for _, datarow in dataframe[mask].sort(["Size"]).iterrows():
    row = {
        "Dataset": datarow.Dataset,
        "Time": datarow.Time,
        "SetupTime": datarow.SetupTime,
        "TotalTime": datarow.Time + datarow.SetupTime,
        "Size": datarow.Size
    }
    
    if datarow.Optimization == "openmp":
        row["Experiment"] = "OpenMP con Matriz Densa"
    elif datarow.Optimization == "cuda" and datarow.MatrixType == "dense":
        row["Experiment"] = "CUDA con Matriz Densa"
    else:
        cuda_omp.loc[cuda_omp.shape[0]] = {
            "Experiment": "OpenMP con Matriz Rala",
            "Dataset": datarow.Dataset,
            "Time": datarow.RefTime,
            "SetupTime": datarow.SetupRefTime,
            "TotalTime": datarow.RefTime + datarow.SetupRefTime,
            "Size": datarow.Size
        }

        row["Experiment"] = "CUDA con Matriz Rala"
    
    cuda_omp.loc[cuda_omp.shape[0]] = row

# cuda_omp = cuda_omp.sort(["Size", "Experiment"]).reset_index(drop=True)
cuda_omp[["Time", "SetupTime", "TotalTime"]] = cuda_omp[["Time", "SetupTime", "TotalTime"]]\
    .divide(problem_sizes[[0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3]], axis=0)
cuda_omp[["Time", "SetupTime", "TotalTime"]] = cuda_omp[["Time", "SetupTime", "TotalTime"]]/1e-6

g = sns.factorplot(x="Dataset", y="TotalTime", hue="Experiment", data=cuda_omp,
                   kind="bar", palette="RdBu_r", legend=False)
g.fig.get_axes()[0].set_yscale('log')
g.despine(left=True)
g.set_xlabels("Dataset")
g.set_ylabels("Tiempo (Escala Logarítmica en \u03BCs)")
g.fig.suptitle("Comparación de tiempos normalizados para\nexperimentos CUDA y OpenMP",
               fontsize=14, fontweight='bold')
g.fig.subplots_adjust(top=.9)
g.fig.set_size_inches((10, 6))
plt.legend(title="Tipo de Experimentos")

g.fig.savefig("../plots/cuda_omp.png")

In [None]:
cuda_sparse_blocks = dataframe[dataframe.BlockExperiment == 1].sort(["Blocks", "Size"])\
    .reset_index(drop=True).pivot("Blocks", "Dataset")[["Time", "SetupTime"]]
cuda_sparse_blocks = cuda_sparse_blocks["Time"] + cuda_sparse_blocks["SetupTime"]
cuda_sparse_blocks = cuda_sparse_blocks[["ml100k", "ml1M", "ml10M", "ml20M"]].sort_index(ascending=False)\
    .divide(problem_sizes[[0, 1, 2, 3]], axis='columns')

fig, ax = plt.subplots()
heatmap_plot = sns.heatmap(cuda_sparse_blocks/1e-6, annot=True, fmt=".3g", ax=ax, linewidths=.5, cmap="Blues",
                           norm=LogNorm(vmin=cuda_sparse_blocks.min(), vmax=cuda_sparse_blocks.max()), cbar=True)
ax.set_xlabel("Dataset")
ax.set_ylabel("Tamaño del Bloque")
ax.set_yticklabels("2x2 4x4 8x8 16x16 32x32".split())
fig.set_size_inches((10, 6))
fig.suptitle("Tiempo de ejecución normalizado para experimentos con CUDA\n " +
             "y Matrices Ralas según el tamaño del bloque de threads (en \u03BCs)",
             fontsize=14, fontweight='bold')
fig.subplots_adjust(top=0.9)
fig.savefig("../plots/cuda_sparse_blocks.png")

In [3]:
setup_time_no_data = 0.595061
ml100k = dataframe[dataframe.Dataset == "ml100k"].reset_index(drop=True).loc[[0, 2, 6, 17, 18, 21]]
ml1M = dataframe[dataframe.Dataset == "ml1M"].reset_index(drop=True).loc[[0, 2, 11, 12, 13]]
ml10M = dataframe[dataframe.Dataset == "ml10M"].reset_index(drop=True).loc[[0, 4, 5, 6]]
ml20M = dataframe[dataframe.Dataset == "ml20M"].reset_index(drop=True).loc[[0, 1]]

datasets = pd.concat([ml100k, ml1M, ml10M, ml20M]).reset_index(drop=True)

evolution_dataframe = pd.DataFrame(columns=["Experiment", "Dataset", "Time",
                                            "SetupTime", "TotalTime", "Size"])

for idx, datarow in datasets.iterrows():
    row = {
        "Dataset": datarow.Dataset,
        "Size": datarow.Size
    }
    
    row["Experiment"] = "{}_{}_{}_{}".format(datarow.MatrixType, datarow.Optimization, 
                                             datarow.Threads, datarow.Blocks)
    
    if idx in {4, 9, 13, 15}:
        row["Time"] = datarow.RefTime
        row["SetupTime"] = datarow.SetupRefTime
        row["TotalTime"] = datarow.RefTime + datarow.SetupRefTime
        row["Experiment"] = "{}_openmp_{}_{}".format(datarow.MatrixType, datarow.Threads, datarow.Blocks)
    else:
        row["Time"] = datarow.Time
        
        if idx in {0, 1}:
            row["SetupTime"] = setup_time_no_data
            row["TotalTime"] = datarow.Time + setup_time_no_data
            
            if idx == 0:
                row["Experiment"] = "no_optimization"
        else:
            row["SetupTime"] = datarow.SetupTime
            row["TotalTime"] = datarow.Time + datarow.SetupTime

    evolution_dataframe.loc[evolution_dataframe.shape[0]] = row

# swap rows 6 and 7
evolution_dataframe.loc[[6,7],:] = evolution_dataframe.loc[[7,6],:].values
experiments_names = [
    "Sin optimización",
    "Optimización por flags de compilación",
    "OpenMP con matriz densa (12 threads)",
    "CUDA con matriz densa (32x32 threads por bloque)",
    "OpenMP con matriz rala (12 threads)",
    "CUDA con matriz rala (32x32 threads por bloque)",
    "Optimización por flags de compilación",
    "OpenMP con matriz densa (12 threads)",
    "CUDA con matriz densa (32x32 threads por bloque)",
    "OpenMP con matriz rala (12 threads)",
    "CUDA con matriz rala (16x16 threads por bloque)",
    "OpenMP con matriz densa (12 threads)",
    "CUDA con matriz densa (32x32 threads por bloque)",
    "OpenMP con matriz rala (12 threads)",
    "CUDA con matriz rala (16x16 threads por bloque)",
    "OpenMP con matriz rala (12 threads)",
    "CUDA con matriz rala (16x16 threads por bloque)"
]
evolution_dataframe["Experiment"] = experiments_names
evolution_dataframe[["Time", "SetupTime", "TotalTime"]] = evolution_dataframe[["Time", "SetupTime", "TotalTime"]]\
    .divide(problem_sizes[[0]*6 + [1]*5 + [2]*4 + [3]*2], axis=0)
evolution_dataframe[["Time", "SetupTime", "TotalTime"]] = evolution_dataframe[["Time", "SetupTime", "TotalTime"]]/1e-6
evolution_dataframe

Unnamed: 0,Experiment,Dataset,Time,SetupTime,TotalTime,Size
0,Sin optimización,ml100k,7.379797,0.420418,7.800215,1682
1,Optimización por flags de compilación,ml100k,2.670519,0.420418,3.090937,1682
2,OpenMP con matriz densa (12 threads),ml100k,0.189421,0.357766,0.547188,1682
3,CUDA con matriz densa (32x32 threads por bloque),ml100k,0.052999,1.323722,1.376721,1682
4,OpenMP con matriz rala (12 threads),ml100k,0.199359,0.350301,0.54966,1682
5,CUDA con matriz rala (32x32 threads por bloque),ml100k,0.010322,1.226322,1.236644,1682
6,Optimización por flags de compilación,ml1M,1.058788,0.046591,1.105379,3952
7,OpenMP con matriz densa (12 threads),ml1M,0.283529,0.046162,0.32969,3952
8,CUDA con matriz densa (32x32 threads por bloque),ml1M,0.07781,0.099262,0.177072,3952
9,OpenMP con matriz rala (12 threads),ml1M,0.061241,0.04591,0.107151,3952


In [None]:
dataset_dataframe = evolution_dataframe.loc[:5]

fig, ax = plt.subplots(figsize=(8, 5))
sns.set_color_codes("pastel")
sns.barplot(x="TotalTime", y="Experiment", data=dataset_dataframe,
            label="Tiempo Total", color="b", ax=ax)

sns.set_color_codes("muted")
sns.barplot(x="Time", y="Experiment", data=dataset_dataframe,
            label="Tiempo de Cálculo", color="b", ax=ax)

ax.set(xscale='log')
ax.legend(loc='lower right', frameon=True)
ax.set(xlim=(1e-3, 20), ylabel="", xlabel="Tiempo (Escala Logarítmica en \u03BCs)")
fig
sns.despine(left=True, bottom=True)
fig.suptitle("Evolución de tiempos normalizados para\nexperimentos sobre ml100k",
               fontsize=14, fontweight='bold')
fig.subplots_adjust(top=0.85)

fig.savefig("../plots/ml100k.png", bbox_inches="tight")

In [None]:
dataset_dataframe = evolution_dataframe.loc[6:10]

fig, ax = plt.subplots(figsize=(8, 5))
sns.set_color_codes("pastel")
sns.barplot(x="TotalTime", y="Experiment", data=dataset_dataframe,
            label="Tiempo Total", color="b", ax=ax)

sns.set_color_codes("muted")
sns.barplot(x="Time", y="Experiment", data=dataset_dataframe,
            label="Tiempo de Cálculo", color="b", ax=ax)

ax.set(xscale='log')
ax.legend(loc="lower right", frameon=True)
ax.set(xlim=(1e-3, 1.5), ylabel="", xlabel="Tiempo (Escala Logarítmica en \u03BCs)")
fig
sns.despine(left=True, bottom=True)
fig.suptitle("Evolución de tiempos normalizados para\nexperimentos sobre ml1M",
               fontsize=14, fontweight='bold')
fig.subplots_adjust(top=0.85)

fig.savefig("../plots/ml1M.png", bbox_inches="tight")

In [None]:
dataset_dataframe = evolution_dataframe.loc[11:14]

fig, ax = plt.subplots(figsize=(8, 5))
sns.set_color_codes("pastel")
sns.barplot(x="TotalTime", y="Experiment", data=dataset_dataframe,
            label="Tiempo Total", color="b", ax=ax)

sns.set_color_codes("muted")
sns.barplot(x="Time", y="Experiment", data=dataset_dataframe,
            label="Tiempo de Cálculo", color="b", ax=ax)

ax.set(xscale='log')
ax.legend(loc="lower right", frameon=True)
ax.set(xlim=(1e-2, 200), ylabel="", xlabel="Tiempo (Escala Logarítmica en \u03BCs)")
fig
sns.despine(left=True, bottom=True)
fig.suptitle("Evolución de tiempos normalizados para\nexperimentos sobre ml10M",
               fontsize=14, fontweight='bold')
fig.subplots_adjust(top=0.85)

fig.savefig("../plots/ml10M.png", bbox_inches="tight")

In [None]:
dataset_dataframe = evolution_dataframe.loc[15:16]

fig, ax = plt.subplots(figsize=(8, 5))
sns.set_color_codes("pastel")
sns.barplot(x="TotalTime", y="Experiment", data=dataset_dataframe,
            label="Tiempo Total", color="b", ax=ax)

sns.set_color_codes("muted")
sns.barplot(x="Time", y="Experiment", data=dataset_dataframe,
            label="Tiempo de Cálculo", color="b", ax=ax)

ax.set(xscale='log')
ax.legend(loc="lower right", frameon=True)
ax.set(xlim=(1e-2, 5), ylabel="", xlabel="Tiempo (Escala Logarítmica en \u03BCs)")
fig
sns.despine(left=True, bottom=True)
fig.suptitle("Evolución de tiempos normalizados para\nexperimentos sobre ml20M",
               fontsize=14, fontweight='bold')
fig.subplots_adjust(top=0.85)

# fig.tight_layout()
fig.savefig("../plots/ml20M.png", bbox_inches="tight")