In [None]:
from matplotlib import cm
from matplotlib.colors import rgb2hex, colorConverter
from matplotlib import colors
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.patches as mpatches
import pandas as pd
import pymongo

from scipy.optimize import least_squares
from sklearn.cluster import SpectralClustering, KMeans, AffinityPropagation
from sklearn.metrics import silhouette_samples, silhouette_score
from seaborn import heatmap
import scipy.cluster.hierarchy as sch
import networkx as nx

from bson.objectid import ObjectId
from scipy.signal import savgol_filter
from scipy.optimize import least_squares
from scipy import interpolate

from IPython.core.display import display, HTML
from tqdm import tqdm
import sympy

import shutil
import pickle
import os
import json
import glob
from copy import deepcopy
from pathlib import Path
from collections import defaultdict, Counter
from functools import partial
from importlib import reload
from itertools import product as prod_itertools

from pascalanalyzer import PascalData
from pascalanalyzer.pascalmodel import (create_model, LeastSquaresOptmizer, PascalModel)
from profiler import Analyser
import plotdata

import warnings
warnings.filterwarnings("ignore")

%pylab inline

from timeline import *
import matplotlib.dates as mdates

# Connect to the database

The database has multiples trained models as well as the measurements

## Model fitting

### Model type
- SVR
- Equation

### Split type
- Random
- Halton

### Number of training points
- 10,20,..,100,all


Each model was fitted 10 times varying the split type and number of points
The measurements unities are GHz, KJ

In [None]:
#plt.style.use('seaborn')

plt.rcParams["figure.dpi"] = 76
plt.rcParams["axes.titlesize"] = 26
plt.rcParams["font.size"] = 26
plt.rcParams["legend.fontsize"] = 20
plt.rcParams["xtick.labelsize"] = 24
plt.rcParams["ytick.labelsize"] = 24
plt.rcParams["axes.labelsize"] = 24

def matplotlib_rc_1():
    plt.rcParams["figure.figsize"] = (15,9)
    
def matplotlib_rc_2():
    plt.rcParams["figure.figsize"] = (10,8)

matplotlib_rc_1()

Path("experiments/").mkdir(parents=True, exist_ok=True)
Path("fingerprint/").mkdir(parents=True, exist_ok=True)
Path("fingerprint/phases/").mkdir(parents=True, exist_ok=True)
Path("cache/").mkdir(parents=True, exist_ok=True)
Path("models/metrics").mkdir(parents=True, exist_ok=True)
Path("models/power/").mkdir(parents=True, exist_ok=True)
Path("models/energy/power").mkdir(parents=True, exist_ok=True)
Path("models/energy/freq_cores").mkdir(parents=True, exist_ok=True)
Path("models/energy/freq_inps").mkdir(parents=True, exist_ok=True)
Path("models/energy/cores_inps").mkdir(parents=True, exist_ok=True)
Path("models/overhead").mkdir(parents=True, exist_ok=True)
Path("models/overhead/lowest_mpe").mkdir(parents=True, exist_ok=True)
Path("models/analisys").mkdir(parents=True, exist_ok=True)
Path("models/hypothesis/const_intructions/freq").mkdir(parents=True, exist_ok=True)
Path("models/hypothesis/const_intructions/cores").mkdir(parents=True, exist_ok=True)
Path("models/hypothesis/input_instructions/input_time").mkdir(parents=True, exist_ok=True)
Path("models/hypothesis/input_instructions/fp").mkdir(parents=True, exist_ok=True)
Path("phases/manual").mkdir(parents=True, exist_ok=True)
Path("phases/openmp").mkdir(parents=True, exist_ok=True)
Path("phases/fingerprint").mkdir(parents=True, exist_ok=True)
Path("phases/signals").mkdir(parents=True, exist_ok=True)
Path("phases/critical_points").mkdir(parents=True, exist_ok=True)

RAPL_ENERGY_PKG = 2.3283064365386962890625e-10
flat = lambda x: [b for a in x for b in a]
client = pymongo.MongoClient("mongodb://172.17.0.2/")
energydb = client["energy"]
df_times = None
df_models = None
df_resum = None

# Experiments

# Energy per instruction
## Energy breakdown

### Assembly code
```python
xor rcx, rcx
mov rax, 1
mov rdx, 0

loop:
    targ_inst(*arg)
    targ_inst(*arg)
    targ_inst(*arg)
    targ_inst(*arg)
    targ_inst(*arg)
    targ_inst(*arg)
    targ_inst(*arg)
    targ_inst(*arg)
    targ_inst(*arg)
    
add rcx, 1
cmp rcx, 9999999
jne loop
```

$E=9999999(\frac{10}{13}inst+\frac{3}{13}loop)$

$E=7692306*inst+2307692*loop$

$E=7692306*inst+constant$

$E_{joules}=RAPL\_ENERGY\_PKG*2.3283064365386962890625e^{-10}$

## Generic x86 energy breakdown

In [None]:
df_generic = pd.read_csv("databases/general_purpose.csv").sort_values("energy")
df_generic["energy"] *= RAPL_ENERGY_PKG
print(df_generic["inst"].unique())

matplotlib_rc_1()
pd.crosstab(df_generic["inst"],df_generic["args"],df_generic["energy"],aggfunc=max).plot.bar()
xticks(fontsize= 10)
xlabel("Instruction")
ylabel("Energy (J)")
legend(fontsize= 14)
title("Energy per argument")
tight_layout()
savefig("experiments/inst_en_args_generic.pdf")
show()

df_generic.sort_values("energy").plot.bar(x="inst",y="energy",logy=True,legend=False)
xticks(fontsize= 10)
xlabel("Instruction")
ylabel("Energy (J)")
title("Energy consumption")
tight_layout()
savefig("experiments/inst_mean_en_generic_all.pdf")
show()

df_generic.groupby("inst").energy.mean().sort_values().plot.bar()
xticks(fontsize= 10)
xlabel("Instruction")
ylabel("Energy (J)")
title("Mean energy consumption")
tight_layout()
savefig("experiments/inst_mean_en_generic.pdf")
show()

df_generic.groupby("inst").energy.apply(lambda x: x.std()/x.mean()).plot.bar()
xticks(fontsize= 10)
xlabel("Instruction")
ylabel("Energy (J)")
title("Energy deviation")
tight_layout()
savefig("experiments/inst_std_en_generic.pdf")
show()

df_generic[~df_generic["args"].str.contains("peach")].groupby("inst").energy.apply(lambda x: x.std()/x.mean()).plot.bar()
xticks(fontsize= 10)
xlabel("Instruction")
ylabel("Energy (J)")
title("Energy deviation without m64")
tight_layout()
savefig("experiments/inst_std_en_generic_nom64.pdf")
show()

## SSE x86 energy breakdown

In [None]:
df_sse = pd.read_csv("databases/mmx.csv").sort_values("energy")
df_sse["energy"] *= RAPL_ENERGY_PKG
print(df_sse["inst"].unique())

matplotlib_rc_1()

pd.crosstab(df_sse["inst"],df_sse["args"],df_sse["energy"],aggfunc=max).plot.bar()
xticks(fontsize= 10)
xlabel("Instruction")
ylabel("Energy (J)")
title("Energy per argument")
legend(fontsize= 14)
tight_layout()
savefig("experiments/inst_en_args_sse.pdf")
show()

df_sse.sort_values("energy").plot.bar(x="inst",y="energy")
xticks(fontsize= 10)
xlabel("Instruction")
ylabel("Energy (J)")
title("Energy consumption")
tight_layout()
savefig("experiments/inst_mean_en_sse_all.pdf")
show()

df_sse.groupby("inst").energy.mean().sort_values().plot.bar()
xticks(fontsize= 10)
xlabel("Instruction")
ylabel("Energy (J)")
title("Mean energy consumption")
tight_layout()
savefig("experiments/inst_mean_en_sse.pdf")
show()

df_sse.groupby("inst").energy.apply(lambda x: x.std()/x.mean()).plot.bar()
xticks(fontsize= 10)
xlabel("Instruction")
ylabel("Energy (J)")
title("Energy deviation")
tight_layout()
savefig("experiments/inst_std_en_sse.pdf")
show()

df_sse[~df_sse["args"].str.contains("peach")].groupby("inst").energy.apply(lambda x: x.std()/x.mean()).plot.bar()
xticks(fontsize= 10)
xlabel("Instruction")
ylabel("Energy (J)")
title("Energy deviation without m64")
tight_layout()
savefig("experiments/inst_std_en_sse_nom64.pdf")
show()

# df_sse["energy"].plot.bar(color=cm.hsv(df_sse["energy"]/df_sse["energy"].max()))
# tight_layout()
# savefig("inst_std_en_generic_nom64.pdf")
# show()

## Generic+SSE x86 energy breakdown

In [None]:
df_sse = pd.read_csv("databases/mmx.csv").sort_values("energy")
df_sse["energy"] *= RAPL_ENERGY_PKG

df_generic = pd.read_csv("databases/general_purpose.csv").sort_values("energy")
df_generic["energy"] *= RAPL_ENERGY_PKG

df_generic["type"]= "generic"
df_sse["type"]= "sse"
df_all= pd.concat( (df_generic, df_sse) )
display(df_all.head(10))

df_all_mean= df_all.copy()
df_all_mean= df_all_mean.groupby("inst").mean().reset_index()
df_all_mean= pd.merge( df_all[["inst","type"]], df_all_mean ).drop_duplicates()
df_all_mean= df_all_mean.sort_values("energy")

c_dict= {"generic":"b", "sse": "r"}
l_color= list(map(lambda x: c_dict[x], df_all_mean["type"]))

matplotlib_rc_1()
df_all_mean.plot.bar(x="inst",y="energy", color=l_color)
red_patch = mpatches.Patch(color='red', label='sse')
blue_patch = mpatches.Patch(color='blue', label='generic')
legend(handles=[red_patch, blue_patch])
title("Energy comparision sse generic")
xlabel("Instruction")
ylabel("Energy (J)")
xticks(fontsize= 10)
legend(fontsize= 14)
tight_layout()
savefig("experiments/inst_en_cmp_sse_generic.pdf")

## Instructions power

In [None]:
df_generic222 = pd.read_csv("databases/generic222.csv")
matplotlib_rc_2()
for inst in df_generic222["inst"].unique():
    for arg in df_generic222["args"].unique():
        df_aux = df_generic222[ (df_generic222["inst"]==inst)&(df_generic222["args"]==arg)&(df_generic222["thr"]==3) ]
        df_aux["pw"] = df_generic222["energy"]*RAPL_ENERGY_PKG/df_generic222["time"]/30
        df_aux["freq"] /= 1e6

        fn= lambda x,f: x[0]*f**3+x[1]*f+x[2]
        fne= lambda x,f,y: fn(x,f)-y

        x0= [1,1,1]
        xs= least_squares(fne,x0,args=(df_aux["freq"], df_aux["pw"]))
        xs.x

        frs= np.arange(1,3.4,0.1)
        plot(frs, fn(xs.x,frs), label="{}_{}".format(inst,arg))
        #plot(df_aux["freq"], df_aux["pw"],".")
xlabel("Frequency (GHz)")
ylabel("Power (W)")
legend(fontsize=10)
tight_layout()
savefig("experiments/inst_pw_args.pdf")

In [None]:
df_generic222 = pd.read_csv("databases/generic222.csv")
colors_list = ["r","g","b","y","c","m","k"]*2
freqs = df_generic222["freq"].unique()
f_cmap = dict(zip(freqs, colors_list[:len(freqs)]))
print(f_cmap)

matplotlib_rc_1()
df_generic222["pw"] = df_generic222["energy"]*RAPL_ENERGY_PKG/df_generic222["time"]/30
dff = df_generic222[ (df_generic222["thr"]==3)&(df_generic222["freq"]==3000000) ]
dff = dff.sort_values("freq", ascending=False)
dff["pw"].plot.bar(color=map(lambda x: f_cmap[x], dff["freq"].values))
#legend(handles=[mpatches.Patch(color=f_cmap[p], label=p) for p in freqs])
xlabel("Instruction")
xticks(range(dff["inst"].shape[0]),dff["inst"].values)
ylabel("Power (W)")
tight_layout()
savefig("experiments/inst_pw_generic.pdf")

In [None]:
with open("databases/power_cpufrac.pkl","rb") as f:
    data_power_cpufrac = pickle.load(f)

df_power_cpufrac= []
for f in data_power_cpufrac:
    for t in f['threads']:
        for p in t["lpcpu"]:
            mv= []
            for s in p["rapl"]:
                mv.append(s["sensor"])
            df_power_cpufrac.append([f["freq"], p["arg"], np.mean(mv)])
df_power_cpufrac = pd.DataFrame(df_power_cpufrac,columns=["freq","arg","pw"])

df_power_cpufrac["arg"]= df_power_cpufrac["arg"].astype(str)
matplotlib_rc_2()
for arg in df_power_cpufrac["arg"].unique():
    df_s = df_power_cpufrac[df_power_cpufrac["arg"]==arg]
    pcpu = float(df_s["arg"].iloc[0].split(',')[0][2:-1])
    print(pcpu)
    plot(df_s["freq"].astype(float)/1e6,df_s["pw"],label=pcpu)
xlabel("Frequency (GHz)")
ylabel("Power (W)")
legend()
tight_layout()
savefig("experiments/pw_freq_load.pdf")

In [None]:
with open("databases/power_cpufrac.pkl","rb") as f:
    data_power_cpufrac = pickle.load(f)

df_power_cpufrac= []
for f in data_power_cpufrac:
    for t in f['threads']:
        for p in t["lpcpu"]:
            mv= []
            for s in p["rapl"]:
                mv.append(s["sensor"])
            df_power_cpufrac.append([f["freq"], p["arg"], np.mean(mv)])
df_power_cpufrac = pd.DataFrame(df_power_cpufrac,columns=["freq","arg","pw"])
df_power_cpufrac["arg"] = df_power_cpufrac["arg"].apply(lambda x: x[0])


fn = lambda x,f,p: x[0]*f**3*p+x[1]*f+x[2]
err = lambda x,f,p,y: fn(x,f,p)-y
df_power_cpufrac["freq"] = df_power_cpufrac["freq"].astype(float)
df_power_cpufrac["arg"] = df_power_cpufrac["arg"].astype(float)
df_power_cpufrac["freq"] /= 1e6
res = least_squares(err, [1,1,1], args=(df_power_cpufrac["freq"], df_power_cpufrac["arg"], df_power_cpufrac["pw"]))
res.x

matplotlib_rc_2()
for arg in df_power_cpufrac["arg"].unique():
    df_s= df_power_cpufrac[df_power_cpufrac["arg"]==arg]
    plot(df_s["freq"],fn(res.x,df_s["freq"],arg),label=arg)
    plot(df_s["freq"],df_s["pw"],".",c="b")
xlabel("frequency GHz")
ylabel("Power (W)")
text(1.5,35,"P(f,%cpu)=af^3*%cpu+bf+c")
legend(fontsize=20)
tight_layout()
savefig("experiments/model_pw_freq_load.pdf")

In [None]:
with open("databases/a.out_freq.dat", "rb") as f:
    a_out_freq_dat = pickle.load(f)

df_freq_data = np.array([0, 0, 0])
for thr in a_out_freq_dat["data"]:
    for freq in thr["threads"]:
        aux= {k:a_out_freq_dat[k] for k in a_out_freq_dat if k != "data"}
        aux["data"]= freq["data"]
        #print(aux["to_monitor"])
        x = Analyser(aux)

        x.df["input_size"]= x.df["PERF_COUNT_HW_INSTRUCTIONS"]/x.df["MEM_UOPS_RETIRED:ALL_STORES"]

        x.df["SYSTEMWIDE:RAPL_ENERGY_PKG_acc"] = x.df["SYSTEMWIDE:RAPL_ENERGY_PKG"].cumsum()
        x.df["SYSTEMWIDE:RAPL_ENERGY_CORES_acc"] = x.df["SYSTEMWIDE:RAPL_ENERGY_CORES"].cumsum()

        x.df["SYSTEMWIDE:RAPL_ENERGY_LL"] = x.df["SYSTEMWIDE:RAPL_ENERGY_PKG"]-x.df["SYSTEMWIDE:RAPL_ENERGY_CORES"]
        x.df["SYSTEMWIDE:RAPL_ENERGY_LL_acc"] = x.df["SYSTEMWIDE:RAPL_ENERGY_LL"].cumsum()

        x.df["SYSTEMWIDE:RAPL_ENERGY_percent"] = x.df["SYSTEMWIDE:RAPL_ENERGY_LL_acc"]/x.df["SYSTEMWIDE:RAPL_ENERGY_PKG_acc"]

        x.df["freq"] = [freq["freq"]]*x.df["SYSTEMWIDE:RAPL_ENERGY_PKG"].shape[0]
        df_freq_data = np.vstack((df_freq_data, 
                            x.df[ ["freq","SYSTEMWIDE:RAPL_ENERGY_PKG", "MEM_UOPS_RETIRED:ALL_STORES"] ].values))
        
df_freq_data = df_freq_data[1:]
df_freq_data = pd.DataFrame(df_freq_data, columns=["freq","en_cores","inst"])
df_freq_data["freq"] = df_freq_data["freq"].astype(float)/1e6
df_freq_data["inst"] = df_freq_data["inst"].astype(float)/1e9
df_freq_data["en_cores"] = df_freq_data["en_cores"].astype(float)*2.3283064365386962890625e-10
df_freq_data = df_freq_data[df_freq_data["en_cores"]!=0]

matplotlib_rc_2()
for f in df_freq_data["freq"].unique()[:1]:
    tp= df_freq_data[df_freq_data["freq"]==f].values
    t= np.arange(0,len(tp),1)

    pcpu= 1
    for i in range(0,len(t)-1,100):
        plot(t[:100]*0.05,tp[i:i+100:,1], label=f"{pcpu*100}% load")
        pcpu-=0.25

    xlabel("Time (s)")
    ylabel("Power (W)")
    legend(loc= "upper left",fontsize=20)
    tight_layout()
    savefig("experiments/pw_load.pdf")

# Voltage and frequency relationship

In [None]:
fv_rel = pd.DataFrame([
    [2.2,0.77,2.199],
    [2.1,0.76,2.099],
    [2.0,0.75,2.000],
    [1.9,0.74,1.899],
    [1.8,0.73,1.799],
    [1.7,0.72,1.699],
    [1.6,0.71,1.599],
    [1.5,0.70,1.500],
    [1.4,0.69,1.397],
    [1.3,0.68,1.297],
    [1.2,0.67,1.200]
], columns= ["freq", "volt", "aperf"])

matplotlib_rc_2()
#plot(fv_rel["aperf"], fv_rel["volt"])
plot(fv_rel["aperf"], fv_rel["volt"], linestyle='-', marker="o")
xlabel("Frequency (GHz)")
ylabel("Voltage (V)")
tight_layout()
savefig("experiments/freq_volt_rel.pdf")

# Fingerprint

In [None]:
colors_= plt.rcParams['axes.prop_cycle'].by_key()['color']*10

class Clusters(dict):
    def _repr_html_(self):
        html = '<table style="border: 0;">'
        for c in self:
            hx = rgb2hex(colorConverter.to_rgb(c))
            html += '<tr style="border: 0;">' \
            '<td style="background-color: {0}; ' \
                       'border: 0;">' \
            '<code style="background-color: {0};">'.format(hx)
            html += c + '</code></td>'
            html += '<td style="border: 0"><code>' 
            html += repr(self[c]) + '</code>'
            html += '</td></tr>'

        html += '</table>'

        return html
    
def plot_corr(df):
    # Compute the correlation matrix for the received dataframe
    corr = df.corr()
    
    # Plot the correlation matrix
    fig, ax = plt.subplots()
    cax = ax.matshow(corr, cmap='RdYlGn')
    plt.xticks(range(len(corr.columns)), corr.columns, rotation=90);
    plt.yticks(range(len(corr.columns)), corr.columns);
    
    # Add the colorbar legend
    cbar = fig.colorbar(cax, ticks=[-1, 0, 1], aspect=40, shrink=.8)

def draw_clusters(clusters):
    colors_aux = colors_[:]
    html = '<table style="border: 0;">'
    for k, c in clusters.items():
        hx = rgb2hex(colors_aux.pop()) # rgb2hex(colorConverter.to_rgb(colors_aux.pop()))
        html += '<tr style="border: 0;">' \
        '<td style="background-color: {0}; ' \
                   'border: 0;">' \
        '<code style="background-color: {0};">'.format(hx)
        html += repr(k) + '</code></td>'
        html += '<td style="border: 0"><code>' 
        html += repr(c) + '</code>'
        html += '</td></tr>'

    html += '</table>'
    display(HTML(html))

def draw_communities(G, membership, labels):
    matplotlib_rc_2()
    pos= nx.spring_layout(G)
    fig, ax = plt.subplots()
    club_dict = defaultdict(list)
    for student, club in enumerate(membership):
        club_dict[club].append(student)
    norm = colors.Normalize(vmin=0, vmax=len(club_dict.keys()))
    colors_aux = colors_[:]
    marks= ['o', 'v', '^', '<', '>', '8', 's', 'p', '*', 'h', 'H', 'D', 'd', 'P', 'X']
    for club, members in club_dict.items():        
        nx.draw_networkx_nodes(G, pos,
                               nodelist=members,
                               #node_color=cm.jet(norm(club)),
                               node_shape=marks.pop(),
                               node_color=[colors_aux.pop()],
                               node_size=500,
                               alpha=0.9,
                               ax=ax)
    labels= labels.str.replace('_DATASET_mem.dat','')
    labels= labels.str.replace('EXTRALARGE','3')
    labels= labels.str.replace('LARGE','2')
    labels= labels.str.replace('MEDIUM','1')
    labels= dict(enumerate(labels))
    for p in pos:
        pos[p]+=[0,0.03]
    nx.draw_networkx_labels(G, pos, labels= labels, font_size=5)

def plot_dendogram(Z, labels_):
    matplotlib_rc_1()
    figure()
    #title('Dendrogram')
    xlabel('Application')
    ylabel('Distance')
    den= sch.dendrogram(Z, leaf_rotation=90., leaf_font_size=8., labels = labels_)
    yticks()
    xticks(rotation=-90)
    tight_layout()
    
def plot_graph(metric, method, nc, df):
    G = nx.Graph()
    d = sch.distance.pdist(df.values, metric=metric)
    X = sch.distance.squareform(d)+1
    for i in range(X.shape[0]):
        for j in range(X.shape[1]):
            G.add_edge(i,j,weight=1/X[i,j]) # weight=X[i,j]
    Z = sch.linkage(d, method=method, optimal_ordering=True)
    p_clusters= sch.fcluster(Z,t=nc,criterion='maxclust')
    draw_communities(G,p_clusters, df.index)
    tight_layout()

## Performance counters error

In [None]:
prog_error = Analyser("databases/fingerprints/hpc_belgica/v3/3mm_LARGE_DATASET_mem.dat")

def test(self,verbose=False):
    # find the moda shape
    count_shapes= defaultdict(lambda:0)
    for r in self.data['data']:
        count_shapes[np.shape(r)]+=1
    moda_shape= max(count_shapes,key=count_shapes.get)
    data_moda= [d for d in self.data['data'] if np.shape(d) == moda_shape]

    if verbose:
        print("Moda shape counts {:.2f}%".format(count_shapes[moda_shape]/len(count_shapes.values())*100))
        print(count_shapes[moda_shape], sum(count_shapes.values()))

    el= int(count_shapes[moda_shape]*0.3)//2
    data_moda= np.asarray(data_moda)
    med_avg= np.sort(data_moda,axis=0)

    if el != 0:
        med_avg= med_avg[el:-el]

    def diff(x):
        x= np.concatenate( (x[:,0:1,:] , x[:,1:,:]-x[:,:-1,:]), axis=1 )/self.data['sample_period']
        return x

    med_avg= diff(med_avg)
    std_avg= med_avg.std(axis=0)
    med_avg= med_avg.mean(axis=0)

    # create the dataframe
    med_avg= pd.DataFrame(med_avg, columns=flat(self.data['to_monitor']))
    std_avg= pd.DataFrame(std_avg, columns=flat(self.data['to_monitor']))

    # quality of the samples (experimental)
    if verbose:
        q= std_avg.values/med_avg.values
        print("AVG 68% samples error", np.nanmean(q)*100)
        print("AVG 99% samples error", np.nanmean(3*q)*100)

        print("MAX 68% samples error", np.nanmax(q)*100)
        print("MAX 99% samples error", np.nanmax(3*q)*100)

    return med_avg, std_avg


for d, dim in enumerate(flat(prog_error.data["to_monitor"])):
    fname = flat(prog_error.data["to_monitor"])[d]
    print(fname)
    matplotlib_rc_2()
    
    count_shapes= defaultdict(lambda:0)
    for r in prog_error.data['data']:
        count_shapes[np.shape(r)]+=1
    moda_shape= max(count_shapes,key=count_shapes.get)
    data_moda= [d for d in prog_error.data['data'] if np.shape(d) == moda_shape]
    for e in data_moda:
        aux= np.array(e)
        aux= np.concatenate( (aux[0:1,:] , aux[1:,:]-aux[:-1,:]), axis=0 )/prog_error.data['sample_period']
        plot(aux[:,d])
    
#     for e in prog_error.data['data']:
#         aux= np.array(e)
#         aux= np.concatenate( (aux[0:1,:] , aux[1:,:]-aux[:-1,:]), axis=0 )/prog_error.data['sample_period']
#         plot(aux[:,d])
    
    med_avg, std_avg= test(prog_error,0)
    aux= med_avg[dim].values
    aux_std= std_avg[dim].values
    
    
    fill_between(np.arange(aux.shape[0]), aux-1*aux_std, aux+1*aux_std,color='k',zorder=1)
    plot(aux,c='r',linewidth=2)
    ylabel(fname)
    xlabel("Sample")
    savefig(f"fingerprint/error_3mm_{fname}.pdf")
    show()

## Clusterization by input size

In [None]:
df_input_all = pd.read_csv("databases/inputs_all.csv",index_col=0).T
d = sch.distance.pdist(df_input_all.values, 'canberra')
Z = sch.linkage(d, 'ward', optimal_ordering=True)

labels = sch.fcluster(Z,t=5,criterion='maxclust')
pdic = defaultdict(list)
for l, p in zip(labels, df_input_all.index):
    pdic[l].append(p.split('_')[0])
for k in pdic:
    count = Counter(pdic[k])
    count = {k:count[k] for k in count if count[k] > 1 }
    pdic[k] = list(count.keys())

draw_clusters(pdic)
plot_dendogram(Z, list(df_input_all.index.str.replace('_DATASET_mem.dat','')))
savefig('fingerprint/dendograma_input_size.pdf')
plot_graph('canberra', 'ward', 5, df_input_all)
savefig('fingerprint/graph_input_size.pdf')

In [None]:
df_input_all = pd.read_csv("databases/inputs_all.csv",index_col=0).T
d = sch.distance.pdist(df_input_all.values, 'canberra')
Z = sch.linkage(d, 'ward', optimal_ordering=True)

labels = sch.fcluster(Z,t=5,criterion='maxclust')
pdic = defaultdict(list)
for l, p in zip(labels, df_input_all.index):
    pdic[l].append(p)

aux = df_input_all.copy()
aux.index = aux.index.str.replace("_DATASET_mem.dat","")
matplotlib_rc_2()
for c, (k, p) in enumerate(pdic.items()):
    x = [x.replace("_DATASET_mem.dat","") for x in p]
    aux.loc[x].T.plot()
    legend(loc="upper left",ncol=3,fontsize=10)
    ylabel("Input size")
    xlabel("Normalized time")
    tight_layout()
    savefig("fingerprint/cluster_input_{}".format(c))
    show()

## Correlation before after clustering

In [None]:
df_input_all = pd.read_csv("databases/inputs_all.csv",index_col=0).T
d = sch.distance.pdist(df_input_all.values, 'canberra')
Z = sch.linkage(d, 'ward', optimal_ordering=True)
labels = sch.fcluster(Z,t=5,criterion='maxclust')

# d= sch.distance.pdist(df.values, metric=best['metric'])
# X= sch.distance.squareform(d)+1
# df_aux= pd.DataFrame(X,columns=df.index)

df_aux = df_input_all.T
columns = [df_aux.columns.tolist()[i] for i in list((np.argsort(labels)))]
df_new = df_aux.reindex(columns, axis=1)

plt.rcParams["figure.figsize"] = (24,24)
plot_corr(df_aux)
savefig("fingerprint/corr_before_input_size.pdf")
plot_corr(df_new)
savefig("fingerprint/corr_after_input_size.pdf")

## Clusterization by floating point operation

In [None]:
#plt.style.use('grayscale')
df_fp_all = pd.read_csv("databases/fp_all.csv",index_col=0)
df_norm_fp_all = (df_fp_all - df_fp_all.mean()) / (df_fp_all.max() - df_fp_all.min())
df_norm_fp_all = df_norm_fp_all.T

d = sch.distance.pdist(df_norm_fp_all.values, 'canberra')
Z = sch.linkage(d, 'ward', optimal_ordering=True)
labels = sch.fcluster(Z, t=8,criterion='maxclust')

pdic = defaultdict(list)
for l, p in zip(labels, df_norm_fp_all.index):
    pdic[l].append(p.split('_')[0])

draw_clusters(pdic)
plot_dendogram(Z, list(df_norm_fp_all.index.str.replace('_DATASET_mem.dat','')))
savefig('fingerprint/dendograma_floating.pdf')
plot_graph('canberra', 'ward', 8, df_fp_all.T)
savefig('fingerprint/graph_floating.pdf')

In [None]:
df_fp_all = pd.read_csv("databases/fp_all.csv",index_col=0)
df_norm_fp_all = (df_fp_all - df_fp_all.mean()) / (df_fp_all.max() - df_fp_all.min())
df_norm_fp_all = df_norm_fp_all.T

d = sch.distance.pdist(df_norm_fp_all.values, 'canberra')
Z = sch.linkage(d, 'ward', optimal_ordering=True)
labels = sch.fcluster(Z, t=8,criterion='maxclust')

pdic = defaultdict(list)
for l, p in zip(labels, df_norm_fp_all.index):
    pdic[l].append(p)

aux = df_norm_fp_all
aux.index= aux.index.str.replace('_DATASET_mem.dat','')
aux = aux.T
matplotlib_rc_1()
for c, (k, p) in enumerate(pdic.items()):
    x = [x.replace('_DATASET_mem.dat','') for x in p]
    aux[x].plot(figsize=(10,10))
    legend(loc='upper left',ncol=3,fontsize=10)
    ylabel('Floating point operations')
    xlabel('Normalized time')
    tight_layout()
    savefig('fingerprint/cluster_fp_{}'.format(c))
    show()

## Correlation before after clustering

In [None]:
df_fp_all = pd.read_csv("databases/fp_all.csv",index_col=0)
df_norm_fp_all = (df_fp_all - df_fp_all.mean()) / (df_fp_all.max() - df_fp_all.min())
df_norm_fp_all = df_norm_fp_all.T

d = sch.distance.pdist(df_norm_fp_all.values, 'canberra')
Z = sch.linkage(d, 'ward', optimal_ordering=True)
labels = sch.fcluster(Z, t=8,criterion='maxclust')

# d= sch.distance.pdist(df.T.values, metric=best['metric'])
# X= sch.distance.squareform(d)+1
# df_aux = pd.DataFrame(X,columns=df.T.index)

df_aux = df_fp_all
columns = [df_aux.columns.tolist()[i] for i in list((np.argsort(labels)))]
df_new = df_aux.reindex(columns, axis=1)

plt.rcParams["figure.figsize"] = (24,24)
plot_corr(df_aux)
savefig("fingerprint/corr_before_float.pdf")
plot_corr(df_new)
savefig("fingerprint/corr_after_float.pdf")

## Fingerprint workflow

In [None]:
p1 = []
p2 = []
p3 = []
p4 = []
px = Analyser('databases/fingerprints/hpc_belgica/v3/2mm_EXTRALARGE_DATASET_mem.dat')
for r in px.data["data"]:
    aux = np.asarray(r)
    p1.append(aux[:,0]) # PERF_COUNT_HW_INSTRUCTIONS
p2 = px.df['PERF_COUNT_HW_INSTRUCTIONS'].values
_, y0= px.interpolate(feature='PERF_COUNT_HW_INSTRUCTIONS', npoints= 100, filter_signal= False) #'input_size'
p3 = y0
_, y0= px.interpolate(feature='PERF_COUNT_HW_INSTRUCTIONS', npoints= 100) #'input_size'
p4 = y0
matplotlib_rc_2()
for x in p1:
    plot(np.diff(x))
ylabel("Instructions executed")
xlabel("Sample")
tight_layout()
savefig("fingerprint/workflow.pdf")
figure()
plot(p2)
ylabel("Instructions executed")
xlabel("Sample")
tight_layout()
savefig("fingerprint/workflow_1.pdf")
figure()
plot(p3)
ylabel("Instructions executed")
xlabel("Sample")
tight_layout()
savefig("fingerprint/workflow_2.pdf")
figure()
plot(p4)
ylabel("Instructions executed")
xlabel("Sample")
tight_layout()
savefig("fingerprint/workflow_3.pdf")

## Critical points with fingerprint

In [None]:
df= pd.read_csv("databases/inputs_all.csv",index_col=0)
for p in df.columns:
    print(p)
    matplotlib_rc_2()
    df[p].plot(c='g')
    
    dif= np.diff(df[p])
    dif= np.diff(dif)
    dif= abs(dif)
    idx= np.where(dif>dif.mean())[0]+1
    X = numpy.hstack((idx,df[p].values[idx])).reshape((-1,2),order='F')
    km= KMeans(n_clusters=7, random_state=0).fit(X)
    centers= km.cluster_centers_
    centers= np.vstack( (centers, [0, df[p].values[0]]) )
    centers= np.vstack( (centers, [99, df[p].values[99]]) )
    centers= centers[centers[:,0].argsort()]
    
    plot(centers[:,0], centers[:,1],c='b')
    scatter(centers[:,0], centers[:,1],c='b')
    xlabel("Normalized time")
    ylabel("Input size")
    tight_layout()
    savefig(f"phases/critical_points/fp_phase_{p.replace('.dat','.pdf')}")
    legend()
    show()

# Models
## Load the data and prepare the dataframes

In [None]:
def get_title(name):
    """
    This is just to plot the names
    """
    app_titles = {
        "black": "Blackscholes", "body": "Bodytrack", "canneal": "Canneal",
        "dedup": "Dedup", "fluid": "Fluianimate", "freq": "Freqmine",
        "openmc": "Openmc", "rtview": "Raytrace", "swap": "Swaptions",
        "vips": "Vips", "xhpl": "HPL", "x264": "x264",
        "ferret": "Ferret",
    }
    for k in app_titles:
        if k in name:
            return app_titles[k]
        
def map_color(col):
    clist= (col-min(col))/(max(col)-min(col))*255
    colors= map(lambda x: cm.inferno.colors[int(x)%256], clist)
    cmp= (col.unique()-min(col))/(max(col)-min(col))*255
    cmp= map(lambda x: cm.inferno.colors[int(x)%256], cmp)
    cmap= dict(zip(col.unique(),cmp))

    patchs= []
    for l,c in cmap.items():
        patchs.append(mpatches.Patch(color=c, label=l))

    return list(colors), patchs

def create_df_resum():
    global df_resum
    if not (df_resum is None):
        return df_resum

    run_col = energydb["run"]
    cursor = run_col.aggregate([
        {
            "$match": { "config.data_descriptor.extras.sensors": {'values': ['info', 'time']} },
        },
        {
            "$group":
            {
                "_id" : {"config":{"arguments":"$config.arguments"}},
                "keys": {"$push": "$_id" },
                "nitem": {"$sum": 1}
            }
        }
    ],  allowDiskUse=True)
    df_resum= []
    for d in tqdm(cursor):
        for k in d["keys"]:
            if not os.path.isfile(f"cache/{k}.json"):
                data = run_col.find_one( {"_id": ObjectId(k) } )
                data["_id"]= str(data["_id"])
                json.dump(data, open(f"cache/{k}.json", "w+"))
            data = PascalData(f"cache/{k}.json")
            df= data.energy()
            df["nsamples"]= data.dataframe_group("sensors")["info"].apply(len)
            df["name"]= data.config["pkg"]
            df_resum.append(df)
    df_resum = pd.concat(df_resum)
    return df_resum

def create_df_times():
    global df_times
    if not (df_times is None):
        return df_times
    run_col = energydb["run"]
    cursor = run_col.aggregate([
        {
            "$match": { "config.data_descriptor.keys": ["cores", "frequency", "input", "repetitions"] },
        },
        {
            "$match": { "config.data_descriptor.extras.sensors": {'values': ['info', 'time']} },  
        },
        {
            "$group":{
                "_id" : {"config":{"arguments":"$config.arguments"}},
                "keys": {"$push": "$_id" },
                "nitem": {"$sum": 1}
            }
        }
    ], allowDiskUse=True)
    df_times= []
    prog_names = defaultdict(lambda : 0)
    for d in tqdm(cursor):
        for k in d["keys"]:
            if not os.path.isfile(f"cache/{k}.json"):
                data = run_col.find_one( {"_id": ObjectId(k) } )
                data["_id"]= str(data["_id"])
                json.dump(data, open(f"cache/{k}.json", "w+"))
            data = PascalData(f"cache/{k}.json")
            df = data.energy()
            if not (len(df["cores"].unique()) > 1 and \
                len(df["frequency"].unique()) > 1 and \
                len(df["input"].unique()) > 1):
                    continue

            def filtering(df):
                last_ins = np.sort(df["input"].unique())[-5:]
                df = df[df["input"].isin(last_ins)]
                df["frequency"] = df["frequency"].astype(float)/1e6
                df = df[df["frequency"] < 2.3]
                df["ipmi_energy"] /= 1e3
                df["input"] = df["input"].astype(int)
                df["input"] = df["input"]-df["input"].min()+1
                return df
            df = filtering(df)
            df["name"] = data.config["pkg"]
            prog_names[data.config["pkg"]] += 1
            if prog_names[data.config["pkg"]] > 1:
                df["name"] +=  "_" + str(prog_names[data.config["pkg"]]-1)
            df_times.append(df)

    df_times = pd.concat(df_times)
    df_times[["input","cores","frequency"]] = df_times[["input","cores","frequency"]].astype(float)
    df_times = df_times.sort_values(["input","cores","frequency"])
    return df_times

def create_df_models():
    """
    Grab the results group by application arguments
    """
    global df_models
    if not (df_models is None):
        return df_models
    model_col = energydb["model"]
    models = model_col.aggregate([
            {
                "$group":
                {
                    "_id" : {"config":{"arguments":"$config.arguments"}},
                    #"all": {"$push": "$$ROOT" },
                    "pkgs": {"$push": "$config.pkg" },
                    "keys": {"$push": "$_id" },
                    "maes": {"$push": "$info.mae" },
                    "tss": {"$push": "$train_sz" },
                    "idxs": {"$push": "$train_idx" },
                    "types": {"$push": "$info.type"},
                    "split_types": {"$push": "$split_type" },
                    "nitem": {"$sum": 1}
                }
            }
        ], 
        allowDiskUse=True)
    df_models= []
    for app in tqdm(models):
        ens = []
        refs= []
        for kk, ii in zip(app["keys"], app["idxs"]):
            if not os.path.isfile(f"cache/{kk}.json"):
                data = model_col.find_one( {"_id": ObjectId(kk) } )
                data["_id"]= str(data["_id"])
                json.dump(data, open(f"cache/{kk}.json", "w+"))
            data = PascalModel(f"cache/{kk}.json")
            ens.append(data.data.loc[ii]["ipmi_energy"].sum())
            refs.append(data)

        X = np.transpose([app["pkgs"], app["tss"], app["types"], app["split_types"], app["maes"], ens, refs])
        df = pd.DataFrame(X, columns=["name","ts", "type", "split_type", "mpe", "energy", "ref"])
        df[["ts", "mpe", "energy"]] = df[["ts", "mpe", "energy"]].astype(float)

        df_models.append(df)

    df_models = pd.concat(df_models)
    df_models["title"] = df_models["name"].apply(get_title)
    return df_models

In [None]:
df_resum = create_df_resum()
display(df_resum)
print("Total number of executions :", df_resum.shape[0])
print("Total number of samples :", df_resum.nsamples.sum())
print("Total energy spent (Mj) :", df_resum.ipmi_energy.sum()/1e6)
print("Total time spent (Days):", df_resum.total_time.sum()/(60*60*24))

In [None]:
df_times = create_df_times()
display(df_times)
print("Total from complete executions")
print("Total number of executions : ", df_times.shape[0])
print("Total energy spent (Mj) : ", df_times.ipmi_energy.sum()/1e3)
print("Total time spent (Days) : ", df_times.total_time.sum()/(60*60*24))

In [None]:
df_times[(df_times["cores"]==32)&(df_times["input"]==5)].groupby("name").ipmi_energy.std()/df_times[(df_times["cores"]==32)&(df_times["input"]==5)].groupby("name").ipmi_energy.mean()*100

In [None]:
df_times = create_df_times()
matplotlib_rc_2()

def rel_max_min(df,col):
    min_f = df[df["frequency"]==df["frequency"].min()][col].values[0]
    max_f = df[df["frequency"]==df["frequency"].max()][col].values[0]
    return min_f/max_f

df_rel_min_max_freq = df_times.groupby(["name","input","cores"]).apply(
    partial(rel_max_min, col="total_time")).reset_index()
df_rel_min_max_en = df_times.groupby(["name","input","cores"]).apply(
    partial(rel_max_min, col="ipmi_energy")).reset_index()

hist(df_rel_min_max_freq[0], label="Time min freq/Time max freq",bins=np.arange(0,5,0.1),alpha=0.5)
hist(df_rel_min_max_en[0], label="Energy min freq/Energy max freq",bins=np.arange(0,5,0.1),alpha=0.5)
tight_layout()
legend()
savefig("models/min_max_freq_cmp.pdf")

In [None]:
df_times = create_df_times()
for app in df_times.name.unique():
    dfx = df_times[df_times["name"]==app]
    dfx["edp"] = dfx["ipmi_energy"]*dfx["total_time"]
    dfx = dfx[dfx["input"]==dfx["input"].max()]
    
    print(app)
    matplotlib_rc_1()
    fig, axs = subplots(2, 2)
    colors, labels= map_color(dfx["cores"])
    dfx.plot.scatter(x="frequency", y="edp", c=colors, ax= axs[0][0])
    dfx.plot.scatter(x="total_time", y="ipmi_energy", c=colors, ax= axs[0][1])
    dfx.plot.scatter(x="frequency", y="total_time", c=colors, ax= axs[1][0])
    dfx.plot.scatter(x="frequency", y="ipmi_energy", c=colors, ax= axs[1][1])
    subplots_adjust(left=0.07, right=0.93, wspace=0.25, hspace=0.35)
    axs[0][0].set_xlabel("Frequency (GHz)")
    axs[0][0].set_ylabel("EDP")
    axs[0][1].set_xlabel("Time (s)")
    axs[0][1].set_ylabel("Energy (J)")
    axs[1][0].set_xlabel("Frequency (GHz)")
    axs[1][0].set_ylabel("Time (s)")
    axs[1][1].set_xlabel("Frequency (GHz)")
    axs[1][1].set_ylabel("Energy (J)")
    legend(handles=labels, loc="upper right", bbox_to_anchor=(1.2, 2.35))
    #tight_layout()
    savefig(f"models/metrics/{app}.pdf")
    show()

In [None]:
df_times = create_df_times()
matplotlib_rc_2()
colors, labels= map_color(df_times["cores"])
df_times.plot.scatter(x="total_time", y="ipmi_energy",c=colors)
legend(handles=labels,fontsize=10)
tight_layout()
savefig("models/metrics/all.pdf")

# Speedups

In [None]:
df_times = create_df_times()
def speedup(df):
    if df[df["cores"]==1].total_time.shape[0]>0:
        #print(df)
        #print(df.total_time.values/df[df["cores"]==1].total_time.values)
        df.total_time= df[df["cores"]==1].total_time.iloc[0]/df.total_time
    else:
        df.total_time= None
    df= df.rename(columns={"total_time":"speedup"})
    return df[["cores","speedup","ipmi_energy"]].reset_index(drop=True)

speedup = df_times.groupby(["name","frequency","input"]).apply(speedup)
speedup = speedup.reset_index().dropna().drop(columns=["level_3"])
speedup["parallel_frac"]= (speedup["cores"]*speedup["speedup"]-speedup["cores"])/( (speedup["cores"]-1)*speedup["speedup"] )
display(speedup)

ws = (speedup["cores"]*speedup["speedup"]-speedup["cores"])/( (speedup["cores"]-1)*speedup["speedup"] )
ws = ws.dropna()
matplotlib_rc_2()
ws[ (ws>=0)&(ws<=1) ].plot.hist()
xlabel("Parallel fraction")
tight_layout()
savefig("models/speedups.pdf")

# 2D plots energy (model vs measured)

# Variation Cores X Energy

In [None]:
df_models = create_df_models()

min_mpe = df_models[(df_models["ts"]==10)&(df_models["type"]=="equation")].groupby(["title"]).mpe.min().values
aux2 = df_models[df_models.mpe.isin(min_mpe)].drop_duplicates("title")

matplotlib_rc_2()
mpes = []
for idx, eq in aux2.iterrows():
    model = eq.ref
    #print(eq)

    i = model.data["input"].unique().max()
    f = model.data["frequency"].unique().max()
    crs = sort(model.data["cores"].unique())

    X = np.array(list(prod_itertools([i],crs,[f])))
    X = np.sort(X,axis=0)

    en_pred = model.predict(X)
    en_real = model.data[(model.data["frequency"]==f)&
                         (model.data["input"]==i)].sort_values("cores").ipmi_energy.values
    
    #title(eq["name"].split("_")[1].capitalize())
    mpe = sum(abs(en_real-en_pred)/en_real)/len(en_real)*100
    print(eq["name"])
    print("MSE", sum((en_real-en_pred)**2)/len(en_real))
    print("MPE", mpe)
    name = model.config["pkg"]
    mpes.append([name,mpe])
    plot(crs, en_pred, label="model")
    scatter(crs, en_real, color="black",label="measured values")
    xlabel("Cores")
    ylabel("Energy (kJ)")
    legend()
    savefig(f"models/energy/power/{name}.pdf")
    show()
mpes = pd.DataFrame(mpes, columns=["name","mpe"])
mpesc3

# Variation Cores X Power

In [None]:
path = "/home/vitor/Documents/energy_scripts_superpc/create_db"
reload(plotdata)
df_models = create_df_models()

def plot3d(pascalmodel, outputname):
    df = pascalmodel.data
    df[pascalmodel.inputs] = df[pascalmodel.inputs].astype(float)
    df_train= df.loc[pascalmodel.train_idx]

    freqs = np.arange(1.1, 2.4, 0.1,dtype=float)
    cores = np.arange(1, 32, 1, dtype=float)
    plotdata.new_figure()
    matplotlib_rc_2()
    plt.rcParams["font.size"] = 18
    plt.rcParams["legend.fontsize"] = 14
    plt.rcParams["xtick.labelsize"] = 14
    plt.rcParams["ytick.labelsize"] = 14
    plt.rcParams["axes.labelsize"] = 14

    def update():
        X = np.array(np.meshgrid(*[freqs, cores])).T.reshape(-1, 2)
        Y = pascalmodel.predict(X)
        plotdata.setProps(xlabel='Frequencies (GHz)',
                          ylabel='Active threads',
                          zlabel='Power (W)',
                          fontsize=18)

        dfaux = df_train
        dfaux = dfaux.sort_values(["frequency", "cores"])
        plotdata.ax.scatter(dfaux["frequency"],
                        dfaux["cores"],
                        dfaux["ipmi_power"],
                        antialiased=True, color="red",s=500,marker="*")

        dfaux = df.loc[set(df.index)-set(pascalmodel.train_idx)]
        dfaux = dfaux.sort_values(["frequency", "cores"])
        plotdata.ax.scatter(dfaux["frequency"],
                        dfaux["cores"],
                        dfaux["ipmi_power"],
                        antialiased=True, color="black",s=100)

        plotdata.plot3D(freqs, cores, Y, points=False, color_="b")
        plotdata.ax.legend(["Trained values","Measured values","Model"])
        #plotdata.ax.legend(["Trained values","Measured values","Min energy","Model"])
        
        
        X = dfaux[["frequency","cores"]].values
        Y = pascalmodel.predict(X)
        mpe = sum(abs(dfaux["ipmi_power"].values-Y))/len(Y)
        name = pascalmodel.config["pkg"]
        mpes.append([name, mpe])
        print(name, mpe)

    plotdata.update_user = update
    update()
    plotdata.ax.view_init(40, -60)
    tight_layout()
    
    savefig(outputname)
    show()

aux = df_models[df_models["ref"].apply(lambda x : (x.data["cores"].unique().shape[0]>2)
                                                   &(x.data["frequency"].unique().shape[0]>2) )]
min_mpe = aux[(aux["ts"]==10)&(aux["type"]=="pw_equation")].groupby(["title"]).mpe.min().values
aux2 = aux[aux.mpe.isin(min_mpe)].drop_duplicates("title")
mpes = []
for idx,pascalmodel in aux2.iterrows():
    model = pascalmodel.ref
    name = pascalmodel["name"]
    plot3d(model, outputname=f"models/power/{name}.pdf")

mpes = pd.DataFrame(mpes, columns=["name","mpe"])
print(mpes.mean())
mpes

# 3D plots energy (model vs measured)

# Variation Cores X Frequency

In [None]:
reload(plotdata)
df_models = create_df_models()

def plot3d(pascalmodel, outputname):
    df = pascalmodel.data
    df[pascalmodel.inputs] = df[pascalmodel.inputs].astype(float)
    df_train= df.loc[pascalmodel.train_idx]

    freqs = np.arange(1.1, 2.4, 0.1)
    cores = np.arange(1, 32, 1)
    
    matplotlib_rc_2()
    plotdata.new_figure()
    plt.rcParams["font.size"] = 18
    plt.rcParams["legend.fontsize"] = 14
    plt.rcParams["xtick.labelsize"] = 14
    plt.rcParams["ytick.labelsize"] = 14
    plt.rcParams["axes.labelsize"] = 14

    def update(val):
        X = np.array(np.meshgrid(
            [int(val)], cores, freqs)).T.reshape(-1, 3)
        Y = pascalmodel.predict(X)
        plotdata.setProps(xlabel='Frequencies (GHz)',
                          ylabel='Active threads',
                          zlabel='Energy (KJ)',
                          fontsize=18)

        dfaux = df_train[df_train["input"] == val]
        dfaux = dfaux.sort_values(["frequency", "cores"])
        plotdata.ax.scatter(dfaux["frequency"],
                        dfaux["cores"],
                        dfaux["ipmi_energy"],
                        antialiased=True, color="red",s=500,marker="*")

        dfaux = df.loc[set(df.index)-set(pascalmodel.train_idx)]
        dfaux = dfaux[dfaux["input"] == val]
        dfaux = dfaux.sort_values(["frequency", "cores"])
        plotdata.ax.scatter(dfaux["frequency"],
                        dfaux["cores"],
                        dfaux["ipmi_energy"],
                        antialiased=True, color="black",s=100)
        
#         if len(best) > 2:
#             plotdata.ax.scatter([best[2]],
#                             [best[1]],
#                             [best[3]],
#                             antialiased=True, color="y",s=100)
        
        plotdata.plot3D(freqs, cores, Y, points=False, color_="b")
        plotdata.ax.legend(["Trained values","Measured values","Model"])
        #plotdata.ax.legend(["Trained values","Measured values","Min energy","Model"])

    best= []
    for i, arg in enumerate(pascalmodel.config["arguments"]):
        X = np.array(np.meshgrid(
            [float(i)], cores, freqs)).T.reshape(-1, 3)
        Y = pascalmodel.predict(X)
        idx = np.argmin(Y)
        if i == 2:
            best= list(X[idx])+[Y[idx]]

    plotdata.update_user = update
    update(3)
    plotdata.ax.view_init(30, 60)
    tight_layout()
    savefig(outputname)
    show()

min_mpe = df_models[(df_models["ts"]==10)&(df_models["type"]=="equation")].groupby(["title"]).mpe.min().values
aux2 = df_models[df_models.mpe.isin(min_mpe)].drop_duplicates("title")
for row in aux2.values:
    model= row[6]
    print(row[0])
    plot3d(model,outputname=f"models/energy/freq_cores/{row[0]}.pdf")

# Variation Input X Frequency

In [None]:
df_models = create_df_models()

def plot3d(pascalmodel, outputname):
    df = pascalmodel.data
    df[pascalmodel.inputs] = df[pascalmodel.inputs].astype(float)
    df_train= df.loc[pascalmodel.train_idx]
    # print(pascalmodel.config)

    freqs = np.arange(1.1, 2.4, 0.1)
    cores = np.arange(1, 32, 1)
    inps = np.arange(1, 6, 1)
    matplotlib_rc_2()
    plotdata.new_figure()
    
    plt.rcParams["font.size"] = 18
    plt.rcParams["legend.fontsize"] = 14
    plt.rcParams["xtick.labelsize"] = 14
    plt.rcParams["ytick.labelsize"] = 14
    plt.rcParams["axes.labelsize"] = 14

    def update(val):
        X = np.array(np.meshgrid(
            inps, [int(val)], freqs)).T.reshape(-1, 3)
        Y = pascalmodel.predict(X)
        plotdata.setProps(xlabel='Frequency (GHz)',
                          ylabel='Input size',
                          zlabel='Energy (KJ)',
                          fontsize=18)

        dfaux = df_train[df_train["cores"] == val]
        dfaux = dfaux.sort_values(["frequency", "input"])
        plotdata.ax.scatter(dfaux["frequency"],
                        dfaux["input"],
                        dfaux["ipmi_energy"],
                        antialiased=True, color="red",s=500,marker="*")

        dfaux = df.loc[set(df.index)-set(pascalmodel.train_idx)]
        dfaux = dfaux[dfaux["cores"] == val]
        dfaux = dfaux.sort_values(["frequency","input"])
        plotdata.ax.scatter(dfaux["frequency"],
                        dfaux["input"],
                        dfaux["ipmi_energy"],
                        antialiased=True, color="black",s=100)
        
#         if len(best) > 2:
#             plotdata.ax.scatter([best[2]],
#                             [best[1]],
#                             [best[3]],
#                             antialiased=True, color="y",s=100)
        
        plotdata.plot3D(freqs, inps, Y, points=False, color_="b")
        plotdata.ax.legend(["Trained values","Measured values","Model"])
        plotdata.ax.set_xlim(min(freqs), max(freqs))
        plotdata.ax.set_ylim(min(inps), max(inps))
        #plotdata.ax.legend(["train values","measured values","min energy","model"],fontsize=20)

    best= []
    for i, arg in enumerate(pascalmodel.config["arguments"]):
        X = np.array(np.meshgrid(
            [float(i)], cores, freqs)).T.reshape(-1, 3)
        Y = pascalmodel.predict(X)
        idx = np.argmin(Y)
        if i == 2:
            best= list(X[idx])+[Y[idx]]

    plotdata.update_user = update
    update(32)
    plotdata.ax.view_init(30, 240)
    tight_layout()
    savefig(outputname)
    show()


min_mpe= df_models[(df_models["ts"]==10)&(df_models["type"]=="equation")].groupby(["title"]).mpe.min().values
aux2= df_models[df_models.mpe.isin(min_mpe)].drop_duplicates("title")
for row in aux2.values:
    model= row[6]
    print(row[0])
    plot3d(model,outputname=f"models/energy/freq_inps/{row[0]}.pdf")

# Variation Input X Cores

In [None]:
df_models = create_df_models()

def plot3d(pascalmodel, outputname):
    df = pascalmodel.data
    df[pascalmodel.inputs] = df[pascalmodel.inputs].astype(float)
    df_train= df.loc[pascalmodel.train_idx]
    # print(pascalmodel.config)

    freqs = np.arange(1.1, 2.4, 0.1)
    cores = np.arange(1, 32, 1)
    inps = np.arange(1, 6, 1)
    matplotlib_rc_2()
    plotdata.new_figure()
    
    plt.rcParams["font.size"] = 18
    plt.rcParams["legend.fontsize"] = 14
    plt.rcParams["xtick.labelsize"] = 14
    plt.rcParams["ytick.labelsize"] = 14
    plt.rcParams["axes.labelsize"] = 14

    def update(val):
        X = np.array(np.meshgrid(
            inps, cores, [float(val)])).T.reshape(-1, 3)
        Y = pascalmodel.predict(X)
        plotdata.setProps(xlabel='Input size',
                          ylabel='Active threads',
                          zlabel='Energy (KJ)',
                          fontsize=18)

        dfaux = df_train[df_train["frequency"] == val]
        dfaux = dfaux.sort_values(["cores", "input"])
        plotdata.ax.scatter(dfaux["input"],
                        dfaux["cores"],
                        dfaux["ipmi_energy"],
                        antialiased=True, color="red",s=500,marker="*")

        dfaux = df.loc[set(df.index)-set(pascalmodel.train_idx)]
        dfaux = dfaux[dfaux["frequency"] == val]
        dfaux = dfaux.sort_values(["cores","input"])
        plotdata.ax.scatter(dfaux["input"],
                        dfaux["cores"],
                        dfaux["ipmi_energy"],
                        antialiased=True, color="black",s=100)
        
#         if len(best) > 2:
#             plotdata.ax.scatter([best[2]],
#                             [best[1]],
#                             [best[3]],
#                             antialiased=True, color="y",s=100)
        
        plotdata.plot3D(inps, cores, Y, points=False, color_="b")
        plotdata.ax.legend(["Trained values","Measured values","Model"])
        #plotdata.ax.legend(["train values","measured values","min energy","model"],fontsize=20)

    best= []
    for i, arg in enumerate(pascalmodel.config["arguments"]):
        X = np.array(np.meshgrid(
            [float(i)], cores, freqs)).T.reshape(-1, 3)
        Y = pascalmodel.predict(X)
        idx = np.argmin(Y)
        if i == 2:
            best= list(X[idx])+[Y[idx]]

    plotdata.update_user = update
    update(max(df_train.frequency.unique()))
    plotdata.ax.view_init(30, 60)
    tight_layout()
    savefig(outputname)
    show()


min_mpe= df_models[(df_models["ts"]==10)&(df_models["type"]=="equation")].groupby(["title"]).mpe.min().values
aux2= df_models[df_models.mpe.isin(min_mpe)].drop_duplicates("title")
for row in aux2.values:
    model= row[6]
    print(row[0])
    plot3d(model,outputname=f"models/energy/cores_inps/{row[0]}.pdf")

# ML models comparison

In [None]:
df = pd.read_csv("ml_models/data03_random.csv",index_col=0)
files = ["completo_fluid_2.json", "completo_openmc_kernel_novo.json",
        "completo_rtview_1.json", "completo_swaptions_1.json", 
         "completo_vips_4.json", "completo_x264_4.json",
        "completo_xhpl.json", "completo_black_3.json",
        "completo_bodytrack_3.json", "completo_canneal_3.json",
        "completo_dedup_3.json","completo_ferret_3.json"]
not_file = ['completo_freqmine_1.json', 'completo_freqmine_2.json','completo_fluid_1.json']
df = df[df["name"]!="SVR_gridsearch"]
df = df[df["file"].isin(files)]
df = df[df["ts"]<100]
#df = df[~df["file"].isin(not_file)]
df = df.groupby(["name","ts"]).mean().reset_index()

ax = None
matplotlib_rc_2()
for name in df["name"].unique():
    aux = df[df["name"]==name]
    ax = aux.plot(x = "ts", y = "mse", ax = ax, label = name, logy=True)
    ylabel("Mean squared error")
    xlabel("Training data size")

savefig("experiments/ml_models.pdf")

# Overall MPE results

In [None]:
df_models = create_df_models()

df_overhad = []
min_mpe = df_models[(df_models["ts"]==10)&(df_models["type"]=="equation")].groupby(["title"]).mpe.min().values
aux1 = df_models[df_models.mpe.isin(min_mpe)].drop_duplicates("title")[["title","ts","type","mpe"]]
df_overhad.append(aux1)
print("Table with best equation models using 10 points")
display(aux1)

min_mpe = df_models[(df_models["ts"]==10)&(df_models["type"]=="svr")].groupby(["title"]).mpe.min().values
aux2 = df_models[df_models.mpe.isin(min_mpe)].drop_duplicates("title")[["title","ts","type","mpe"]]
df_overhad.append(aux2)
print("Table with best svr models using 10 points")
display(aux2)

df_overhad_comb = pd.merge(df_overhad[0],df_overhad[1],on="title")
matplotlib_rc_1()
df_overhad_comb[["title","mpe_y","mpe_x"]].plot.bar()
mpe_mod, mpe_svr= df_overhad_comb[["title","mpe_x","mpe_y"]].mean()
plt.plot([-1,13],[mpe_svr,mpe_svr],"--")
plt.plot([-1,13],[mpe_mod,mpe_mod],"--")
locs, labels = xticks()
ylabel("Mean percentage error")
xticks(ticks=locs,labels=df_overhad_comb["title"].values)
legend(["Mean SVR","Mean model","SVR","Model"])
tight_layout()
savefig("models/mpe_svr_eq.pdf")

# MPE vs Train size (10 points)

In [None]:
df_models = create_df_models()

min_mpe_svr= df_models[(df_models["ts"]==10)&(df_models["type"]=="svr")].groupby(["title"]).mpe.min().values
aux2= df_models[df_models.mpe.isin(min_mpe_svr)].drop_duplicates("title")

min_mpe_eq= df_models[(df_models["ts"]==10)&(df_models["type"]=="equation")].groupby(["title"]).mpe.min().values
aux3= df_models[df_models.mpe.isin(min_mpe_eq)].drop_duplicates("title")

avgr_eq= []
avgr_svr= []
avgr_eq_en= []
avgr_svr_en= []
tss= []

for t1,t2 in zip(aux2.name, aux3.name):
    matplotlib_rc_2()
    aux= df_models[(df_models["name"]==t1)&(df_models["type"]=="svr")&(df_models["ts"]>1)]
    aux= aux.drop(columns="ref").groupby(["ts"]).min().reset_index()
    plt.plot(aux["ts"],aux["mpe"],label="SVR")
    
    avgr_svr.append(aux["mpe"])
    avgr_eq_en.append(aux["energy"])
    
    aux= df_models[(df_models["name"]==t2)&(df_models["type"]=="equation")&(df_models["ts"]>1)]
    aux= aux.drop(columns="ref").groupby(["ts"]).min().reset_index()
    plt.plot(aux["ts"],aux["mpe"],label="Model")
    
    avgr_eq.append(aux["mpe"])
    avgr_svr_en.append(aux["energy"])
    tss= aux["ts"]
    
    ylabel("Mean percetage error")
    xlabel("Number of samples")
    legend()
    
    #title(get_title(app_name))
    tight_layout()
    savefig(f"models/overhead/{t1}.pdf")
    show()

In [None]:
np.mean(avgr_svr,axis=0), np.mean(avgr_eq,axis=0)

In [None]:
matplotlib_rc_2()
plt.plot(tss,np.mean(avgr_svr,axis=0),label="Mean error SVR")
plt.plot(tss,np.mean(avgr_eq,axis=0),label="Mean error model")
ylabel("Mean percetage error")
xlabel("Number of samples")
legend()
tight_layout()
savefig("models/overhead/overall_mpe_10pts.pdf")

# Energy vs Train size (10 points)

In [None]:
matplotlib_rc_2()
plt.plot(tss,np.mean(avgr_eq_en,axis=0),label="Mean energy model and SVR")
ylabel("Energy (KJ)")
xlabel("Number of samples")
legend()
tight_layout()
savefig(f"models/overhead/overall_energy_10pts.pdf")

# Error vs train size using the apps with lowest mpe

In [None]:
df_models = create_df_models()
min_mpe = df_models.groupby(["title","type"]).mpe.min().values

split_type= "halton"
for app_name in df_models[df_models.mpe.isin(min_mpe)].name.unique():
    print(app_name)
    df1= df_models[df_models["name"]==app_name].drop(columns="ref").groupby(["ts","type"]).min().reset_index()
    df1= df1[df1["ts"]>1]

    matplotlib_rc_2()
    fig, ax = subplots()
    
    x1= df1[ (df1["type"]=="equation") ][["ts","mpe"]]
    x2= df1[ (df1["type"]=="svr") ][["ts","mpe"]]
    
    plot(x1["ts"],x1["mpe"],label="Equation",c="b")
    plot(x2["ts"],x2["mpe"],label="SVR",c="r")


    ylabel("Mean percetage error")
    xlabel("Number of sampels")
    legend()
    
    #title(get_title(app_name))
    tight_layout()
    savefig(f"models/overhead/lowest_mpe/{app_name}_{split_type}.pdf")
    show()

# Energy vs train size using the apps with lowest mpe

In [None]:
df_models = create_df_models()

split_type= "random"
for app_name in df_models["name"].unique():
    print(app_name)
    df1= df_models[df_models["name"]==app_name].groupby(["ts","type","split_type"]).mpe.min()
    df1= df_models[df_models["mpe"].isin(df1.values)].drop_duplicates()
    df1= df1[df1["ts"]>1]
    
    matplotlib_rc_2()
    fig, ax = subplots()

    x1= df1[ (df1["type"]=="equation")&(df1["split_type"]==split_type) ][["ts","energy"]].sort_values("ts")
    x2= df1[ (df1["type"]=="svr")&(df1["split_type"]==split_type) ][["ts","energy"]].sort_values("ts")

    plot(x1["ts"],x1["energy"],label="Equation",c="b")
    plot(x2["ts"],x2["energy"],label="Svr",c="r")


    ylabel("Energy (KJ)")
    xlabel("Train size")
    legend()
    
    title(get_title(app_name))
    tight_layout()
    #savefig(f"figures/overhead/svr_vs_eq/{app_name}_{split_type}.pdf")
    show()

In [None]:
df_models = create_df_models()

min_mpe = df_models.groupby(["title","type"]).mpe.min().values
dfz = df_models[df_models["name"].isin(df_models[df_models.mpe.isin(min_mpe)].name.unique())]

matplotlib_rc_2()
x1 = dfz[ (dfz["ts"]>1)&(dfz["type"]=="equation") ].groupby("ts").mpe.mean().plot(label="Equation")
xlabel("Train size")
ylabel("Mean percentage error")
x1 = dfz[ (dfz["ts"]>1)&(dfz["type"]=="svr") ].groupby("ts").mpe.mean().plot(label="SVR")
xlabel("Train size")
ylabel("Mean percentage error")
legend()
savefig("models/overall_lowest_mpe.pdf")

In [None]:
df_models = create_df_models()

matplotlib_rc_2()
x1 = df_models[df_models["ts"]>1].groupby("ts").energy.mean().plot()
xlabel("Train size")
ylabel("Energy (KJ)")
tight_layout()
savefig("models/overhead/overall_energy_lowest_mpe.pdf")

# Ondemand comparision

- Energy saving
- Time penality
- Table with mean runs to compensante energy spent on training

In [None]:
ond = pd.read_csv("databases/dvfs_ond_cmp.csv",index_col=0)
ond = ond.loc[set(ond.index)-set([0,4,7,13,14])]
display(ond)

In [None]:
ond = pd.read_csv("databases/dvfs_ond_cmp.csv",index_col=0)
ond = ond.loc[set(ond.index)-set([0,4,7,13,14])]

matplotlib_rc_1()
ond.sav_max.plot.bar()
med= ond.sav_max.mean()
print(med)
plot([-1,13],[med,med],"--",c="r")
locs, labels = xticks()
ylabel("Relative saving", )
yticks()
xticks(ticks=locs,labels=ond["app"].values)
legend(["Mean saving"])
tight_layout()
savefig("models/dvfs_cmp_max.pdf")

In [None]:
ond = pd.read_csv("databases/dvfs_ond_cmp.csv",index_col=0)
ond = ond.loc[set(ond.index)-set([0,4,7,13,14])]

matplotlib_rc_1()
ond.sav_mean.plot.bar()
med= ond.sav_mean.mean()
print(med)
plot([-1,13],[med,med],"--",c="r")
locs, labels = xticks()
ylabel("Relative saving")
yticks()
xticks(ticks=locs,labels=ond["app"].values)
legend(["Mean saving"])
tight_layout()
savefig("models/dvfs_cmp_mean.pdf")

In [None]:
ond = pd.read_csv("databases/dvfs_ond_cmp.csv",index_col=0)
ond = ond.loc[set(ond.index)-set([0,4,7,13,14])]

matplotlib_rc_1()
ond.sav_32.plot.bar()
med= ond.sav_32.mean()
print(med)
plot([-1,13],[med,med],"--",c="r")
locs, labels = xticks()
ylabel("Relative saving")
yticks()
xticks(ticks=locs,labels=ond["app"].values)
legend(["Mean saving"])
tight_layout()
savefig("models/dvfs_cmp_32.pdf")

# Model Analisys

## Energy vs time

In [None]:
prop_cycle = plt.rcParams['axes.prop_cycle']
lcolors = prop_cycle.by_key()['color']

class MyScalarFormatter(ScalarFormatter):
    def __call__(self, x, pos=None):
        if len(self.locs) == 0:
            return ''
        else:
            xp = (x - self.offset) / (10. ** self.orderOfMagnitude)
            if abs(xp) < 1e-8:
                xp = 0
            if self._useLocale:
                s = locale.format_string(self.format, (xp,), grouping=True)
            else:
                s = self.format % xp
            return self.fix_minus(s)

locale._override_localeconv = {'thousands_sep': ',', 'grouping': [3,0]}
sf = MyScalarFormatter(useLocale=True)

xs = [1,0.29,0.97,198,0.8]

time_= lambda f,p: xs[0]*( xs[4]/p-xs[4]+1 )/f
power_= lambda f,p: (xs[1]*f**3+xs[2]*f)*p+xs[3]
energy_= lambda f,p: time_(f,p)*power_(f,p)


def pareto_frontier_selection(costs, return_mask= True):  # <- Fastest for many points
    #pprint('pareto_frontier_selection')

    is_efficient = np.arange(costs.shape[0])#Return evenly spaced values within a given interval.
    n_points = costs.shape[0]
    next_point_index = 0  # Next index in the is_efficient array to search for

    while next_point_index<len(costs):#it stops when the number of pareto points found is lower the number of remaining costs
        nondominated_point_mask = np.any(costs<=costs[next_point_index], axis=1)#Checking whether any [time,energy] costs<=costs[next_point_index] in each row                  
        is_efficient = is_efficient[nondominated_point_mask]  # Remove dominated points            
        costs = costs[nondominated_point_mask]
        next_point_index = np.sum(nondominated_point_mask[:next_point_index])+1#count how many points are non dominated. It advance for the next point that is lower in any column than the current

    if return_mask:
        is_efficient_mask = np.zeros(n_points, dtype = bool)
        is_efficient_mask[is_efficient] = True
        return is_efficient_mask
    else:
        return is_efficient

In [None]:
xs = [100,0.29,0.97,198,0.8]
xx = []
yy = []
y_desc = 0
matplotlib_rc_2()
i = 0
for w in [0.1,0.3,0.6,0.8,0.9]:
    xs[4] = w
    f= np.arange(1.2,2.3,0.1)
    p= np.arange(1,32,1)
    Y, X= np.meshgrid(p,f)
    x, y = time_(X,Y), energy_(X,Y)
    for v,z in zip(x,y):
        scatter(v,z,c=lcolors[i%len(lcolors)],s=1, marker=".")
    xx = list(x.reshape(-1))
    yy = list(y.reshape(-1))
    costs = np.stack([xx,yy]).T
    mask = pareto_frontier_selection(costs)
    scatter(costs[mask][:, 0], costs[mask][:, 1], s=40, label=f"{w}", c=lcolors[i%len(lcolors)], marker="x")
    
    min_idx = np.argmin(costs[:, 1])
    fmin, pmin = X.reshape(-1)[min_idx], Y.reshape(-1)[min_idx]
    #print(energy_(fmin,pmin) == yy[min_idx])
    y_desc = 25000+3000*i
    plt.annotate(f"({fmin:.2f} (GHz), #{pmin:.0f})", xy=(costs[min_idx, 0],costs[min_idx, 1]),
                xycoords='data', xytext=(costs[min_idx, 0]+30,costs[min_idx, 1]), textcoords='data',
                arrowprops=dict(facecolor='red', shrink=0.1, width=0.1, headwidth=5),
                horizontalalignment='right', verticalalignment='top', color=lcolors[i%len(lcolors)], fontsize=14)
    
    max_idx = np.argmax(costs[:, 1])
    fmax, pmax = X.reshape(-1)[max_idx], Y.reshape(-1)[max_idx]
    #print(energy_(fmax,pmax), yy[max_idx])
    plt.annotate(f"({fmax:.2f} (GHz), #{pmax:.0f})", xy=(costs[max_idx, 0],costs[max_idx, 1]),
                xycoords='data', xytext=(costs[max_idx, 0]-20,costs[max_idx, 1]), textcoords='data',
                arrowprops=dict(facecolor='blue', shrink=0.1, width=0.1, headwidth=5),
                horizontalalignment='right', verticalalignment='top', color=lcolors[i%len(lcolors)], fontsize=14)
    i += 1

ylabel("Energy (J)")
xlabel("Time (s)")
ax = plt.gca()
ax.yaxis.set_major_formatter(sf)
#ax.set_yscale('log')
handles, labels = plt.gca().get_legend_handles_labels()
by_label = dict(zip(labels, handles))
legend(by_label.values(), by_label.keys())
ylim(0,50000)
tight_layout()
savefig("models/analisys/pareto_w_low.pdf")

In [None]:
xs = [100,0.29,0.97,198,0.8]
xx = []
yy = []
i = 0
matplotlib_rc_2()
for w in [0.1,0.3,0.6,0.8,0.9]:
    xs[4] = w
    f= np.arange(1.2,5.0,0.1)
    p= np.arange(1,64,1)
    Y, X= np.meshgrid(p,f)
    x, y = time_(X,Y), energy_(X,Y)
    for v,z in zip(x,y):
        scatter(v,z,c=lcolors[i%len(lcolors)],s=1, marker=".")
    xx = list(x.reshape(-1))
    yy = list(y.reshape(-1))
    costs = np.stack([xx,yy]).T
    mask = pareto_frontier_selection(costs)
    scatter(costs[mask][:, 0], costs[mask][:, 1], s=40, label=f"{w}", c=lcolors[i%len(lcolors)], marker="x")
    
    min_idx = np.argmin(costs[:, 1])
    fmin, pmin = X.reshape(-1)[min_idx], Y.reshape(-1)[min_idx]
    #print(energy_(fmin,pmin) == yy[min_idx])
    y_desc = 3000*i
#     if i == 0:
#         y_desc = 3000
    plt.annotate(f"({fmin:.2f} (GHz), #{pmin:.0f})", xy=(costs[min_idx, 0],costs[min_idx, 1]),
                xycoords='data', xytext=(costs[min_idx, 0]+65,costs[min_idx, 1]+y_desc), textcoords='data',
                arrowprops=dict(facecolor='red', shrink=0.1, width=0.1, headwidth=5),
                horizontalalignment='right', verticalalignment='top', color=lcolors[i%len(lcolors)], fontsize=14)
    
    max_idx = np.argmax(costs[:, 1])
    fmax, pmax = X.reshape(-1)[max_idx], Y.reshape(-1)[max_idx]
    #print(energy_(fmax,pmax), yy[max_idx])
    y_desc = -20 if i > 2 else 40
    plt.annotate(f"({fmax:.2f} (GHz), #{pmax:.0f})", xy=(costs[max_idx, 0],costs[max_idx, 1]),
                xycoords='data', xytext=(costs[max_idx, 0]+y_desc,costs[max_idx, 1]), textcoords='data',
                arrowprops=dict(facecolor='blue', shrink=0.1, width=0.1, headwidth=5),
                horizontalalignment='right', verticalalignment='top', color=lcolors[i%len(lcolors)], fontsize=14)
    i += 1
    
ylabel("Energy (J)")
xlabel("Time (s)")
ax = plt.gca()
ax.yaxis.set_major_formatter(sf)
#ax.set_yscale('log')
handles, labels = plt.gca().get_legend_handles_labels()
by_label = dict(zip(labels, handles))
legend(by_label.values(), by_label.keys())
ylim(0,50000)
tight_layout()
savefig("models/analisys/pareto_w_high.pdf")

In [None]:
xx = []
yy = []
i = 0
xs = [100,0.29,0.97,198,0.86]
y_desc = 0
matplotlib_rc_2()
for w in [50,100,200,300]:
    xs[3] = w
    f= np.arange(1.2,2.3,0.1)
    p= np.arange(1,32,1)
    Y, X= np.meshgrid(p,f)
    x, y = time_(X,Y), energy_(X,Y)
    
    for v,z in zip(x,y):
        scatter(v,z,c=lcolors[i%len(lcolors)],s=0.05)
    
    xx = list(x.reshape(-1))
    yy = list(y.reshape(-1))
    costs = np.stack([xx,yy]).T
    mask = pareto_frontier_selection(costs)
    scatter(costs[mask][:, 0], costs[mask][:, 1], s=20, label=f"{w}", c=lcolors[i%len(lcolors)])
    
    min_idx = np.argmin(costs[:, 1])
    fmin, pmin = X.reshape(-1)[min_idx], Y.reshape(-1)[min_idx]
    #print(energy_(fmin,pmin) == yy[min_idx])
    y_desc = 3000*i
    plt.annotate(f"({fmin:.2f} (GHz), #{pmin:.0f})", xy=(costs[min_idx, 0],costs[min_idx, 1]),
                xycoords='data', xytext=(costs[min_idx, 0]+30,costs[min_idx, 1]+y_desc), textcoords='data',
                arrowprops=dict(facecolor='red', shrink=0.1, width=0.1, headwidth=5),
                horizontalalignment='right', verticalalignment='top', color=lcolors[i%len(lcolors)], fontsize=14)
    
    max_idx = np.argmax(costs[:, 1])
    fmax, pmax = X.reshape(-1)[max_idx], Y.reshape(-1)[max_idx]
    #print(energy_(fmax,pmax), yy[max_idx])
    plt.annotate(f"({fmax:.2f} (GHz), #{pmax:.0f})", xy=(costs[max_idx, 0],costs[max_idx, 1]),
                xycoords='data', xytext=(costs[max_idx, 0]-10,costs[max_idx, 1]), textcoords='data',
                arrowprops=dict(facecolor='blue', shrink=0.1, width=0.1, headwidth=5),
                horizontalalignment='right', verticalalignment='top', color=lcolors[i%len(lcolors)], fontsize=14)
    
    i += 1

ylabel("Energy (J)")
xlabel("Time (s)")
ax = plt.gca()
#ax.set_yscale('log')
ax.yaxis.set_major_formatter(sf)
handles, labels = plt.gca().get_legend_handles_labels()
by_label = dict(zip(labels, handles))
legend(by_label.values(), by_label.keys())
ylim(0,50000)
tight_layout()
savefig("models/analisys/pareto_static_low.pdf")

In [None]:
xx = []
yy = []
i = 0
xs = [100,0.29,0.97,198,0.86]
y_desc = 0
matplotlib_rc_2()
for w in [50,100,200,300]:
    xs[3] = w
    f= np.arange(1.2,5.0,0.1)
    p= np.arange(1,64,1)
    Y, X= np.meshgrid(p,f)
    x, y = time_(X,Y), energy_(X,Y)
    
    for v,z in zip(x,y):
        scatter(v,z,c=lcolors[i%len(lcolors)],s=0.05)
    
    xx = list(x.reshape(-1))
    yy = list(y.reshape(-1))
    costs = np.stack([xx,yy]).T
    mask = pareto_frontier_selection(costs)
    scatter(costs[mask][:, 0], costs[mask][:, 1], s=20, label=f"{w}", c=lcolors[i%len(lcolors)])
    
    min_idx = np.argmin(costs[:, 1])
    fmin, pmin = X.reshape(-1)[min_idx], Y.reshape(-1)[min_idx]
    #print(energy_(fmin,pmin) == yy[min_idx])
    y_desc = 1000*i
    plt.annotate(f"({fmin:.2f} (GHz), #{pmin:.0f})", xy=(costs[min_idx, 0],costs[min_idx, 1]),
                xycoords='data', xytext=(costs[min_idx, 0]+40,costs[min_idx, 1]+y_desc), textcoords='data',
                arrowprops=dict(facecolor='red', shrink=0.1, width=0.1, headwidth=5),
                horizontalalignment='right', verticalalignment='top', color=lcolors[i%len(lcolors)], fontsize=14)
    
    max_idx = np.argmax(costs[:, 1])
    fmax, pmax = X.reshape(-1)[max_idx], Y.reshape(-1)[max_idx]
    #print(energy_(fmax,pmax), yy[max_idx])
    y_desc = 25 if i == 0 else -20
    plt.annotate(f"({fmax:.2f} (GHz), #{pmax:.0f})", xy=(costs[max_idx, 0],costs[max_idx, 1]),
                xycoords='data', xytext=(costs[max_idx, 0]+y_desc,costs[max_idx, 1]), textcoords='data',
                arrowprops=dict(facecolor='blue', shrink=0.1, width=0.1, headwidth=5),
                horizontalalignment='right', verticalalignment='top', color=lcolors[i%len(lcolors)], fontsize=14)
    i += 1

ylabel("Energy (J)")
xlabel("Time (s)")
ax = plt.gca()
#ax.set_yscale('log')
ax.yaxis.set_major_formatter(sf)

matplotlib.ticker.FuncFormatter(lambda x, p: format(x, ',.6g').replace(',', ' '))
handles, labels = plt.gca().get_legend_handles_labels()
by_label = dict(zip(labels, handles))
legend(by_label.values(), by_label.keys())
ylim(0,50000)
tight_layout()
savefig("models/analisys/pareto_static_high.pdf")

# Studying the input size parameter

## Total number of instructions is constant

In [None]:
run_col = energydb["run"]
for d in run_col.aggregate([
        {
            "$match":{
                "config.data_descriptor.values": ["start_time", "stop_time", "fingerprint_sample"]
            }
        },
        {
            "$group":
            {
                "_id" : {"config":{"arguments":"$config.arguments"}},
                "ids": {"$addToSet": "$_id" },
                "nitem": {"$sum": 1}
            }
        }
    ], allowDiskUse=True):
    for k in d["ids"]:
        if not os.path.isfile(f"cache/{k}.json"):
            data = run_col.find_one( {"_id": ObjectId(k) } )
            data["_id"]= str(data["_id"])
            json.dump(data, open(f"cache/{k}.json", "w+"))
        data = PascalData(f"cache/{k}.json")
        df = data.dataframe_group("sensors")
        if len(df["frequency"].unique()) > 1 and "3" in data.config["command"]:
            df["frequency"] = df["frequency"].astype(float)/1e6
            df = df[df["sensors"] == "fingerprint_sample"]
            df["info"] = df["info"].apply(flat)
            df = df[df["repetitions"] == "2"]
            matplotlib_rc_2()
            figure()
            fname = data.config["pkg"][2:]#.capitalize()
            print(fname)
            for freq in df["frequency"].values:
                y = df[df["frequency"]==freq]["info"].values
                y = list(y[0])
                y = np.diff(y)

                if len(y) > 3:
                    sz = 1
                    if True:
                        sz = len(y)
                    npoints = 100
                    x0, y0= np.linspace(0,sz,len(y)), y
                    tck = interpolate.splrep(x0, y0, s=0)
                    x1 = np.linspace(0,sz,npoints)
                    y1 = interpolate.splev(x1, tck, der=0)
                    y = savgol_filter(y1,11,3)
                    xlabel("Time")
                    ylabel("Instructions/seconds")
                    #y1 = y1*2.2/freq
                    plot(x1, y1, label=freq)

            legend()
            tight_layout()
            savefig(f"models/hypothesis/const_intructions/freq/{fname}.pdf")
            show()

In [None]:
run_col = energydb["run"]
for d in run_col.aggregate([
        {
            "$match":{
                "config.data_descriptor.values": ["start_time", "stop_time", "fingerprint_sample"]
            }
        },
        {
            "$group":
            {
                "_id" : {"config":{"arguments":"$config.arguments"}},
                "ids": {"$addToSet": "$_id" },
                "nitem": {"$sum": 1}
            }
        }
    ], allowDiskUse=True):
    for k in d["ids"]:
        if not os.path.isfile(f"cache/{k}.json"):
            data = run_col.find_one( {"_id": ObjectId(k) } )
            data["_id"]= str(data["_id"])
            json.dump(data, open(f"cache/{k}.json", "w+"))
        data = PascalData(f"cache/{k}.json")
        df = data.dataframe_group("sensors")
        if len(df["cores"].unique()) > 1 and "3" in data.config["command"]:
            df = data.dataframe_group("sensors")
            df = df[df["sensors"] == "fingerprint_sample"]
            df["info"] = df["info"].apply(flat)

            df = df[df["repetitions"] == "1"]
            matplotlib_rc_2()
            fname = data.config["pkg"][2:]#.capitalize()
            print(fname)
            for freq in df["cores"].values:
                y = df[df["cores"]==freq]["info"].values
                y = list(y[0])
                y = np.diff(y)

                if len(y) > 3:
                    sz = 1
                    if False:
                        sz = len(y)
                    npoints = 100
                    x0, y0= np.linspace(0,sz,len(y)), y
                    tck = interpolate.splrep(x0, y0, s=0)
                    x1 = np.linspace(0,sz,npoints)
                    y1 = interpolate.splev(x1, tck, der=0)
                    y = savgol_filter(y1,5,3)
                    xlabel("Time")
                    ylabel("Instructions/seconds")
                    plot(x0, y0, label=freq)
                #plot(y)

            legend()
            tight_layout()
            savefig(f"models/hypothesis/const_intructions/cores/{fname}.pdf")
            show()

In [None]:
run_col = energydb["run"]
for d in run_col.aggregate([
        {
            "$match":{
                "config.data_descriptor.values": ["start_time", "stop_time", "fingerprint_sample"]
            }
        },
        {
            "$group":
            {
                "_id" : {"config":{"arguments":"$config.arguments"}},
                #"pkgs": {"$addToSet": "$$ROOT" },
                "ids": {"$addToSet": "$_id" },
                "nitem": {"$sum": 1}
            }
        }
    ], allowDiskUse=True):
    for k in d["ids"]:
        if not os.path.isfile(f"cache/{k}.json"):
            data = run_col.find_one( {"_id": ObjectId(k) } )
            data["_id"]= str(data["_id"])
            json.dump(data, open(f"cache/{k}.json", "w+"))
        data = PascalData(f"cache/{k}.json")
        df = data.dataframe_group("sensors")
        if len(df["cores"].unique()) > 1 and "3" in data.config["command"]:
            fname = data.config["pkg"][2:]#.capitalize()
            
            df = data.dataframe_generic()
            df["time"] = df["stop_time"]-df["start_time"]
            df = df.drop(columns=["start_time", "stop_time"])
            df["instructions"] = df["fingerprint_sample"].apply(lambda x : x[-1])
            dfx = df.groupby(["cores"]).mean()

            if "fluid" in fname:
                # fluid can only run with power of 2
                dfx = dfx[dfx["time"] > 1]

            if "x264" in fname:
                # x264 with one core ?
                dfx = dfx.loc[dfx.index != "1"]

            dfx.index = dfx.index.astype(int)
            dfx = dfx.sort_index()
            res = dfx["instructions"].std()/dfx["instructions"].mean()*100
            res_m = dfx["instructions"].mean()
            res_s = dfx["instructions"].std()
            print(fname.capitalize(), f"{res_m:.2e} {res_s:.2e} {res:.2f}%")
        #     if res > 4:
        #         dfx.plot(y="instructions")
        #         show()

In [None]:
run_col = energydb["run"]
for d in run_col.aggregate([
        {
            "$match":{
                "config.data_descriptor.values": ["start_time", "stop_time", "fingerprint_sample"]
            }
        },
        {
            "$group":
            {
                "_id" : {"config":{"arguments":"$config.arguments"}},
                "ids": {"$addToSet": "$_id" },
                "nitem": {"$sum": 1}
            }
        }
    ], allowDiskUse=True):
    for k in d["ids"]:
        if not os.path.isfile(f"cache/{k}.json"):
            data = run_col.find_one( {"_id": ObjectId(k) } )
            data["_id"]= str(data["_id"])
            json.dump(data, open(f"cache/{k}.json", "w+"))
        data = PascalData(f"cache/{k}.json")
        df = data.dataframe_generic()
        if len(df["frequency"].unique()) > 1 and "3" in data.config["command"]:
            fname = data.config["pkg"][2:]#.capitalize()
            
            df["time"] = df["stop_time"]-df["start_time"]
            df = df.drop(columns=["start_time", "stop_time"])
            df["instructions"] = df["fingerprint_sample"].apply(lambda x : x[-1])
            dfx = df.groupby(["frequency"]).mean()

            if "fluid" in fname:
                # fluid can only run with power of 2
                dfx = dfx[dfx["time"] > 1]

            if "x264" in fname:
                # x264 with one core ?
                dfx = dfx.loc[dfx.index != "1"]


            res = dfx["instructions"].std()/dfx["instructions"].mean()*100
            res_m = dfx["instructions"].mean()
            res_s = dfx["instructions"].std()
            print(fname.capitalize(), f"{res_m:.2e} {res_s:.2e} {res:.2f}%")
        #     if res > 1:
        #         dfx.plot(y="instructions")
        #         show()

## Input size and time

In [None]:
run_col = energydb["run"]
covs = []
for d in run_col.aggregate([
        {
            "$match":{
                "config.data_descriptor.values": ["start_time", "stop_time", "fingerprint_sample"]
            }
        },
#         {
#             "$match":{
#                 "config.pkg": "./x264"
#             }
#         },
        {
            "$group":
            {
                "_id" : {"config":{"arguments":"$config.arguments"}},
                "ids": {"$addToSet": "$_id" },
                "nitem": {"$sum": 1}
            }
        }
    ], allowDiskUse=True):
    for k in d["ids"]:
        if not os.path.isfile(f"cache/{k}.json"):
            data = run_col.find_one( {"_id": ObjectId(k) } )
            data["_id"]= str(data["_id"])
            json.dump(data, open(f"cache/{k}.json", "w+"))
        data = PascalData(f"cache/{k}.json")
        df = data.dataframe_generic()
        if len(df["input"].unique()) > 1:# and "1" in data.config["command"]:
            df["time"] = df["stop_time"]-df["start_time"]
            df["instructions"] = df["fingerprint_sample"].apply(lambda x : x[-1])
            df = df.drop(columns=["start_time", "stop_time", "fingerprint_sample"])
            df = df.groupby(["input"]).mean().reset_index()
            df = df.sort_values("input").astype(float)

            fname = data.config["pkg"][2:]#.capitalize()
            matplotlib_rc_2()
            plot(df["time"], df["instructions"])
            print(np.corrcoef(df["time"], df["instructions"]))
            covs.append([fname.capitalize(),np.corrcoef(df["time"], df["instructions"])[0][1]])
            scatter(df["time"], df["instructions"])
            xlabel("Time (s)")
            ylabel("Instructions")
            tight_layout()
            savefig(f"models/hypothesis/input_instructions/input_time/{fname}.pdf")
            show()
covs = pd.DataFrame(covs, columns=["name","cov"])
covs.groupby("name").max().reset_index()

In [None]:
run_col = energydb["run"]
for d in run_col.aggregate([
        {
            "$match":{
                "config.data_descriptor.values": ["start_time", "stop_time", "fingerprint_sample"]
            }
        },
        {
            "$group":
            {
                "_id" : {"config":{"arguments":"$config.arguments"}},
                "ids": {"$addToSet": "$_id" },
                "nitem": {"$sum": 1}
            }
        }
    ], allowDiskUse=True):
    for k in d["ids"]:
        if not os.path.isfile(f"cache/{k}.json"):
            data = run_col.find_one( {"_id": ObjectId(k) } )
            data["_id"]= str(data["_id"])
            json.dump(data, open(f"cache/{k}.json", "w+"))
        data = PascalData(f"cache/{k}.json")
        df = data.dataframe_generic()
        if len(df["input"].unique()) > 1 and len(df["frequency"].unique()) > 1:
            df["frequency"] = df["frequency"].astype(float)/1e6
            df["time"] = df["stop_time"]-df["start_time"]
            df["instructions"] = df["fingerprint_sample"].apply(lambda x : x[-1])
            df = df.drop(columns=["start_time", "stop_time", "fingerprint_sample"])
            df = df.groupby(["input", "frequency"]).mean().reset_index()
            df = df.sort_values("input").astype(float)

            fname = data.config["pkg"][2:]#.capitalize()
            matplotlib_rc_2()
            for fx in sorted(df["frequency"].unique()):
                dfx = df[df["frequency"]==fx]
                plot(dfx["time"], dfx["instructions"],label=fx)
            xlabel("Time (s)")
            ylabel("Instructions")
            legend()
            tight_layout()
            savefig(f"models/hypothesis/input_instructions/input_time/{fname}.pdf")
            show()

## Fingerprint parsec applications

In [None]:
run_col = energydb["run"]
for d in run_col.aggregate([
        {
            "$match":{
                "config.data_descriptor.values": ["start_time", "stop_time", "fingerprint_sample"]
            }
        },
        {
            "$group":
            {
                "_id" : {"config":{"arguments":"$config.arguments"}},
                "ids": {"$addToSet": "$_id" },
                "nitem": {"$sum": 1}
            }
        }
    ], allowDiskUse=True
    ):
    for k in d["ids"]:
        if not os.path.isfile(f"cache/{k}.json"):
            data = run_col.find_one( {"_id": ObjectId(k) } )
            data["_id"]= str(data["_id"])
            json.dump(data, open(f"cache/{k}.json", "w+"))
        data = PascalData(f"cache/{k}.json")
        df = data.dataframe_group("sensors")
        if len(df["input"].unique()) > 1 and "1" in data.config["command"]:
            df = df[df["sensors"]=="fingerprint_sample"]
            df["info"] = df["info"].apply(flat)
            df["time"] = df["time"].apply(lambda x: x[-1]-x[0])
            df = df[df["repetitions"]=="2"]

            fname = data.config["pkg"][2:]#.capitalize()
            matplotlib_rc_2()
            print(fname)
            for i, v in df.iterrows():
                y = np.diff(v["info"])
                sz = 1
                if True:
                    sz = len(y)
                npoints = 100
                if len(y) > 3:
                    x0, y0= np.linspace(0,sz,len(y)), y
                    tck = interpolate.splrep(x0, y0, s=0)
                    x1 = np.linspace(0,sz,npoints)
                    y1 = interpolate.splev(x1, tck, der=0)
                    y = savgol_filter(y1,5,3)
                plot(y, label= "input "+v["input"])

            xlabel("Percentage of execution (%)")
            ylabel("Instructions/seconds")
            legend()
            tight_layout()
            savefig(f"models/hypothesis/input_instructions/fp/{fname}.pdf")
            show()

## Model Gradient 

In [None]:
ptype= False
show_contour, show_arrow= True, False

cmap= cm.hot.reversed()

f,p= sympy.symbols("f p")
xs= [1,0.29,0.97,198,0.5]

show_p= True
xs= [1,0.29,0.97,198,0.5]
func= xs[0]*(((xs[1]*f**3+xs[2]*f)*p+xs[3])*(xs[4]/p-xs[4]+1))/f
fmax= 2.2
tmax= 16

def draw(name1, name2):
    matplotlib_rc_2()
    fig = plt.figure()
    fig2, ax2 = plt.subplots()
    ax = fig.gca(projection='3d')

    x1= np.linspace(1,fmax,10)
    x2= np.linspace(1,tmax,10)
    X,Y= np.meshgrid(x1,x2)

    func= xs[0]*(((xs[1]*f**3+xs[2]*f)*p+xs[3])*(xs[4]/p-xs[4]+1))/f
    grad= [sympy.diff(func, var) for var in (f,p)]
    t= sympy.lambdify([f,p],func)

    Z= t(X,Y)

    if ptype:
        U, V= np.meshgrid(x1,x2)
        ax.plot_surface(X, Y, Z, antialiased=True, cmap=cmap)
        W= Z
    else:
        U, V, W= np.meshgrid(x1,x2,np.linspace(0,1,5))

    g= sympy.lambdify([f,p],grad)
    G= g(U,V)
    u = -G[0]
    v = -G[1]
    w = -t(u,v)

    ax.quiver(U, V, W, u, v, w, length=0.1, color="r", normalize=True)

    c= ax2.pcolormesh(x1, x2, Z, cmap=cmap)
    G= g(X,Y)
    u = -G[0]/np.hypot(G[0],G[1])
    v = -G[1]/np.hypot(G[0],G[1])

    if show_arrow:
        ax2.quiver(X, Y, u, v, color="b")
    if show_contour:
        ax2.contour(X,Y,Z,50)
    
    
    ax.set_ylabel("Cores")
    ax.set_xlabel("Frequency (GHz)")
    ax.set_zlabel("Energy (KJ)")
    
    ax2.set_ylabel("Cores")
    ax2.set_xlabel("Frequency (GHz)")

    fig.canvas.draw_idle()
    fig2.canvas.draw_idle()
    
    fig.tight_layout()
    fig.savefig(name1)
    fig2.tight_layout()
    fig2.savefig(name2)
    show()

xs[4] = 1
draw("models/analisys/w1_3d.pdf", "models/analisys/w1.pdf")
xs[4] = 0
draw("models/analisys/w0_3d.pdf", "models/analisys/w0.pdf")
xs[4] = 0.5

xs[3] = 0
draw("models/analisys/pstatic0_3d.pdf", "models/analisys/pstatic0.pdf")
xs[3] = 3000
draw("models/analisys/pstatic3000_3d.pdf", "models/analisys/pstatic3000.pdf")
xs[3] = 198

xs[2] = 0
draw("models/analisys/pleak0_3d.pdf", "models/analisys/pleak0.pdf")
xs[2] = 10
draw("models/analisys/pleak10_3d.pdf", "models/analisys/pleak10.pdf")
xs[2] = 0.97

xs[1] = 0
draw("models/analisys/pdyn0_3d.pdf", "models/analisys/pdyn0.pdf")
xs[1] = 3
draw("models/analisys/pdyn3_3d.pdf", "models/analisys/pdyn3.pdf")
xs[1] = 0.29

# Phases

In [None]:
def plot3d(pascalmodel):
    df = pascalmodel.data
    df[pascalmodel.inputs] = df[pascalmodel.inputs].astype(float)
    df_train= df.loc[pascalmodel.train_idx]
    # print(pascalmodel.config)

    freqs = sort(df_train.frequency.unique().astype(float))
    #freqs = np.arange(1.1, 2.3, 0.1)
    cores = sort(df_train.cores.unique().astype(int))
    #cores = np.arange(1, 33, 1)
    matplotlib_rc_2()
    plotdata.new_figure()
    
    best= []
    X = np.array(np.meshgrid(cores, freqs)).T.reshape(-1, 2)
    Y = pascalmodel.predict(X)
    idx = np.argmin(Y)
    best= list(X[idx])+[Y[idx]]
    renergy= df[(df["cores"]==X[idx][0])&
                     (df["frequency"]==X[idx][1])].energy.iloc[0]
    print(Y[idx], X[idx])
    print("Real energy", renergy)


    plotdata.setProps(ylabel='Frequencies (GHz)',
                      xlabel='Active threads',
                      zlabel='Energy (KJ)')

    dfaux = df_train
    dfaux = dfaux.sort_values(["frequency", "cores"])
    plotdata.ax.scatter(dfaux["cores"],
                    dfaux["frequency"],
                    dfaux["energy"],
                    antialiased=True, color="red",s=100)

    dfaux = df.loc[set(df.index)-set(pascalmodel.train_idx)]
    dfaux = dfaux.sort_values(["frequency", "cores"])
    plotdata.ax.scatter(dfaux["cores"],
                    dfaux["frequency"],
                    dfaux["energy"],
                    antialiased=True, color="black",s=100)

    if len(best) > 2:
        plotdata.ax.scatter([best[0]],
                        [best[1]],
                        [best[2]],
                        antialiased=True, color="y",s=100)

    plotdata.plot3D(cores, freqs, Y, points=False, color_="b")
    plotdata.ax.legend(["train values","measured values","min energy","model"])

    plotdata.ax.view_init(30, 60)
    plt.tight_layout()
    return int(best[1]*1e6), int(best[0]), renergy, Y[idx]

## Manual instrumentation

In [None]:
df = pd.read_csv("databases/black_pascal_rapl_10M.csv",index_col=0)

opt = LeastSquaresOptmizer(
"""
pw_eq= lambda x,f,p: (x[0]*f**3+x[1]*f)*p+x[2]
perf_eq= lambda x,f,p: x[1]*(p-x[0]*p+x[0])/(f*p)
model= lambda x,p,f: pw_eq(x,f,p)*perf_eq(x[3:],f,p)
""", 5)


df1= df[df["regions"]==0]
pascalmodel_r1 = create_model(df1,
                           inputs=["cores", "frequency"],
                           output=["energy"], model=deepcopy(opt),
                           config=None, train_sz=0.9,
                           split_type="random")

df2= df[df["regions"]==1]
pascalmodel_r2 = create_model(df2,
                           inputs=["cores", "frequency"],
                           output=["energy"], model=deepcopy(opt),
                           config=None, train_sz=0.9,
                           split_type="random")

df3= df[df["regions"]==2]
pascalmodel_r3 = create_model(df3,
                           inputs=["cores", "frequency"],
                           output=["energy"], model=deepcopy(opt),
                           config=None, train_sz=0.9,
                           split_type="random")

dfa= df.groupby(["cores","frequency"]).sum().reset_index()
pascalmodel_a= create_model(dfa,
                           inputs=["cores", "frequency"],
                           output=["energy"], model=deepcopy(opt),
                           config=None, train_sz=0.9,
                           split_type="random")

val= abs(pascalmodel_r1.predict(df1[["cores", "frequency"]].astype(float).values)-df1["energy"])/df1["energy"]
print(max(val)*100, np.mean(val)*100, np.min(val)*100)
f1,c1,re1,pe1= plot3d(pascalmodel_r1)
savefig("phases/manual/blackscholes_1.pdf")
show()

val= abs(pascalmodel_r1.predict(df1[["cores", "frequency"]].astype(float).values)-df1["energy"])/df1["energy"]
print(max(val)*100, np.mean(val)*100, np.min(val)*100)
f2,c2,re2,pe2= plot3d(pascalmodel_r2)
savefig("phases/manual/blackscholes_2.pdf")
show()

val= abs(pascalmodel_r1.predict(df1[["cores", "frequency"]].astype(float).values)-df1["energy"])/df1["energy"]
print(max(val)*100, np.mean(val)*100, np.min(val)*100)
f3,c3,re3,pe3= plot3d(pascalmodel_r3)
savefig("phases/manual/blackscholes_3.pdf")
show()

f,c,re,pe= plot3d(pascalmodel_a)
savefig("phases/manual/blackscholes.pdf")
show()

b= re1+re2+re3
a= re
figure(figsize=(8,5))
bar([1,2],[a/a,b/a])
a, b, b/a
savefig("phases/manual/blackscholes_cmp.pdf")
show()

## Openmp instrumentation

In [None]:
df = pd.read_csv("databases/black_pascal_rapl_10M_inst_new2.csv",index_col=0)

opt = LeastSquaresOptmizer(
"""
pw_eq= lambda x,f,p: (x[0]*f**3+x[1]*f)*p+x[2]
perf_eq= lambda x,f,p: x[1]*(p-x[0]*p+x[0])/(f*p)
model= lambda x,p,f: pw_eq(x,f,p)*perf_eq(x[3:],f,p)
""", 5)

df["regions"] = df["regions"].astype(int)


df1= df[df["regions"]==1]
pascalmodel_r1 = create_model(df1,
                           inputs=["cores", "frequency"],
                           output=["energy"], model=deepcopy(opt),
                           config=None, train_sz=0.9,
                           split_type="random")

df2= df[df["regions"]==2]
pascalmodel_r2 = create_model(df2,
                           inputs=["cores", "frequency"],
                           output=["energy"], model=deepcopy(opt),
                           config=None, train_sz=0.9,
                           split_type="random")

df3= df[df["regions"]==3]
pascalmodel_r3 = create_model(df3,
                           inputs=["cores", "frequency"],
                           output=["energy"], model=deepcopy(opt),
                           config=None, train_sz=0.9,
                           split_type="random")

dfa= df.groupby(["cores","frequency"]).sum().reset_index()
pascalmodel_a= create_model(dfa,
                           inputs=["cores", "frequency"],
                           output=["energy"], model=deepcopy(opt),
                           config=None, train_sz=0.9,
                           split_type="random")

val= abs(pascalmodel_r1.predict(df1[["cores", "frequency"]].astype(float).values)-df1["energy"])/df1["energy"]
print(max(val)*100, np.mean(val)*100, np.min(val)*100)
f1,c1,re1,pe1= plot3d(pascalmodel_r1)
savefig("phases/openmp/blackscholes_1.pdf")
show()

val= abs(pascalmodel_r1.predict(df1[["cores", "frequency"]].astype(float).values)-df1["energy"])/df1["energy"]
print(max(val)*100, np.mean(val)*100, np.min(val)*100)
f2,c2,re2,pe2= plot3d(pascalmodel_r2)
savefig("phases/openmp/blackscholes_2.pdf")
show()

val= abs(pascalmodel_r1.predict(df1[["cores", "frequency"]].astype(float).values)-df1["energy"])/df1["energy"]
print(max(val)*100, np.mean(val)*100, np.min(val)*100)
f3,c3,re3,pe3= plot3d(pascalmodel_r3)
savefig("phases/openmp/blackscholes_3.pdf")
show()

f,c,re,pe= plot3d(pascalmodel_a)
savefig("phases/openmp/blackscholes.pdf")
show()

b= re1+re2+re3
a= re
figure(figsize=(8,5))
bar([1,2],[a/a,b/a])
print(a, b, b/a)
savefig("phases/openmp/blackscholes_cmp.pdf")
show()

In [None]:
#TODO add how the regions are computed

# Pthread instrumentation

In [None]:
#TODO make sure this is pthreads
run_col = energydb["run"]
cursor = run_col.aggregate([
    {
        "$match": { "config.data_descriptor.extras.regions.values": {"$ne":None}},
    },
    {
        "$group":
        {
            "_id" : {"config":{"arguments":"$config.arguments"}},
            "keys": {"$push": "$_id" },
            "nitem": {"$sum": 1}
        }
    }
],  allowDiskUse=True)

matplotlib_rc_1()
for d in cursor:
    for k in d["keys"]:
        if not os.path.isfile(f"cache/{k}.json"):
            data = run_col.find_one( {"_id": ObjectId(k) } )
            data["_id"]= str(data["_id"])
            json.dump(data, open(f"cache/{k}.json", "w+"))
        data = PascalData(f"cache/{k}.json")
        try:
            df = data.regions(regions=True)
#             df = data.dataframe_group("regions")
            x = df[ (df["cores"]=="30")&(df["frequency"]=="2200000")&(df["input"]=="1")]
            p = 0
            for i, v in x.iterrows():
                w = np.array(v["stop_time"])-np.array(v["start_time"])
                p += 1
                plt.barh(y=p, width=w, left=np.array(v["start_time"]))
            xlabel("Time (s)")
            ylabel("Region")
            tight_layout()
            fname = data.config["pkg"].strip("./")
            savefig(f"phases/regions_{fname}.pdf")
            show()
        except Exception as e:
            print(e)
            pass

## Simulation

In [None]:
df_phases = pd.read_csv("databases/phases.csv", index_col=0)
max_en = df_phases.groupby("app").energy.max()
min_en = df_phases.groupby("app").energy.min()
df_min = pd.merge(df_phases, min_en).drop_duplicates("app")
df_max = pd.merge(df_phases, max_en).drop_duplicates("app")

en_diff = (max_en-min_en)/min_en*100
print("Min energy number of phases in avgr", df_min["nphases"].mean())
print("Max energy number of phases in avgr", df_max["nphases"].mean())
print("Energy difference", en_diff.mean(), "%")

print("Min energy phase")
matplotlib_rc_1()
df_min["nphases"].hist()
xlabel("Number of phases for minimal energy")
ylabel("Frequency")
tight_layout()
savefig("phases/min_phases_distribution.pdf")

In [None]:
df_phases = pd.read_csv("databases/phases.csv", index_col=0)
matplotlib_rc_1()
print("35 Phase")
dfx = df_phases[df_phases["nphases"]==36].reset_index(drop=True)
for i, v in dfx.iterrows():
    x = json.loads(v["phases"].replace("'", "\""))
    x = np.array(x,dtype=float)
    y = [0,1]*len(x)
    for s in x[::-1]:
        barh(i, s, 0.8)
    plt.gca().set_prop_cycle(None)
x = yticks(dfx.index, labels=dfx["app"])
yticks(fontsize=14)
xlabel("Division")
ylabel("App")
tight_layout()
savefig("phases/phase_division_35.pdf")

In [None]:
df_phases = pd.read_csv("databases/phases.csv", index_col=0)
matplotlib_rc_1()
print("3 Phase")
dfx = df_phases[df_phases["nphases"]==6].reset_index(drop=True)
for i, v in dfx.iterrows():
    x = json.loads(v["phases"].replace("'", "\""))
    x = np.array(x,dtype=float)
    y = [0,1]*len(x)
    for s in x[::-1]:
        barh(i, s, 0.8)
    plt.gca().set_prop_cycle(None)
x = yticks(dfx.index, labels=dfx["app"])
yticks(fontsize=14)
xlabel("Division")
ylabel("App")
tight_layout()
savefig("phases/phase_division_3.pdf")

In [None]:
df_phases = pd.read_csv("databases/phases.csv", index_col=0)
ax = None
matplotlib_rc_1()
for appname in df_phases.app.unique():
    if "freq" in appname: continue
    xx= df_phases[(df_phases["app"]==appname)]
    xx["energy"] = xx["energy"]/xx["energy"].max()
    ax = xx.plot(x="nphases",y="energy",label=appname, ax = ax)
    ylabel("Relative energy")
    xlabel("Number of phases")
    #ylim(0,1)
legend(fontsize=10,loc="upper right")
tight_layout()
savefig("fingerprint/energy_per_phase.pdf")

In [None]:
matplotlib_rc_1()
df_real_sim = pd.read_csv("databases/phases_sim_real.csv", index_col=0)
df_real_sim["real_en"].hist()
xlabel("Energy (J)")
ylabel("Frequency")
tight_layout()
savefig("phases/cpm_real_sim.pdf")

In [None]:
df_phases = pd.read_csv("databases/phases.csv", index_col=0)

def get_timeslice_energy(dfx, beg, end):
    start_time = dfx["start_time"]
    stop_time = dfx["stop_time"]
    x = np.array(dfx["info"])
    t = np.array(dfx["time"])
    
    t1 = start_time+(stop_time-start_time)*beg
    t2 = start_time+(stop_time-start_time)*end

    v1 = np.argmax(t>=t1)-1
    v1 = v1 if v1 >= 0 else 0
    v2 = np.argmax(t>=t2)-1
    v2 = v2 if v2 >= 0 else 0

    if v1 == v2:
        return t2-t1, (t2-t1)*(x[v1])

    en = (t[v1+1]-t1)*x[v1]+(t2-t[v2])*x[v2]
    if v1+1<v2:
        tt = t[v1+1:v2+1]
        tt = tt[1:]-tt[:-1]
        xx = x[v1+1:v2+1][:-1]
        en += sum(xx*tt)
    return t2-t1, en

def plot_best(dfx, bps, app="", cores_freqs= False):
    pf = []
    pc = []
    for i, (a, b) in enumerate(zip(bps[:-1], bps[1:])):
        min_en = {}
        for i, v in dfx.iterrows():
            t, e = get_timeslice_energy(v, a, b)
            if e >= 0:
                f, p = v["frequency"], v["cores"]
                min_en[f"{f};{p}"] = e
        
        conf = min(min_en, key=min_en.get)
        pf.append(float(conf.split(";")[0]))
        pc.append(int(conf.split(";")[1]))

    nx = np.repeat(bps,2)[1:-1]*100
    
    if cores_freqs:
        ny = np.repeat(pc,2)
        plot(nx, ny, alpha=1.0, label=f"{len(bps)}_freq")
    else:
        ny = np.repeat(pc,2)
        plot(nx, ny, alpha=1.0, label=f"{len(bps)}_cores")

run_col = energydb["run"]
for appname in df_phases.app.unique():
    print(appname[:-5])
    for d in run_col.aggregate([
        {
            "$match":{
                "config.pkg": appname[:-5]
            }
        },
        {
            "$group":
            {
                "_id" : {"config":{"arguments":"$config.arguments"}},
                "ids": {"$addToSet": "$_id" },
                "nitem": {"$sum": 1}
            }
        }
    ], allowDiskUse=True
    ):
        for k in d["ids"]:
            if not os.path.isfile(f"cache/{k}.json"):
                data = run_col.find_one( {"_id": ObjectId(k) } )
                data["_id"]= str(data["_id"])
                json.dump(data, open(f"cache/{k}.json", "w+"))
            data = PascalData(f"cache/{k}.json")
    dfx = data.dataframe_group("sensors")
    dfx["start_time"] = dfx["time"].apply(lambda x: x[0])
    dfx["stop_time"] = dfx["time"].apply(lambda x: x[-1])
    dfx["total_time"] = dfx["stop_time"]-dfx["start_time"]
    #df= df[df["frequency"]!="2300000"]
    dfx = dfx[dfx["input"] == dfx["input"].unique()[-1]]
    
    
    aux = df_phases[(df_phases["app"]==appname)&(df_phases["nphases"]==37)]["phases"].values[0]
    phases_aux = list(map(float,json.loads(aux.replace("'", "\""))))
    plot_best(dfx, phases_aux, appname, True)
    
    max_phases = df_phases[df_phases["app"]==appname]["nphases"].max()
    aux = df_phases[(df_phases["app"]==appname)&(df_phases["nphases"]==max_phases)]["phases"].values[0]
    phases_aux = list(map(float,json.loads(aux.replace("'", "\""))))
    plot_best(dfx, phases_aux, appname, True)
    
    xlabel("Percentage of execution")
    ylabel("Number of cores")
    legend()
    savefig(f"phases/signals/{appname[:-5]}_cores_signals_cmp.pdf")
    show()
    
    aux = df_phases[(df_phases["app"]==appname)&(df_phases["nphases"]==37)]["phases"].values[0]
    phases_aux = list(map(float,json.loads(aux.replace("'", "\""))))
    plot_best(dfx, phases_aux, appname, False)
    
    max_phases = df_phases[df_phases["app"]==appname]["nphases"].max()
    aux = df_phases[(df_phases["app"]==appname)&(df_phases["nphases"]==max_phases)]["phases"].values[0]
    phases_aux = list(map(float,json.loads(aux.replace("'", "\""))))
    plot_best(dfx, phases_aux, appname, False)
    
    xlabel("Percentage of execution")
    ylabel("Number of cores")
    legend()
    savefig(f"phases/signals/{appname[:-5]}_freq_signals_cmp.pdf")
    show()
    
#     for i,v in df_phases[df_phases["app"]==appname].iterrows():
#         print(v["nphases"])
#         phases_aux = list(map(float,json.loads(v["phases"].replace("'", "\""))))
#         plot_best(dfx, phases_aux, appname, True)
#         xlabel("Percentage of execution")
#         ylabel("Number of cores")
#         legend()
#         savefig(f"phases/{appname[:-5]}_{v['nphases']}_cores_signals.pdf")
#         show()

#         plot_best(dfx, phases_aux, appname, False)
#         xlabel("Percentage of execution")
#         ylabel("Frequency (GHz)")
#         legend()
#         savefig(f"phases/{appname[:-5]}_{v['nphases']}_freq_signals.pdf")
#         show()

## Fingerprint Metrics

In [None]:
df_phases = pd.read_csv("databases/phases.csv", index_col=0)

nphases = 3
lw = 5

for f in glob.glob(f"databases/fingerprints/parsec/*.dat"):
    matplotlib_rc_1()
    aname = os.path.basename(f)[:-8]
    xx = df_phases[df_phases["app"].str.contains(aname)]
    xx = xx[xx["nphases"] == nphases]
    print(aname)
    xx = xx["phases"].values[-1]
    xx = json.loads(xx.replace("'", "\""))
    x = [float(y) for y in xx]
    
    fp_data = Analyser(f)
    try:
        fp_data.df["m1"] = fp_data.df['PERF_COUNT_HW_INSTRUCTIONS']/fp_data.df['MEM_UOPS_RETIRED:ALL_STORES']
        fp_data.df = fp_data.df.dropna()
        x0, y0 = fp_data.interpolate(feature="m1", npoints=100)
        y0 = y0/max(y0)
        plot(x0,y0,label="m1",linewidth=lw)

        fp_data.df["m2"] = fp_data.df['PERF_COUNT_HW_INSTRUCTIONS']/fp_data.df['MEM_UOPS_RETIRED:ALL_LOADS']
        fp_data.df = fp_data.df.dropna()
        x0, y0 = fp_data.interpolate(feature="m2", npoints=100)
        y0 = y0/max(y0)
        plot(x0,y0,label="m2",linewidth=lw)

        fp_data.df["m3"] = fp_data.df['PERF_COUNT_HW_INSTRUCTIONS']
        fp_data.df = fp_data.df.dropna()
        x0, y0 = fp_data.interpolate(feature="m3", npoints=100)
        y0 = y0/max(y0)
        plot(x0,y0,label="m3",linewidth=lw)

        fp_data.df["m4"] = fp_data.df['MEM_UOPS_RETIRED:ALL_LOADS']
        fp_data.df = fp_data.df.dropna()
        x0, y0 = fp_data.interpolate(feature="m4", npoints=100)
        y0 = y0/max(y0)
        plot(x0,y0,label="m4",linewidth=lw)

        fp_data.df["m5"] = fp_data.df['MEM_UOPS_RETIRED:ALL_STORES']
        fp_data.df = fp_data.df.dropna()
        x0, y0 = fp_data.interpolate(feature="m5", npoints=100)
        y0 = y0/max(y0)
        plot(x0,y0,label="m5",linewidth=lw)

        fp_data.df["m6"] = fp_data.df['MEM_UOPS_RETIRED:ALL_LOADS']/fp_data.df['PERF_COUNT_HW_INSTRUCTIONS']
        fp_data.df = fp_data.df.dropna()
        x0, y0 = fp_data.interpolate(feature="m6", npoints=100)
        y0 = y0/max(y0)
        plot(x0,y0,label="m6",linewidth=lw)

        a, b = 0, 1
        c, d = ylim()
        acc = a
        colors = [(0.1,0.1,0.1), (0.4,0.4,0.4)]
        for v in zip(x[:-1], x[1:]):
            sz = (v[1]-v[0])*(b-a)
            bar(acc+sz/2, d-c, sz, c, alpha=0.8, color=colors[-1])
            acc += sz
            colors = [colors[-1]]+colors[:-1]
        ylim(c, d)
        
        ylabel("Normalized fingerprint")
        xlabel("Percentage of execution")
        legend()
        savefig(f"phases/metrics_{aname}.pdf")
        show()
    except:
        pass

## Best fingerprint

In [None]:
df = pd.read_csv("databases/best_fps.csv", index_col=0)
df_phases = pd.read_csv("databases/phases.csv", index_col=0)

counters_dict = {
    "SYSTEMWIDE:RAPL_ENERGY_PKG": "energy_pkg",
    "SYSTEMWIDE:RAPL_ENERGY_DRAM": "energy_ram",
    "PERF_COUNT_HW_INSTRUCTIONS": "instructions",
    "MEM_UOPS_RETIRED:ALL_STORES": "memory_store",
    "MEM_UOPS_RETIRED:ALL_LOADS": "memory_load",
    "PERF_COUNT_SW_PAGE_FAULTS": "page_fault",
    "PERF_COUNT_SW_CPU_CLOCK": "clock",
}

matplotlib_rc_1()
for _, v in df.iterrows():
    fp = v["fp"][1:-1].split(" ")
    fp = [float(x.strip()) for x in fp if x]
    fp = np.array(fp)
    
    dif = np.diff(fp)
    dif = np.diff(dif)
    dif = abs(dif)
    idx = np.where(dif>dif.mean())[0]+1
    X = numpy.hstack((idx,fp[idx])).reshape((-1,2),order='F')
    
    km= KMeans(n_clusters=4-2, random_state=0).fit(X)
    centers= km.cluster_centers_
    centers= np.vstack( (centers, [0, fp[0]]) )
    centers= np.vstack( (centers, [99, fp[99]]) )
    centers= centers[centers[:,0].argsort()]
    
    aname = v["app"]
    #print(v["app"], v["operations"], v["counters"])
    aa = [counters_dict[x.replace("'","").strip()] for x in v["counters"][1:-1].split(",")]
    bb = [x.replace("'","").strip() for x in v["operations"][1:-1].split(",")]
    cc = aa[0]+"".join(["".join(x) for x in zip(bb,aa[1:])])
    print(v["app"], cc)
    
    xx = df_phases[df_phases["app"].str.contains(aname)]
    xx = xx[xx["nphases"] == 4]
    xx = xx["phases"].values[-1]
    xx = json.loads(xx.replace("'", "\""))
    x = [float(y)*100 for y in xx]
    
    title(v["app"]+": "+cc)
    plot(centers[:,0], centers[:,1])
    scatter(centers[:,0], centers[:,1],label="critical points")
    plot(fp, label="fingerprint")
    
    a, b = 0, 1
    c, d = ylim()
    acc = a
    colors = [(0.1,0.1,0.1), (0.4,0.4,0.4)]
    for v in zip(x[:-1], x[1:]):
        sz = (v[1]-v[0])*(b-a)
        bar(acc+sz/2, d-c, sz, c, alpha=0.8, color=colors[-1])
        acc += sz
        colors = [colors[-1]]+colors[:-1]
    ylim(c, d)
    
    xlabel("Normalized time")
    ylabel("Fingerprint")
    legend()
    tight_layout()
    savefig(f"phases/fingerprint/{aname}.pdf")
    show()

# Power fingerprint

In [None]:
run_col = energydb["run"]
matplotlib_rc_2()
for d in run_col.aggregate([
        {
            "$match":{
                "config.data_descriptor.values": ["start_time", "stop_time", "fingerprint_sample"]
            }
        },
        {
            "$match":{
                "config.pkg": "./rtview"
            }
        },
        {
            "$group":
            {
                "_id" : {"config":{"arguments":"$config.arguments"}},
                "ids": {"$addToSet": "$_id" },
                "nitem": {"$sum": 1}
            }
        }
    ], allowDiskUse=True):
    for k in d["ids"]:
        if not os.path.isfile(f"cache/{k}.json"):
            data = run_col.find_one( {"_id": ObjectId(k) } )
            data["_id"]= str(data["_id"])
            json.dump(data, open(f"cache/{k}.json", "w+"))
        data = PascalData(f"cache/{k}.json")
        df = data.dataframe_group("sensors")
        if len(df["frequency"].unique()) > 1 and "3" in data.config["command"]:
            df["frequency"] = df["frequency"].astype(float)/1e6
            
            
            df_pw = df[df["sensors"] == "ipmi"]
            df = df[df["sensors"] == "fingerprint_sample"]
            df["info"] = df["info"].apply(flat)
            df = df[df["repetitions"] == "2"]
            
            fname = data.config["pkg"][2:]#.capitalize()
            print(fname)
            if not "rtview" in fname: continue
            print(data.config)
            for freq in df["frequency"].values:
                y = df[df["frequency"]==freq]["info"].values
                y = list(y[0])
                y = np.diff(y)

                if len(y) > 3:
                    sz = 1
                    if False:
                        sz = len(y)
                    npoints = 100
                    x0, y0= np.linspace(0,sz,len(y)), y
                    tck = interpolate.splrep(x0, y0, s=0)
                    x1 = np.linspace(0,sz,npoints)
                    y1 = interpolate.splev(x1, tck, der=0)
                    y = savgol_filter(y1,11,3)
                    xlabel("Time")
                    ylabel("Instructions/seconds")
                    y1 = y1*2.2/freq
                    plot(x1, y1, label=freq)

            legend()
            tight_layout()
            show()
            
            figure(figsize=(15,10))
            #display(df_pw)
            df_pw = df_pw[df_pw["repetitions"] == "2"]
            for freq in df_pw["frequency"].values:
                y = df_pw[df_pw["frequency"]==freq]["info"].values[0]
                #y = list(y[0])
                #y = np.diff(y)

                if len(y) > 3:
                    sz = 1
                    if False:
                        sz = len(y)
                    npoints = 100
                    x0, y0= np.linspace(0,sz,len(y)), y
                    tck = interpolate.splrep(x0, y0, s=0)
                    x1 = np.linspace(0,sz,npoints)
                    y1 = interpolate.splev(x1, tck, der=0)
                    y = savgol_filter(y1,11,3)
                    xlabel("Time")
                    ylabel("Power (W)")
                    #y1 = y1*2.2/freq
                    plot(x1, y1, label=freq)
            legend()
            tight_layout()

## Correlation instructions power

In [None]:
run_col = energydb["run"]
matplotlib_rc_2()
avr_corr = []
for d in run_col.aggregate([
        {
            "$match":{
                "config.data_descriptor.values": ["start_time", "stop_time", "fingerprint_sample"]
            }
        },
#         {
#             "$match":{
#                 "config.pkg": "./rtview"
#             }
#         },
        {
            "$group":
            {
                "_id" : {"config":{"arguments":"$config.arguments"}},
                "ids": {"$addToSet": "$_id" },
                "nitem": {"$sum": 1}
            }
        }
    ], allowDiskUse=True):
    for k in d["ids"]:
        if not os.path.isfile(f"cache/{k}.json"):
            data = run_col.find_one( {"_id": ObjectId(k) } )
            data["_id"]= str(data["_id"])
            json.dump(data, open(f"cache/{k}.json", "w+"))
        data = PascalData(f"cache/{k}.json")
        df = data.dataframe_group("sensors")
        
        df["frequency"] = df["frequency"].astype(float)/1e6


        df_pw = df[df["sensors"] == "ipmi"]
        df_pw = df_pw[df_pw["repetitions"] == "2"]

        df = df[df["sensors"] == "fingerprint_sample"]
        # Grab the first of the sampling group
        # For run 4 [INSTRUCTIONS_RETIRED, MEM_UOPS_RETIRED:ALL_LOADS, MEM_UOPS_RETIRED:ALL_STORES]
        df["info"] = df["info"].apply(lambda ss: [aux[0] for aux in ss])
        df = df[df["repetitions"] == "2"]


        df = df.rename(columns={"info":"fp","time":"fp_time"})
        df = df.drop(columns="sensors")
        df_pw = df_pw.rename(columns={"info":"pw","time":"pw_time"})
        df_pw = df_pw.drop(columns="sensors")
        df = pd.merge(df,df_pw,on=["cores","frequency","input","repetitions"])
        #display(df.head(5))

        fname = data.config["pkg"][2:]#.capitalize()
        
        # Taking only one row of the dataframe
        fp = df["fp"].values[0]
        pw = df["pw"].values[0]
        fp = np.diff(fp,prepend=[0])
        
        if len(pw) < 3 or len(fp) < 3:
            continue
        
        print(fname,data.config)
        
        npoints = 1000
        x_fp, y_fp= np.linspace(0,1,len(fp)), fp
        tck = interpolate.splrep(x_fp, y_fp, s=0)
        x1 = np.linspace(0,1,npoints)
        y1 = interpolate.splev(x1, tck, der=0)
        
        x_pw, y_pw= np.linspace(0,1,len(pw)), pw
        tck = interpolate.splrep(x_pw, y_pw, s=0)
        x2 = np.linspace(0,1,npoints)
        y2 = interpolate.splev(x2, tck, der=0)
        
        
        fig, ax_f = plt.subplots(figsize=(15,10))
        ax_c = ax_f.twinx()
        ax_f.scatter(x1,y1,c="blue")
        ax_c.scatter(x2,y2,c="red")
        
        
        ax_f.set_ylabel("Inst/sec",color="blue", fontsize=24)
        ax_c.set_ylabel("Power (W)",color="red", fontsize=24)
        ax_f.set_xlabel("Time (%)", fontsize=24)
        show()
        
        figure(figsize=(15,10))
        scatter(y1,y2)
        avr_corr.append(np.corrcoef(y1,y2)[0][1])
        print("Correlation", avr_corr[-1])
        ylabel("Power (W)", fontsize=24)
        xlabel("Inst/sec", fontsize=24)
        show()
print(np.mean(avr_corr))

# Application model from fingerprint

In [None]:
energydb = client["energy"]
run_col = energydb["run"]

matplotlib_rc_2()
for d in run_col.aggregate([
        {
            "$match":{
                "config.data_descriptor.values": ["start_time", "stop_time", "fingerprint_sample"]
            }
        },
        {
            "$match": { "config.pkg": "./rtview" },
        },
        {
            "$group":
            {
                "_id" : {"config":{"arguments":"$config.arguments"}},
                "ids": {"$addToSet": "$_id" },
                "nitem": {"$sum": 1}
            }
        }
    ], allowDiskUse=True):
    for k in d["ids"]:
        if not os.path.isfile(f"cache/{k}.json"):
            data = run_col.find_one( {"_id": ObjectId(k) } )
            data["_id"]= str(data["_id"])
            json.dump(data, open(f"cache/{k}.json", "w+"))
        data = PascalData(f"cache/{k}.json")
        df = data.dataframe_group("sensors")
        df2 = data.dataframe_generic()
        if len(df["frequency"].unique()) > 1 and "3" in data.config["command"]:
            figure(figsize=(15,10))
            
            df["frequency"] = df["frequency"].astype(float)/1e6
            df = df[df["sensors"] == "fingerprint_sample"]
            df["info"] = df["info"].apply(flat)
            df = df[df["repetitions"] == "2"]
            df = df[df["frequency"] != "2300000"]
            
            fname = data.config["pkg"][2:]#.capitalize()
            print(fname)
            #print(data.config)
            #df["ck"] = df["info"].apply(np.diff)
            #df["ck"] = df["ck"].apply(np.mean)
            df["T"] = df["time"].apply(lambda x : x[-1]-x[0])
            df["I"] = df["info"].apply(lambda x : x[-1])
            df["a"] = df["I"]/(df["T"]*df["frequency"])
            a = df["a"].mean()/1e6
            I = df["I"].mean()
            print(a)
            #display(df)
            
            df2["time"] = df2["stop_time"]-df2["start_time"]
            df2 = df2[df2["frequency"] != "2300000"]
            df2 = df2.groupby("frequency").mean().reset_index()
            plot(df2["frequency"].astype(float),df2["time"])
            
            x1 = np.arange(1200000, 2300000, 100000)
            y1 = I/(a*x1)
            #y1 = df["T"].max()/x1*1e6*1.2
            plot(x1, y1, label="aproxx")
            legend()
            show()
            #display(df2)

# CPU  Requests

In [None]:
matplotlib_rc_1()
df_requests = pd.read_csv("databases/cpu_requests.csv",sep=" ")
df_requests = df_requests[df_requests["NCPUS"] < 1024]
df_requests = df_requests[df_requests["State"]=="COMPLETED"]
df_requests.groupby("NCPUS").count()["State"].sort_values(ascending=False).plot.bar(logy=True)
xlabel("Requested cores")
ylabel("Number of jobs")
savefig("experiments/cpu_requestes.pdf")