In [None]:
# This script tries to approximate the cost of Google's DenseMap

import pandas as pd
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import math
import numpy as np
from scipy import optimize
from sklearn.metrics import r2_score
from scipy.stats import linregress
from sklearn.metrics import mean_squared_error, mean_absolute_error

figsize=(4.5, 3)

TUMBlue='#3070b3'
TUMGreen='#a2ad00'
TUMOrange='#e37222'
TUMCyan='#00778a'
TUMDarkRed='#9c0d16'
TUMLightGray='#e6e6e6'


def mean_absolute_percentage_error(y_true, y_pred):
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

def sym_mean_absolute_percentage_error(y_true, y_pred):
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    return np.mean(np.abs((y_true - y_pred) / ((y_true + y_pred) / 2))) * 100

In [None]:
# Just plot the data
data = pd.read_csv("../data-files/stateTableCost_dut2.csv")

steps = data['size'].unique()

setupMax = data.groupby(['size'])['setup'].max()
setupMean = data.groupby(['size'])['setup'].mean()
setupMin = data.groupby(['size'])['setup'].min()

runMin = data.groupby(['size'])['run'].min()
runMean = data.groupby(['size'])['run'].mean()
runMax = data.groupby(['size'])['run'].max()

plt.semilogx(basex=2)
plt.ylim([0,1200])

plt.plot(steps, setupMean)

plt.grid(b=True)

plt.show()

In [None]:
# This function models the insertion cost
def setupfun(x):
    #return 400 + 170* (((np.sum([2**i for i in range(2, math.floor(math.log(x,2)))])/ x) -0.5) *2)
    return 400 + 170* (((2** (math.floor(math.log(x, 2)) + 1) - 8)/x) -1)

# This function models the query cost
def runfun_fit(x,t,o):
    # This is the actual mathematical function
    def runner(a):
        #return math.log(a,t) + o
        return o  #+ 50 * (math.log(a,2) - math.floor(math.log(a, 2)))
    # Magic in case curve_fit hands us an array
    if isinstance(x, np.ndarray):
        ret = []
        for s in x:
            ret += [runner(s)]
        return ret
    else:
        return runner(x)



In [None]:
#x = np.arange(2**10-1,2**17, 16)
# Calculate the functions
y = [setupfun(i) for i in steps]

r2 = r2_score(setupMean, y) 
print("R2 score (setup): " + str(r2))

mae = mean_absolute_error(setupMean, y)
print("MAE score (setup): " + str(mae))

mape = mean_absolute_percentage_error(setupMean, y)
print("MAPE score (setup): " + str(mape))

smape = sym_mean_absolute_percentage_error(setupMean, y)
print("sMAPE score (setup): " + str(smape))

#params = [1.02,2**11,600]
# Try to fit the data
mins=[1.01, 0]
maxs=[1.1, 1000]
params, _ = optimize.curve_fit(runfun_fit, steps, runMean, bounds=(mins,maxs))
print(params)
fit = []
for s in steps:
    fit += [runfun_fit(s,params[0],params[1])]

r2 = r2_score(runMean, fit) 
print("R2 score (run): " + str(r2))

rmse = math.sqrt(mean_squared_error(runMean, fit))
print("RMSE score (run): " + str(rmse))

mae = mean_absolute_error(runMean, fit)
print("MAE score (run): " + str(mae))

mape = mean_absolute_percentage_error(runMean, fit)
print("MAPE score (run): " + str(mape))

smape = sym_mean_absolute_percentage_error(runMean, fit)
print("sMAPE score (run): " + str(smape))

#lg = linregress(steps, runMean)
#print(lg)
#_, _, r2, _, _ = linregress(steps, runMean)
#r2 = r2 * r2
#print("R2 score (run) (scipy): " + str(r2))
    
fig = plt.figure(figsize=figsize, dpi=300)

plt.semilogx(basex=2)
plt.ylim([0,1200])

# Plot the data
plt.plot(steps,y, label="fit Insert", linestyle=(0, (5, 1)), color=TUMOrange, linewidth=2)
plt.plot(steps, setupMean, label="data Insert", color=TUMBlue, linewidth=2)

#plt.plot(steps, setupMin, label="data-setup", color='#00FF7F')
#plt.plot(steps, setupMax, label="data-setup", color='#00FF7F')

plt.fill_between(steps, setupMax, setupMin, where=setupMax >= setupMin, facecolor=TUMLightGray, interpolate=True)

plt.plot(steps, fit, linestyle=(0, (5, 1)), label="fit Lookup", color=TUMCyan, linewidth=2)
plt.plot(steps, runMean, label="data Lookup", color=TUMDarkRed, linewidth=2)

#plt.scatter(data['size'], data['setup'], 1, label="data", color=TUMBlue, zorder=1)

plt.fill_between(steps, runMax, runMin, where=runMax >= runMin, facecolor=TUMLightGray, interpolate=True)

plt.legend()
plt.ylabel("Cycles per Operation")
plt.xlabel("\#Operations")
plt.grid(b=True)

#plt.savefig('output/synth-denseMap.pdf',bbox_inches='tight')
#plt.show()

from matplotlib2tikz import save as tikz_save
tikz_save('../../paper/graphs/denseMap.tex',
          figureheight='3.5cm',
          figurewidth='.9\\columnwidth',
          extra_axis_parameters = {"mark options={scale=.15}",
                                   "legend style={at={(0.5,1.1)}, draw=white!80.0!black, anchor=south}",
                                   "legend columns = 2"}
         )