In [None]:
# This script tries to approximate the cost of Google's DenseMap

import pandas as pd
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import math
import numpy as np
from scipy import optimize
from sklearn.metrics import r2_score

figsize=(4.5, 3)

TUMBlue='#3070b3'
TUMGreen='#a2ad00'
TUMOrange='#e37222'

In [None]:
# Just plot the data
data = pd.read_csv("../data-files/stateTableCost_dut2.csv")

steps = data['size'].unique()

setupMax = data.groupby(['size'])['setup'].max()
setupMean = data.groupby(['size'])['setup'].mean()
setupMin = data.groupby(['size'])['setup'].min()

runMin = data.groupby(['size'])['run'].min()
runMean = data.groupby(['size'])['run'].mean()
runMax = data.groupby(['size'])['run'].max()

plt.semilogx(basex=2)
plt.ylim([0,1200])

plt.plot(steps, setupMean)

plt.grid(b=True)

plt.show()

In [None]:
# This function models the insertion cost
def setupfun(x):
    return (400 + math.log(x,10000)) + 170* (((np.sum([2**i for i in range(2, math.floor(math.log(x,2)))])/ x) -0.5) *2)

# This function models the query cost
def runfun_fit(x,t,o):
    # This is the actual mathematical function
    def runner(a):
        #return math.log(a,t) + o
        return o
    # Magic in case curve_fit hands us an array
    if isinstance(x, np.ndarray):
        ret = []
        for s in x:
            ret += [runner(s)]
        return ret
    else:
        return runner(x)



In [None]:
#x = np.arange(2**10-1,2**17, 16)
# Calculate the functions
y = [setupfun(i) for i in steps]

r2 = r2_score(setupMean, y) 
print("R2 score (setup): " + str(r2))

#params = [1.02,2**11,600]
# Try to fit the data
mins=[1.01, 0]
maxs=[1.1, 1000]
params, _ = optimize.curve_fit(runfun_fit, steps, runMean, bounds=(mins,maxs))
print(params)
fit = []
for s in steps:
    fit += [runfun_fit(s,params[0],params[1])]

r2 = r2_score(runMean, fit) 
print("R2 score (run): " + str(r2))
    
fig = plt.figure(figsize=figsize, dpi=300)

plt.semilogx(basex=2)
plt.ylim([0,1200])

# Plot the data
plt.plot(steps,y, label="fit-setup", color=TUMOrange)
plt.plot(steps, setupMean, label="data-setup", color=TUMBlue)

#plt.plot(steps, setupMin, label="data-setup", color='#00FF7F')
#plt.plot(steps, setupMax, label="data-setup", color='#00FF7F')

plt.fill_between(steps, setupMax, setupMin, where=setupMax >= setupMin, facecolor='#00FF7F', interpolate=True)

plt.plot(steps, fit, label="fit-run", color=TUMOrange)
plt.plot(steps, runMean, label="data-run", color=TUMBlue)

plt.scatter(data['size'], data['setup'], 1, label="data", color=TUMBlue, zorder=1)

plt.fill_between(steps, runMax, runMin, where=runMax >= runMin, facecolor='#00FF7F', interpolate=True)

plt.legend(bbox_to_anchor=(1, 1))
plt.ylabel("Cycles per Operation")
plt.xlabel("\#Operations")
plt.grid(b=True)

#plt.savefig('output/synth-denseMap.pdf',bbox_inches='tight')
#plt.show()

from matplotlib2tikz import save as tikz_save
tikz_save('../tex-output/denseMap.tex',
          figureheight='3.5cm',
          figurewidth='.9\\columnwidth',
          extra_axis_parameters = {"mark options={scale=.15}",
                                   "legend style={at={(0.5,1.1)}, draw=white!80.0!black, anchor=south}",
                                   "legend columns = 2"}
         )