In [1]:
import math

import matplotlib as mpl
mpl.rcParams['text.usetex'] = True 
mpl.rcParams['text.latex.preamble'] = [r'\usepackage{libertine}', r'\RequirePackage[libertine]{newtxmath}']
mpl.rc('font', family='serif')

import numpy as np
import pandas as pd
import seaborn as sns

import matplotlib.pyplot as plt
from matplotlib import rc

from cycler import cycler

mpl.rcParams['ps.usedistiller'] = 'xpdf' 
plt.style.use('seaborn-notebook')

plt.rcParams['axes.titlesize'] = '25'
plt.rcParams['axes.labelsize'] = '25'
plt.rcParams['legend.fontsize'] = '15'
plt.rcParams['xtick.labelsize'] = '15'
plt.rcParams['ytick.labelsize'] = '15'

monochrome=(cycler('color', sns.color_palette("husl", 8))*2+(cycler('marker', ['v', 's', "o"])*7)[0:16])
plt.rc('axes', prop_cycle=monochrome)

pd.options.display.max_rows = 999

In [2]:
# Replace this variable accordingly 
path_to_results = "../experiments/2019-11-23-00-07_StreamEstimatorPerformance_/"

In [3]:
masterdata = pd.read_csv(path_to_results + "StreamEstimatorPerformance.csv")

In [4]:
masterdata["niter"] = [x.split("||")[0].split("-")[1] for x in masterdata["estimatorId"]]
masterdata["window"] = [x.split("||")[1].split("-")[0] for x in masterdata["estimatorId"]]
masterdata["step"] = [x.split("||")[1].split("-")[1] for x in masterdata["estimatorId"]]
masterdata["stream"] = [x.split("||")[1].split("-")[-1] for x in masterdata["estimatorId"]]

In [5]:
masterdata["niter"] = masterdata["niter"].astype(int)
masterdata["step"] = masterdata["step"].astype(int)
masterdata["stream"] = masterdata["stream"].astype(int)

In [6]:
masterdata.tail()

Unnamed: 0,estimatorId,cpu,wall,abserror,absmeansqerror,absspeedup,relerror,relmeansqerror,relspeedup,path,rep,niter,window,step,stream
3695,MWP-500-0||1000-1-0.99-0,20242940.0,20674460.0,0.000929,1e-06,0.790262,0.0,0.0,1.0,data/MWP-500-0||1000-1-0.99-0,9,500,1000,1,0
3696,MWP-500-0||1000-1-0.99-1,11120550.0,11374850.0,0.00096,2e-06,1.464056,0.000953,2e-06,1.625108,data/MWP-500-0||1000-1-0.99-1,37,500,1000,1,1
3697,MWP-500-0||1000-1-0.99-0,18072090.0,18458550.0,0.000938,1e-06,0.900898,0.0,0.0,1.0,data/MWP-500-0||1000-1-0.99-0,37,500,1000,1,0
3698,MWP-500-0||1000-1-0.99-1,10163150.0,10382150.0,0.000939,1e-06,1.627404,0.000982,2e-06,1.952545,data/MWP-500-0||1000-1-0.99-1,28,500,1000,1,1
3699,MWP-500-0||1000-1-0.99-0,19844000.0,20281150.0,0.00093,1e-06,0.833478,0.0,0.0,1.0,data/MWP-500-0||1000-1-0.99-0,28,500,1000,1,0


In [7]:
masterdata["relerror"] = masterdata["relerror"].fillna(0)

In [8]:
masterdata.groupby(["niter","step", "stream"]).mean()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,cpu,wall,abserror,absmeansqerror,absspeedup,relerror,relmeansqerror,relspeedup,rep
niter,step,stream,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
1,1,0,1215720.0,1251175.0,0.014809,0.00036786,13.784764,0.0,0.0,1.0,25.5
1,1,1,618522.7,636552.4,0.014788,0.00036738,27.410371,0.020897,0.000733,2.03362,25.5
1,10,0,1090707.0,1123553.0,0.046069,0.00356296,15.420052,0.0,0.0,1.0,25.5
1,10,1,299493.0,308248.9,0.045815,0.00352324,58.163644,0.064591,0.007096,3.841073,25.5
1,50,0,1174828.0,1211300.0,0.096916,0.0157107,14.222192,0.0,0.0,1.0,25.5
1,50,1,274201.3,282374.5,0.096659,0.01561062,64.021588,0.134549,0.031834,4.595325,25.5
1,100,0,1145597.0,1182598.0,0.127915,0.0271731,14.618737,0.0,0.0,1.0,25.5
1,100,1,273800.1,282321.6,0.126111,0.0264896,65.059235,0.171902,0.053803,4.49393,25.5
1,500,0,1144118.0,1181276.0,0.190021,0.05866666,14.558868,0.0,0.0,1.0,25.5
1,500,1,274812.3,283956.5,0.18921,0.0585354,64.659838,0.245944,0.117653,4.490808,25.5


In [9]:
masterdata.dtypes

estimatorId        object
cpu               float64
wall              float64
abserror          float64
absmeansqerror    float64
absspeedup        float64
relerror          float64
relmeansqerror    float64
relspeedup        float64
path               object
rep                 int64
niter               int64
window             object
step                int64
stream              int64
dtype: object

In [10]:
%matplotlib notebook

stream = 1

for niter in sorted(masterdata["niter"].unique()): 
    fig, axes = plt.subplots(nrows=1, ncols=len(masterdata["step"].unique()), figsize=(10,3), sharey=True, sharex=True)
    axess = axes.reshape(-1)
    i=0
    for step in sorted([int(x) for x in masterdata["step"].unique()]): 
        ax = axess[i]
        plt.sca(axess[i])
        subdata = masterdata[(masterdata["niter"] == niter) & (masterdata["step"] == step) & (masterdata["stream"] == stream)]
        path = subdata["path"].values[0]
        f = open(path_to_results + path) 
        l = f.readline()
        values = pd.Series([float(x) for x in l.split(",")])
        values.plot(ax = ax)
        
        if(i == 0):
            plt.ylabel("%s"%(niter))
        plt.title("%s"%step)
            
        i = i + 1

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [16]:
m = masterdata.groupby("path").mean()["cpu"].max()
masterdata["relcpu"] = masterdata["cpu"] /  m

In [17]:
purples5 = sns.color_palette("Purples_r", 5)
blues5 = sns.color_palette("Blues_r", 5)
reds5 = sns.color_palette("Reds_r", 5)

In [18]:
masterdata["niter"].unique()

array([500,   1,   5,  10, 100,  50])

In [19]:
masterdata.head()

Unnamed: 0,estimatorId,cpu,wall,abserror,absmeansqerror,absspeedup,relerror,relmeansqerror,relspeedup,path,rep,niter,window,step,stream,relcpu
0,MWP-500-0||1000-1-0.99-1,15403350.0,15766740.0,0.0,0.0,0.0,0.000934,1e-06,1.062882,data/MWP-500-0||1000-1-0.99-1,-34,500,1000,1,1,0.784128
1,MWP-500-0||1000-1-0.99-0,16371950.0,16761770.0,0.0,0.0,0.0,0.0,0.0,1.0,data/MWP-500-0||1000-1-0.99-0,-34,500,1000,1,0,0.833436
2,MWP-500-0||1000-1-0.99-1,15753100.0,16127290.0,0.0,0.0,0.0,0.000929,1e-06,1.018035,data/MWP-500-0||1000-1-0.99-1,-2,500,1000,1,1,0.801933
3,MWP-500-0||1000-1-0.99-0,16037210.0,16416740.0,0.0,0.0,0.0,0.0,0.0,1.0,data/MWP-500-0||1000-1-0.99-0,-2,500,1000,1,0,0.816396
4,MWP-500-0||1000-1-0.99-1,16203690.0,16590020.0,0.0,0.0,0.0,0.000928,1e-06,0.972451,data/MWP-500-0||1000-1-0.99-1,-50,500,1000,1,1,0.824871


## Figure 9: Quality and speed of contrast estimation with concept drift (* ≡ sweet spot)

In [20]:
fig, axes = plt.subplots(nrows=4, ncols=6, figsize=(11,8), sharey="row", sharex="row")
axess = axes.reshape(-1)
i=0

prettytitle={
    "relcpu": "Relative Time",
    "abserror": "Absolute Error",
    "relspeedup": "Index Speedup"
}

for measure in ["data", "abserror", "relcpu", "relspeedup"]:
    for niter in sorted(masterdata["niter"].unique()): 
        if(measure == "data"):
            subdatastream = masterdata[(masterdata["niter"] == niter) & (masterdata["stream"] == 1)]
            path = subdatastream[subdatastream["step"] == 50]["path"].values[0]
            
            f = open(path_to_results + path)
            slowvalues = pd.Series([float(x) for x in f.readline().split(",")])
                
            plt.sca(axess[i])
            slowvalues = slowvalues[::500]
            plt.scatter(x=slowvalues.index, y=slowvalues, edgecolor='black', color="white", alpha=0.6, rasterized=False)
            plt.ylim((-0.05,1.05))
            
            if(i == 0):
                plt.ylabel("Estimation", fontsize=15, labelpad=20)
            
            axess[i].spines['right'].set_visible(False)
            axess[i].spines['top'].set_visible(False)
            axess[i].spines['bottom'].set_visible(False)
            
            axess[i].xaxis.set_ticks_position('none')
            
            axess[i].text(0,0.05, r'$T \rightarrow  $', fontsize=15)
            axess[i].text(0,-0.1, "$w\'=50$", fontsize=15)
        
            axess[i].set_title("$M = %s$"%niter)
        else:
            subdatastream = masterdata[(masterdata["niter"] == niter) & (masterdata["stream"] == 1)]
            
            cutoffs = subdatastream.groupby("step")[measure].quantile([0.05, 0.95]).unstack(level=1)
            subdatastream = subdatastream.loc[((cutoffs.loc[subdatastream["step"], 0.05] < subdatastream[measure].values) & 
                                   (subdatastream[measure].values < cutoffs.loc[subdatastream["step"], 0.95])).values]

            
            subdatastream = subdatastream.groupby("step").agg(["mean", "std"]).reset_index()
            subdatastream = subdatastream.sort_values("step", ascending=False)

            if(i < 12):
                c = reds5[1]
            elif(i < 18):
                c = blues5[1]
            else:
                c = purples5[1]

            plt.sca(axess[i])
            x_pos = np.arange(len(subdatastream["step"]))
            plt.bar(x_pos, subdatastream[measure]["mean"], yerr=subdatastream[measure]["std"], alpha=0.6, color=c, ecolor='black')
            plt.xticks(x_pos, subdatastream["step"], rotation=90)

            if(measure == "relspeedup"):
                axess[i].set_yticks([0,1,2,3,4,5], ["0","","2","","4"])
            if(measure == "abserror"):
                axess[i].set_yscale("log")

            if(measure == "relcpu"):
                axess[i].set_yscale("log")
                axess[i].set_ylim((0.005, 1))
                axess[i].get_yaxis().set_major_formatter(mpl.ticker.LogFormatterSciNotation())
                axess[i].tick_params(axis='y', which='major')

            if(i % 6 == 0):
                if(measure == "relspeedup"):
                    axess[i].set_ylabel("%s"%prettytitle[measure], fontsize=15, labelpad=30)
                else:
                    axess[i].set_ylabel("%s"%prettytitle[measure], fontsize=15, labelpad=10)
            else:
                axess[i].set_ylabel("")
            if(i < 6):
                axess[i].set_title("$M = %s$"%niter)

            if(niter == 50):
                if(measure == "abserror"):
                    axess[i].text(2.8, 0.03, "*", fontsize=25)
                if(measure == "relcpu"):
                    axess[i].text(2.8, 0.07, "*", fontsize=25)
                if(measure == "relspeedup"):
                    axess[i].text(2.8, 4.5, "*", fontsize=25)


        if(i < 18):
            plt.xticks([])
        else:
            axess[i].set_xlabel("$\mathit{\Delta}$")
        i = i + 1
    
plt.tight_layout()

plt.savefig("plots/Fig9.pdf")

<IPython.core.display.Javascript object>