# Application Level Caching Analysis

### Prerequisites

None. Exemplary logs are loaded by default. Experiments were run on:

* Storage: CEPH HDDs
* CPU: Intel Xeon E5-2630 v3 8x@2.4GHz
* Image: ubuntu-18.04-lts/Openstack
* Memory: 80GB DDR4

All plots that are not saved with the `save_fig` function were not used in the paper, but may provide a close-up look on specific values.

### Meta information

* all pipelines are represented

In [1]:
%load_ext autoreload
%autoreload 2
import re
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pylab as plt
import matplotlib.patches as mpatches

from matplotlib import ticker
from typing import List
# adding previous directory for easier use of library
import sys
sys.path.append('../')
from notebookhelper import show_values_on_bars, show_values_on_catplot, save_figure, make_big_number_prettier \
                         , make_big_number_prettier_storage_mb

from presto.analysis import StrategyAnalysis \
                       , strat_analysis_from_csv

pd.set_option('display.max_rows', 200)
plotting_context = "paper"
default_palette = "colorblind"
epoch_palette = sns.color_palette("YlOrRd", 3)
samples_palette = sns.color_palette("icefire", 15)
threads_palette = sns.color_palette("tab20", 4)
font_scale = 1.4
sns.set(font_scale=font_scale, context=plotting_context)
sns.set(rc={"figure.dpi":300, 'savefig.dpi':300})

local_fig_dir = "misc"

def save_fig(name, file_type='pdf'):
    save_figure(name, local_fig_dir=local_fig_dir, file_type=file_type)
    
def print_stats(cum_df, original_sps, caching_sps):
    
    new_sps = cum_df.describe()["throughput_sps"].iloc[1]
    new_sps_std = cum_df.describe()["throughput_sps"].iloc[2]
    sample_size = cum_df.describe()["shard_cum_size_MB"].iloc[1] / cum_df.sample_count.unique()[0]
    org_speedup = new_sps / original_sps
    org_to_sys_speedup = caching_sps / original_sps
    sys_speedup = new_sps / caching_sps
    print("---- ")
    print(f"  - sample size: {round(sample_size,2)}MB")
    print(f"  - original throughput:     {original_sps}")
    print(f"  - sys. caching throughput: {caching_sps}")
    print(f"  - app. caching throughput: {round(new_sps,1)} (+/- {round(new_sps_std,1)})")
    print(f"  - org. to app. cache speedup:   {round(org_speedup,2)}x")
    print(f"  - org. to sys. cache speedup:   {round(org_to_sys_speedup,2)}x")
    

## 1. CV - Segfault because of OOM

## 2. CV2-JPG

In [2]:
home_path = "../logs/final-logs/cubeplusplus/jpg/application-caching"
path_to_cum_df = f"{home_path}/cubeplusplus-JPG-application-cache_2021-10-20-21:34:07_cum-df_samples-4890_threads-8-full.csv"
path_to_cum_dstat_df = f"{home_path}/cubeplusplus-JPG-application-cache_2021-10-20-21:34:07_cum-dstat-df_samples-4890_threads-8-full.csv"

analysis = strat_analysis_from_csv(path_to_cum_dstat_df = path_to_cum_dstat_df
                                   , path_to_cum_df = path_to_cum_df)
cum_dstat_df = analysis.to_cum_dstat_df()
cum_df       = analysis.to_cum_df()
strategies   = list(cum_df.split_name.unique())
print(strategies)
print_stats(cum_df = cum_df, original_sps = 643, caching_sps = 2147)

['5-center-pixel-values']
---- 
  - sample size: 1.18MB
  - original throughput:     643
  - sys. caching throughput: 2147
  - app. caching throughput: 9766.4 (+/- 1193.6)
  - org. to app. cache speedup:   15.19x
  - org. to sys. cache speedup:   3.34x


## 3. CV2-PNG

In [3]:
home_path = "../logs/final-logs/cubeplusplus/png/application-caching"
path_to_cum_df = f"{home_path}/cubeplusplus-PNG-application-cache_2021-10-20-21:43:37_cum-df_samples-4890_threads-8-full.csv"
path_to_cum_dstat_df = f"{home_path}/cubeplusplus-PNG-application-cache_2021-10-20-21:43:37_cum-dstat-df_samples-4890_threads-8-full.csv"

analysis = strat_analysis_from_csv(path_to_cum_dstat_df = path_to_cum_dstat_df
                                   , path_to_cum_df = path_to_cum_df)
cum_dstat_df = analysis.to_cum_dstat_df()
cum_df       = analysis.to_cum_df()
strategies   = list(cum_df.split_name.unique())
print(strategies)
print_stats(cum_df = cum_df, original_sps = 631, caching_sps = 2201)

['5-center-pixel-values']
---- 
  - sample size: 1.18MB
  - original throughput:     631
  - sys. caching throughput: 2201
  - app. caching throughput: 9148.2 (+/- 1110.8)
  - org. to app. cache speedup:   14.5x
  - org. to sys. cache speedup:   3.49x


## 4. NLP - Segfault because of OOM

## 5. NILM

In [4]:
home_path = "../logs/final-logs/cream/application-caching"
path_to_cum_df = f"{home_path}/cream-application-cache_2021-10-20-21:55:42_cum-df_samples-267840_threads-8-full.csv"
path_to_cum_dstat_df = f"{home_path}/cream-application-cache_2021-10-20-21:55:42_cum-dstat-df_samples-267840_threads-8-full.csv"

analysis = strat_analysis_from_csv(path_to_cum_dstat_df = path_to_cum_dstat_df
                                   , path_to_cum_df = path_to_cum_df)
cum_dstat_df = analysis.to_cum_dstat_df()
cum_df       = analysis.to_cum_df()
strategies   = list(cum_df.split_name.unique())
print(strategies)
print_stats(cum_df = cum_df, original_sps = 9053, caching_sps = 9957)

['3-active-power-+-rms-current-+-cumsum-rms-current']
---- 
  - sample size: 0.01MB
  - original throughput:     9053
  - sys. caching throughput: 9957
  - app. caching throughput: 12403.9 (+/- 621.3)
  - org. to app. cache speedup:   1.37x
  - org. to sys. cache speedup:   1.1x


## 6. Commonvoice

In [5]:
home_path = "../logs/final-logs/commonvoice/application-caching"
path_to_cum_df = f"{home_path}/commonvoice-application-cache_2021-10-20-20:52:51_cum-df_samples-12717_threads-8-full.csv"
path_to_cum_dstat_df = f"{home_path}/commonvoice-application-cache_2021-10-20-20:52:51_cum-dstat-df_samples-12717_threads-8-full.csv"

analysis = strat_analysis_from_csv(path_to_cum_dstat_df = path_to_cum_dstat_df
                                   , path_to_cum_df = path_to_cum_df)
cum_dstat_df = analysis.to_cum_dstat_df()
cum_df       = analysis.to_cum_df()
strategies   = list(cum_df.split_name.unique())
print(strategies)
print_stats(cum_df = cum_df, original_sps = 5229, caching_sps = 8429)

['3-convert-to-spectrogram']
---- 
  - sample size: 0.08MB
  - original throughput:     5229
  - sys. caching throughput: 8429
  - app. caching throughput: 11373.3 (+/- 173.4)
  - org. to app. cache speedup:   2.18x
  - org. to sys. cache speedup:   1.61x


## 7. Librispeech

In [6]:
home_path = "../logs/final-logs/librispeech/application-caching"
path_to_cum_df = f"{home_path}/librispeech-application-cache_2021-10-20-17:19:58_cum-df_samples-28539_threads-8-full.csv"
path_to_cum_dstat_df = f"{home_path}/librispeech-application-cache_2021-10-20-17:19:58_cum-dstat-df_samples-28539_threads-8-full.csv"

analysis = strat_analysis_from_csv(path_to_cum_dstat_df = path_to_cum_dstat_df
                                   , path_to_cum_df = path_to_cum_df)
cum_dstat_df = analysis.to_cum_dstat_df()
cum_df       = analysis.to_cum_df()
strategies   = list(cum_df.split_name.unique())
print(strategies)
print_stats(cum_df = cum_df, original_sps = 1436, caching_sps = 6021)

['3-convert-to-spectrogram']
---- 
  - sample size: 0.41MB
  - original throughput:     1436
  - sys. caching throughput: 6021
  - app. caching throughput: 11544.5 (+/- 407.0)
  - org. to app. cache speedup:   8.04x
  - org. to sys. cache speedup:   4.19x
