# Analyze Visibility Metrics 

In [1]:
%matplotlib notebook

import os 
import re 
import pytz
import glob 
import json
import unicodedata

import numpy as np 
import pandas as pd 
import seaborn as sns
import matplotlib as mpl 
import matplotlib.pyplot as plt 

from dateutil.parser import parse as parse_ts

from datetime import datetime, timedelta 
from collections import defaultdict

In [2]:
DATA = "../data"
RUNS = "run-*"
FIGS = "../figures"
HOSTS = "hosts.json"
RESULTS = "visibile_versions-*.log"
CONFIGS = "config-*.json"


def suffix(path):
    # Get the run id from the path 
    name, _ = os.path.splitext(path)
    return int(name.split("-")[-1]) 


def load_hosts(path=DATA):
    with open(os.path.join(path, HOSTS), 'r') as f:
        return json.load(f)


def load_configs(path=DATA):
    configs = {}
    for name in glob.glob(os.path.join(path, CONFIGS)):
        with open(name, 'r') as f:
            configs[suffix(name)] = json.load(f)
    return configs 


def slugify(name):
    slug = unicodedata.normalize('NFKD', name)
    slug = str(slug.encode('ascii', 'ignore')).lower()
    slug = re.sub(r'[^a-z0-9]+', '-', slug).strip('-')
    slug = re.sub(r'[-]+', '-', slug)
    return slug 


def load_raw_results(path=DATA):
    for run_path in glob.glob(os.path.join(path, RUNS)):
        run = suffix(run_path)
        
        for host in os.listdir(run_path):
            for result in glob.glob(os.path.join(run_path, host, RESULTS)):
                exp = suffix(result) 
                
                with open(result, 'r') as f:
                    for line in f:
                        row = json.loads(line.strip())
                        row['name'] = host 
                        row['runid'] = run 
                        row['expid'] = exp 
                        yield row

                        
def _load_results(path=DATA):
    
    versions = defaultdict(list)
    for row in load_raw_results(path):
        vers = "{Key} {Version} {runid}-{expid}".format(**row)
        result = {key: row[key] for key in ('name', 'expid', 'runid')}
        result['timestamp'] = parse_ts(row['Timestamp'])
        
        versions[vers].append(result)
    
    configs = load_configs(path) 
    for version, values in versions.items():
        expid = values[0]['expid']
        conf = configs[expid]
        
        ts = [r['timestamp'] for r in values]
        n_hosts = len(set([r['name'] for r in values]))
        t_hosts = len(conf['replicas']['hosts'])
        
        exp = conf['replicas']['config']['bandit']
        epsilon = conf['replicas']['config'].get('epsilon', None)
        if epsilon:
            exp += " ε={}".format(epsilon)
        
        yield {
            'version': version, 
            'exp': exp,
            'expid': expid, 
            'runid': values[0]['runid'], 
            'n_hosts': n_hosts, 
            'max_ts': max(ts), 
            'min_ts': min(ts), 
            'latency': (max(ts) - min(ts)).total_seconds(), 
            'visibility': n_hosts / t_hosts, 
        }
                        

def load_results(data=DATA):
    csv = os.path.join(data, "visibility.csv")
    if os.path.exists(csv):
        return pd.read_csv(csv)
    
    df = pd.DataFrame(_load_results(data))
    df.to_csv(csv)
    return df 

In [3]:
data = load_results()
exps_sorted = sorted(data['exp'].unique())

In [6]:
g = sns.barplot(x='exp', y='latency', order=exps_sorted, data=data)
g.set_xlabel("")
g.set_ylabel("visibility latency (seconds)")
plt.show()

<IPython.core.display.Javascript object>

In [7]:
g = sns.barplot(x='exp', y=data['visibility']*100, order=exps_sorted, data=data)
g.set_xlabel("")
g.set_ylabel("percent replication (%)")
plt.show()

<IPython.core.display.Javascript object>