In [1]:
import os
import pandas as pd
import seaborn as sns
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
pd.set_option('precision', 4)
sns.set_style('ticks')
%matplotlib inline
matplotlib.rcParams['figure.figsize'] = [20, 7]
matplotlib.rcParams['font.size'] = 15
matplotlib.rcParams['xtick.labelsize'] = 15
matplotlib.rcParams['ytick.labelsize'] = 15
matplotlib.rcParams['axes.labelsize'] = 15
matplotlib.rcParams['legend.fontsize'] = 15
matplotlib.rcParams['axes.titlesize'] = 'x-large'

In [2]:
# prefix = "../log"
prefix = "../output/streamer/scheduler/combinations"
filenames = ["{}/sched-sim-permute-all-{}.csv".format(prefix, i) for i in range(1, 10+1)]
dfs = [pd.read_csv(f, header=None) for f in filenames if os.path.isfile(f) and os.path.getsize(f) > 0]
df_all = pd.concat(dfs)

In [3]:
df_all['comb'] = df_all[0].apply(str).str.split('_')
df_all['No of applications'] = df_all['comb'].apply(len)
df_all['Avg FNR'] = df_all[1]
df_all['Avg Rel Acc Loss'] = df_all[2]
df_all['FNRs'] = df_all[3].str.split('_')
df_all['Rel Acc Losss'] = df_all[4].str.split('_')

a = ['mean={}'.format(i) for i in range(10)]
baselines = {i: df_all[df_all[0] == 'mean={}'.format(i)] for i in range(6)}
baselines_fnr = {k: float(v['Avg FNR']) for k, v in baselines.items()}

def aa(x):
    if not isinstance(x, list):
        x = [x]
    return map(float, x)

for metric in ['Rel Acc Loss', 'FNR']:
    df_all[metric + 's'] = df_all[metric + 's'].fillna(df_all['Avg ' + metric]).apply(aa)

# Normalise against baseline FNR.
df_all['Curves'] = df_all[0].apply(lambda x: x.replace("mean=", "").split("_"))
def norm(x):
    return [fnr / baselines_fnr[int(curve)]
            for curve, fnr in zip(x['Curves'], x['FNRs'])]
def c_loss(x):
    return [fnr - baselines_fnr[int(curve)]
            for curve, fnr in zip(x['Curves'], x['FNRs'])]

df_all['Normed FNRs'] = df_all.apply(norm, axis=1)
df_all['Avg Normed FNR'] = df_all['Normed FNRs'].apply(np.mean)
df_all['FNR Losss'] = df_all.apply(c_loss, axis=1)
df_all['Avg FNR Loss'] = df_all['FNR Losss'].apply(np.mean)

for metric in ['Rel Acc Loss', 'FNR', 'Normed FNR', 'FNR Loss']:
    df_all[metric + 's'] = df_all[metric + 's'].fillna(df_all['Avg ' + metric]).apply(aa)
    df_all['Min ' + metric] = df_all[metric + 's'].apply(min)
    df_all['Max ' + metric] = df_all[metric + 's'].apply(max)
    df_all['Max-Min ' + metric] = df_all[metric + 's'].apply(lambda x: max(x) - min(x))
    df_all['(Max-Min)/Max ' + metric] = df_all[metric + 's'].apply(lambda x: (max(x) - min(x))/(max(x)+.01))

In [4]:
cols = [u'comb',
               u'No of applications',                    u'Avg FNR',
                 u'Avg Rel Acc Loss',                       u'FNRs',
                    u'Rel Acc Losss',                     u'Curves',
                      u'Normed FNRs',             u'Avg Normed FNR',
                        u'FNR Losss',               u'Avg FNR Loss',
                 u'Min Rel Acc Loss',           u'Max Rel Acc Loss',
             u'Max-Min Rel Acc Loss', u'(Max-Min)/Max Rel Acc Loss',
                          u'Min FNR',                    u'Max FNR',
                      u'Max-Min FNR',          u'(Max-Min)/Max FNR',
                   u'Min Normed FNR',             u'Max Normed FNR',
               u'Max-Min Normed FNR',   u'(Max-Min)/Max Normed FNR',
                     u'Min FNR Loss',               u'Max FNR Loss',
                 u'Max-Min FNR Loss',     u'(Max-Min)/Max FNR Loss']
df_all2 = df_all[cols]

def get_n_apps(x):
    return df_all2[df_all2['No of applications'] == x]

metrics = ['Rel Acc Loss', 'FNR', 'FNR Loss', 'Normed FNR']
bases = ['Rel Acc Loss', 'FNR']

## Curves

In [5]:
import sys
sys.path.append('../data')
import app_data
curves = [a["accuracies"] for a in app_data.app_options3]
curve_titles = ['Fake-Linear', 'Fake-Log', 'Syn-Flowers_Inception', 'Syn-Cats_Inception', 'Syn-Paris_Inception', 'Fake-Inflection']

ImportError: No module named app_data

In [None]:
for curve, title in zip(curves, curve_titles):
    xs, ys = zip(*sorted(curve.items()))
    if title.startswith('Fake'):
        style = 'o'
    elif title.startswith('Syn'):
        style = 'X'
    plt.plot(xs, ys, style + '-', label=title)
plt.xlabel('Layers fine-tuned')
plt.ylabel('Accuracy')
plt.legend();

### 1 application

In [None]:
fig, axes = plt.subplots(1, 2)
for base, ax in zip(bases, axes):
    plt.bar(get_n_apps(1)['Avg '+base].index, get_n_apps(1)['Avg '+base].values)

In [None]:
get_n_apps(1)

## 2 applications

In [None]:
# for base, ax in zip(bases, axes):
#     plt.bar(get_n_apps(1)['Avg '+base].index, get_n_apps(1)['Avg '+base].values)
df_ = get_n_apps(2)[['Curves', 'FNRs', 'Rel Acc Losss']]

from collections import defaultdict
kk = defaultdict(dict)
def f(r):
    for curve, fnr in zip(r['Curves'], r['FNRs']):
        curve = int(curve)
        c_idx = map(int, r['Curves'])
        if curve != c_idx[0]:
            c_idx = reversed(c_idx)
        kk[curve][tuple(c_idx)] = fnr
df_.apply(f, axis=1)

fig, axes = plt.subplots(1, len(kk), sharey=True, sharex=True)
plt.xticks(range(6), [curve_titles[k] for k in xs], rotation='vertical')

for k, v in kk.items():
    pts = sorted(v.items())
    xs, ys = zip(*pts)
    xs = [x[1] for x in xs]
    axes[k].bar(xs, ys)
    axes[k].set_title(curve_titles[k])
    axes[k].axhline(baselines_fnr[k], color='r')
#     axes[k].xticks(range(6), [curve_titles[k] for k in xs], rotation='vertical')
axes[0].set_ylabel("FNR");


Red line = Baseline

## 3 apps

In [None]:
get_n_apps(3)[['Curves', 'FNRs', 'Rel Acc Losss']]