# Evaluation


In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.manifold import TSNE

import warnings
warnings.filterwarnings('ignore')

plt.rcParams['figure.figsize'] = [10, 5]

# Continual Learning Metrics

In [2]:
# Because of a mistake in my implementation
# ["no_of_test"] cannot be used but it can be calculated by ["no_of_correct_prediction"]/["accuracy"]
# but it cannot be calculated when ["accuracy"] == 0

# ((raw["no_of_correct_prediction"]/ raw["accuracy"]).apply(np.ceil))

# the mistake have been fixed now but the data have not updated

def calculateContinualMetircs(raw):
    task_order = raw["task_order"].unique()
    method = raw["method"].unique()
    print(task_order, method)
    
    all_MBase = {k:[] for k in method}
    all_Mnew  = {k:[] for k in method}
    all_Mnow  = {k:[] for k in method}

    for t in task_order:
        rows = raw[raw["task_order"]==t]
        offline = rows[rows["method"]=="offline"]


        for m in method:
            if m=="offline":
                continue

            target = rows[rows["method"]==m]

            # calculate m_base
            _ideal = offline[offline["task_index"]==1]["accuracy"]
            _m = target[target["task_index"]==1][["accuracy", "no_of_test", "no_of_correct_prediction"]]

            _N = len(_m)
            _m = (_m["accuracy"]/float(_ideal)).sum()
            Mbase = float(_m/_N)

            all_MBase[m].append(Mbase)


            _sum = 0.0
            train_session = target["train_session"].unique()
            for s in train_session:
                s = int(s)
                _ideal = offline[offline["task_index"]==s]["accuracy"]

                _m = target[target["train_session"]==str(s)]
                _m = _m[_m["task_index"]==s]["accuracy"]

                assert len(_m)==1

                _sum += float(_m)/float(_ideal)

            Mnew = _sum/len(train_session)
            all_Mnew[m].append(Mnew)


            _sum = 0.0
            task_index = target["task_index"].unique()
            _m = target[target["train_session"]==str(len(task_index))]
            for t in task_index:
                t = int(t)
                _ideal = offline[offline["task_index"]==t]["accuracy"]
                _m1 = _m[_m["task_index"]==t]["accuracy"]

                assert len(_m1)==1

                _sum += float(_m1)/float(_ideal)

            Mnow = _sum/len(train_session)
            all_Mnow[m].append(Mnow)
            
    return all_MBase, all_Mnew, all_Mnow



In [3]:
from scipy import stats

def printCLMetrics(all_MBase, all_Mnew, all_Mnow):
    def p(metric, name):
        
        print("Metric: ", name)
        for m in metric:
            avg = np.mean(metric[m])
            err = stats.sem(metric[m])
            print("{0} {1:.3f} {2:.3f}".format(m, avg, err))
        print("=====================")
        print("")
            
            
    p(all_MBase, "M base")
    p(all_Mnew, "M new")
    p(all_Mnow, "M now")
    
    

In [9]:
# Result from newsrc/result_iter1000-1000_h500-100_all/

folder = "newsrc/result_iter1000-1000_h500-100_all/"
raw = pd.read_csv(folder+"results.txt")
raw.columns = [c.strip() for c in raw.columns]

raw.head()
b, n, nw = calculateContinualMetircs(raw)

print("")
printCLMetrics(b, n, nw)

[0 1 2 3 4 5 6 7 8 9] ['offline' 'none' 'exact' 'mp-gan' 'mp-wgan' 'sg-cgan' 'sg-cwgan']

Metric:  M base
offline nan nan
none 0.247 0.025
exact 1.141 0.118
mp-gan 0.687 0.077
mp-wgan 0.701 0.108
sg-cgan 0.592 0.063
sg-cwgan 0.367 0.051

Metric:  M new
offline nan nan
none 1.258 0.073
exact 1.237 0.074
mp-gan 1.257 0.074
mp-wgan 1.256 0.074
sg-cgan 1.258 0.073
sg-cwgan 1.259 0.073

Metric:  M now
offline nan nan
none 0.234 0.018
exact 1.108 0.063
mp-gan 0.803 0.061
mp-wgan 0.823 0.078
sg-cgan 0.652 0.038
sg-cwgan 0.307 0.036



In [26]:
# Result from newsrc/result_iter1000-1000_h500-100_all/

folder = "../Results/run_offline_acc/"
raw = pd.read_csv(folder+"results.txt")
raw.columns = [c.strip() for c in raw.columns]

fto = open(folder+"task_orders.txt")
task_orders = [line.strip().split(";") for line in fto]

def offlineAccuracy(raw, task_orders):
    acc = {k:[] for k in task_orders[0]}
    for i, order in enumerate(task_orders):
        
        m = raw[raw["task_order"]==i]
        for k, row in m.iterrows():
            c = order[row["task_index"]-1]
            acc[c].append(row["accuracy"])
            
    for m in acc:
        avg = np.mean(acc[m])
        err = stats.sem(acc[m])
        print("{0} {1:.3f} {2:.3f}".format(m, avg, err))

offlineAccuracy(raw, task_orders)
# print()

R2_work_at_computer 0.994 0.003
R2_sleep 0.984 0.000
R1_work_at_computer 1.000 0.000
R2_prepare_lunch 0.007 0.007
R2_bed_to_toilet 0.762 0.060
R1_work_at_dining_room_table 0.677 0.035
R2_watch_TV 0.898 0.045
R1_bed_to_toilet 0.943 0.057
R1_sleep 1.000 0.000
R2_prepare_dinner 0.986 0.008


In [10]:
# Result from newsrc/result_iter1000-1000_h500-100_all/

folder = "newsrc/result_iter5000-1000_h500-100_all/"
raw = pd.read_csv(folder+"results.txt")
raw.columns = [c.strip() for c in raw.columns]

raw.head()
b, n, nw = calculateContinualMetircs(raw)

print("")
printCLMetrics(b, n, nw)


[0 1 2 3 4 5 6 7 8 9] ['offline' 'none' 'exact' 'mp-gan' 'mp-wgan' 'sg-cgan' 'sg-cwgan']

Metric:  M base
offline nan nan
none 0.300 0.046
exact 1.371 0.170
mp-gan 0.903 0.080
mp-wgan 0.924 0.094
sg-cgan 0.884 0.111
sg-cwgan 0.713 0.047

Metric:  M new
offline nan nan
none 1.222 0.032
exact 1.201 0.029
mp-gan 1.222 0.031
mp-wgan 1.222 0.032
sg-cgan 1.224 0.031
sg-cwgan 1.223 0.031

Metric:  M now
offline nan nan
none 0.265 0.028
exact 1.049 0.010
mp-gan 0.776 0.033
mp-wgan 0.801 0.034
sg-cgan 0.725 0.041
sg-cwgan 0.654 0.037



# GAN Metrics

In [2]:
print("Model size")
{'mp-gan': [1490061], 'mp-wgan': [1490061], 'sg-cgan': [151010], 'sg-cwgan': [151010]}

Model size


{'mp-gan': [1490061],
 'mp-wgan': [1490061],
 'sg-cgan': [151010],
 'sg-cwgan': [151010]}

In [8]:
from scipy import stats

def calculateGANMetircs(raw_gan, raw_solver):
    
    task_order = raw_gan["task_order"].unique()
    method = raw_gan["method"].unique()
    print(task_order, method)
    
    is_score = {k:[] for k in method}
#     is_err = {k:[] for k in method}
    mmd_score = {k:[] for k in method}
    knn_acc = {k:[] for k in method}
    knn_TPR = {k:[] for k in method}
    knn_TNR = {k:[] for k in method}
    offline_acc = {k:[] for k in method}
    training_time = {k:[] for k in method}
    
    for t in task_order:
        rows = raw_gan[raw_gan["task_order"]==t]
        for m in method:
            _m = rows[rows["method"]==m]
            
            _n = raw_solver[raw_solver["task_order"]==t]
            _n = pd.to_numeric(_n[_n["method"]==m]["generator_training_time"]).sum()
            
            is_score[m].append(float(_m["is"]))
            mmd_score[m].append(float(_m["mmd"]))
            knn_acc[m].append(float(_m["knn_tp"]+_m["knn_tn"])/float(_m["knn_tp"]+_m["knn_tn"]+_m["knn_fp"]+_m["knn_fn"]))
            knn_TPR[m].append(float(_m["knn_tp"])/float(_m["knn_tp"]+_m["knn_fn"]))
            knn_TNR[m].append(float(_m["knn_tn"])/float(_m["knn_tn"]+_m["knn_fp"]))
                              
            offline_acc[m].append(float(_m["offline_acc_fake"]))
            training_time[m].append(_n)
        
    return is_score, mmd_score, knn_acc, knn_TPR, knn_TNR, offline_acc, training_time


def printGANMetrics(metrics):
    names = ["IS Score", "MMD", "1-NN Acc", "1-NN TPR", "1-NN TNR", "Offline Acc", "Training Time"]
#     for i, metric in enumerate(metrics):
#         print("Metric", names[i])
#         for m in metric:
#             avg = np.mean(metric[m])
#             err = stats.sem(metric[m])
#             print("{0} {1:.3f} {2:.3f}".format(m, avg, err))
#         print("===================")
    
    for m in metric[0]:
#         for i, n in enumerate(names):
#             metric = metrics[i]
        


In [14]:
folder = "newsrc/result_iter1000-1000_h500-100_all/"
raw_gan = pd.read_csv(folder+"gan_score.txt")
raw_gan.columns = [c.strip() for c in raw_gan.columns]

raw_solver = pd.read_csv(folder+"results.txt")
raw_solver.columns = [c.strip() for c in raw_solver.columns]

m = calculateGANMetircs(raw_gan, raw_solver)

def printGANMetrics(metrics):
    names = ["IS Score", "MMD", "1-NN Acc", "1-NN TPR", "1-NN TNR", "Offline Acc", "Training Time"]
#     for i, metric in enumerate(metrics):
#         print("Metric", names[i])
#         for m in metric:
#             avg = np.mean(metric[m])
#             err = stats.sem(metric[m])
#             print("{0} {1:.3f} {2:.3f}".format(m, avg, err))
#         print("===================")
    
    for m in metrics[0]:
        print(m)
        for i, n in enumerate(names):
            metric = metrics[i]
            avg = np.mean(metric[m])
            err = stats.sem(metric[m])
            print("{0} {1:.3f} {2:.3f}".format(n, avg, err))
        print("===================")
        

printGANMetrics(m)

[0 1 2 3 4 5 6 7 8 9] ['mp-gan' 'mp-wgan' 'sg-cgan' 'sg-cwgan']
mp-gan
IS Score 9.846 0.096
MMD 0.109 0.005
1-NN Acc 1.000 0.000
1-NN TPR 1.000 0.000
1-NN TNR 1.000 0.000
Offline Acc 0.891 0.018
Training Time 191.816 2.281
mp-wgan
IS Score 9.634 0.206
MMD 0.108 0.006
1-NN Acc 1.000 0.000
1-NN TPR 1.000 0.000
1-NN TNR 1.000 0.000
Offline Acc 0.882 0.024
Training Time 400.615 3.998
sg-cgan
IS Score 6.769 0.337
MMD 0.266 0.010
1-NN Acc 1.000 0.000
1-NN TPR 1.000 0.000
1-NN TNR 1.000 0.000
Offline Acc 0.697 0.047
Training Time 234.455 1.710
sg-cwgan
IS Score 1.819 0.293
MMD 0.955 0.103
1-NN Acc 1.000 0.000
1-NN TPR 1.000 0.000
1-NN TNR 1.000 0.000
Offline Acc 0.256 0.119
Training Time 372.805 3.642


In [15]:
folder = "newsrc/result_iter5000-1000_h500-100_all/"
raw_gan = pd.read_csv(folder+"gan_score.txt")
raw_gan.columns = [c.strip() for c in raw_gan.columns]

raw_solver = pd.read_csv(folder+"results.txt")
raw_solver.columns = [c.strip() for c in raw_solver.columns]

m = calculateGANMetircs(raw_gan, raw_solver)
printGANMetrics(m)

[0 1 2 3 4 5 6 7 8 9] ['mp-gan' 'mp-wgan' 'sg-cgan' 'sg-cwgan']
mp-gan
IS Score 9.947 0.022
MMD 0.082 0.005
1-NN Acc 1.000 0.000
1-NN TPR 1.000 0.000
1-NN TNR 1.000 0.000
Offline Acc 0.860 0.016
Training Time 1137.738 6.542
mp-wgan
IS Score 9.869 0.101
MMD 0.050 0.004
1-NN Acc 1.000 0.000
1-NN TPR 1.000 0.000
1-NN TNR 1.000 0.000
Offline Acc 0.866 0.010
Training Time 3343.850 12.237
sg-cgan
IS Score 9.469 0.267
MMD 0.151 0.012
1-NN Acc 1.000 0.000
1-NN TPR 1.000 0.000
1-NN TNR 1.000 0.000
Offline Acc 0.875 0.021
Training Time 1434.469 1.498
sg-cwgan
IS Score 7.892 0.438
MMD 0.253 0.010
1-NN Acc 1.000 0.000
1-NN TPR 1.000 0.000
1-NN TNR 1.000 0.000
Offline Acc 0.831 0.042
Training Time 3081.804 14.606
