# **Default Setting**

In [1]:
import tensorflow as tf
tf.__version__

'2.2.0'

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive


# **Load Dataset and Model**

In [3]:
import os
import numpy as np

# check files
base_path = "/content/drive/My Drive/Speaker Recognition"

kor_dataset = np.load(os.path.join(base_path, "dataset", "kor_dataset.npz"))
eng_dataset = np.load(os.path.join(base_path, "dataset", "eng_dataset.npz"))

print("kor_dataset[\"data_{}\"].shape:\t\t{}".format(3 ** 8, kor_dataset["data_{}".format(3 ** 8)].shape))
print("kor_dataset[\"data_{}\"].shape:\t{}".format(2 * (3 ** 8), kor_dataset["data_{}".format(2 * (3 ** 8))].shape))
print("kor_dataset[\"data_{}\"].shape:\t{}".format(3 ** 9, kor_dataset["data_{}".format(3 ** 9)].shape))
print("kor_dataset[\"labels\"].shape:\t\t{}".format(kor_dataset["labels"].shape))

print("eng_dataset[\"data_{}\"].shape:\t\t{}".format(3 ** 8, eng_dataset["data_{}".format(3 ** 8)].shape))
print("eng_dataset[\"data_{}\"].shape:\t{}".format(2 * (3 ** 8), eng_dataset["data_{}".format(2 * (3 ** 8))].shape))
print("eng_dataset[\"data_{}\"].shape:\t{}".format(3 ** 9, eng_dataset["data_{}".format(3 ** 9)].shape))
print("eng_dataset[\"labels\"].shape:\t\t{}".format(eng_dataset["labels"].shape))

kor_dataset["data_6561"].shape:		(50, 100, 6561)
kor_dataset["data_13122"].shape:	(50, 100, 13122)
kor_dataset["data_19683"].shape:	(50, 100, 19683)
kor_dataset["labels"].shape:		(50, 100)
eng_dataset["data_6561"].shape:		(50, 100, 6561)
eng_dataset["data_13122"].shape:	(50, 100, 13122)
eng_dataset["data_19683"].shape:	(50, 100, 19683)
eng_dataset["labels"].shape:		(50, 100)


In [4]:
random_states = np.load(os.path.join(base_path, "models", "random_states.npy"), allow_pickle = True)

print("random_states.shape: {}".format(random_states.shape))

random_states.shape: (3, 6, 10, 2)


# **Evaluation**

*   True Positive Rate (TPR)
*   False Positive Rate (FPR)
*   Receiver Operating Characteristic (ROC) Curve
*   Area Under the Curve (AUC)
*   Equal Error Rate (EER)

In [0]:
def calculate_centroid(data, labels, output_dims = 128):
    centroids = np.zeros((len(np.unique(labels)), output_dims))
    for c in np.unique(labels): # already sorted using np.unique()
        centroids[int(c)] = np.average(data[np.where(labels == c), :], axis = 1)

    return centroids

In [0]:
def split_dataset(kor_data, kor_labels, eng_data, eng_laels, data_length, 
                  kor_shots_labels, eng_shots_labels, way = 5, shot = 5,  max_shot = 20):

    train_data = np.concatenate((
        np.reshape(kor_data[:ways, kor_shots_labels, :], (-1, data_length)),
        np.reshape(eng_data[:ways, eng_shots_labels, :], (-1, data_length))),
        axis = 0)
    train_labels = np.concatenate((
        np.reshape(kor_labels[:ways, kor_shots_labels], (-1)),
        np.reshape(eng_labels[:ways, eng_shots_labels], (-1))),
        axis = 0)
    
    test_data = np.concatenate((
        np.reshape(kor_data[:ways, max_shot:, :], (-1, data_length)),
        np.reshape(eng_data[:ways, max_shot:, :], (-1, data_length))),
        axis = 0)
    test_labels = np.concatenate((
        np.reshape(kor_labels[:ways, max_shot:], (-1)),
        np.reshape(eng_labels[:ways, max_shot:], (-1))),
        axis = 0)
    
    # print(train_data.shape, train_labels.shape, test_data.shape, test_labels.shape)
    return train_data, train_labels, test_data, test_labels

In [0]:
import itertools

def calculate_distance(centroids, test_pred, test_true):
    positive_condition_distance = np.zeros((0), dtype = float)
    negative_condition_distance = np.zeros((0), dtype = float)

    for l in np.unique(test_true):
        same_class = test_pred[np.where(test_true == l)]
        diff_class = test_pred[np.where(test_true != l)]

        partial_pos = tf.norm([centroids[int(l)] - i for i in same_class], axis = -1)
        partial_neg = tf.norm([centroids[int(l)] - j for j in diff_class], axis = -1)

        positive_condition_distance = np.concatenate(
            [positive_condition_distance, partial_pos.numpy()], axis = None)
        negative_condition_distance = np.concatenate(
            [negative_condition_distance, partial_neg.numpy()], axis = None)

    # print("positive_condition_distance.shape: {}".format(positive_condition_distance.shape))
    # print("negative_condition_distance.shape: {}".format(negative_condition_distance.shape))

    return positive_condition_distance, negative_condition_distance

In [0]:
def calculate_tpr_fpr(positive_condition_distance, negative_condition_distance):

    true_positive = np.zeros((10 ** 4), dtype = float)
    false_positive = np.zeros((10 ** 4), dtype = float)

    total_results = np.concatenate(
        [positive_condition_distance, negative_condition_distance], axis = None)

    for i, d in enumerate(np.linspace(np.min(total_results), np.max(total_results), 10 ** 4)):    
        true_positive[i] = np.where(positive_condition_distance <= d)[0].shape[0]
        false_positive[i] = np.where(negative_condition_distance <= d)[0].shape[0]

    true_positive_rate = true_positive / len(positive_condition_distance)
    false_positive_rate = false_positive / len(negative_condition_distance)

    return true_positive_rate, false_positive_rate

In [0]:
def calculate_eer_thr(tpr, fpr):
    fnr = 1 - tpr
    thr = np.nanargmin(np.absolute((fnr - fpr)))
    eer = (fpr[thr] + fnr[thr]) / 2
    return eer, thr

In [0]:
%%time
import os
import tensorflow_addons as tfa
from sklearn.metrics import auc

ways = 5
shots = 5
max_shot = 20
repeats = 10
output_dims = 128

eers = np.zeros((3, 6, 10, 2, 3)) # save eers
aucs = np.zeros((3, 6, 10, 2, 3)) # save aucs

for i, data_length in enumerate([3**8, 2 * (3**8), 3**9]):
    kor_data = kor_dataset["data_{}".format(data_length)]
    kor_labels = kor_dataset["labels"]

    eng_data = eng_dataset["data_{}".format(data_length)]
    eng_labels = eng_dataset["labels"]

    for ks in range(ways + 1):
        for repeat in range(repeats):
            print("{}\t{}\t{}".format(data_length, ks, repeat))

            # load random states
            kor_shots_labels = random_states[i, ks, repeat, 0]
            eng_shots_labels = random_states[i, ks, repeat, 1]

            # load train dataset
            train_data, train_labels, test_data, test_labels = split_dataset(
                kor_data, kor_labels, eng_data, eng_labels, data_length, kor_shots_labels, eng_shots_labels)
            
            # load model
            model_NN1_name = "{}_{}_{}_K{}E{}_{}_{}.h5".format(ways, shots, data_length, ks, shots - ks, "NN1", repeat)
            model_NN1 = tf.keras.models.load_model(os.path.join(base_path, "models", model_NN1_name))
            model_NN1 = tf.keras.models.Model(
                inputs = model_NN1.input, outputs = model_NN1.layers[-2].output) # remove outputs

            model_NN2_name = "{}_{}_{}_K{}E{}_{}_{}.h5".format(ways, shots, data_length, ks, shots - ks, "NN2", repeat)
            model_NN2 = tf.keras.models.load_model(os.path.join(base_path, "models", model_NN2_name))

            # calculate centroid
            NN1_centroid_features = calculate_centroid(model_NN1.predict(train_data), train_labels)
            NN2_centroid_features = calculate_centroid(model_NN2.predict(train_data), train_labels)

            # calculate distances
            NN1_KorOnly_positive_condition_distance, NN1_KorOnly_negative_condition_distance = calculate_distance(
                NN1_centroid_features, model_NN1.predict(test_data[:ways * (100 - max_shot)]), test_labels[:ways * (100 - max_shot)])
            NN1_EngOnly_positive_condition_distance, NN1_EngOnly_negative_condition_distance = calculate_distance(
                NN1_centroid_features, model_NN1.predict(test_data[ways * (100 - max_shot):]), test_labels[ways * (100 - max_shot):])
            NN1_Mixed_positive_condition_distance, NN1_Mixed_negative_condition_distance = calculate_distance(
                NN1_centroid_features, model_NN1.predict(test_data), test_labels)

            NN2_KorOnly_positive_condition_distance, NN2_KorOnly_negative_condition_distance = calculate_distance(
                NN2_centroid_features, model_NN2.predict(test_data[:ways * (100 - max_shot)]), test_labels[:ways * (100 - max_shot)])
            NN2_EngOnly_positive_condition_distance, NN2_EngOnly_negative_condition_distance = calculate_distance(
                NN2_centroid_features, model_NN2.predict(test_data[ways * (100 - max_shot):]), test_labels[ways * (100 - max_shot):])
            NN2_Mixed_positive_condition_distance, NN2_Mixed_negative_condition_distance = calculate_distance(
                NN2_centroid_features, model_NN2.predict(test_data), test_labels)
            
            # calculate tpr (true positive rate), fpr (false positive rate)
            NN1_KorOnly_tpr, NN1_KorOnly_fpr = calculate_tpr_fpr(
                NN1_KorOnly_positive_condition_distance, NN1_KorOnly_negative_condition_distance)
            NN1_EngOnly_tpr, NN1_EngOnly_fpr = calculate_tpr_fpr(
                NN1_EngOnly_positive_condition_distance, NN1_EngOnly_negative_condition_distance)
            NN1_Mixed_tpr, NN1_Mixed_fpr = calculate_tpr_fpr(
                NN1_Mixed_positive_condition_distance, NN1_Mixed_negative_condition_distance)
                                    
            NN2_KorOnly_tpr, NN2_KorOnly_fpr = calculate_tpr_fpr(
                NN2_KorOnly_positive_condition_distance, NN2_KorOnly_negative_condition_distance)
            NN2_EngOnly_tpr, NN2_EngOnly_fpr = calculate_tpr_fpr(
                NN2_EngOnly_positive_condition_distance, NN2_EngOnly_negative_condition_distance)
            NN2_Mixed_tpr, NN2_Mixed_fpr = calculate_tpr_fpr(
                NN2_Mixed_positive_condition_distance, NN2_Mixed_negative_condition_distance)

            # calculate eer (equal error rate)
            NN1_KorOnly_eer, _ = calculate_eer_thr(NN1_KorOnly_tpr, NN1_KorOnly_fpr)
            NN1_EngOnly_eer, _ = calculate_eer_thr(NN1_EngOnly_tpr, NN1_EngOnly_fpr)
            NN1_Mixed_eer, _ = calculate_eer_thr(NN1_Mixed_tpr, NN1_Mixed_fpr)

            NN2_KorOnly_eer, _ = calculate_eer_thr(NN2_KorOnly_tpr, NN2_KorOnly_fpr)
            NN2_EngOnly_eer, _ = calculate_eer_thr(NN2_EngOnly_tpr, NN2_EngOnly_fpr)
            NN2_Mixed_eer, _ = calculate_eer_thr(NN2_Mixed_tpr, NN2_Mixed_fpr)

            # save
            eers[i, ks, repeat, 0, 0] = NN1_KorOnly_eer
            eers[i, ks, repeat, 0, 1] = NN1_EngOnly_eer
            eers[i, ks, repeat, 0, 2] = NN1_Mixed_eer

            eers[i, ks, repeat, 1, 0] = NN2_KorOnly_eer
            eers[i, ks, repeat, 1, 1] = NN2_EngOnly_eer
            eers[i, ks, repeat, 1, 2] = NN2_Mixed_eer

            aucs[i, ks, repeat, 0, 0] = auc(NN1_KorOnly_fpr, NN1_KorOnly_tpr)
            aucs[i, ks, repeat, 0, 1] = auc(NN1_EngOnly_fpr, NN1_EngOnly_tpr)
            aucs[i, ks, repeat, 0, 2] = auc(NN1_Mixed_fpr, NN1_Mixed_tpr)

            aucs[i, ks, repeat, 1, 0] = auc(NN2_KorOnly_fpr, NN2_KorOnly_tpr)
            aucs[i, ks, repeat, 1, 1] = auc(NN2_EngOnly_fpr, NN2_EngOnly_tpr)
            aucs[i, ks, repeat, 1, 2] = auc(NN2_Mixed_fpr, NN2_Mixed_tpr)

6561	0	0
6561	0	1
6561	0	2
6561	0	3
6561	0	4
6561	0	5
6561	0	6
6561	0	7
6561	0	8
6561	0	9
6561	1	0
6561	1	1
6561	1	2
6561	1	3
6561	1	4
6561	1	5
6561	1	6
6561	1	7
6561	1	8
6561	1	9
6561	2	0
6561	2	1
6561	2	2
6561	2	3
6561	2	4
6561	2	5
6561	2	6
6561	2	7
6561	2	8
6561	2	9
6561	3	0
6561	3	1
6561	3	2
6561	3	3
6561	3	4
6561	3	5
6561	3	6
6561	3	7
6561	3	8
6561	3	9
6561	4	0
6561	4	1
6561	4	2
6561	4	3
6561	4	4
6561	4	5
6561	4	6
6561	4	7
6561	4	8
6561	4	9
6561	5	0
6561	5	1
6561	5	2
6561	5	3
6561	5	4
6561	5	5
6561	5	6
6561	5	7
6561	5	8
6561	5	9
13122	0	0
13122	0	1
13122	0	2
13122	0	3
13122	0	4
13122	0	5
13122	0	6
13122	0	7
13122	0	8
13122	0	9
13122	1	0
13122	1	1
13122	1	2
13122	1	3
13122	1	4
13122	1	5
13122	1	6
13122	1	7
13122	1	8
13122	1	9
13122	2	0
13122	2	1
13122	2	2
13122	2	3
13122	2	4
13122	2	5
13122	2	6
13122	2	7
13122	2	8
13122	2	9
13122	3	0
13122	3	1
13122	3	2
13122	3	3
13122	3	4
13122	3	5
13122	3	6
13122	3	7
13122	3	8
13122	3	9
13122	4	0
13122	4	1
13122	4	2
13122	4	3
13122	4	4
13122	4	5


In [0]:
if not os.path.exists(os.path.join(base_path, "results")):
    os.mkdir(os.path.join(base_path, "results"))

np.save(os.path.join(base_path, "results", "eers.npy"), eers)
np.save(os.path.join(base_path, "results", "aucs.npy"), aucs)

In [0]:
avg_eers = np.average(eers, axis = 2) # (3, 6, 2, 3)
avg_aucs = np.average(aucs, axis = 2) # (3, 6, 2, 3)

for i, data_length in enumerate([3**8, 2 * (3**8), 3**9]):
    print("data_length: {}".format(data_length))
    print("[NN1] min(eers): {:.4f}, max(eers): {:.4f}".format(
        np.min(avg_eers[i, :, 0, :], axis = None), np.max(avg_eers[i, :, 0, :], axis = None)))
    print("[NN2] min(eers): {:.4f}, max(eers): {:.4f}".format(
        np.min(avg_eers[i, :, 1, :], axis = None), np.max(avg_eers[i, :, 1, :], axis = None)))

    print("[NN1] min(aucs): {:.4f}, max(aucs): {:.4f}".format(
        np.min(avg_aucs[i, :, 0, :], axis = None), np.max(avg_aucs[i, :, 0, :], axis = None)))
    print("[NN2] min(aucs): {:.4f}, max(aucs): {:.4f}".format(
        np.min(avg_aucs[i, :, 1, :], axis = None), np.max(avg_aucs[i, :, 1, :], axis = None)))
    
    print()

data_length: 6561
[NN1] min(eers): 0.3440, max(eers): 0.4788
[NN2] min(eers): 0.2740, max(eers): 0.3351
[NN1] min(aucs): 0.5471, max(aucs): 0.7226
[NN2] min(aucs): 0.7268, max(aucs): 0.7945

data_length: 13122
[NN1] min(eers): 0.2884, max(eers): 0.4643
[NN2] min(eers): 0.2314, max(eers): 0.3164
[NN1] min(aucs): 0.5649, max(aucs): 0.8001
[NN2] min(aucs): 0.7657, max(aucs): 0.8414

data_length: 19683
[NN1] min(eers): 0.2541, max(eers): 0.4289
[NN2] min(eers): 0.2023, max(eers): 0.2931
[NN1] min(aucs): 0.6094, max(aucs): 0.8321
[NN2] min(aucs): 0.7976, max(aucs): 0.8775



In [0]:
# NN1
for i in range(3):
    for j in range(3):
        for k in range(6):
            print("[NN1]\tAUC: {:<.4f}+-{:.4f}\tEER: {:.4f}+-{:.4f}".format(
                np.average(aucs[i, k, :, 0, j]), np.std(aucs[i, k, :, 0, j]),
                np.average(eers[i, k, :, 0, j]), np.std(eers[i, k, :, 0, j])))
        print()

[NN1]	AUC: 0.5471+-0.0360	EER: 0.4788+-0.0322
[NN1]	AUC: 0.5997+-0.0539	EER: 0.4417+-0.0444
[NN1]	AUC: 0.6418+-0.0537	EER: 0.4049+-0.0417
[NN1]	AUC: 0.6762+-0.0579	EER: 0.3832+-0.0484
[NN1]	AUC: 0.7206+-0.0365	EER: 0.3440+-0.0287
[NN1]	AUC: 0.7226+-0.0284	EER: 0.3443+-0.0257

[NN1]	AUC: 0.6163+-0.0295	EER: 0.4252+-0.0239
[NN1]	AUC: 0.6391+-0.0434	EER: 0.4081+-0.0305
[NN1]	AUC: 0.6682+-0.0322	EER: 0.3879+-0.0272
[NN1]	AUC: 0.6771+-0.0352	EER: 0.3798+-0.0275
[NN1]	AUC: 0.6906+-0.0307	EER: 0.3733+-0.0264
[NN1]	AUC: 0.6814+-0.0163	EER: 0.3832+-0.0177

[NN1]	AUC: 0.5815+-0.0317	EER: 0.4521+-0.0275
[NN1]	AUC: 0.6193+-0.0475	EER: 0.4248+-0.0373
[NN1]	AUC: 0.6549+-0.0411	EER: 0.3957+-0.0337
[NN1]	AUC: 0.6766+-0.0456	EER: 0.3813+-0.0360
[NN1]	AUC: 0.7057+-0.0322	EER: 0.3581+-0.0258
[NN1]	AUC: 0.7021+-0.0182	EER: 0.3644+-0.0152

[NN1]	AUC: 0.5649+-0.0287	EER: 0.4643+-0.0247
[NN1]	AUC: 0.6465+-0.0254	EER: 0.4044+-0.0262
[NN1]	AUC: 0.6969+-0.0376	EER: 0.3707+-0.0279
[NN1]	AUC: 0.7556+-0.0322	EER: 

In [0]:
# NN2
for i in range(3):
    for j in range(3):
        for k in range(6):
            print("[NN2]\tAUC: {:<.4f}+-{:.4f}\tEER: {:.4f}+-{:.4f}".format(
                np.average(aucs[i, k, :, 1, j]), np.std(aucs[i, k, :, 1, j]),
                np.average(eers[i, k, :, 1, j]), np.std(eers[i, k, :, 1, j])))
        print()

[NN2]	AUC: 0.7268+-0.0490	EER: 0.3351+-0.0424
[NN2]	AUC: 0.7490+-0.0645	EER: 0.3139+-0.0508
[NN2]	AUC: 0.7366+-0.0419	EER: 0.3322+-0.0346
[NN2]	AUC: 0.7818+-0.0395	EER: 0.2921+-0.0362
[NN2]	AUC: 0.7945+-0.0351	EER: 0.2740+-0.0322
[NN2]	AUC: 0.7704+-0.0390	EER: 0.3101+-0.0385

[NN2]	AUC: 0.7506+-0.0414	EER: 0.3127+-0.0416
[NN2]	AUC: 0.7460+-0.0549	EER: 0.3162+-0.0468
[NN2]	AUC: 0.7348+-0.0305	EER: 0.3282+-0.0180
[NN2]	AUC: 0.7591+-0.0310	EER: 0.3125+-0.0269
[NN2]	AUC: 0.7582+-0.0274	EER: 0.2997+-0.0236
[NN2]	AUC: 0.7354+-0.0531	EER: 0.3324+-0.0453

[NN2]	AUC: 0.7390+-0.0447	EER: 0.3236+-0.0413
[NN2]	AUC: 0.7475+-0.0584	EER: 0.3140+-0.0484
[NN2]	AUC: 0.7355+-0.0348	EER: 0.3311+-0.0243
[NN2]	AUC: 0.7701+-0.0343	EER: 0.3014+-0.0311
[NN2]	AUC: 0.7756+-0.0291	EER: 0.2861+-0.0254
[NN2]	AUC: 0.7525+-0.0436	EER: 0.3209+-0.0399

[NN2]	AUC: 0.7657+-0.0442	EER: 0.3164+-0.0389
[NN2]	AUC: 0.7789+-0.0433	EER: 0.2942+-0.0435
[NN2]	AUC: 0.8308+-0.0606	EER: 0.2528+-0.0614
[NN2]	AUC: 0.8374+-0.0363	EER: 

# **Visualization**

## **Load Results(EER, AUC)**

In [0]:
import os
import numpy as np

# check files
base_path = "/content/drive/My Drive/Speaker Recognition"

eers = np.load(os.path.join(base_path, "results", "eers.npy"))
aucs = np.load(os.path.join(base_path, "results", "aucs.npy"))

In [8]:
print("eers.shape: {}".format(eers.shape))
print("aucs.shape: {}".format(aucs.shape))

eers.shape: (3, 6, 10, 2, 3)
aucs.shape: (3, 6, 10, 2, 3)


## **Results Visualization**

In [0]:
from bokeh.plotting import output_notebook, figure, show
from bokeh.models import ColumnDataSource
from bokeh.palettes import Spectral3
from bokeh.models.ranges import FactorRange
from bokeh.transform import factor_cmap
from bokeh.layouts import gridplot

output_notebook()

In [0]:
test_types = ["Korean Only", "English Only", "Concatenated"]
kor_eng_shots = ["K0E5", "K1E4", "K2E3", "K3E2", "K4E1", "K5E0"]

gp = []

for k in range(2):
    for i, data_length in enumerate([3**8, 2 * (3**8), 3**9]):
        data = {
            "kor_eng_shots" : kor_eng_shots,
            "K0E5" : [np.average(eers[i, 0, :, k, j]) for j in range(3)],
            "K1E4" : [np.average(eers[i, 1, :, k, j]) for j in range(3)],
            "K2E3" : [np.average(eers[i, 2, :, k, j]) for j in range(3)],
            "K3E2" : [np.average(eers[i, 3, :, k, j]) for j in range(3)],
            "K4E1" : [np.average(eers[i, 4, :, k, j]) for j in range(3)],
            "K5E0" : [np.average(eers[i, 5, :, k, j]) for j in range(3)]}

        x = [(test_type, kor_eng_shot) for test_type in test_types for kor_eng_shot in kor_eng_shots]
        counts = sum(zip(data["K0E5"], data["K1E4"], data["K2E3"], data["K3E2"],
                     data["K4E1"], data["K5E0"]), ()) # like an hstack

        source = ColumnDataSource(data = dict(x = x, counts = counts))

        p = figure(
            x_range = FactorRange(*x), plot_height = 250, plot_width = 600, 
            title = "Equal Error Rates (EER) of NN{} (input samples = {})".format(k + 1, data_length))

        index_cmap = factor_cmap("x", palette = ["firebrick", "olive", "navy"], factors = sorted(test_types), end = 1)
        p.vbar(x = "x", top = "counts", width = 0.9, source = source, fill_color = index_cmap, alpha = .5)

        p.y_range.start = 0.15
        p.y_range.end = 0.5
        p.x_range.range_padding = 0.1
        p.xaxis.major_label_orientation = .5
        p.xgrid.grid_line_color = None

        p.title.text_font_size = "13pt"
        p.xaxis.major_label_text_font_size = "8pt"
        p.xaxis.group_text_font_size = "12pt"

        gp.append(p)

grid = gridplot([[gp[0], gp[1]], [gp[2], gp[3]], [gp[4], gp[5]]])
show(grid)

In [0]:
test_types = ["Korean Only", "English Only", "Concatenated"]
kor_eng_shots = ["K0E5", "K1E4", "K2E3", "K3E2", "K4E1", "K5E0"]

gp = []

for k in range(2):
    for i, data_length in enumerate([3**8, 2 * (3**8), 3**9]):
        data = {
            "kor_eng_shots" : kor_eng_shots,
            "K0E5" : [np.average(aucs[i, 0, :, k, j]) for j in range(3)],
            "K1E4" : [np.average(aucs[i, 1, :, k, j]) for j in range(3)],
            "K2E3" : [np.average(aucs[i, 2, :, k, j]) for j in range(3)],
            "K3E2" : [np.average(aucs[i, 3, :, k, j]) for j in range(3)],
            "K4E1" : [np.average(aucs[i, 4, :, k, j]) for j in range(3)],
            "K5E0" : [np.average(aucs[i, 5, :, k, j]) for j in range(3)]}

        x = [(test_type, kor_eng_shot) for test_type in test_types for kor_eng_shot in kor_eng_shots]
        counts = sum(zip(data["K0E5"], data["K1E4"], data["K2E3"], data["K3E2"],
                     data["K4E1"], data["K5E0"]), ()) # like an hstack

        source = ColumnDataSource(data = dict(x = x, counts = counts))

        p = figure(
            x_range = FactorRange(*x), plot_height = 250, plot_width = 600, 
            title = "Area Under the Curve (AUC) of NN{} (input samples = {})".format(k + 1, data_length))

        index_cmap = factor_cmap("x", palette = ["firebrick", "olive", "navy"], factors = sorted(test_types), end = 1)
        p.vbar(x = "x", top = "counts", width = 0.9, source = source, fill_color = index_cmap, alpha = .5)

        p.y_range.start = 0.5
        p.y_range.end = 0.9
        p.x_range.range_padding = 0.1
        p.xaxis.major_label_orientation = .5
        p.xgrid.grid_line_color = None

        p.title.text_font_size = "13pt"
        p.xaxis.major_label_text_font_size = "8pt"
        p.xaxis.group_text_font_size = "12pt"

        gp.append(p)

grid = gridplot([[gp[0], gp[1]], [gp[2], gp[3]], [gp[4], gp[5]]])
show(grid)

## **Embedding Visualizaiton (t-SNE)**

In [0]:
from bokeh.plotting import figure, show, output_notebook
from bokeh.models import Legend
from bokeh.colors import RGB
from sklearn.manifold import TSNE

output_notebook()

def draw_figure(data, labels, centroids, title,
                colors = "olive gold turquoise mediumorchid firebrick".split()):
    p = figure(width = 900, height = 400, title = title)

    for i in np.unique(labels):
        idx = np.where(labels == i)[0]
        data_ = data[idx]

        p.scatter(data_[:,0], data_[:,1], color = colors[int(i)],
                  line_width = .5, legend_label = "class {}".format(int(i)))
    p.circle_x(
        centroids[:, 0], centroids[:, 1], size = 20, fill_color = colors,
        line_color = "black", fill_alpha = 0.8, line_width = 1.5)
        
    p.title.text_font_size = "14pt"
    show(p)

### Best Performance Model
*   Data Length = 3**9
*   Korean Shots = 2
*   Englisth Shots = 3
*   Repeats = 9
*   Model : Neural Network \#2
*   Test Data = English Only


In [0]:
np.where(eers == np.min(eers))

(array([2]), array([2]), array([9]), array([1]), array([1]))

In [0]:
eers[2, 2, 9, 1, 1]

0.13249999999999998

In [0]:
np.where(aucs == np.max(aucs))

(array([2]), array([2]), array([9]), array([1]), array([1]))

In [0]:
aucs[2, 2, 9, 1, 1]

0.93780078125

In [13]:
import tensorflow_addons as tfa

i = 2
ks = 2
repeat = 9

ways = 5
shots = 5
max_shot = 20
data_length = 3 ** 9

# load random states
kor_shots_labels = random_states[i, ks, repeat, 0]
eng_shots_labels = random_states[i, ks, repeat, 1]

kor_data = kor_dataset["data_{}".format(data_length)]
kor_labels = kor_dataset["labels"]

eng_data = eng_dataset["data_{}".format(data_length)]
eng_labels = eng_dataset["labels"]

# load train dataset
train_data, train_labels, test_data, test_labels = split_dataset(
    kor_data, kor_labels, eng_data, eng_labels, data_length, kor_shots_labels, eng_shots_labels)

# load model
model_NN2_name = "{}_{}_{}_K{}E{}_{}_{}.h5".format(ways, shots, data_length, ks, shots - ks, "NN2", repeat)
model_NN2 = tf.keras.models.load_model(os.path.join(base_path, "models", model_NN2_name))

# calculate centroid
NN2_centroid_features = calculate_centroid(model_NN2.predict(train_data), train_labels)
NN2_test_features = model_NN2.predict(test_data[:ways * (100 - max_shot)])

z = TSNE(n_components = 2, perplexity = 30).fit_transform(
    np.concatenate((NN2_test_features, NN2_centroid_features), axis = 0))

draw_figure(
    z[:NN2_test_features.shape[0]], test_labels[:ways * (100 - max_shot)], z[NN2_test_features.shape[0]:],
    title = "Best Performance Model (EER: {:.4f}, AUC: {:.4f})".format(eers[2, ks, repeat, 1, 1], aucs[2, ks, repeat, 1, 1]))

### Worst Performance
*   Data Length = 3**8
*   Korean Shots = 0
*   Englisth Shots = 5
*   Repeats = 4
*   Model : Neural Network \#1
*   Test Data = Korean Only


In [0]:
np.where(eers == np.max(eers))

(array([0]), array([0]), array([4]), array([0]), array([0]))

In [0]:
eers[0, 0, 4, 0, 0]

0.54

In [0]:
np.where(aucs == np.min(aucs))

(array([0]), array([0]), array([4]), array([0]), array([0]))

In [0]:
aucs[0, 0, 4, 0, 0]

0.47416953124999994

In [14]:
i = 0
ks = 0
repeat = 4

ways = 5
shots = 5
max_shot = 20
data_length = 3 ** 8

# load random states
kor_shots_labels = random_states[i, ks, repeat, 0]
eng_shots_labels = random_states[i, ks, repeat, 1]

kor_data = kor_dataset["data_{}".format(data_length)]
kor_labels = kor_dataset["labels"]

eng_data = eng_dataset["data_{}".format(data_length)]
eng_labels = eng_dataset["labels"]

# load train dataset
train_data, train_labels, test_data, test_labels = split_dataset(
    kor_data, kor_labels, eng_data, eng_labels, data_length, kor_shots_labels, eng_shots_labels)

# load model
model_NN1_name = "{}_{}_{}_K{}E{}_{}_{}.h5".format(ways, shots, data_length, ks, shots - ks, "NN1", repeat)
model_NN1 = tf.keras.models.load_model(os.path.join(base_path, "models", model_NN1_name))
model_NN1 = tf.keras.models.Model(
    inputs = model_NN1.input, outputs = model_NN1.layers[-2].output) # remove outputs

# calculate centroid
NN1_centroid_features = calculate_centroid(model_NN1.predict(train_data), train_labels)
NN1_test_features = model_NN1.predict(test_data[:ways * (100 - max_shot)])

z = TSNE(n_components = 2, perplexity = 30).fit_transform(
    np.concatenate((NN1_test_features, NN1_centroid_features), axis = 0))

draw_figure(
    z[:NN1_test_features.shape[0]], test_labels[:ways * (100 - max_shot)], z[NN1_test_features.shape[0]:],
    title = "Worst Performance Model (EER: {:.4f}, AUC: {:.4f})".format(eers[0, ks, repeat, 0, 0], aucs[0, ks, repeat, 0, 0]))