In [None]:
%matplotlib inline
%load_ext autoreload
%autoreload 2
    
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import adjusted_rand_score
import numpy as np
from sklearn.cluster import AgglomerativeClustering
from tqdm import tqdm
from sklearn.pipeline import Pipeline
from sklearn.decomposition import PCA
import helpers_preorder as hp
import helpers_datasets as hd
import pandas as pd
from scipy.stats import sem

plt.style.use("seaborn-darkgrid")


###### Tested with Tensorflow==1.15.0 and Numpy==1.17.1 ######
print(tf.__version__)
print(np.__version__)
############################################################

tf.enable_eager_execution() # Required for learner execution


included_classes = {0:False, 6:True} # Tshirt = 0, Shirt = 6
X_mnist_train, y_mnist_train_raw, X_mnist_test, y_mnist_test_raw = hd.get_mnist_dataset(
    included_classes=included_classes.keys(), num_train_images=-1, num_test_images=-1)
y_mnist_train = np.array([included_classes[y] for y in y_mnist_train_raw])
y_mnist_test = np.array([included_classes[y] for y in y_mnist_test_raw])

# Transformed LanK . f and RanK . f Performance

In [None]:
TRAINING_SAMPLES_PER_BOOTSTRAP = 9000
TESTING_SAMPLES_PER_BOOTSTRAP = 1000
OUTPUT_DIMENSION = 10 
LEARNING_RATE = 0.02
EPOCHS = 1000
NUM_EXPERIMENTS = 10

def run_bootstrap_experiment():
    tr_indices = np.random.permutation(X_mnist_train.shape[0])[:TRAINING_SAMPLES_PER_BOOTSTRAP]
    te_indices = np.random.permutation(X_mnist_test.shape[0])[:TESTING_SAMPLES_PER_BOOTSTRAP]
    Xtr, ytr = X_mnist_train[tr_indices], y_mnist_train[tr_indices]
    Xte, yte = X_mnist_test[te_indices], y_mnist_test[te_indices]

    linear_learner = hp.LinearOrderingLossLearner(
        learning_rate=LEARNING_RATE,
        output_dimension=OUTPUT_DIMENSION,
        num_columns=X_mnist_train.shape[-1])
    losses = linear_learner.fit(Xtr, ytr, epochs=EPOCHS, batches_per_epoch=1)
    evaluation_kwargs = {
        "X_train": Xtr, "y_train": ytr, "X_test": Xte, "y_test": yte,
        "f": lambda x: linear_learner.predict(x).numpy()
    }
    evaluation_results = {}
    for classifier_class in [hp.RanPreorderClassifier, hp.LanPreorderClassifier]:
        evaluation_results[classifier_class.kind] = hp.evaluate(
            classifier_class=classifier_class, **evaluation_kwargs)
    return evaluation_results


results_list = []
for i in range(NUM_EXPERIMENTS):
    result = run_bootstrap_experiment()
    print(result)
    results_list.append(result)

    
df_dict = {}
for kind in ["ran", "lan"]:
    for metric in ["train_tpr", "train_tnr", "test_tpr", "test_tnr"]:
        values = [getattr(r[kind], metric) for r in results_list]
        df_dict["{}_{}".format(kind, metric)] = {"mean": np.mean(values), "standard_error": 2*sem(values)}
pd.DataFrame(df_dict).T
