# Analyzing Input-as-Output performance by input class-type

In [None]:
from collections import defaultdict
import numpy as np
import os
import pandas as pd
import warnings

from comparative_viz import ComparativeViz
from equality_experiment import InputAsOutputExperiment

In [None]:
experiment = InputAsOutputExperiment()

In [None]:
def classify_example(ex):
    left, right = ex
    half = int(left.shape[0] / 2)

    a = left[: half]
    b = left[half: ]

    left_label = np.array_equal(a, b)
    left_label = "same" if left_label else "different"

    c = right[: half]
    d = right[half: ]

    right_label = np.array_equal(c, d)
    right_label = "same" if right_label else "different"

    x_label = "{}/{}".format(left_label, right_label)

    return x_label

In [None]:
def get_per_class_accuracy(ex_classes, preds, y_test):
    cm = defaultdict(list)
    for cls, gold, pred in zip(ex_classes, preds, y_test):
        cm[cls].append(int(gold == pred))
    acc = {}
    for cls, scores in cm.items():
        acc[cls] = sum(scores) / len(scores)
    return acc

In [None]:
def run(n_trials=20, embed_dim=25, lr=0.01, alpha=0.0001, train_sizes=list(range(104, 2001, 100))):
    data = []
    for trial in range(1, n_trials+1):

        # Model with the best parameters we found experimentally:
        mod = experiment.get_model(
            hidden_dim=embed_dim * 2,
            alpha=alpha,
            lr=lr,
            embed_dim=embed_dim)

        # Dataset:
        X_train, X_test, y_train, y_test, test_dataset = \
            experiment.get_new_train_and_test_sets(embed_dim)
        ex_classes = [classify_example(ex) for ex in X_test]

        # Zero-shot predictions
        preds = mod.predict(X_test)
        accs = get_per_class_accuracy(ex_classes, preds, y_test)

        for cls, acc in accs.items():
            d = {
                'trial': trial,
                'train_size': 0,
                'embed_dim': embed_dim,
                'hidden_dim': embed_dim * 2,
                'alpha': alpha,
                'learning_rate': lr,
                'input_class': cls,
                'accuracy': acc,
                'batch_pos': 0,
                'batch_neg': 0}
            d.update(accs)
            data.append(d)

        # With additional training:
        for train_size in train_sizes:

            X_batch = X_train[ : train_size]
            y_batch = y_train[ : train_size]
            batch_pos = sum([1 for label in y_train[ : train_size] if label == 1])

            with warnings.catch_warnings():
                warnings.simplefilter("ignore")
                mod.fit(X_batch, y_batch)

                preds = mod.predict(X_test)
                accs = get_per_class_accuracy(ex_classes, preds, y_test)

                for cls, acc in accs.items():
                    d = {
                        'trial': trial,
                        'train_size': train_size,
                        'embed_dim': embed_dim,
                        'hidden_dim': embed_dim * 2,
                        'alpha': alpha,
                        'learning_rate': lr,
                        'input_class': cls,
                        'accuracy': acc,
                        'batch_pos': batch_pos,
                        'batch_neg': len(X_batch) - batch_pos}
                    d.update(accs)
                    data.append(d)

    data_df = pd.DataFrame(data)

    return data_df

In [None]:
data_df = run()

In [None]:
data_df.to_csv(
    os.path.join("results", "input-as-output-byclass-results.csv"),
    index=None)