# Experiment visualizations

## Contents

1. [Set-up](#Set-up)
1. [Basic equality](#Basic-equality)
    1. [Basic equality: No pretraining](#Basic-equality:-No-pretraining)
    1. [Basic equality: With pretraining](#Basic-equality:-With-pretraining)
    1. [Basic equality: Compare with and without pretraining](#Basic-equality:-Compare-with-and-without-pretraining)
1. [Flat Premack](#Flat-Premack)
    1. [Flat Premack: One hidden layer, no pretraining](#Flat-Premack:-One-hidden-layer,-no-pretraining)
    1. [Flat Premack: Two hidden layers, no pretraining](#Flat-Premack:-Two-hidden-layers,-no-pretraining)
    1. [Flat Premack: Two hidden layers, with pretraining](#Flat-Premack:-Two-hidden-layers,-with-pretraining)
    1. [Flat Premack: Compare with and without pretraining](#Flat-Premack:-Compare-with-and-without-pretraining)
1. [Inputs as outputs](#Inputs-as-outputs)
    1. [Inputs as outputs: No pretraining](#Inputs-as-outputs:-No-pretraining)
    1. [Inputs as outputs: Zero-shot setting](#Inputs-as-outputs:-Zero-shot-setting)
    1. [Inputs as outputs: With pretraining](#Inputs-as-outputs:-With-pretraining)
    1. [Inputs as outputs: Compare with and without pretraining](#Inputs-as-outputs:-Compare-with-and-without-pretraining)
    1. [Inputs as outputs: By-class results](#Inputs-as-outputs:-By-class-results)
1. [Fuzzy Pattern LMs](#Fuzzy-Pattern-LMs)
    1. [Fuzzy Pattern LMs: No pretraining](#Fuzzy-Pattern-LMs:-No-pretraining)
    1. [Fuzzy Pattern LMs: With pretraining](#Fuzzy-Pattern-LMs:-With-pretraining)
    1. [Fuzzy Pattern LMs: Compare with and without pretraining](#Fuzzy-Pattern-LMs:-Compare-with-and-without-pretraining)
1. [Small pretraining examples for the paper](#Small-pretraining-examples-for-the-paper)
    1. [Original random embedding](#Original-random-embedding)
    1. [Pretrained embedding](#Pretrained-embedding)

## Set-up

In [None]:
from comparative_viz import ComparativeViz
from comparative_viz import compare_with_and_without_pretraining_viz
from comparative_viz import input_as_output_zero_shot_viz
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import os
import torch
import pandas as pd
import re
from torch_rep_learner import RepLearner
import utils

In [None]:
utils.fix_random_seeds()

## Basic equality

### Basic equality: No pretraining

In [None]:
ComparativeViz("equality.csv").create_all()

### Basic equality: With pretraining

In [None]:
#ComparativeViz("equality-pretrain-3tasks.csv").create_all()

In [None]:
#ComparativeViz("equality-pretrain-5tasks.csv").create_all()

In [None]:
#ComparativeViz("equality-pretrain-10tasks.csv").create_all()

### Basic equality: Compare with and without pretraining

In [None]:
compare_with_and_without_pretraining_viz(
    nopretrain_base_filename="equality.csv",
    pretrain_base_filenames=(
        "equality-pretrain-3tasks.csv",
        "equality-pretrain-5tasks.csv",
        "equality-pretrain-10tasks.csv"
    ),
    embed_dim=10,
    hidden_dim=100,
    nopretrain_color=utils.STYLE_COLORS[1],
    experiment_type="equality-pretrain-compare")

In [None]:
# Train-set evaluation:

compare_with_and_without_pretraining_viz(
    nopretrain_base_filename="equality.csv",
    pretrain_base_filenames=(
        "equality-pretrain-3tasks.csv",
        "equality-pretrain-5tasks.csv",
        "equality-pretrain-10tasks.csv"
    ),
    embed_dim=10,
    hidden_dim=100,
    accuracy_col="train_accuracy",
    nopretrain_color=utils.STYLE_COLORS[1],
    experiment_type="equality-pretrain-compare")

## Flat Premack

### Flat Premack: One hidden layer, no pretraining

In [None]:
ComparativeViz("flatpremack-h1.csv").create_all()

### Flat Premack: Two hidden layers, no pretraining

In [None]:
ComparativeViz("flatpremack-h2.csv").create_all()

### Flat Premack: Two hidden layers, with pretraining

In [None]:
#ComparativeViz("flatpremack-h2-pretrain-3tasks.csv").create_all()

In [None]:
#ComparativeViz("flatpremack-h2-pretrain-5tasks.csv").create_all()

In [None]:
#ComparativeViz("flatpremack-h2-pretrain-10tasks.csv").create_all()

### Flat Premack: Compare with and without pretraining

In [None]:
compare_with_and_without_pretraining_viz(
    nopretrain_base_filename="flatpremack-h2.csv",
    pretrain_base_filenames=(
        "flatpremack-h2-pretrain-3tasks.csv",
        "flatpremack-h2-pretrain-5tasks.csv",
        "flatpremack-h2-pretrain-10tasks.csv"
    ),
    embed_dim=10,
    hidden_dim=100,
    experiment_type="flatpremack-h2-pretrain-compare",
    nopretrain_color=utils.STYLE_COLORS[1],
    legend_placement="lower right",
    train_size_max=35000)

In [None]:
# Train-set evaluation:

compare_with_and_without_pretraining_viz(
    nopretrain_base_filename="flatpremack-h2.csv",
    pretrain_base_filenames=(
        "flatpremack-h2-pretrain-3tasks.csv",
        "flatpremack-h2-pretrain-5tasks.csv",
        "flatpremack-h2-pretrain-10tasks.csv"
    ),
    embed_dim=10,
    hidden_dim=100,
    accuracy_col="train_accuracy",
    experiment_type="flatpremack-h2-pretrain-compare",
    nopretrain_color=utils.STYLE_COLORS[1],
    legend_placement="lower right",
    train_size_max=35000)

## Inputs as outputs

In [None]:
io_kwargs = {
    "xlabel": "Additional hierarchical Premack training examples",
    "max_cols_method": "smallest"}

### Inputs as outputs: No pretraining

In [None]:
ComparativeViz("input-as-output.csv", **io_kwargs).create_all()

### Inputs as outputs: Zero-shot setting

In [None]:
# input_as_output_zero_shot_viz("input-as-output.csv")

### Inputs as outputs: With pretraining

In [None]:
#ComparativeViz("input-as-output-pretrain-3tasks.csv", **io_kwargs).create_all()

In [None]:
#ComparativeViz("input-as-output-pretrain-5tasks.csv", **io_kwargs).create_all()

In [None]:
#ComparativeViz("input-as-output-pretrain-10tasks.csv", **io_kwargs).create_all()

### Inputs as outputs: Compare with and without pretraining

In [None]:
compare_with_and_without_pretraining_viz(
    nopretrain_base_filename="input-as-output.csv",
    pretrain_base_filenames=(
        "input-as-output-pretrain-3tasks.csv",
        "input-as-output-pretrain-5tasks.csv",
        "input-as-output-pretrain-10tasks.csv"
    ),
    embed_dim=25,
    hidden_dim=None,
    nopretrain_color=utils.STYLE_COLORS[2],
    experiment_type="input-as-output-pretrain-compare",
    legend_placement="lower right",
    **io_kwargs)

In [None]:
# Train-set evaluation:

compare_with_and_without_pretraining_viz(
    nopretrain_base_filename="input-as-output.csv",
    pretrain_base_filenames=(
        "input-as-output-pretrain-3tasks.csv",
        "input-as-output-pretrain-5tasks.csv",
        "input-as-output-pretrain-10tasks.csv"
    ),
    embed_dim=25,
    hidden_dim=None,
    nopretrain_color=utils.STYLE_COLORS[2],
    accuracy_col="train_accuracy",
    experiment_type="input-as-output-pretrain-compare",
    legend_placement="lower right",
    **io_kwargs)

### Inputs as outputs: By-class results

In [None]:
ComparativeViz(
    "input-as-output-byclass-results.csv",
    secondary_col="input_class",
    ylim=[0.0, 1.01],
    legend_placement="lower right").create_all()

## Fuzzy Pattern LMs

In [None]:
lm_ylim = [0, 1.01]

### Fuzzy Pattern LMs: No pretraining

In [None]:
ComparativeViz("fuzzy-lm-vocab20.csv", ylim=lm_ylim).create_all()

### Fuzzy Pattern LMs: With pretraining

In [None]:
#ComparativeViz("fuzzy-lm-vocab20-pretrain-3tasks.csv", ylim=lm_ylim).create_all()

In [None]:
#ComparativeViz("fuzzy-lm-vocab20-pretrain-5tasks.csv", ylim=lm_ylim).create_all()

In [None]:
#ComparativeViz("fuzzy-lm-vocab20-pretrain-10tasks.csv", ylim=lm_ylim).create_all()

### Fuzzy Pattern LMs: Compare with and without pretraining

In [None]:
compare_with_and_without_pretraining_viz(
    nopretrain_base_filename="fuzzy-lm-vocab20.csv",
    pretrain_base_filenames=(
        "fuzzy-lm-vocab20-pretrain-3tasks.csv",
        "fuzzy-lm-vocab20-pretrain-5tasks.csv",
        "fuzzy-lm-vocab20-pretrain-10tasks.csv"
    ),
    embed_dim=2,
    hidden_dim=100,
    nopretrain_color=utils.STYLE_COLORS[0],
    experiment_type="fuzzy-lm-pretrain-compare",
    ylim=lm_ylim,
    legend_placement="lower right")

In [None]:
compare_with_and_without_pretraining_viz(
    nopretrain_base_filename="fuzzy-lm-vocab20.csv",
    pretrain_base_filenames=(
        "fuzzy-lm-vocab20-pretrain-3tasks.csv",
        "fuzzy-lm-vocab20-pretrain-5tasks.csv",
        "fuzzy-lm-vocab20-pretrain-10tasks.csv"
    ),
    embed_dim=2,
    hidden_dim=100,
    nopretrain_color=utils.STYLE_COLORS[0],
    experiment_type="fuzzy-lm-pretrain-compare",
    ylim=lm_ylim,
    accuracy_col='train_accuracy',
    legend_placement="lower right")

## Small pretraining examples for the paper

In [None]:
def toy_examples_for_figure(n_tasks=3, vocab_size=8, embed_dim=2):
    X = list(range(vocab_size))

    ys = []
    y = [0,1,0,1,0,1,0,1]
    ys.append(y)
    y = [3,3,4,4,5,5,6,6]
    ys.append(y)
    ys = list(zip(*ys))
    #print(ys)

    embedding = np.array([utils.randvec(embed_dim) for _ in range(vocab_size)])

    mod = RepLearner(
        vocab_size,
        embed_dim=embed_dim,
        embedding=embedding,
        hidden_dim=2,
        n_tasks=2,
        output_dims = [2,1],
        max_iter=5000)

    original_embedding = mod.embedding.copy()

    mod.fit(X, ys)
    #print(mod.predict(X))

    updated_embedding = mod.embedding.copy()

    return original_embedding, updated_embedding, ys

In [None]:
def format_toy_matrix(X, n_rows=6):
    df = pd.DataFrame(X)
    df.columns = [f"d_{{{i}}}" for i in range(1, X.shape[1]+1)]
    return df.head(n_rows).round(1).to_latex(escape=False, index=None)

In [None]:
def visualize_matrix(X, ys, figsize=(3, 3), output_filename=None):
    df = pd.DataFrame(X)
    vocab = df.index
    label_combos = {tuple(x) for x in ys}
    lookup = dict(zip(label_combos, utils.STYLE_COLORS))
    colors = [lookup[tuple(x)] for x in ys]
    for i in range(len(colors)):
        color ={0:"red", 1:"blue"}[ys[i][0]]
        sides =ys[i][1]
        line = "\\node[regular polygon, regular polygon sides={}, fill={}!100] at (axis cs:{}, {}) {{}};".format(
            str(int(sides)),color, str(X[i][0]), str(X[i][1]))
        print(line)
    xvals = [b[0] for b in X]
    yvals = [b[1] for b in X]
    fig, ax = plt.subplots(nrows=1, ncols=1, figsize=figsize)
    ax.plot(xvals, yvals, marker='', linestyle='')
    if True:
        # Text labels:
        for word, x, y, color in zip(vocab, xvals, yvals, colors):
            try:
                ax.annotate(word, (x, y), fontsize=8, color=color)
            except UnicodeDecodeError:  ## Python 2 won't cooperate!
                pass
    else:
        for x, y, color in zip(xvals, yvals, colors):
            ax.plot(x, y, marker='.', color=color)
    plt.xticks([-2,-1,0,1,2])
    plt.yticks([-2,-1,0,1,2])
    # Output:
    if output_filename:
        plt.savefig(output_filename, bbox_inches='tight')
    else:
        plt.show()

In [None]:
def format_fig_lims(emb, padding=0.6):
    """Provides tikz code to ensure that the canvas
    includes all our data."""
    xmin, ymin = emb.min(axis=0)
    xmax, ymax = emb.max(axis=0)

    def pad(x):
        return x + padding if x > 0 else x - padding

    xmin = pad(xmin)
    xmax = pad(xmax)
    ymin = pad(ymin)
    ymax = pad(ymax)

    s = "xmin={0:0.2f}, xmax={1:0.2f},\n".format(xmin, xmax)
    s += "ymin={0:0.2f}, ymax={1:0.2f},\n".format(ymin, ymax)
    return s

In [None]:
original_embedding, updated_embedding, ys  = toy_examples_for_figure()

### Original random embedding

In [None]:
visualize_matrix(
    original_embedding, ys,
    output_filename=os.path.join("fig", "toy-example-original.pdf"))

In [None]:
print(format_fig_lims(original_embedding))

In [None]:
print(format_toy_matrix(original_embedding))

### Pretrained embedding

In [None]:
visualize_matrix(
    updated_embedding, ys,
    output_filename=os.path.join("fig", "toy-example-trained.pdf"))

In [None]:
print(format_fig_lims(updated_embedding))

In [None]:
print(format_toy_matrix(updated_embedding))