# Experiment visualizations

## Contents

1. [Set-up](#Set-up)
1. [Basic equality](#Basic-equality)
  1. [Basic equality: No pretraining](#Basic-equality:-No-pretraining)
  1. [Basic equality: With pretraining](#Basic-equality:-With-pretraining)
  1. [Basic equality: Compare with and without pretraining](#Basic-equality:-Compare-with-and-without-pretraining)
1. [Flat Premack](#Flat-Premack)
  1. [Flat Premack: One hidden layer, no pretraining](#Flat-Premack:-One-hidden-layer,-no-pretraining)
  1. [Flat Premack: Two hidden layers, no pretraining](#Flat-Premack:-Two-hidden-layers,-no-pretraining)
  1. [Flat Premack: Two hidden layers, with pretraining](#Flat-Premack:-Two-hidden-layers,-with-pretraining)
  1. [Flat Premack: Compare with and without pretraining](#Flat-Premack:-Compare-with-and-without-pretraining)
1. [Inputs as outputs](#Inputs-as-outputs)
  1. [Inputs as outputs: No pretraining](#Inputs-as-outputs:-No-pretraining)
  1. [Inputs as outputs: Zero-shot setting](#Inputs-as-outputs:-Zero-shot-setting)
  1. [Inputs as outputs: With pretraining](#Inputs-as-outputs:-With-pretraining)
  1. [Inputs as outputs: Compare with and without pretraining](#Inputs-as-outputs:-Compare-with-and-without-pretraining)
1. [Fuzzy Pattern LMs](#Fuzzy-Pattern-LMs)
  1. [Fuzzy Pattern LMs: No pretraining](#Fuzzy-Pattern-LMs:-No-pretraining)
  1. [Fuzzy Pattern LMs: With pretraining](#Fuzzy-Pattern-LMs:-With-pretraining)
  1. [Fuzzy Pattern LMs: Compare with and without pretraining](#Fuzzy-Pattern-LMs:-Compare-with-and-without-pretraining)
1. [Small pretraining examples for the paper](#Small-pretraining-examples-for-the-paper)

## Set-up

In [None]:
from comparative_viz import ComparativeViz
from comparative_viz import compare_with_and_without_pretraining_viz
from comparative_viz import input_as_output_zero_shot_viz
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import re
from torch_rep_learner import RepLearner
import utils

In [None]:
utils.fix_random_seeds()

## Basic equality

### Basic equality: No pretraining

In [None]:
ComparativeViz("equality.csv").create_all()

### Basic equality: With pretraining

In [None]:
ComparativeViz("equality-pretrain-3tasks.csv").create_all()

In [None]:
ComparativeViz("equality-pretrain-5tasks.csv").create_all()

In [None]:
ComparativeViz("equality-pretrain-10tasks.csv").create_all()

### Basic equality: Compare with and without pretraining

In [None]:
compare_with_and_without_pretraining_viz(
    nopretrain_base_filename="equality.csv",
    pretrain_base_filenames=(
        "equality-pretrain-3tasks.csv",
        "equality-pretrain-5tasks.csv",
        "equality-pretrain-10tasks.csv"
    ),
    embed_dim=10,
    hidden_dim=100,
    nopretrain_color=utils.STYLE_COLORS[1],
    experiment_type="equality-pretrain-compare")

## Flat Premack

### Flat Premack: One hidden layer, no pretraining

In [None]:
ComparativeViz("flatpremack-h1.csv").create_all()

### Flat Premack: Two hidden layers, no pretraining

In [None]:
ComparativeViz("flatpremack-h2.csv").create_all()

### Flat Premack: Two hidden layers, with pretraining

In [None]:
ComparativeViz("flatpremack-h2-pretrain-3tasks.csv").create_all()

In [None]:
ComparativeViz("flatpremack-h2-pretrain-5tasks.csv").create_all()

In [None]:
ComparativeViz("flatpremack-h2-pretrain-10tasks.csv").create_all()

### Flat Premack: Compare with and without pretraining

In [None]:
compare_with_and_without_pretraining_viz(
    nopretrain_base_filename="flatpremack-h2.csv",
    pretrain_base_filenames=(
        "flatpremack-h2-pretrain-3tasks.csv",
        "flatpremack-h2-pretrain-5tasks.csv",
        "flatpremack-h2-pretrain-10tasks.csv"
    ),
    embed_dim=10,
    hidden_dim=100,
    experiment_type="flatpremack-h2-pretrain-compare",  
    nopretrain_color=utils.STYLE_COLORS[1],
    legend_placement="lower right",
    train_size_max=35000)

## Inputs as outputs

In [None]:
io_kwargs = {
    "xlabel": "Additional hierarchical Premack training examples",
    "max_cols_method": "smallest"}

### Inputs as outputs: No pretraining

In [None]:
ComparativeViz("input-as-output.csv", **io_kwargs).create_all()

### Inputs as outputs: Zero-shot setting

In [None]:
input_as_output_zero_shot_viz("input-as-output.csv")

### Inputs as outputs: With pretraining

In [None]:
ComparativeViz("input-as-output-pretrain-3tasks.csv", **io_kwargs).create_all()

In [None]:
ComparativeViz("input-as-output-pretrain-5tasks.csv", **io_kwargs).create_all()

In [None]:
ComparativeViz("input-as-output-pretrain-10tasks.csv", **io_kwargs).create_all()

### Inputs as outputs: Compare with and without pretraining

In [None]:
compare_with_and_without_pretraining_viz(
    nopretrain_base_filename="input-as-output.csv",
    pretrain_base_filenames=(
        "input-as-output-pretrain-3tasks.csv",   
        "input-as-output-pretrain-5tasks.csv",
        "input-as-output-pretrain-10tasks.csv"
    ),
    embed_dim=25,
    hidden_dim=None,
    nopretrain_color=utils.STYLE_COLORS[2],
    experiment_type="input-as-output-pretrain-compare",    
    legend_placement="lower right",
    **io_kwargs)

## Fuzzy Pattern LMs

In [None]:
lm_ylim = [0, 1.01]

### Fuzzy Pattern LMs: No pretraining

In [None]:
ComparativeViz("fuzzy-lm-vocab20.csv", ylim=lm_ylim).create_all()

### Fuzzy Pattern LMs: With pretraining

In [None]:
ComparativeViz("fuzzy-lm-vocab20-pretrain-3tasks.csv", ylim=lm_ylim).create_all()

In [None]:
ComparativeViz("fuzzy-lm-vocab20-pretrain-5tasks.csv", ylim=lm_ylim).create_all()

In [None]:
ComparativeViz("fuzzy-lm-vocab20-pretrain-10tasks.csv", ylim=lm_ylim).create_all()

### Fuzzy Pattern LMs: Compare with and without pretraining

In [None]:
compare_with_and_without_pretraining_viz(
    nopretrain_base_filename="fuzzy-lm-vocab20.csv",
    pretrain_base_filenames=(
        "fuzzy-lm-vocab20-pretrain-3tasks.csv",
        "fuzzy-lm-vocab20-pretrain-5tasks.csv",
        "fuzzy-lm-vocab20-pretrain-10tasks.csv"
    ),
    embed_dim=2,
    hidden_dim=100,
    nopretrain_color=utils.STYLE_COLORS[0],
    experiment_type="fuzzy-lm-pretrain-compare", 
    ylim=lm_ylim,
    legend_placement="lower right")

## Small pretraining examples for the paper

In [None]:
def toy_examples_for_figure(n_tasks=3, vocab_size=500, embed_dim=5):
    X = list(range(vocab_size))
    
    ys = []
    for _ in range(n_tasks):
        y = np.random.choice((0, 1), size=vocab_size, replace=True)
        ys.append(y)
    ys = list(zip(*ys))
    
    embedding = np.array([utils.randvec(embed_dim) for _ in range(vocab_size)])
    
    mod = RepLearner(
        vocab_size,
        embed_dim=embed_dim,
        embedding=embedding,
        hidden_dim=10,
        max_iter=100)
    
    original_embedding = mod.embedding.copy()
    
    mod.fit(X, ys)
    
    updated_embedding = mod.embedding.copy()
    
    return original_embedding, updated_embedding, ys

In [None]:
def format_toy_matrix(X, n_rows=4):
    df = pd.DataFrame(X)
    df.columns = [f"d_{{{i}}}" for i in range(1, X.shape[1]+1)]
    return df.head(n_rows).round(1).to_latex(escape=False, index=None)

In [None]:
def visualize_matrix(X, ys, figsize=(3, 3), output_filename=None):
    df = pd.DataFrame(X)
    label_combos = {tuple(x) for x in ys}
    lookup = dict(zip(label_combos, utils.STYLE_COLORS))
    colors = [lookup[tuple(x)] for x in ys]
    utils.tsne_viz(
        df, 
        colors=colors, 
        use_names=False, 
        output_filename=output_filename, 
        figsize=figsize,
        random_state=42)

In [None]:
original_embedding, updated_embedding, ys  = toy_examples_for_figure()

In [None]:
visualize_matrix(
    original_embedding, ys, 
    output_filename=os.path.join("fig", "toy-example-original.pdf"))

In [None]:
visualize_matrix(
    updated_embedding, ys, 
    output_filename=os.path.join("fig", "toy-example-trained.pdf"))

In [None]:
print(format_toy_matrix(original_embedding))

In [None]:
print(format_toy_matrix(updated_embedding))