In [16]:
from pathlib import Path

import numpy as np
import torch

import file_handling as fh
from run_scholar import (
    load_word_counts, load_scholar_model, load_labels, get_minibatch, predict_label_probs
)

In [102]:
MODEL_DIR = "imdb-k_50/learned_dev_no_bg-with_label"
DATA_DIR = "../scholar/data/imdb/processed"
SPLIT = "train"

checkpoint = torch.load(Path(MODEL_DIR, "torch_model.pt"), map_location="cpu")
options = checkpoint['options']

## Quick look at accuracy results

In [103]:
train_accuracies = [
    float(fh.read_text(Path(model_seed_dir, "accuracy.train.txt"))[0])
    for model_seed_dir in Path(MODEL_DIR).glob("[0-9]*")
]
print(f"{np.mean(train_accuracies):0.4}, {np.std(train_accuracies):0.4}")

0.9679, 0.002768


## Set up data, model

Load data

In [104]:
vocab = fh.read_json(Path(DATA_DIR, "train.vocab.json"))
test_X, _, test_row_selector, test_ids = load_word_counts(
    DATA_DIR, SPLIT, vocab=vocab
)
test_labels, _, label_names, _ = load_labels(
    DATA_DIR, SPLIT, test_row_selector, "sentiment",
)
test_topic_covars = None # for now
n_test = test_X.shape[0]

Loading data
Loaded 25000 documents with 5000 features
Found 25000 non-empty documents
Loading labels from ../scholar/data/imdb/processed/train.sentiment.csv
Found 2 labels


Create two versions of the "prior covars" that will index the deviation embeddings, one per label

In [105]:
deviation_indexer_neg = np.vstack([np.ones(n_test), np.zeros(n_test)]).T
deviation_indexer_pos = np.vstack([np.zeros(n_test), np.ones(n_test)]).T
_, _, deviation_covar_names, _ = load_labels(
    DATA_DIR, "train", test_row_selector, options.deviation_embedding_covar
)

Loading labels from ../scholar/data/imdb/processed/train.sentiment.csv
Found 2 labels


Make a dummy dictionary for model loading since embeddings will be loaded from the torch checkpoint.

In [106]:
from scholar import Scholar
def load_scholar_model(inpath, embeddings=None, map_location=None):
    """
    Load the Scholar model
    """
    checkpoint = torch.load(inpath, map_location=map_location)
    scholar_kwargs = checkpoint["scholar_kwargs"]
    scholar_kwargs["init_embeddings"] = embeddings
    if map_location == 'cpu':
        scholar_kwargs['device'] = None

    model = Scholar(**scholar_kwargs)
    model._model.load_state_dict(checkpoint["model_state_dict"])
    model.optimizer.load_state_dict(checkpoint["optimizer_state_dict"])

    return model, checkpoint

In [107]:
embeddings = {} 
if options.background_embeddings:
    embeddings["background"] = None, True
if options.deviation_embeddings:
    for name in deviation_covar_names:
        embeddings[name] = None, True
model, _ = load_scholar_model(
    Path(MODEL_DIR, "torch_model.pt"), embeddings=embeddings, map_location='cpu'
)
model.eval()           

## Evaluate reconstruction errors

In [108]:
def get_recon_errors(model, X, PC, TC, batch_size):
    """
    Get the reconstruction errors for each doc in the dataset
    """
    n_items, _ = X.shape
    n_batches = int(np.ceil(n_items / batch_size))
    Y = None
    recon_errors = []
    for i in range(n_batches):
        batch_xs, _, batch_pcs, batch_tcs = get_minibatch(
            X, Y, PC, TC, i, batch_size
        )
        
        batch_size = model.get_batch_size(batch_xs)
        if batch_size == 1:
            batch_xs = np.expand_dims(batch_xs, axis=0)
        if batch_pcs is not None and batch_size == 1:
            batch_pcs = np.expand_dims(batch_pcs, axis=0)
        if batch_tcs is not None and batch_size == 1:
            batch_tcs = np.expand_dims(batch_tcs, axis=0)
        
        batch_xs = torch.Tensor(batch_xs).to(model.device)
        if batch_pcs is not None:
            batch_pcs = torch.Tensor(batch_pcs).to(model.device)
        if batch_tcs is not None:
            batch_tcs = torch.Tensor(batch_tcs).to(model.device)

        # dummy Ys
        batch_ys = np.zeros((batch_size, model._model.n_labels), np.float32)
        batch_ys = torch.Tensor(batch_ys).to(model.device)

        _, X_recon, _ = model._model(
            batch_xs,
            batch_ys,
            batch_pcs,
            batch_tcs,
            compute_loss=False,
            var_scale=1.0,
            eta_bn_prop=0.0,
        )
        error = -(batch_xs * (X_recon + 1e-10).log()).sum(1).detach().numpy()
        recon_errors.append(error)
        
    return np.concatenate(recon_errors)

In [109]:
error_neg = get_recon_errors(
    model, test_X, PC=deviation_indexer_neg, TC=None, batch_size=512
)
error_pos = get_recon_errors(
    model, test_X, PC=deviation_indexer_pos, TC=None, batch_size=512
)

In [110]:
label_names

Index(['neg', 'pos'], dtype='object')

In [111]:
print("Error on negative documents")
print(error_neg[test_labels[:, 0] == 1].mean())
print(error_pos[test_labels[:, 0] == 1].mean())

print("Error on positive documents")
print(error_neg[test_labels[:, 0] == 0].mean())
print(error_pos[test_labels[:, 0] == 0].mean())

Error on negative documents
680.2247
688.6816
Error on positive documents
703.16034
694.81805


In [112]:
(error_pos > error_neg) & (test_labels[:, 0] == 1)

array([ True,  True,  True, ..., False, False, False])

In [113]:
print("Negative recall")
print(
    ((error_neg < error_pos) & (test_labels[:, 0] == 1)).sum() 
    / (test_labels[:, 0] == 1).sum()
)
print("Positive recall")
print(
    ((error_neg > error_pos) & (test_labels[:, 0] == 0)).sum()
    / (test_labels[:, 0] == 0).sum()
)

Negative recall
0.90216
Positive recall
0.88176


## Use reconstruction errors in prediction

In [114]:
deviation_indexer_from_recon_loss = np.array([
    1 * (error_neg < error_pos), 1 * (error_neg > error_pos)
]).T

In [115]:
pred_probs = predict_label_probs(
    model, test_X, PC=deviation_indexer_from_recon_loss, TC=None, batch_size=512
)

In [116]:
(pred_probs.argmax(1) == test_labels[:, 1]).mean()

0.8866

In [118]:
MODEL_DIR

'imdb-k_50/learned_dev_no_bg-with_label'

'imdb-k_50/baseline-no_dev_learned_bg-with_label', 0.897 train, 0.849 test
'imdb-k_50/learned_dev_no_bg-with_label', 0.8867 train, 



x