In [1]:
from common.tokenizer import TokenizerConfig, Tokenizer
from common.encoder import TransformerEncoderConfig, TransformerEncoder
from posttraining.model import ModelConfig, Model
from tqdm.notebook import tqdm
from posttraining.dataset import PosttrainingDatasetConfig, PosttrainingDataset

from torch.utils.data import DataLoader
import torch
import json

# Loading the model

We start by loading the model with the default configuration.

In [2]:
# Set up default configuration

tokenizer_cfg = TokenizerConfig(
    var_names="xyzwuvrst",
    pad_token=0,
    mask_token=1,
    mul_token=2,
    num_special_token=3,
    mask_rate=0.15,
    vocab_size=100
)

encoder_cfg = TransformerEncoderConfig(
    num_layer=4,
    num_head=2,
    d_model=64,
    d_ff=128,
    dropout=0.1,
    max_len=512,
    vocab_size=100,
    num_kind=3,
    pad_token=0,
    pad_kind=0,
    kind_weight=1e-3
)

model_cfg = ModelConfig(
    proj_dim=32
)

lhs_kind = 1
rhs_kind = 2

In [3]:
# Initialize the tokenizer, encoder and model, and load the default trained model.
tokenizer = Tokenizer(tokenizer_cfg)
encoder = TransformerEncoder(encoder_cfg)
model = Model(encoder, model_cfg)
model.load_state_dict(torch.load('default_trained_model.pth', weights_only=True))
model.eval()

Model(
  (encoder): TransformerEncoder(
    (positional_encoding): Embedding(512, 64)
    (kind_embedding): Embedding(3, 64)
    (embedding): Embedding(100, 64)
    (encoder_layer): TransformerEncoderLayer(
      (self_attn): MultiheadAttention(
        (out_proj): NonDynamicallyQuantizableLinear(in_features=64, out_features=64, bias=True)
      )
      (linear1): Linear(in_features=64, out_features=128, bias=True)
      (dropout): Dropout(p=0.1, inplace=False)
      (linear2): Linear(in_features=128, out_features=64, bias=True)
      (norm1): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
      (norm2): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
      (dropout1): Dropout(p=0.1, inplace=False)
      (dropout2): Dropout(p=0.1, inplace=False)
    )
    (encoder): TransformerEncoder(
      (layers): ModuleList(
        (0-3): 4 x TransformerEncoderLayer(
          (self_attn): MultiheadAttention(
            (out_proj): NonDynamicallyQuantizableLinear(in_features=64, out_f

# Making predictions

The next few cells illustrate how to make predictions with the model.

In [4]:
# Set up tokenized equations
equations = dict()
with open("tokenized_equations.jsonl","r") as f:
    for l in f:
        j = json.loads(l)
        equations[j['name']] = {'lhs' : j['lhs'], 'rhs' : j['rhs']}

In [5]:
# Example illustrating how to use the model for prediction.
# Note that the model outputs logits, so sigmoid must be used to get a probability.
def predict(lhs,rhs):
    lhs = equations[lhs]
    rhs = equations[rhs]
    lhs_src = torch.tensor(tokenizer.tokenize(lhs['lhs'] + lhs['rhs'])['tokens']).unsqueeze(0)
    rhs_src = torch.tensor(tokenizer.tokenize(rhs['lhs'] + rhs['rhs'])['tokens']).unsqueeze(0)
    lhs_kinds = torch.tensor([lhs_kind] * len(lhs['lhs']) + [rhs_kind] * len(lhs['rhs'])).unsqueeze(0)
    rhs_kinds = torch.tensor([lhs_kind] * len(rhs['lhs']) + [rhs_kind] * len(rhs['rhs'])).unsqueeze(0)
    output = model.forward(
        { 'src' : lhs_src, 'kinds' : lhs_kinds },
        { 'src' : rhs_src, 'kinds' : rhs_kinds }
    )['output']
    return (output >= 0.0).item()

In [6]:
print(predict("Equation600", "Equation518"))
print(predict("Equation1718", "Equation3965"))

True
False


# Testing the model

In the cells below we will test the model using the testing dataset.
This is exactly what the script `posttrain.py` does as its last step.

In [7]:
dataset_cfg = PosttrainingDatasetConfig(
    datadir='posttraining/data',
    equations='tokenized_equations.jsonl',
    train='train_impls.jsonl',
    val='val_impls.jsonl',
    test='test_impls.jsonl',
    pad_kind=0,
    lhs_kind=1,
    rhs_kind=2,
    max_datapoints=None
)

dataset = PosttrainingDataset(dataset_cfg, tokenizer, 'test')

Loading corpus. This may take a while...


2756786it [00:08, 342931.87it/s]


In [8]:
dataloader = DataLoader(
        dataset,
        batch_size=1024,
        num_workers=8,
        shuffle=True,
        collate_fn=dataset.collate_fn
    )

In [9]:
# This will take a while to run
correct_pred = 0
total = 0
for batch in tqdm(dataloader):
    predictions = (model(**batch)['output'] >= 0.0).int()
    correct_predictions = (predictions == batch['labels']).int().sum()
    size = predictions.size(0)
    total += size
    correct_pred += correct_predictions
    print(f'\rCorrect ratio so far: {correct_pred/total}',end='',flush=True)

  0%|          | 0/2693 [00:00<?, ?it/s]

Correct ratio so far: 0.9834386706352234

In [10]:
print(f"Testing accuracy: {correct_pred/total}")

Testing accuracy: 0.9834386706352234
