# Light Attention model

There are two different sets of checkpoints for this model: one for the output dimension of 10 (called subcell) and one for the other output dimensions (in this case the only other used output dimension is 2)

-> two onnx exports are necessary

In [1]:
from pathlib import Path
from prott5_batch_predictor import LA


root_dir = Path.cwd().parent
model_dir = root_dir / "checkpoints"

la_model = LA(model_dir=model_dir, output_dim=2).model
la_subcell_model = LA(model_dir=model_dir, output_dim=10).model

In [2]:
import os
import torch
from pathlib import Path

root_dir = Path.cwd().parent
model_dir = root_dir / "checkpoints"


def export_LA_to_onnx(la_model, onnx_file_path=f'{root_dir}/checkpoints/light_attention_onnx', is_subcell_model=False):
    if not os.path.exists(onnx_file_path):
        os.mkdir(onnx_file_path)
    # Define the dummy input tensor `x` and mask tensor `mask`
    B = 2  # batch size
    N = 5  # sequence length
    C = 1024  # number of input channels/features

    x = torch.randn(B, N, C)
    x_transposed = torch.permute(x, (0,2,1))

    mask = torch.ones(B, N)  # Mask tensor with shape (B, N). All ones means no masking

    specific_onnx_file_path = f'{onnx_file_path}/la_subcell.onnx' if is_subcell_model else f'{onnx_file_path}/la.onnx'
    # Export the model
    torch.onnx.export(
        la_model,                            # model being run
        (x_transposed, mask),                # model input (or a tuple for multiple inputs)
        specific_onnx_file_path,             # where to save the model
        export_params=True,                  # store the trained parameter weights inside the model file
        opset_version=12,                    # the ONNX version to export the model to
        do_constant_folding=True,            # whether to execute constant folding for optimization
        input_names=['input', 'mask'],       # the model's input names
        output_names=['output'],             # the model's output names
        dynamic_axes={'input': {0: 'batch_size', 1: 'sequence_length', 2: 'embedding_dim'},
                      'mask': {0: 'batch_size', 1: 'sequence_length'},# variable length axes
                      'output': {0: 'batch_size'}}
    )
    print(f"Model has been successfully exported to {specific_onnx_file_path}")

In [3]:
export_LA_to_onnx(la_model=la_model)
export_LA_to_onnx(la_model=la_subcell_model, is_subcell_model=True)

  attention = attention.masked_fill(mask[:, None, :] == 0, torch.tensor(-1e+4))


Model has been successfully exported to /Users/pschloetermann/IdeaProjects/Biocentral_ohne_original/pgp/checkpoints/light_attention_onnx/la.onnx
Model has been successfully exported to /Users/pschloetermann/IdeaProjects/Biocentral_ohne_original/pgp/checkpoints/light_attention_onnx/la_subcell.onnx


# Compare results

In [3]:
from pathlib import Path


root_dir = Path.cwd().parent
output_dir_org = f'{root_dir}/output_la_org'
output_dir_onnx = f'{root_dir}/output_la_onnx'

with open (f'{output_dir_onnx}/la_mem_pred.txt', 'r') as f:
    la_mem_pred_onnx = f.read()

with open (f'{output_dir_onnx}/la_subcell_pred.txt', 'r') as f:
    la_subcell_pred_onnx = f.read()

with open (f'{output_dir_org}/la_mem_pred.txt', 'r') as f:
    la_mem_pred_org = f.read()

with open (f'{output_dir_org}/la_subcell_pred.txt', 'r') as f:
    la_subcell_pred_org = f.read()

with open (f'{output_dir_org}/ids.txt', 'r') as f:
    ids_org = f.read()

with open (f'{output_dir_onnx}/ids.txt', 'r') as f:
    ids_onnx = f.read()

assert ids_onnx == ids_org, "IDs of nnx conservation model and original conservation model output are NOT identical!"
assert la_mem_pred_onnx == la_mem_pred_org, "LA membrane predictions are NOT identical"
assert la_subcell_pred_onnx == la_subcell_pred_org, "LA subcell predictions are NOT identical"