# Export SETH model to onnx

In [7]:
import torch
from pathlib import Path


from prott5_batch_predictor import SETH


root_dir = Path.cwd().parent
model_dir = root_dir / "checkpoints"

seth_model = SETH(model_dir=model_dir).model

import os

def export_seth_to_onnx(seth_model, onnx_file_path=f'{root_dir}/checkpoints/seth_onnx'):
    if not os.path.exists(onnx_file_path):
        os.mkdir(onnx_file_path)

    # Define the dummy input tensor `x` and mask tensor `mask`
    B = 2  # batch size
    N = 505  # sequence length
    C = 1024  # number of input channels/features

    x = torch.randn(B, N, C)

    specific_onnx_file_path = f'{onnx_file_path}/seth.onnx'
    # Export the model
    torch.onnx.export(
        seth_model,                               # model being run
        x,                           # model input (or a tuple for multiple inputs)
        specific_onnx_file_path,             # where to save the model
        export_params=True,                  # store the trained parameter weights inside the model file
        opset_version=12,                    # the ONNX version to export the model to
        do_constant_folding=True,            # whether to execute constant folding for optimization
        input_names=['input'],       # the model's input names
        output_names=['output'],             # the model's output names
        dynamic_axes={'input': {0: 'batch_size', 1: 'sequence_length', 2: 'embedding_dimension'},
                      'output': {0: 'batch_size'}}
    )
    print(f"Model has been successfully exported to {specific_onnx_file_path}")

In [8]:
export_seth_to_onnx(seth_model=seth_model)

Model has been successfully exported to /Users/pschloetermann/IdeaProjects/Biocentral_ohne_original/pgp/checkpoints/seth_onnx/seth.onnx


# Compare results onnx <-> original model

In [18]:
import csv
import numpy as np

def read_predictions(file_path):
    predictions = []
    with open(file_path, mode='r') as file:
        csv_reader = csv.reader(file)
        for row in csv_reader:
            predictions.append(np.array([float(i) for i in row]))
    return predictions

In [34]:
from pathlib import Path

root_dir = Path.cwd().parent
output_dir_org = f'{root_dir}/output_seth_org'
output_dir_onnx = f'{root_dir}/output_seth_onnx'
with open (f'{output_dir_onnx}/ids.txt', 'r') as f:
    ids_onnx = f.read()
with open (f'{output_dir_org}/ids.txt', 'r') as f:
    ids_org = f.read()

assert ids_onnx == ids_org, "IDs of nnx tmbed model and original tmbed model output are NOT identical!"

seth_pred_org = read_predictions(f"{output_dir_org}/seth_disorder_pred.csv")
seth_pred_onnx = read_predictions(f"{output_dir_onnx}/seth_disorder_pred.csv")
for index, _ in enumerate(seth_pred_org):
    predictions_equal = np.array_equal(seth_pred_org[index], seth_pred_onnx[index])
    if not predictions_equal:
        print(f"The predictions at index {index} are not equal")
        differences = np.subtract(seth_pred_org[index], seth_pred_onnx[index])
        print(f"Maximal difference:{np.max(differences)}")
        print(f"Maximal negative difference:{np.min(differences)}\n")


The predictions at index 0 are not equal
Maximal difference:4.999999999810711e-06
Maximal negative difference:-8.999999998593466e-06

The predictions at index 1 are not equal
Maximal difference:7.999999999341867e-06
Maximal negative difference:-9.000000000369823e-06

The predictions at index 2 are not equal
Maximal difference:6.000000000838668e-06
Maximal negative difference:-7.999999999341867e-06

The predictions at index 3 are not equal
Maximal difference:7.000000000090267e-06
Maximal negative difference:-7.999999999341867e-06

