Let's import one of the pretrained models from HuggingFace Hub using the Transformers library.

In [None]:
import torch

from transformers import AutoTokenizer, AutoModelForSequenceClassification

model_name = "andrewdalpino/ESM2-35M-Protein-Molecular-Function"

sequence = "MALWMRLLPLLALLALWGPDPAAAFVNQHLCGSHLVEALYLVCGERGFFYTPKA"

top_k = 10

tokenizer = AutoTokenizer.from_pretrained(model_name)

model = AutoModelForSequenceClassification.from_pretrained(model_name)

out = tokenizer(
    sequence,
    max_length=1026,
    truncation=True,
    return_tensors="pt",
)

with torch.no_grad():
    outputs = model.forward(out["input_ids"])

    probabilities = torch.sigmoid(outputs.logits.squeeze(0))

    probabilities, indices = torch.topk(probabilities, top_k)

probabilities = probabilities.tolist()

terms = [model.config.id2label[index] for index in indices.tolist()]

print(f"Top {top_k} GO Terms:")

for term, probability in zip(terms, probabilities):
    print(f"{probability:.4f}: {term}")