In [1]:
from huggingface_hub import login
from esm.models.esm3 import ESM3
from esm.sdk.api import ESM3InferenceClient, ESMProtein, GenerationConfig
import torch
import torch.nn as nn

In [2]:
# ---------------------------
# 1. Login and Load the Model
# ---------------------------
login()  # Log in with your Hugging Face credentials (ensure you have "Read" permission)

# Download and instantiate the model on GPU (or "cpu" if needed)
model: ESM3InferenceClient = ESM3.from_pretrained("esm3-open").to("cuda")

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…



Fetching 22 files:   0%|          | 0/22 [00:00<?, ?it/s]

In [3]:
# ---------------------------
# 2. Define a Classifier Head
# ---------------------------
class ESM3Classifier(nn.Module):
    def __init__(self, embedding_dim, num_classes, hidden_dim=256, dropout=0.1):
        super(ESM3Classifier, self).__init__()
        self.classifier = nn.Sequential(
            nn.Linear(embedding_dim, hidden_dim),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(hidden_dim, num_classes)
        )
    
    def forward(self, embeddings):
        # embeddings: (batch_size, embedding_dim)
        logits = self.classifier(embeddings)
        return logits

In [4]:
# ---------------------------
# 3. Generate a Protein Representation
# ---------------------------
# Example protein sequence (you can replace with your own)
prompt = "EVQLVESGGGLVQPGGSLRLSCAASGFNIKDTYIHWVRQAPGKGLEWVARIYPTNGYTRYADSVKGRFTISADTSKNTAYLQMNSLRAEDTAVYYCSSDGSYGFGAMDYWGQGTLVTVSSGGGGSGGGGSGGGGSDIQMTQSPSSLSASVGDRVTITCRASQDVNTAVAWYQQKPGKAPKLLIYSASFLYSGVPSRFSGSRSGTDFTLTISSLQPEDFATYYCQQYDQTPPTFGQGTKVEIK"

# Create an ESMProtein instance from the sequence.
protein = ESMProtein(sequence=prompt)

# Configure generation to obtain a representation.
# (Assumes that specifying track="representation" yields token-level embeddings.)
gen_config = GenerationConfig(track="representation", num_steps=8, temperature=0.7)

In [5]:
gen_config

GenerationConfig(track='representation', invalid_ids=[], schedule='cosine', strategy='entropy', num_steps=8, temperature=0.7, temperature_annealing=False, top_p=1.0, condition_on_coordinates_only=True)

In [6]:
# Generate the protein representation.
protein = model.generate(protein, gen_config)

AttributeError: 'ESMProteinTensor' object has no attribute 'representation'

In [None]:
# At this point, assume that the returned protein object has an attribute 'representation'
# which is a list or tensor of token embeddings with shape (seq_length, embedding_dim).
# (The exact attribute name may differ, so consult the API docs.)
# Convert it to a torch tensor and move to the appropriate device.
token_representations = torch.tensor(protein.representation).to("cuda")

# ---------------------------
# 4. Pool the Token Embeddings
# ---------------------------
# Use mean pooling to obtain a single embedding vector for the entire sequence.
# token_representations: (seq_length, embedding_dim) => pooled_embedding: (embedding_dim,)
pooled_embedding = token_representations.mean(dim=0)
# Add a batch dimension: (1, embedding_dim)
pooled_embedding = pooled_embedding.unsqueeze(0)

# ---------------------------
# 5. Attach and Run the Classifier Head
# ---------------------------
# Determine embedding dimension from the pooled embedding
embedding_dim = pooled_embedding.size(1)
num_classes = 2  # Example: binary classification

classifier = ESM3Classifier(embedding_dim, num_classes).to("cuda")

# Forward pass through the classifier to obtain logits.
logits = classifier(pooled_embedding)
print("Logits:", logits)