In [1]:
# !pip install fair-esm
# !pip install torch
# !pip install biopython
# !pip install tqdm

### EXAMPLE 1: MSA Transformer model (~500MB in size)

In [1]:
#Import the necessary libraries and packages
from esm import pretrained
import torch
from Bio import SeqIO
import os
import pickle

# Model and Alphabet you want to download
model, alphabet = pretrained.esm_msa1b_t12_100M_UR50S()

#Using torch to save the model
torch.save(model, "pretrained_msa_transformer.pt") # change the file name accordingly 

#Using pickle to save the alphabet component
pickle.dump(alphabet,open("pretrained_msa_alphabet.pickle","wb")) # change the file name accordingly 

#### Loading the Model and Alphabet

In [None]:
model = torch.load("pretrained_msa_transformer.pt")

In [None]:
model

In [None]:
alphabet = pickle.load(open("pretrained_msa_alphabet.pickle","rb"))

In [None]:
alphabet

### EXAMPLE 2: ESM 2 Transformer model (~11.5GB in size)

In [1]:
#Import the necessary libraries and packages
from transformers import AutoTokenizer, AutoModel
import torch
import pickle


# Model and Tokenizer you want to download
model_name = "facebook/esm2_t36_3B_UR50D"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModel.from_pretrained(model_name, output_attentions=True)

#Using torch to save the model
torch.save(model, "facebook_esm2_transformer.pt") # change the file name accordingly 

#Using pickle to save the alphabet component
pickle.dump(tokenizer, open("facebook_esm2_transformer_tokenizer.pickle", "wb")) # change the file name accordingly 

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Some weights of the model checkpoint at facebook/esm2_t36_3B_UR50D were not used when initializing EsmModel: ['lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.weight', 'lm_head.layer_norm.bias', 'lm_head.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing EsmModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing EsmModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of EsmModel were not initialized from the model checkpoint at facebook/esm2_t36_3B_UR50D and are newly initialized: ['esm.pooler.dense.weight', 'esm.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


#### Loading the Model and Tokenizer

In [None]:
model = torch.load("facebook_esm2_transformer.pt")

In [None]:
model

In [None]:
tokenizer = pickle.load(open("facebook_esm2_transformer_tokenizer.pickle","rb"))

In [None]:
tokenizer 

### EXAMPLE 2: ESM 2 Transformer model (~11.5GB in size) [another approach]

In [2]:
#Import the necessary libraries and packages
from transformers import AutoTokenizer, AutoModel

# Model you want to download
model_name = "facebook/esm2_t36_3B_UR50D"

# Download and save the tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.save_pretrained('/ESM/Local_ESM2_Model')

# Download and save the model
model = AutoModel.from_pretrained(model_name, output_attentions=True)
model.save_pretrained('/ESM/Local_ESM2_Model')

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Some weights of the model checkpoint at facebook/esm2_t36_3B_UR50D were not used when initializing EsmModel: ['lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.bias', 'lm_head.layer_norm.weight', 'lm_head.decoder.weight', 'lm_head.layer_norm.bias']
- This IS expected if you are initializing EsmModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing EsmModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of EsmModel were not initialized from the model checkpoint at facebook/esm2_t36_3B_UR50D and are newly initialized: ['esm.pooler.dense.bias', 'esm.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


#### Loading the Model and Tokenizer

In [None]:
# Directory where the tokenizer and model are saved
saved_directory = '/ESM/Local_ESM2_Model'

# Loading the model from the saved directory
model = AutoModel.from_pretrained(saved_directory)

In [None]:
model

In [None]:
# Loading the tokenizer from the saved directory
tokenizer = AutoTokenizer.from_pretrained(saved_directory)

In [None]:
tokenizer 