In [None]:
from pathlib import Path
import torch
from utils.seq_evo_lm_data_loader import getSeqVecEmbeddings
from model.embedder_architecture import SeqEvoLM

config = {
    'hidden_size': 2048,
    'projection_size': 256,
    'dropout_rate': 0.1,
    'max_seq_len_for_TBPTT': 290,
    'loss_harmony_weight': 0.75,
    'clip_norm_value': 1.0,
    'model_name': 'multitask_lm_weights',
    'use_GPU': True

}

def SeqEvoLmEmbedder(options):
    if options['use_GPU']:
        ##Check for GPU
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = SeqEvoLM(device_resource=device,hidden_size=options['hidden_size'],projection_size=options['projection_size'],dropout_rate=options['dropout_rate'])
    ### Load saved model weights
    thesis_model_dir = Path('seq_evo_lm')
    path = Path.joinpath(thesis_model_dir, options['model_name']+ '.pt')
    model.load_state_dict(torch.load(path))
    model = model.to(device)
    model.eval()
    return model

def getSeqEvoLmEmbeddings(SeqEvoLmEmbedder,batch): #Function takes one string or a list of strings passed to the argument 'batch'
    uncon_xx_pad, con_xx_pad, xx_lens, original_indices = getSeqVecEmbeddings(batch)
    embeddings = SeqEvoLmEmbedder(uncon_xx_pad, con_xx_pad, xx_lens)
    embeddings = [embeddings[i] for i in original_indices]
    return embeddings

In [None]:
##Load the Sequence Evolution Language Model Embedder
embedder = SeqEvoLmEmbedder(config)

In [None]:
##Input
batch = ["PROTEIN","SEQWENCE"] #Takes a list of strings/sequences or just one string/sequence
#batch = "PROTEIN" 

##Generate embeddings
embeddings = getSeqEvoLmEmbeddings(embedder,batch)