In [1]:
import os
os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8"
os.environ['CUDA_VISIBLE_DEVICES'] = '0'

import torch
import random

seed = 7
torch.manual_seed(seed)
random.seed(seed)

import ankh

In [2]:
def get_num_params(model):
    return sum(p.numel() for p in model.parameters())

In [3]:
def get_n_mask_tokens(n):
    return [f"<extra_id_{i}>" for i in range(n)]

def append_n_mask_tokens(input_, n):
    return input_ + "".join(get_n_mask_tokens(n))

### Select the available device.

In [4]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print('Available device:', device)

Available device: cuda:0


### Load Ankh large model.

In [5]:
model, tokenizer = ankh.load_large_model(generation=True)
model.eval()
model.to(device=device)

T5ForConditionalGeneration(
  (shared): Embedding(144, 1536)
  (encoder): T5Stack(
    (embed_tokens): Embedding(144, 1536)
    (block): ModuleList(
      (0): T5Block(
        (layer): ModuleList(
          (0): T5LayerSelfAttention(
            (SelfAttention): T5Attention(
              (q): Linear(in_features=1536, out_features=1024, bias=False)
              (k): Linear(in_features=1536, out_features=1024, bias=False)
              (v): Linear(in_features=1536, out_features=1024, bias=False)
              (o): Linear(in_features=1024, out_features=1536, bias=False)
              (relative_attention_bias): Embedding(64, 16)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.0, inplace=False)
          )
          (1): T5LayerFF(
            (DenseReluDense): T5DenseGatedActDense(
              (wi_0): Linear(in_features=1536, out_features=3840, bias=False)
              (wi_1): Linear(in_features=1536, out_features=3840, bias=False)
           

In [6]:
print(f"Number of parameters:", get_num_params(model))

Number of parameters: 1878705152


### Test Autoregressive generation on a sequence.

In [7]:
test_seq = "QVQLVESGGGLVQPGGSL"
num_new_tokens = 5
masked_seq = append_n_mask_tokens(test_seq, n=num_new_tokens)
maximum_length = num_new_tokens * 2  + 1
num_beams = 5
temperature = 1.0

In [8]:
encoded = tokenizer.encode_plus(masked_seq, add_special_tokens=True, return_tensors='pt') 
input_ids = encoded['input_ids'].to(device)

In [9]:
input_ids

tensor([[ 16,   6,  16,   4,   6,   9,   7,   5,   5,   5,   4,   6,  16,  13,
           5,   5,   7,   4, 143, 142, 141, 140, 139,   1]], device='cuda:0')

In [10]:
generation = model.generate(input_ids=input_ids, temperature = temperature,
                                max_length = maximum_length,
                                num_beams = num_beams,
                                do_sample=True if temperature > 0 else False)

output_ids = generation[0].squeeze()

In [11]:
generated_tokens = list(tokenizer.decode(output_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False))

In [12]:
test_output = f"{test_seq}" + "".join(generated_tokens)
print(test_output)

QVQLVESGGGLVQPGGSLVQPGG


In [13]:
len(generated_tokens)

5