In [1]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
model_name = "Salesforce/codegen-350M-multi"

In [3]:
model = AutoModelForCausalLM.from_pretrained(model_name)

In [4]:
model.cuda()
_=model.eval()

In [5]:
input_texts = ["from",
               "from transformers import AutoTokenizer, AutoModelForCausalLM", 
               "tokenizer = AutoTokenizer.from_pretrained(model_name)", 
               "model = AutoModelForCausalLM.from_pretrained(model_name)"]

In [6]:
def set_cuda(inputs):
    for key in inputs:
        inputs[key] = inputs[key].cuda()
    return inputs

In [7]:
# params = { 'min_length':32, 
#            'max_length':128, 
#            'do_sample':True, 
#            'top_p':0.95, 
#            'num_return_sequences':5}

In [8]:
params = { "max_length": 128, "do_sample":False}

# batch mode

In [9]:
tokenizer_with_pad = AutoTokenizer.from_pretrained(model_name)
tokenizer_with_pad.pad_token_id = tokenizer_with_pad.eos_token_id

In [10]:
encoded_texts = tokenizer_with_pad(input_texts, return_tensors="pt", padding=True)
encoded_texts = set_cuda(inputs = encoded_texts)

In [11]:
%%time
with torch.no_grad():
    generated_ids = model.generate(**encoded_texts, 
                                   pad_token_id=tokenizer_with_pad.eos_token_id,
                                   **params)

  attn_weights = torch.where(causal_mask, attn_weights, mask_value)


CPU times: user 1.83 s, sys: 106 ms, total: 1.94 s
Wall time: 1.94 s


In [12]:
# show the generation for thr 1st sample
for i, generated_text in enumerate(tokenizer_with_pad.batch_decode(generated_ids, skip_special_tokens=True)):
    print(f"TEXT {i+1}:")
    print()
    print(generated_text)
    break

TEXT 1:

from<<<|cpp|>/*
 * Copyright (C) 2008-2013 TrinityCore <http://www.trinitycore.org/>
 * Copyright (C) 2006-2009 ScriptDev2 <https://scriptdev2.svn.sourceforge.net/>
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License as published by the
 * Free Software Foundation; either version 2 of the License, or (at your



In [13]:
# check attention_mask for the 1st sample (eos_token_id == pad_token_id == 50256)

In [14]:
i=0
print(*zip(encoded_texts['input_ids'][i].tolist(), encoded_texts['attention_mask'][i].tolist()))

(6738, 1) (50256, 0) (50256, 0) (50256, 0) (50256, 0) (50256, 0) (50256, 0) (50256, 0) (50256, 0) (50256, 0) (50256, 0) (50256, 0) (50256, 0) (50256, 0) (50256, 0) (50256, 0) (50256, 0) (50256, 0)


In [15]:
# check attention_mask for the 2st sample (eos_token_id == pad_token_id == 50256)

In [16]:
i=1
print(*zip(encoded_texts['input_ids'][i].tolist(), encoded_texts['attention_mask'][i].tolist()))

(6738, 1) (6121, 1) (364, 1) (1330, 1) (11160, 1) (30642, 1) (7509, 1) (11, 1) (11160, 1) (17633, 1) (1890, 1) (24334, 1) (6775, 1) (31288, 1) (50256, 0) (50256, 0) (50256, 0) (50256, 0)


In [17]:
# experiment - add more padded tokens

In [18]:
extra_len = 20

input_ids0 = encoded_texts['input_ids'][0]
attention_mask0 = encoded_texts['attention_mask'][0]

print(input_ids0.shape, attention_mask0.shape)

extra_input_ids = tokenizer_with_pad.pad_token_id*torch.ones(size=(extra_len,), dtype=torch.int)
new_input_ids0 = torch.hstack([input_ids0, extra_input_ids.cuda()]).view((1,-1))

extra_attention_mask = torch.zeros(size=(extra_len,), dtype=torch.int)
new_attention_mask0 = torch.hstack([attention_mask0, extra_attention_mask.cuda()]).view((1,-1))

print(new_input_ids0.shape, new_attention_mask0.shape)

torch.Size([18]) torch.Size([18])
torch.Size([1, 38]) torch.Size([1, 38])


In [19]:
%%time
with torch.no_grad():
    generated_ids0 = model.generate(input_ids=new_input_ids0, attention_mask=new_attention_mask0,
                                   pad_token_id=tokenizer_with_pad.eos_token_id,
                                   **params)

CPU times: user 1.18 s, sys: 0 ns, total: 1.18 s
Wall time: 1.18 s


In [20]:
generated_ids0

tensor([[ 6738, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256,
         50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256,
         50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256,
         50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256,    27,    27,
            91, 20322,    91,    29, 15211,   198,  1635, 15069,   357,    34,
             8,  3648,    12,  6390, 22844, 14055,  1279,  4023,  1378,  2503,
            13,  2213,  6269,  7295,    13,  2398, 15913,   198,  1635, 15069,
           357,    34,     8,  4793,    12, 10531, 12327, 13603,    17,  1279,
          5450,  1378, 12048,  7959,    17,    13, 21370,    77,    13, 10459,
         30293,    13,  3262, 15913,   198,  1635,   198,  1635,   770,  1430,
           318,  1479,  3788,    26,   345,   460, 17678,  4163,   340,   290,
            14,   273, 13096,   340,   198,  1635,   739,   262,  2846,   286,
           262, 22961,  3611,  5094, 13789,   355,  

In [21]:
generated_ids[0]

tensor([ 6738, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256,
        50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256,    27,    27,
           27,    91, 20322,    91,    29, 15211,   198,  1635, 15069,   357,
           34,     8,  3648,    12,  6390, 22844, 14055,  1279,  4023,  1378,
         2503,    13,  2213,  6269,  7295,    13,  2398, 15913,   198,  1635,
        15069,   357,    34,     8,  4793,    12, 10531, 12327, 13603,    17,
         1279,  5450,  1378, 12048,  7959,    17,    13, 21370,    77,    13,
        10459, 30293,    13,  3262, 15913,   198,  1635,   198,  1635,   770,
         1430,   318,  1479,  3788,    26,   345,   460, 17678,  4163,   340,
          290,    14,   273, 13096,   340,   198,  1635,   739,   262,  2846,
          286,   262, 22961,  3611,  5094, 13789,   355,  3199,   416,   262,
          198,  1635,  3232, 10442,  5693,    26,  2035,  2196,   362,   286,
          262, 13789,    11,   393,   357,   265,   534,   198],

In [22]:
# we add more padded tokens and get different result

# single mode

In [23]:
tokenizer = AutoTokenizer.from_pretrained(model_name)

In [24]:
%%time
for i, input_text in enumerate(input_texts):
    encoded_text = tokenizer(input_text, return_tensors="pt")
    encoded_text = set_cuda(inputs = encoded_text)
    with torch.no_grad():
        generated_single_ids = model.generate(**encoded_text,
                                              pad_token_id=tokenizer.eos_token_id,
                                               **params)
    for decoded_single_text in tokenizer.batch_decode(generated_single_ids, skip_special_tokens=True):
        print(f"=========={i+1} text ==============")
        print(decoded_single_text)
        print()
    
    break

from __future__ import unicode_literals

from django.db import models
from django.utils.encoding import python_2_unicode_compatible
from django.utils.translation import ugettext_lazy as _

from. import models


@python_2_unicode_compatible
class User(models.Model):
    username = models.CharField(_('username'), max_length=30)
    password = models.CharField(_('password'), max_length=30)
    email = models.EmailField(_('email address'),

CPU times: user 1.65 s, sys: 0 ns, total: 1.65 s
Wall time: 1.64 s


In [25]:
generated_single_ids

tensor([[ 6738, 11593, 37443,   834,  1330, 28000,  1098,    62, 17201,   874,
           198,   198,  6738, 42625, 14208,    13,  9945,  1330,  4981,   198,
          6738, 42625, 14208,    13, 26791,    13, 12685,  7656,  1330, 21015,
            62,    17,    62, 46903,  1098,    62, 38532,   198,  6738, 42625,
         14208,    13, 26791,    13, 41519,  1330,   334,  1136,  5239,    62,
            75, 12582,   355,  4808,   198,   198,  6738,   764,  1330,  4981,
           628,   198,    31, 29412,    62,    17,    62, 46903,  1098,    62,
         38532,   198,  4871, 11787,     7, 27530,    13, 17633,  2599,   198,
         50284, 29460,   796,  4981,    13, 12441, 15878, 28264, 10786, 29460,
         33809,  3509,    62, 13664,    28,  1270,     8,   198, 50284, 28712,
           796,  4981,    13, 12441, 15878, 28264, 10786, 28712, 33809,  3509,
            62, 13664,    28,  1270,     8,   198, 50284, 12888,   796,  4981,
            13, 15333, 15878, 28264, 10786, 12888,  

# question (why?)

In [26]:
# for single mode we have got:

# from __future__ import unicode_literals

# from django.db import models
# from django.utils.encoding import python_2_unicode_compatible
# from django.utils.translation import ugettext_lazy as _

# from. import models


# @python_2_unicode_compatible
# class User(models.Model):
#     username = models.CharField(_('username'), max_length=30)
#     password = models.CharField(_('password'), max_length=30)
#     email = models.EmailField(_('email address'),
                              
 #for batch mopde we have got:   
                              
# from<<<|cpp|>/*
#  * Copyright (C) 2008-2013 TrinityCore <http://www.trinitycore.org/>
#  * Copyright (C) 2006-2009 ScriptDev2 <https://scriptdev2.svn.sourceforge.net/>
#  *
#  * This program is free software; you can redistribute it and/or modify it
#  * under the terms of the GNU General Public License as published by the
#  * Free Software Foundation; either version 2 of the License, or (at your