In [2]:
import torch
from torch import nn
import numpy as np
from transformers import RobertaForTokenClassification, RobertaTokenizerFast
tokenizer = RobertaTokenizerFast.from_pretrained('roberta-base')
model = RobertaForTokenClassification.from_pretrained('roberta-base', return_dict=True)

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForTokenClassification: ['lm_head.dense.bias', 'lm_head.decoder.weight', 'lm_head.layer_norm.bias', 'lm_head.bias', 'lm_head.dense.weight', 'lm_head.layer_norm.weight']
- This IS expected if you are initializing RobertaForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able

In [3]:
import torch
from torch import nn
import numpy as np
class prompt(nn.Module):

    def __init__(self,
                 wte : nn.Embedding,
                 prompt_length : int = 20,
                 rand_range : float = 0.5,
                 initialize_from_vocab : bool = True):

        super(prompt, self).__init__()
        self.wte = wte
        self.prompt_length = prompt_length
        self.learned_embedding = nn.Parameter(self.initialize_embedding(wte,
                                                                        prompt_length,
                                                                        rand_range,
                                                                        initialize_from_vocab))

    
    def initialize_embedding(self,
                             wte : nn.Embedding,
                             prompt_length : int =10,
                             random_range : float = 0.5,
                             initialize_from_vocab : bool = True):

        if initialize_from_vocab:
            return self.wte.weight[:prompt_length].clone().detach()

        return torch.FloatTensor(wte.weight.size(1),prompt_length).uniform_(-random_range,random_range)

    def forward(self,tokens):
        input_embedding = self.wte(tokens[:,self.prompt_length:])
        learned_embedding = self.learned_embedding.repeat(input_embedding.size(0),1,1)
        print(input_embedding)
        print(input_embedding.shape)
        print(learned_embedding)
        print(learned_embedding.shape)
        return torch.cat((learned_embedding,input_embedding),dim=1)
                    

In [4]:
model = RobertaForTokenClassification.from_pretrained(
    "roberta-base",
    num_labels = 1,
    output_attentions = False,
    output_hidden_states = False, 
)
prompt_emb = prompt(model.get_input_embeddings(), 
                      prompt_length=20, 
                      initialize_from_vocab=True)
model.set_input_embeddings(prompt_emb)

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForTokenClassification: ['lm_head.dense.bias', 'lm_head.decoder.weight', 'lm_head.layer_norm.bias', 'lm_head.bias', 'lm_head.dense.weight', 'lm_head.layer_norm.weight']
- This IS expected if you are initializing RobertaForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able

In [5]:
inputs = tokenizer(
    "Hello, my dog is cute")

n_tokens = 20
inputs['input_ids']=torch.cat((torch.full((1,n_tokens), 500).resize(n_tokens),torch.tensor(inputs['input_ids'], dtype=torch.long)))
inputs['attention_mask']=torch.cat((torch.full((1,n_tokens), 1).resize(n_tokens),torch.tensor(inputs['attention_mask'], dtype=torch.long)))



In [6]:
batch_in = torch.LongTensor(inputs['input_ids'])
batch_mask = torch.LongTensor(inputs['attention_mask'])
print(batch_in.shape)
print(batch_mask.shape)
batch_in = batch_in.reshape((1,batch_in.shape[0]))
batch_mask = batch_mask.reshape((1,batch_mask.shape[0]))    
model(batch_in,batch_mask)

torch.Size([28])
torch.Size([28])
tensor([[[ 0.1476, -0.0365,  0.0753,  ..., -0.0023,  0.0172, -0.0016],
         [-0.1852,  0.0297, -0.1843,  ...,  0.0050, -0.1139,  0.0404],
         [ 0.0998, -0.0595, -0.0705,  ..., -0.2491, -0.0905, -0.0499],
         ...,
         [-0.0265,  0.0249,  0.0118,  ...,  0.0011, -0.0384, -0.1366],
         [-0.0861,  0.2069,  0.0929,  ...,  0.1931, -0.1449, -0.0831],
         [-0.0347, -0.0873, -0.0180,  ...,  0.1174, -0.0098, -0.0355]]],
       grad_fn=<EmbeddingBackward0>)
torch.Size([1, 8, 768])
tensor([[[ 0.1476, -0.0365,  0.0753,  ..., -0.0023,  0.0172, -0.0016],
         [ 0.0156,  0.0076, -0.0118,  ..., -0.0022,  0.0081, -0.0156],
         [-0.0347, -0.0873, -0.0180,  ...,  0.1174, -0.0098, -0.0355],
         ...,
         [-0.1332, -0.0391, -0.0661,  ..., -0.0450, -0.0546,  0.0156],
         [ 0.0358,  0.0647, -0.1526,  ..., -0.1164, -0.0242, -0.0792],
         [-0.0911, -0.1117, -0.0304,  ..., -0.0569, -0.1113, -0.1200]]],
       grad_fn=<Repea

TokenClassifierOutput(loss=None, logits=tensor([[[ 0.2737],
         [ 0.1345],
         [ 0.1662],
         [-0.0013],
         [ 0.0019],
         [-0.0683],
         [-0.0614],
         [-0.0914],
         [-0.1765],
         [-0.1113],
         [-0.0162],
         [-0.1161],
         [-0.0499],
         [-0.0083],
         [-0.1131],
         [ 0.1061],
         [ 0.1515],
         [ 0.1186],
         [ 0.1165],
         [-0.0790],
         [ 0.2720],
         [ 0.0201],
         [ 0.1880],
         [ 0.0847],
         [ 0.1159],
         [ 0.1400],
         [-0.0990],
         [ 0.2838]]], grad_fn=<ViewBackward0>), hidden_states=None, attentions=None)

In [7]:
inputs = tokenizer.encode_plus(
    "Creampie motherfucker I would like to fuck you")

n_tokens = 20
inputs['input_ids']=torch.cat((torch.full((1,n_tokens), 500).resize(n_tokens),torch.tensor(inputs['input_ids'], dtype=torch.long)))
inputs['attention_mask']=torch.cat((torch.full((1,n_tokens), 1).resize(n_tokens),torch.tensor(inputs['attention_mask'], dtype=torch.long)))

In [100]:
batch_in = torch.LongTensor(inputs['input_ids'])
batch_mask = torch.LongTensor(inputs['attention_mask'])
batch_in = batch_in.reshape((1,batch_in.shape[0]))
batch_mask = batch_mask.reshape((1,batch_mask.shape[0]))    
model(batch_in,batch_mask)

tensor([[[ 0.1476, -0.0365,  0.0753,  ..., -0.0023,  0.0172, -0.0016],
         [-0.3821, -0.0012,  0.0288,  ..., -0.0869, -0.1298, -0.0964],
         [ 0.0670,  0.1276, -0.1354,  ..., -0.0532,  0.1196,  0.1250],
         ...,
         [ 0.0407, -0.3547, -0.0796,  ..., -0.2576,  0.0377, -0.2172],
         [ 0.0184, -0.0451, -0.0091,  ...,  0.1189, -0.0917, -0.0632],
         [-0.0347, -0.0873, -0.0180,  ...,  0.1174, -0.0098, -0.0355]]],
       grad_fn=<EmbeddingBackward0>)
torch.Size([1, 14, 768])
tensor([[[ 0.1476, -0.0365,  0.0753,  ..., -0.0023,  0.0172, -0.0016],
         [ 0.0156,  0.0076, -0.0118,  ..., -0.0022,  0.0081, -0.0156],
         [-0.0347, -0.0873, -0.0180,  ...,  0.1174, -0.0098, -0.0355],
         ...,
         [-0.1332, -0.0391, -0.0661,  ..., -0.0450, -0.0546,  0.0156],
         [ 0.0358,  0.0647, -0.1526,  ..., -0.1164, -0.0242, -0.0792],
         [-0.0911, -0.1117, -0.0304,  ..., -0.0569, -0.1113, -0.1200]]],
       grad_fn=<RepeatBackward0>)
torch.Size([1, 20, 7

TokenClassifierOutput(loss=None, logits=tensor([[[-0.2895],
         [-0.2569],
         [-0.2413],
         [-0.2783],
         [-0.1996],
         [-0.1514],
         [-0.1752],
         [-0.0555],
         [-0.0038],
         [ 0.0520],
         [-0.1599],
         [-0.0871],
         [-0.1321],
         [-0.1538],
         [-0.1403],
         [-0.0408],
         [-0.0910],
         [-0.1631],
         [-0.1798],
         [-0.0919],
         [-0.2899],
         [-0.4656],
         [-0.0797],
         [-0.2300],
         [-0.4179],
         [-0.2770],
         [-0.1964],
         [-0.1745],
         [-0.2441],
         [-0.2134],
         [-0.3117],
         [-0.2743],
         [-0.4425],
         [-0.2813]]], grad_fn=<ViewBackward0>), hidden_states=None, attentions=None)

In [9]:
tok = tokenizer('Hellow sanya. Lets do it',return_tensors='pt')

In [10]:
tok['input_id']

{'input_ids': tensor([[    0,   725, 33796,   579, 12837,     4, 40702,   109,    24,     2]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1]])}

In [11]:
tok

{'input_ids': tensor([[    0,   725, 33796,   579, 12837,     4, 40702,   109,    24,     2]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1]])}

In [17]:
toks = tokenizer("hello guys i am not chatgpt",add_special_tokens =False,padding = 'max_length',max_length=50)
print(toks)
toks['input_ids'] = torch.cat((torch.full((1,n_tokens), 500).resize(n_tokens),torch.tensor(toks['input_ids'], dtype=torch.long)))
toks['input_ids'] = toks['input_ids'].reshape((1,toks['input_ids'].shape[0]))
            
toks['attention_mask'] = torch.cat((torch.full((1,n_tokens), 1).resize(n_tokens),torch.tensor(toks['attention_mask'], dtype=torch.long)))
toks['attention_mask'] = toks['attention_mask'].reshape((1,toks['attention_mask'].shape[0]))

{'input_ids': [42891, 1669, 939, 524, 45, 7359, 571, 3320, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]}




In [18]:
toks

{'input_ids': tensor([[  500,   500,   500,   500,   500,   500,   500,   500,   500,   500,
           500,   500,   500,   500,   500,   500,   500,   500,   500,   500,
         42891,  1669,   939,   524,    45,  7359,   571,  3320,     1,     1,
             1,     1,     1,     1,     1,     1,     1,     1,     1,     1,
             1,     1,     1,     1,     1,     1,     1,     1,     1,     1,
             1,     1,     1,     1,     1,     1,     1,     1,     1,     1,
             1,     1,     1,     1,     1,     1,     1,     1,     1,     1]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
         1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]])}

In [19]:
tokenizer("hello guys i am not chatgpt",add_special_tokens =False,padding = 'max_length',max_length=50,return_tensors='pt')

{'input_ids': tensor([[42891,  1669,   939,   524,    45,  7359,   571,  3320,     1,     1,
             1,     1,     1,     1,     1,     1,     1,     1,     1,     1,
             1,     1,     1,     1,     1,     1,     1,     1,     1,     1,
             1,     1,     1,     1,     1,     1,     1,     1,     1,     1,
             1,     1,     1,     1,     1,     1,     1,     1,     1,     1]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0]])}