# Prompting Tutorial
## This is a tutorial for prompting. We will use a few shot sentiment analysis task to show how to do prompting.

In [41]:
# load packages
import torch
import pandas as pd
import torch.nn as nn
import numpy as np
from torch.utils.data import DataLoader
from transformers import AutoTokenizer, AutoModelForMaskedLM,AutoModelForSequenceClassification

In [51]:
from datasets import Dataset

In [31]:
import tqdm

In [2]:
# load tokenizer and model
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")

model = AutoModelForMaskedLM.from_pretrained("bert-base-uncased")

Downloading (…)enizer_config.json";:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


Downloading (…)"config.json";:   0%|          | 0.00/570 [00:00<?, ?B/s]

Downloading (…)"vocab.txt";:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading (…)"tokenizer.json";:   0%|          | 0.00/466k [00:00<?, ?B/s]

Downloading (…)"pytorch_model.bin";:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForMaskedLM: ['cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


### An Example of Zero-shot 

In [3]:
Text='The review is [MASK]. Review: It is a good movie.'

In [4]:
encoding = tokenizer(Text, truncation=True, padding=True,return_tensors='pt')

In [5]:
outputs = model(encoding['input_ids'], attention_mask=encoding['attention_mask'])

The mask token 103 is the fifth token 

In [6]:
encoding

{'input_ids': tensor([[ 101, 1996, 3319, 2003,  103, 1012, 3319, 1024, 2009, 2003, 1037, 2204,
         3185, 1012,  102]]), 'token_type_ids': tensor([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]])}

so we use logits[0,4,:] to get the logit of the mask token

In [7]:
logits=outputs.logits[0,4,:]

Check the first five result

In [8]:
top_5_tokens = np.argsort(-logits.detach().numpy())[:5].tolist()

In [9]:
for token in top_5_tokens:
    print(f">>> {Text.replace(tokenizer.mask_token, tokenizer.decode([token]))}")

>>> The review is positive. Review: It is a good movie.
>>> The review is mixed. Review: It is a good movie.
>>> The review is excellent. Review: It is a good movie.
>>> The review is good. Review: It is a good movie.
>>> The review is negative. Review: It is a good movie.


### In this tutorial, we choose the IMDB Dataset as our dataset. It has 50000 movie reviews, 25000 for postive and 25000 for negative.

In [10]:
data=pd.read_csv('IMDB Dataset.csv')

In [11]:
data

Unnamed: 0,review,sentiment
0,One of the other reviewers has mentioned that ...,positive
1,A wonderful little production. <br /><br />The...,positive
2,I thought this was a wonderful way to spend ti...,positive
3,Basically there's a family where a little boy ...,negative
4,"Petter Mattei's ""Love in the Time of Money"" is...",positive
...,...,...
49995,I thought this movie did a down right good job...,positive
49996,"Bad plot, bad dialogue, bad acting, idiotic di...",negative
49997,I am a Catholic taught in parochial elementary...,negative
49998,I'm going to have to disagree with the previou...,negative


In [12]:
# Add Prompt
review_lst=data['review'].to_list()
#For each sample we will add the prompt and add it to the prompt list
prompt=[]
for sentence in review_lst:
    new_string='The review is [MASK]. Review: '+sentence
    prompt.append(new_string)
#create new colunmn in data as prompt
data['prompt']=prompt

In [13]:
data

Unnamed: 0,review,sentiment,prompt
0,One of the other reviewers has mentioned that ...,positive,The review is [MASK]. Review: One of the other...
1,A wonderful little production. <br /><br />The...,positive,The review is [MASK]. Review: A wonderful litt...
2,I thought this was a wonderful way to spend ti...,positive,The review is [MASK]. Review: I thought this w...
3,Basically there's a family where a little boy ...,negative,The review is [MASK]. Review: Basically there'...
4,"Petter Mattei's ""Love in the Time of Money"" is...",positive,The review is [MASK]. Review: Petter Mattei's ...
...,...,...,...
49995,I thought this movie did a down right good job...,positive,The review is [MASK]. Review: I thought this m...
49996,"Bad plot, bad dialogue, bad acting, idiotic di...",negative,"The review is [MASK]. Review: Bad plot, bad di..."
49997,I am a Catholic taught in parochial elementary...,negative,The review is [MASK]. Review: I am a Catholic ...
49998,I'm going to have to disagree with the previou...,negative,The review is [MASK]. Review: I'm going to hav...


check the encoding of positive and negative in tokenizer

In [14]:
tokenizer.decode([3893])

'positive'

In [15]:
tokenizer.decode([4997])

'negative'

Add targets. We want to predict the mask token to be postive or negative

In [16]:
data['target'] = data['sentiment'].replace(['positive','negative'], [3893,4997])

In [17]:
data

Unnamed: 0,review,sentiment,prompt,target
0,One of the other reviewers has mentioned that ...,positive,The review is [MASK]. Review: One of the other...,3893
1,A wonderful little production. <br /><br />The...,positive,The review is [MASK]. Review: A wonderful litt...,3893
2,I thought this was a wonderful way to spend ti...,positive,The review is [MASK]. Review: I thought this w...,3893
3,Basically there's a family where a little boy ...,negative,The review is [MASK]. Review: Basically there'...,4997
4,"Petter Mattei's ""Love in the Time of Money"" is...",positive,The review is [MASK]. Review: Petter Mattei's ...,3893
...,...,...,...,...
49995,I thought this movie did a down right good job...,positive,The review is [MASK]. Review: I thought this m...,3893
49996,"Bad plot, bad dialogue, bad acting, idiotic di...",negative,"The review is [MASK]. Review: Bad plot, bad di...",4997
49997,I am a Catholic taught in parochial elementary...,negative,The review is [MASK]. Review: I am a Catholic ...,4997
49998,I'm going to have to disagree with the previou...,negative,The review is [MASK]. Review: I'm going to hav...,4997


### Create a subset with 2 samples one for positive and one for negative

In [59]:
one_shot=data.groupby('sentiment').tail(1)

In [19]:
one_shot

Unnamed: 0,review,sentiment,prompt,target
49995,I thought this movie did a down right good job...,positive,The review is [MASK]. Review: I thought this m...,3893
49999,No one expects the Star Trek movies to be high...,negative,The review is [MASK]. Review: No one expects t...,4997


### Create a subset with 32 samples 16 for positive and 16 for negative

In [20]:
few_shot=data.groupby('sentiment').head(16)

Customized Dataset Class

In [21]:
class CustomDataset():

    def __init__(self, dataframe, tokenizer):
        self.tokenizer = tokenizer
        self.prompts = list(dataframe['prompt'])
        self.targets = list(dataframe['target'])

    def __len__(self):
        return len(self.prompts)

    def __getitem__(self, index):

    
        inputs = self.tokenizer.encode_plus(
            self.prompts[index],
            None,
            add_special_tokens=True,
            max_length=512,
            padding='max_length',
            return_token_type_ids=True,
            truncation=True
        )
        
        
        
        
        ids_content = inputs['input_ids']
        mask_content = inputs['attention_mask']
        token_type_ids_content = inputs["token_type_ids"]
        targets=self.targets[index]
        return torch.tensor(ids_content, dtype=torch.long),torch.tensor(mask_content, dtype=torch.long),torch.tensor(token_type_ids_content, dtype=torch.long),torch.tensor(targets, dtype=torch.long)

dataloders for training and testing

In [22]:
one_shot_set = CustomDataset(one_shot, tokenizer)

oneshotloader = DataLoader(one_shot_set, batch_size=2,shuffle=True)

In [23]:
few_shot_set = CustomDataset(few_shot, tokenizer)

fewshotloader = DataLoader(few_shot_set, batch_size=2,shuffle=True)

In [38]:
test_set = CustomDataset(data[:5000], tokenizer)

testloader = DataLoader(test_set, batch_size=2,shuffle=False)

# Training and Evaluating functions

In [25]:
def training(train_loader, model, criterion,optimizer):
    """one epoch training"""


    epoch_loss = 0

    epoch_acc = 0
   
    model.train()


    for idx, (input_ids,attention_mask,token_type_ids,targets) in enumerate(train_loader):


        
        if torch.cuda.is_available():
            input_ids = input_ids.cuda()
            attention_mask = attention_mask.cuda()
            targets= targets.cuda()


        # compute logits
        outputs = model(input_ids, attention_mask=attention_mask).logits
        #the mask token logits is in outputs[:,4,:]
        pred=outputs[:,4,:]
        
        #calculate the loss and accuracy
        loss = criterion(pred, targets)
        acc = calculate_accuracy(pred, targets)
        
        #accumulate the loss and accuracy
        epoch_loss += loss.item()
        epoch_acc += acc.item()


        # SGD
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    



    return epoch_loss / len(train_loader), epoch_acc / len(train_loader)

In [26]:
def evaluate(eval_loader, model,  criterion):
    epoch_loss = 0

    epoch_acc = 0

    model.eval()
    
    with torch.no_grad():
    
        for idx, (input_ids,attention_mask,token_type_ids,targets) in enumerate(eval_loader):

            if torch.cuda.is_available():
                input_ids = input_ids.cuda()
                attention_mask = attention_mask.cuda()
                targets= targets.cuda()
            optimizer.zero_grad()
            
            # compute logits
            outputs = model(input_ids, attention_mask=attention_mask).logits
            #the mask token logits is in outputs[:,4,:]
            pred=outputs[:,4,:]
            
            #calculate the loss and accuracy
            loss = criterion(pred, targets)
            acc = calculate_accuracy(pred, targets)
            
            #accumulate the loss and accuracy
            epoch_loss += loss.item()
            epoch_acc += acc.item()
        
    return epoch_loss / len(eval_loader), epoch_acc / len(eval_loader)

In [27]:
#calculate accuracy
def calculate_accuracy(y_pred, y):
    top_pred = y_pred.argmax(1, keepdim = True)
    correct = top_pred.eq(y.view_as(top_pred)).sum()
    acc = correct.float() / y.shape[0]
    return acc

In [68]:

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.AdamW(
        model.parameters(),
        lr=1e-6,
    )

In [39]:
#training and evaluate
for i in tqdm.tqdm(range(300)):


    print(f"epochs:{i+1}")
    
    #training
    tr_loss,tr_acc=training(oneshotloader, model, criterion,optimizer)
    print(f'training_loss:{tr_loss}, acc:{tr_acc}')

    #evaluating
    ts_loss,ts_acc=evaluate(testloader, model,  criterion)
    print(f'ts_loss:{ts_loss}, ts_acc:{ts_acc}')
    

  0%|                                                                                          | 0/300 [00:00<?, ?it/s]

epochs:1
training_loss:2.6284995079040527, acc:0.5


  0%|▎                                                                               | 1/300 [01:38<8:12:50, 98.90s/it]

ts_loss:1.8543546616435052, ts_acc:0.469
epochs:2
training_loss:2.494699239730835, acc:0.5


  0%|▎                                                                             | 1/300 [02:42<13:31:40, 162.88s/it]


KeyboardInterrupt: 

### After 35 epochs, the accracy reachs 0.78 for testing with only 2 samples in training
### After 30 epochs, the accracy reachs 0.82 for testing with only 36 samples in training

In [66]:
class MyModel(nn.Module):
    def __init__(self, num_classes):
        super(MyModel, self).__init__()
        
        self.model_bert = AutoModelForSequenceClassification.from_pretrained("bert-base-uncased")
        self.model_bert.dropout = nn.Identity()
        self.model_bert.classifier = nn.Identity()
        
        self.head = nn.Sequential(
            nn.Dropout(p=0.1),
            nn.Linear(768, num_classes)
        )
       
        
        

    def forward(self, x):

        x = self.model_bert(x).logits
        classes = self.head(x)
        
        return classes



model = MyModel(2)

#for param in model.model_roberta.parameters():
#    param.requires_grad = False

# In[33]:


def training(train_loader, model,criterion, optimizer):



    epoch_loss = 0
    epoch_loss1 = 0
    epoch_loss2 = 0
    epoch_acc = 0
   
    model.train()


    for idx, (text,label) in enumerate(train_loader):
        
        
        

        text=text.to(device)
        label=label.to(device)
        classes = model(text)
        loss=criterion(classes,label)

        
        optimizer.zero_grad()
        
        loss.backward()
             

        optimizer.step()

        acc = calculate_accuracy(classes, label)
        
        epoch_loss += loss.item()
        
        epoch_acc += acc.item()





    return epoch_loss / len(train_loader), epoch_acc / len(train_loader)
def evaluate(train_loader, model,criterion, optimizer):



    epoch_loss = 0
    epoch_loss1 = 0
    epoch_loss2 = 0
    epoch_acc = 0
   
    model.eval()

    with torch.no_grad():
        for idx, (text,label) in enumerate(train_loader):
        
        
        

            text=text.to(device)
            label=label.to(device)

            classes = model(text)
            loss=criterion(classes,label)
            
            optimizer.zero_grad()
        
        
             

        

            acc = calculate_accuracy(classes, label)
        
            epoch_loss += loss.item()
        
            epoch_acc += acc.item()


        
        



    return epoch_loss / len(train_loader), epoch_acc / len(train_loader)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

In [44]:
data['target_2'] = data['target'].replace([3893,4997], [0,1])

In [45]:
data

Unnamed: 0,review,sentiment,prompt,target,target_2
0,One of the other reviewers has mentioned that ...,positive,The review is [MASK]. Review: One of the other...,3893,0
1,A wonderful little production. <br /><br />The...,positive,The review is [MASK]. Review: A wonderful litt...,3893,0
2,I thought this was a wonderful way to spend ti...,positive,The review is [MASK]. Review: I thought this w...,3893,0
3,Basically there's a family where a little boy ...,negative,The review is [MASK]. Review: Basically there'...,4997,1
4,"Petter Mattei's ""Love in the Time of Money"" is...",positive,The review is [MASK]. Review: Petter Mattei's ...,3893,0
...,...,...,...,...,...
49995,I thought this movie did a down right good job...,positive,The review is [MASK]. Review: I thought this m...,3893,0
49996,"Bad plot, bad dialogue, bad acting, idiotic di...",negative,"The review is [MASK]. Review: Bad plot, bad di...",4997,1
49997,I am a Catholic taught in parochial elementary...,negative,The review is [MASK]. Review: I am a Catholic ...,4997,1
49998,I'm going to have to disagree with the previou...,negative,The review is [MASK]. Review: I'm going to hav...,4997,1


In [46]:
one_shot_set = CustomDataset(one_shot, tokenizer)

oneshotloader = DataLoader(one_shot_set, batch_size=2,shuffle=True)

In [47]:
few_shot_set = CustomDataset(few_shot, tokenizer)

fewshotloader = DataLoader(few_shot_set, batch_size=2,shuffle=True)

In [48]:
test_set = CustomDataset(data[:5000], tokenizer)

testloader = DataLoader(test_set, batch_size=2,shuffle=False)

In [69]:
for i in tqdm.tqdm(range(300)):


    print(f"epochs:{i+1}")
    
    #training
    tr_loss,tr_acc=training(dataloader, model, criterion,optimizer)
    print(f'training_loss:{tr_loss}, acc:{tr_acc}')

    #evaluating
    ts_loss,ts_acc=evaluate(testloader, model,  criterion,optimizer)
    print(f'ts_loss:{ts_loss}, ts_acc:{ts_acc}')

  0%|                                                                                          | 0/300 [00:00<?, ?it/s]

epochs:1
training_loss:0.7085570096969604, acc:0.5


  0%|▎                                                                               | 1/300 [01:07<5:37:48, 67.79s/it]

ts_loss:0.7051917013049126, ts_acc:0.508
epochs:2
training_loss:0.6451234221458435, acc:1.0


  1%|▌                                                                               | 2/300 [02:11<5:24:06, 65.26s/it]

ts_loss:0.7070887587189675, ts_acc:0.5068
epochs:3
training_loss:0.635071873664856, acc:0.5


  1%|▊                                                                               | 3/300 [03:14<5:18:27, 64.33s/it]

ts_loss:0.708279135298729, ts_acc:0.5066
epochs:4
training_loss:0.5916577577590942, acc:1.0


  1%|▊                                                                               | 3/300 [03:20<5:31:12, 66.91s/it]


KeyboardInterrupt: 

In [58]:
one_shot

Unnamed: 0,review,sentiment,prompt,target
49995,I thought this movie did a down right good job...,positive,The review is [MASK]. Review: I thought this m...,3893
49999,No one expects the Star Trek movies to be high...,negative,The review is [MASK]. Review: No one expects t...,4997


In [60]:
def encode(batch):
    dic=tokenizer(batch["review"], add_special_tokens=True, truncation=True, padding=True, return_tensors='pt',  max_length=512)
    dic["label"]=batch["target_2"]
    return dic


dataset = Dataset.from_pandas(one_shot)
dataset.set_transform(encode)
# In[ ]:
testset = Dataset.from_pandas(data[:5000])
testset.set_transform(encode)

def pad_TextSequence(batch):
      return torch.nn.utils.rnn.pad_sequence(batch,batch_first=True, padding_value=0)

def collate_fn(batch):
  # A data tuple has the form:
  # waveform,  label
    texts, codes = [], []
  # Gather in lists, and encode labels as indices
    #print(batch)
    for i in batch:
        texts += [i['input_ids']]
        codes += [i['label']]
  # Group the list of tensors into a batched tensor
    #tensors = pad_AudioSequence(tensors)
    targets = pad_TextSequence(texts)
    codes=torch.tensor(codes)
    return  targets,codes

In [61]:
dataloader = DataLoader(dataset, batch_size=2,collate_fn=collate_fn,shuffle=True)
testloader = DataLoader(testset, batch_size=2,collate_fn=collate_fn,shuffle=True)