# In-context Learning with GPT models

This tutorial will utilize pre-trained GPT model for in-context few-shot learning. 

In [1]:
!pip install transformers
!pip install openai

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting transformers
  Downloading transformers-4.26.1-py3-none-any.whl (6.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.3/6.3 MB[0m [31m48.6 MB/s[0m eta [36m0:00:00[0m
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1
  Downloading tokenizers-0.13.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.6/7.6 MB[0m [31m51.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting huggingface-hub<1.0,>=0.11.0
  Downloading huggingface_hub-0.12.1-py3-none-any.whl (190 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m190.3/190.3 KB[0m [31m20.4 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: tokenizers, huggingface-hub, transformers
Successfully installed huggingface-hub-0.12.1 tokenizers-0.13.2 transformers-4.26.1
Looking in indexes: https://pypi.org/simple, http

In [2]:
from sklearn.metrics import f1_score
import glob
import pandas as pd
import sys, time, string, re
from transformers import pipeline
import torch
from transformers import AutoModel,AutoModelForSequenceClassification ,GPTNeoForCausalLM, GPT2LMHeadModel, AutoTokenizer


In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


We load pre-trained GPT-2 model from Huggingface.

In [4]:
model = GPT2LMHeadModel.from_pretrained("gpt2")
model.to(device)
model.config.eos_token_id = 0
tokenizer = AutoTokenizer.from_pretrained("gpt2")

Downloading (…)lve/main/config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

Downloading (…)"pytorch_model.bin";:   0%|          | 0.00/548M [00:00<?, ?B/s]

Downloading (…)neration_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

Downloading (…)olve/main/vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

Downloading (…)olve/main/merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

In [5]:
tokenizer.pad_token = tokenizer.eos_token
print("Loding is done...")

Loding is done...


Now, we construct prompts and samples for few-shot learning.

In [26]:
def zero_one_fewshot_data_prompt(data_samples, n_shot):
  few_shot_samples=[]
  few_shot_labels=[]
  data_samples_lenght = len(data_samples)
  print("datasize: ",data_samples_lenght)

  if n_shot==0: # Zero shot
       for j in range(0, data_samples_lenght):
          samples_str = ""
          samples_str = " Sentence: " + str(data_samples['sentence'][j]) + str(' Sentiment: ')

          few_shot_samples.append(samples_str)
          few_shot_labels.append(data_samples['label'][j])
  
  else: # few-shot
      for i in range(0, data_samples_lenght, n_shot + 1):
        samples_str=""

        for n in range(i, min(i + n_shot, data_samples_lenght - 1)):
          samples_str = samples_str + " Sentence: " + str(data_samples['sentence'][n]) + str(' Sentiment: ') + str(data_samples['label'][n]) + " \n"

        samples_str = samples_str + " Sentence: " + str(data_samples['sentence'][min(n+1, data_samples_lenght - 1)]) + str(' Sentiment: ')

        few_shot_samples.append(samples_str)

        few_shot_labels.append(data_samples['label'][n+1])

  print('few_shot_samples: ', len(few_shot_samples))
  return(few_shot_samples, few_shot_labels)

We use a toy dataset for 5-shot learning. 

In [27]:
data_path = "/content/sst2_tiny.csv"
n_shots=5

In [28]:
data_samples = pd.read_csv(data_path,  encoding = "utf-8")

In [29]:
few_shot_samples,few_shot_labels= zero_one_fewshot_data_prompt(data_samples, n_shots)

datasize:  10
few_shot_samples:  2


In [30]:
few_shot_samples

[" Sentence: hide new secretions from the parental units  Sentiment: negative \n Sentence: contains no wit , only labored gags  Sentiment: negative \n Sentence: that loves its characters and communicates something rather beautiful about human nature  Sentiment: positive \n Sentence: remains utterly satisfied to remain the same throughout  Sentiment: negative \n Sentence: on the worst revenge-of-the-nerds clichés the filmmakers could dredge up  Sentiment: negative \n Sentence: that 's far too tragic to merit such superficial treatment  Sentiment: ",
 " Sentence: demonstrates that the director of such hollywood blockbusters as patriot games can still turn out a small , personal film with an emotional wallop .  Sentiment: positive \n Sentence: of saucy  Sentiment: positive \n Sentence: a depressed fifteen-year-old 's suicidal poetry  Sentiment: negative \n Sentence: are more deeply thought through than in most ` right-thinking ' films  Sentiment: "]

We do not need to train the model. We send each prompt as an input and get the prediction from generative model. 

In [31]:
predictions=[]
for i in range (0, len(few_shot_samples)):
  input_ids = tokenizer(few_shot_samples[i], return_tensors="pt").input_ids.to(device)

  generated_ids = model.generate(input_ids, do_sample = True,
                                 num_beams = 5,
                                 max_new_tokens = 2, 
                                 eos_token_id = tokenizer.eos_token_id, pad_token_id = tokenizer.pad_token_id)
  
  generated_text = tokenizer.decode(generated_ids[0])

  pred = str(generated_text.split()[-1]).strip() # We take the last generated word and remove addtional white-space. 
  norm_pred = pred.translate(str.maketrans('', '', string.punctuation))
  predictions.append(norm_pred)

In [33]:
print(generated_text)

 Sentence: demonstrates that the director of such hollywood blockbusters as patriot games can still turn out a small, personal film with an emotional wallop.  Sentiment: positive 
 Sentence: of saucy  Sentiment: positive 
 Sentence: a depressed fifteen-year-old's suicidal poetry  Sentiment: negative 
 Sentence: are more deeply thought through than in most ` right-thinking'films  Sentiment:  negative


In [32]:
print("Predictions", predictions)
print("True labels: ", few_shot_labels[:len(few_shot_samples)])
print("The results of "+str(n_shots)+"-shot expriments of the task  "+str(data_path)+" ==>  "+ str(f1_score(few_shot_labels[:len(few_shot_samples)], predictions, average='macro')))

Predictions ['negative', 'negative']
True labels:  ['negative', 'positive']
The results of 5-shot expriments of the task  /content/sst2_tiny.csv ==>  0.3333333333333333


## Use [OpenAI API](https://openai.com/product) to get GPT-3 and ChatGPT model.
[Instruction](https://platform.openai.com/docs/api-reference/completions) for GPT-3 API.

In [47]:
import os
import openai

# Load your API key from an environment variable or secret management service
openai.api_key = open("/content/openai.key").read()

response = openai.Completion.create(model="text-davinci-003", prompt=few_shot_samples[0], max_tokens=2)
print(response)
print(response.choices[0]["text"].strip())


{
  "choices": [
    {
      "finish_reason": "stop",
      "index": 0,
      "logprobs": null,
      "text": " positive"
    }
  ],
  "created": 1678097511,
  "id": "cmpl-6r26ptQ4KwcY5zNdQ680hcuL2ShGP",
  "model": "text-davinci-003",
  "object": "text_completion",
  "usage": {
    "completion_tokens": 1,
    "prompt_tokens": 95,
    "total_tokens": 96
  }
}
positive


In [48]:
predictions=[]
for i in range (0, len(few_shot_samples)):
  response = openai.Completion.create(model="text-davinci-003", prompt=few_shot_samples[i], max_tokens=2)

  pred = str(response.choices[0]["text"]).strip() # We take the last generated word and remove addtional white-space. 
  norm_pred = pred.translate(str.maketrans('', '', string.punctuation))
  predictions.append(norm_pred)

print("Predictions", predictions)
print("True labels: ", few_shot_labels[:len(few_shot_samples)])
print("The results of "+str(n_shots)+"-shot expriments of the task  "+str(data_path)+" ==>  "+ str(f1_score(few_shot_labels[:len(few_shot_samples)], predictions, average='macro')))

Predictions ['negative', 'positive']
True labels:  ['negative', 'positive']
The results of 5-shot expriments of the task  /content/sst2_tiny.csv ==>  1.0


### ChatGPT

We can also use ChatGPT for this task. See more details from the [instruction](https://platform.openai.com/docs/guides/chat).


```
openai.ChatCompletion.create(
  model="gpt-3.5-turbo",
  messages=[
        {"role": "system", "content": "You are a helpful assistant."},
        {"role": "user", "content": "Who won the world series in 2020?"},
        {"role": "assistant", "content": "The Los Angeles Dodgers won the World Series in 2020."},
        {"role": "user", "content": "Where was it played?"}
    ]
)
```



In [41]:
response = openai.ChatCompletion.create(
  model="gpt-3.5-turbo",
  messages=[
        {"role": "user", "content": few_shot_samples[0]},
    ]
)

print(response)

{
  "choices": [
    {
      "finish_reason": "stop",
      "index": 0,
      "message": {
        "content": "positive",
        "role": "assistant"
      }
    }
  ],
  "created": 1678097151,
  "id": "chatcmpl-6r211Y5ApEv9yVSNDqpG3VJq9N7CR",
  "model": "gpt-3.5-turbo-0301",
  "object": "chat.completion",
  "usage": {
    "completion_tokens": 3,
    "prompt_tokens": 92,
    "total_tokens": 95
  }
}


In [45]:
response.choices[0]["message"]["content"]

'positive'

In [46]:
predictions=[]
for i in range (0, len(few_shot_samples)):
  response = openai.ChatCompletion.create(
                  model="gpt-3.5-turbo",
                  messages=[
                        {"role": "user", "content": few_shot_samples[i]},
                    ]
                )

  pred = str(response.choices[0]["message"]["content"]).strip() # We take the last generated word and remove addtional white-space. 
  norm_pred = pred.translate(str.maketrans('', '', string.punctuation))
  predictions.append(norm_pred)

print("Predictions", predictions)
print("True labels: ", few_shot_labels[:len(few_shot_samples)])
print("The results of "+str(n_shots)+"-shot expriments of the task  "+str(data_path)+" ==>  "+ str(f1_score(few_shot_labels[:len(few_shot_samples)], predictions, average='macro')))

Predictions ['negative', 'positive']
True labels:  ['negative', 'positive']
The results of 5-shot expriments of the task  /content/sst2_tiny.csv ==>  1.0
