In [2]:
from fp_dataset_artifacts.utils import init_openai

init_openai()

In [3]:
from datasets import list_datasets, load_dataset, list_metrics, load_metric

data = load_dataset('snli')
data

Downloading:   0%|          | 0.00/1.63k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/938 [00:00<?, ?B/s]

Downloading and preparing dataset snli/plain_text (download: 90.17 MiB, generated: 65.51 MiB, post-processed: Unknown size, total: 155.68 MiB) to /home/x/.cache/huggingface/datasets/snli/plain_text/1.0.0/1f60b67533b65ae0275561ff7828aad5ee4282d0e6f844fd148d05d3c6ea251b...


Downloading:   0%|          | 0.00/1.93k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.26M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/65.9M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.26M [00:00<?, ?B/s]

Dataset snli downloaded and prepared to /home/x/.cache/huggingface/datasets/snli/plain_text/1.0.0/1f60b67533b65ae0275561ff7828aad5ee4282d0e6f844fd148d05d3c6ea251b. Subsequent calls will reuse this data.


  0%|          | 0/3 [00:00<?, ?it/s]

DatasetDict({
    test: Dataset({
        features: ['premise', 'hypothesis', 'label'],
        num_rows: 10000
    })
    train: Dataset({
        features: ['premise', 'hypothesis', 'label'],
        num_rows: 550152
    })
    validation: Dataset({
        features: ['premise', 'hypothesis', 'label'],
        num_rows: 10000
    })
})

In [4]:
import openai

In [29]:
# Create example

def to_example(data, include_label=True):
    sentences = []
    
    for feature in data.keys():                
        text = data[feature]
        
        if feature == 'label' and not include_label:
            text = ''
        
        sentence = f'{feature}: {text}'
        sentences.append(sentence)
        
    return '\n'.join(sentences)

example = to_example(data['train'][0])
print(example)

premise: A person on a horse jumps over a broken down airplane.
hypothesis: A person is training his horse for a competition.
label: 1


In [22]:
train = data['train']

In [15]:
# Here the label is a number. Would the model perform better if we provided the string values instead?

In [31]:
exs = '\n\n'.join([to_example(train[i]) for i in range(3)])
print(exs)

premise: A person on a horse jumps over a broken down airplane.
hypothesis: A person is training his horse for a competition.
label: 1

premise: A person on a horse jumps over a broken down airplane.
hypothesis: A person is at a diner, ordering an omelette.
label: 2

premise: A person on a horse jumps over a broken down airplane.
hypothesis: A person is outdoors, on a horse.
label: 0


In [35]:
# Let's try making some classifications
x = to_example(train[4], False)
prompt = f'{exs}\n\n{x}'
print(prompt)

premise: A person on a horse jumps over a broken down airplane.
hypothesis: A person is training his horse for a competition.
label: 1

premise: A person on a horse jumps over a broken down airplane.
hypothesis: A person is at a diner, ordering an omelette.
label: 2

premise: A person on a horse jumps over a broken down airplane.
hypothesis: A person is outdoors, on a horse.
label: 0

premise: Children smiling and waving at camera
hypothesis: There are children present
label: 


In [40]:
# Using the most powerful model with max_tokens=1 since we only need a single digit as output
response = openai.Completion.create(
  engine='davinci',
  prompt=prompt,
  temperature=0,
  max_tokens=1,
  top_p=1,
  frequency_penalty=0.0,
  presence_penalty=0.0,
  stop=['\n']
)
response

<OpenAIObject text_completion id=cmpl-47FPqWHYsLC5bjuXOpNvA5Klgz1J2 at 0x7f76c87cd720> JSON: {
  "choices": [
    {
      "finish_reason": "stop",
      "index": 0,
      "logprobs": null,
      "text": ""
    }
  ],
  "created": 1637632902,
  "id": "cmpl-47FPqWHYsLC5bjuXOpNvA5Klgz1J2",
  "model": "davinci:2020-05-03",
  "object": "text_completion"
}

In [41]:
# Seems like that didn't work. We can try it again without those parameters.
response = openai.Completion.create(
  engine='davinci',
  prompt=prompt,
)
response

<OpenAIObject text_completion id=cmpl-47FQhXNGUd7EaVfftmlB8f2GdDgx2 at 0x7f76c87cd090> JSON: {
  "choices": [
    {
      "finish_reason": "length",
      "index": 0,
      "logprobs": null,
      "text": "\n\npremise: Children smiling and waving at camera\nhypothesis:"
    }
  ],
  "created": 1637632955,
  "id": "cmpl-47FQhXNGUd7EaVfftmlB8f2GdDgx2",
  "model": "davinci:2020-05-03",
  "object": "text_completion"
}

In [52]:
# The model tries to predict the next premise and hypothesis by copying,
# which is basically what's happening in the example.
# Since it might be ignoring the numbers. Let's try to use the actual label and add more examples for safe measure.
int2label = train.features['label'].int2str

def to_example(data, include_label=True):
    sentences = []
    
    for feature in data.keys():                
        text = data[feature]
        
        if feature == 'label':
            if not include_label:
                text = ''
            else:
                text = int2label(text)
            
        sentence = f'{feature}: {text}'
        sentences.append(sentence)
        
    return '\n'.join(sentences)

example = to_example(train[0])
print(example)

premise: A person on a horse jumps over a broken down airplane.
hypothesis: A person is training his horse for a competition.
label: neutral


In [60]:
# More examples
exs = '\n\n'.join([
    to_example(train[0]),
    to_example(train[10]),
    to_example(train[100]),
    to_example(train[200]),
    to_example(train[300]),
    to_example(train[400])
])
x = to_example(train[4], False)
prompt = f'{exs}\n\n{x}'
print(prompt)

premise: A person on a horse jumps over a broken down airplane.
hypothesis: A person is training his horse for a competition.
label: neutral

premise: An older man sits with his orange juice at a small table in a coffee shop while employees in bright colored shirts smile in the background.
hypothesis: A boy flips a burger.
label: contradiction

premise: A woman is walking across the street eating a banana, while a man is following with his briefcase.
hypothesis: the woman is a seductress
label: neutral

premise: People on bicycles waiting at an intersection.
hypothesis: There is a bike race happening right now.
label: neutral

premise: A foreign family is walking along a dirt path next to the water.
hypothesis: A family of foreigners walks by the water.
label: entailment

premise: A guy performing a bicycle jump trick for an audience.
hypothesis: tony hawk is performing a skating trick
label: contradiction

premise: Children smiling and waving at camera
hypothesis: There are children p

In [61]:
# 10 max tokens for good measure
response = openai.Completion.create(
  engine='davinci',
  prompt=prompt,
  temperature=0,
  max_tokens=10,
  top_p=1,
  frequency_penalty=0.0,
  presence_penalty=0.0,
  stop=['\n']
)
response

<OpenAIObject text_completion id=cmpl-47FaZgENEwPTkI7z0jUUrX6ZbdcFi at 0x7f76c87b1a40> JSON: {
  "choices": [
    {
      "finish_reason": "stop",
      "index": 0,
      "logprobs": null,
      "text": ""
    }
  ],
  "created": 1637633567,
  "id": "cmpl-47FaZgENEwPTkI7z0jUUrX6ZbdcFi",
  "model": "davinci:2020-05-03",
  "object": "text_completion"
}

In [64]:
# Perhaps it requires a task description to actually work
task = 'This is a textual entailment classifier.'
prompt = f'{task}\n\n{exs}\n\n{x}'
print(prompt)

This is a textual entailment classifier.

premise: A person on a horse jumps over a broken down airplane.
hypothesis: A person is training his horse for a competition.
label: neutral

premise: An older man sits with his orange juice at a small table in a coffee shop while employees in bright colored shirts smile in the background.
hypothesis: A boy flips a burger.
label: contradiction

premise: A woman is walking across the street eating a banana, while a man is following with his briefcase.
hypothesis: the woman is a seductress
label: neutral

premise: People on bicycles waiting at an intersection.
hypothesis: There is a bike race happening right now.
label: neutral

premise: A foreign family is walking along a dirt path next to the water.
hypothesis: A family of foreigners walks by the water.
label: entailment

premise: A guy performing a bicycle jump trick for an audience.
hypothesis: tony hawk is performing a skating trick
label: contradiction

premise: Children smiling and waving 

In [65]:
response = openai.Completion.create(
  engine='davinci',
  prompt=prompt,
  temperature=0,
  max_tokens=10,
  top_p=1,
  frequency_penalty=0.0,
  presence_penalty=0.0,
  stop=['\n']
)
response

<OpenAIObject text_completion id=cmpl-47FeEDk0Vp5i0ZPTw1hDAS4XOYfLK at 0x7f76c8765400> JSON: {
  "choices": [
    {
      "finish_reason": "stop",
      "index": 0,
      "logprobs": null,
      "text": ""
    }
  ],
  "created": 1637633794,
  "id": "cmpl-47FeEDk0Vp5i0ZPTw1hDAS4XOYfLK",
  "model": "davinci:2020-05-03",
  "object": "text_completion"
}

In [66]:
# More details about the task
task = 'This is a textual entailment classifier that detemines the inference relation between premise and hypothesis with the labels: entailment, contradiction, or neutral.'
prompt = f'{task}\n\n{exs}\n\n{x}'
print(prompt)

This is a textual entailment classifier that detemines the inference relation between premise and hypothesis with the labels: entailment, contradiction, or neutral.

premise: A person on a horse jumps over a broken down airplane.
hypothesis: A person is training his horse for a competition.
label: neutral

premise: An older man sits with his orange juice at a small table in a coffee shop while employees in bright colored shirts smile in the background.
hypothesis: A boy flips a burger.
label: contradiction

premise: A woman is walking across the street eating a banana, while a man is following with his briefcase.
hypothesis: the woman is a seductress
label: neutral

premise: People on bicycles waiting at an intersection.
hypothesis: There is a bike race happening right now.
label: neutral

premise: A foreign family is walking along a dirt path next to the water.
hypothesis: A family of foreigners walks by the water.
label: entailment

premise: A guy performing a bicycle jump trick for 

In [69]:
response = openai.Completion.create(
  engine='davinci',
  prompt=prompt,
  max_tokens=100,
  stop=['\n']
)
response

<OpenAIObject text_completion id=cmpl-47FgPVtpFHtcZ6rTdPzTCGM2xEral at 0x7f76c876fea0> JSON: {
  "choices": [
    {
      "finish_reason": "stop",
      "index": 0,
      "logprobs": null,
      "text": ""
    }
  ],
  "created": 1637633929,
  "id": "cmpl-47FgPVtpFHtcZ6rTdPzTCGM2xEral",
  "model": "davinci:2020-05-03",
  "object": "text_completion"
}

In [71]:
# Still fails to generate a valid text
# We can try to get rid of the stop

In [72]:
response = openai.Completion.create(
  engine='davinci',
  prompt=prompt,
  max_tokens=100,
)
response

<OpenAIObject text_completion id=cmpl-47FhJVxpHHZZC7AKlkcGRMgDaz4ta at 0x7f76c876c3b0> JSON: {
  "choices": [
    {
      "finish_reason": "length",
      "index": 0,
      "logprobs": null,
      "text": "\n\npremise: Girls sitting at outdoor tea area talking excitedly, one pointing empty tea cup at each other, another reading tea card.\nlabel: \n\nThere are three strategies for evaluating w-ETT accuracy, knowledge transfer, logical validity, and RPSL compliance. The lack of ground truth in visual reasoning makes evaluation difficult, but since all proposed models derive their inference representation from the premises, they are using the same representations. The major challenge in evaluating w-ETT"
    }
  ],
  "created": 1637633985,
  "id": "cmpl-47FhJVxpHHZZC7AKlkcGRMgDaz4ta",
  "model": "davinci:2020-05-03",
  "object": "text_completion"
}

In [73]:
# Basically, it never wants to generate a label. It always continue to generate more examples for some reason.

In [74]:
print("\n\npremise: Girls sitting at outdoor tea area talking excitedly, one pointing empty tea cup at each other, another reading tea card.\nlabel: \n\nThere are three strategies for evaluating w-ETT accuracy, knowledge transfer, logical validity, and RPSL compliance. The lack of ground truth in visual reasoning makes evaluation difficult, but since all proposed models derive their inference representation from the premises, they are using the same representations. The major challenge in evaluating w-ETT")



premise: Girls sitting at outdoor tea area talking excitedly, one pointing empty tea cup at each other, another reading tea card.
label: 

There are three strategies for evaluating w-ETT accuracy, knowledge transfer, logical validity, and RPSL compliance. The lack of ground truth in visual reasoning makes evaluation difficult, but since all proposed models derive their inference representation from the premises, they are using the same representations. The major challenge in evaluating w-ETT


In [76]:
# The Tweet sentiment classifier from the example
prompt = """This is a tweet sentiment classifier


Tweet: "I loved the new Batman movie!"
Sentiment: Positive
###
Tweet: "I hate it when my phone battery dies."
Sentiment: Negative
###
Tweet: "My day has been 👍"
Sentiment: Positive
###
Tweet: "This is the link to the article"
Sentiment: Neutral
###
Tweet: "This new music video blew my mind"
Sentiment: """
print(prompt)

This is a tweet sentiment classifier


Tweet: "I loved the new Batman movie!"
Sentiment: Positive
###
Tweet: "I hate it when my phone battery dies."
Sentiment: Negative
###
Tweet: "My day has been 👍"
Sentiment: Positive
###
Tweet: "This is the link to the article"
Sentiment: Neutral
###
Tweet: "This new music video blew my mind"
Sentiment: 


In [88]:
# Let's try to mimick the example
def to_example(data, include_label=True):
    sentences = []
    
    for feature in data.keys():                
        text = data[feature]
        
        if feature == 'label':
            if not include_label:
                text = ''
            else:
                text = int2label(text).capitalize()
        else:
            text = f'"{text}"'
            
        sentence = f'{feature.capitalize()}: {text}'
        sentences.append(sentence)
        
    return '\n'.join(sentences)

exs = '\n###\n'.join([
    to_example(train[0]),
    to_example(train[10]),
    to_example(train[100]),
    to_example(train[200]),
    to_example(train[300]),
    to_example(train[400])
])
x = to_example(train[4], False)

task = 'This is a textual entailment classifier.'
prompt = f'{task}\n\n{exs}\n###\n{x}'
print(prompt)

This is a textual entailment classifier.

Premise: "A person on a horse jumps over a broken down airplane."
Hypothesis: "A person is training his horse for a competition."
Label: Neutral
###
Premise: "An older man sits with his orange juice at a small table in a coffee shop while employees in bright colored shirts smile in the background."
Hypothesis: "A boy flips a burger."
Label: Contradiction
###
Premise: "A woman is walking across the street eating a banana, while a man is following with his briefcase."
Hypothesis: "the woman is a seductress"
Label: Neutral
###
Premise: "People on bicycles waiting at an intersection."
Hypothesis: "There is a bike race happening right now."
Label: Neutral
###
Premise: "A foreign family is walking along a dirt path next to the water."
Hypothesis: "A family of foreigners walks by the water."
Label: Entailment
###
Premise: "A guy performing a bicycle jump trick for an audience."
Hypothesis: "tony hawk is performing a skating trick"
Label: Contradiction

In [89]:
response = openai.Completion.create(
    engine='davinci',
    prompt=prompt,
    temperature=0.3,
    max_tokens=60,
    top_p=1,
    frequency_penalty=0.5,
    presence_penalty=0,
    stop=["###"]
)
response

<OpenAIObject text_completion id=cmpl-47FufL86uOqEuAMe9lcIp5UXfDg9r at 0x7f76c934ae00> JSON: {
  "choices": [
    {
      "finish_reason": "stop",
      "index": 0,
      "logprobs": null,
      "text": "\n"
    }
  ],
  "created": 1637634813,
  "id": "cmpl-47FufL86uOqEuAMe9lcIp5UXfDg9r",
  "model": "davinci:2020-05-03",
  "object": "text_completion"
}