In [1]:
import openai
import tiktoken
import time
import random
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import argparse
import os
import pickle
import numpy as np
from tqdm.auto import tqdm
import scipy.stats


# openai.api_key = "sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"


  from .autonotebook import tqdm as notebook_tqdm


In [2]:

model = "microsoft/DialogRPT-updown"
tokenizer = AutoTokenizer.from_pretrained(model)
model = AutoModelForSequenceClassification.from_pretrained(model)
model.eval()

@torch.no_grad()
def score(ctx, hyp):
  model_input = tokenizer.encode(ctx + hyp, return_tensors="pt")
  result = model(model_input, return_dict=True)
  return torch.sigmoid(result.logits)

In [3]:
def query_gpt_with_retries(prompt_object, model = "gpt-4-0314", temperature = 1, max_tokens = 100, top_p = 1, frequency_penalty = 0, presence_penalty = 0, stop = None, logit_bias = {}, n = 1, retries = 20):
    # this function queries gpt-3 with retries, because sometimes the api is down
    # the function returns the response from gpt-4
    # prompt_object is a list of dictionaries with role and content
    # For example, prompt_object = [{"role": "system", "content": "Hello"}, {"role": "user", "content": "Hi"}, {"role":"assistant", "content": "How can I help you?"}]
    
    while retries > 0:
        # print ("Trying to query gpt-4 with retries = ", retries)
        if retries == 10:
            time.sleep(10)
        try:
            max_tokens = num_tokens_from_messages(prompt_object, model=model) + 150
            response = openai.ChatCompletion.create(
                model=model,
                messages=prompt_object,
                temperature=temperature,
                max_tokens=max_tokens,
                top_p=top_p,
                frequency_penalty=frequency_penalty,
                presence_penalty=presence_penalty,
                logit_bias=logit_bias,
                stop=stop,
                n=n
            )
            return response
        except Exception as e:
            print(str(e))
            retries -= 1
    return None 


def num_tokens_from_messages(messages, model="gpt-3.5-turbo"):
    """Returns the number of tokens used by a list of messages."""
    try:
        encoding = tiktoken.encoding_for_model(model)
    except KeyError:
        encoding = tiktoken.get_encoding("cl100k_base")
    if model == "gpt-3.5-turbo":  # note: future models may deviate from this
        num_tokens = 0
        for message in messages:
            num_tokens += 4  # every message follows <im_start>{role/name}\n{content}<im_end>\n
            for key, value in message.items():
                num_tokens += len(encoding.encode(value))
                if key == "name":  # if there's a name, the role is omitted
                    num_tokens += -1  # role is always required and always 1 token
        num_tokens += 2  # every reply is primed with <im_start>assistant
        return num_tokens
    elif model == "gpt-4-0314" or model == "gpt-4":
        num_tokens = 0
        for message in messages:
            num_tokens += 4
            for key, value in message.items():
                num_tokens += len(encoding.encode(value))
                if key == "name":
                    num_tokens += -1
        num_tokens += 2
        return num_tokens
    else:
        raise NotImplementedError(f"""num_tokens_from_messages() is not presently implemented for model {model}.
    See https://github.com/openai/openai-python/blob/main/chatml.md for information on how messages are converted to tokens.""")

In [4]:
# load jsonl 
import os 
import json
import pandas as pd

data = {
    "train": [],
    "dev": [],
}

dataset_names = {
    "train": "train_with-reference",
    "dev": "dev_without-reference"
}

## sample prompt object

# messages=[
#         {"role": "system", "content": "You are a helpful assistant."},
#         {"role": "user", "content": "Who won the world series in 2020?"},
#         {"role": "assistant", "content": "The Los Angeles Dodgers won the World Series in 2020."},
#         {"role": "user", "content": "Where was it played?"}
#     ]


for split in dataset_names:
    with open(os.path.join("data", "raw", f"{dataset_names[split]}.jsonl"), "r") as f:
        for line in f:
            data_line = json.loads(line)
            sample = {"context": "", "response": "", "dialogRPTcontext": "", "dialogRPTresponse": ""}
            utterances = data_line["utterances"]
            # [{'text': 'A) pull through', 'speaker': 'student'}, {'text': 'OK great', 'speaker': 'teacher'}, {'text': 'Not sure about the meaning of the second one... Does that person mean that being the prime minister he had to survive??', 'speaker': 'student'}] {'text': 'Ah yes good question - this is a bit ambiguous....', 'speaker': 'teacher'}
            # sample["context"] = "\n".join([f"{x['speaker']}: {x['text']}" for x in utterances])
            # make sample context into a prompt object
            sample["context"] = []
            for i in range(len(utterances)):
                if utterances[i]['speaker'] == 'student':
                    # sample["context"].append({"role": "user", "content": "new utterance"})
                    sample["context"].append({"role": "user", "content": utterances[i]['speaker'] + ": " + utterances[i]["text"]})
                else:
                    sample["context"].append({"role": "assistant", "content": utterances[i]['speaker'] + ": " + utterances[i]["text"]})
                    
            dialogRPTcontext = ''
            for dialogue_line in utterances:
                dialogRPTcontext += "'" + dialogue_line['speaker'] + "': " + dialogue_line['text'] + ' <|endoftext|> '
            sample["dialogRPTcontext"] = dialogRPTcontext
                        
            
            if "response" in data_line.keys():
                response = data_line["response"]
                sample["response"] = response['speaker'] + ": " + response['text']
                
                dialogRPTresponse = "'" + response['speaker'] + "': " + response['text'] + ' <|endoftext|> ' 
                sample["dialogRPTresponse"] = dialogRPTresponse 

            # print (sample)
            data[split].append(sample)

In [7]:
# random_sample_idx = random.randint(0, len(data['train']))
# print (random_sample_idx)
# random_sample_idx = 1000
# prompt_object = data['train'][random_sample_idx]['context']
# num_few_shot_examples = 3
# # randomly sample from data['train'] and append before prompt_object
# for i in range(num_few_shot_examples):
#     sample = random.choice(data['train'])
#     print (sample)
#     prompt_object = sample['context'] + [{'role': 'assistant', 'content': sample['response']}] + [{'role': 'user', 'content': 'new conversation'},] + prompt_object
# prompt_object.insert(0, {"role": "system", "content": "You are acting as a teacher, and you are helping a student learn, be patient, helpful and kind. Don't be super imposing, give short responses to encourage learning, make the student feel comfortable and confident and help them learn."})
# response = data['train'][random_sample_idx]['response']

1646
{'context': [{'role': 'user', 'content': 'student: yes...maybe just is my internet problems, I can not stop it chang,sorry....'}, {'role': 'assistant', 'content': "teacher: OK so here your big idea just needs to say whether you agree or not with the main idea....so can you tell me in just normal words if you do agree (don't worry about the essay answer - just in normal speaking if you like!)"}], 'response': 'teacher: in other words: is exercise the best way?', 'dialogRPTcontext': "'student': yes...maybe just is my internet problems, I can not stop it chang,sorry.... <|endoftext|> 'teacher': OK so here your big idea just needs to say whether you agree or not with the main idea....so can you tell me in just normal words if you do agree (don't worry about the essay answer - just in normal speaking if you like!) <|endoftext|> ", 'dialogRPTresponse': "'teacher': in other words: is exercise the best way? <|endoftext|> "}
{'context': [{'role': 'assistant', 'content': 'teacher: In this on

In [18]:
gpt4_response = query_gpt_with_retries(prompt_object, model = "gpt-4")
gpt4_response_str = gpt4_response.choices[0]['message']['content']

dialogRPT_gpt4_response = gpt4_response_str.replace("teacher: ", "'teacher': ") + " |<endoftext|>"
print (data['train'][random_sample_idx]['dialogRPTcontext'])
print (dialogRPT_gpt4_response, score(data['train'][random_sample_idx]['dialogRPTcontext'], dialogRPT_gpt4_response).item())

Rate limit reached for default-gpt-4 in organization org-rocrupyvzgcl4yf25rqq6d1v on requests per min. Limit: 200 / min. Please try again in 300ms. Contact support@openai.com if you continue to have issues.
'teacher': Hi <STUDENT>, ready when you are.... <|endoftext|> 'student': Hi <TEACHER>, I'm ready <|endoftext|> 
'teacher': Great! Let's start with a warm-up question. What is your favorite subject and why? |<endoftext|> 0.7912063598632812


In [5]:
# randomly sample 100 examples from train
avg_score_gpt4 = 0.0
avg_score_train = 0.0
for i in range(100):
    if i%10 == 0:
        print (f"Processed {i} examples")
    random_sample_idx = random.randint(0, len(data['train']))
    random_sample_idx = 1000
    prompt_object = data['train'][random_sample_idx]['context']
    num_few_shot_examples = 3
    # randomly sample from data['train'] and append before prompt_object
    for i in range(num_few_shot_examples):
        sample = random.choice(data['train'])
        prompt_object = sample['context'] + [{'role': 'assistant', 'content': sample['response']}] + [{'role': 'user', 'content': 'new conversation'},] + prompt_object
    prompt_object.insert(0, {"role": "system", "content": "You are acting as a teacher, and you are helping a student learn, be patient, helpful and kind. Don't be super imposing, give short responses to encourage learning, make the student feel comfortable and confident and help them learn."})
    response = data['train'][random_sample_idx]['response']
    
    gpt4_response = query_gpt_with_retries(prompt_object, model = "gpt-4-0314")
    gpt4_response_str = gpt4_response.choices[0]['message']['content']

    dialogRPT_gpt4_response = gpt4_response_str.replace("teacher: ", "'teacher': ") + " |<endoftext|>"
    # print (data['train'][random_sample_idx]['dialogRPTcontext'])
    # print (dialogRPT_gpt4_response, score(data['train'][random_sample_idx]['dialogRPTcontext'], dialogRPT_gpt4_response).item())
    dialogRPTscore_gpt4response = score(data['train'][random_sample_idx]['dialogRPTcontext'], dialogRPT_gpt4_response).item()
    avg_score_gpt4 += dialogRPTscore_gpt4response
    
    dialogRPTscore_trainresponse = score(data['train'][random_sample_idx]['dialogRPTcontext'], data['train'][random_sample_idx]['dialogRPTresponse']).item()
    avg_score_train += dialogRPTscore_trainresponse
    
print ("The average dialogRPT score with gpt4 is: ", avg_score_gpt4/100)
print ("The average dialogRPT score with train is: ", avg_score_train/100)
    

Processed 0 examples
Processed 10 examples
Processed 20 examples
Processed 30 examples
Processed 40 examples
Processed 50 examples
