In [1]:
import pandas as pd
import json
from tqdm import tqdm

In [2]:
dataset_path = "../datasets/friends/"
batch_output_files = [dataset_path + "batch_output_friends_part1.jsonl",
                     dataset_path + "batch_output_friends_part2.jsonl"]

In [3]:
df = pd.DataFrame()
for file in batch_output_files:
    dff = pd.read_json(file, lines=True)
    df = pd.concat([df, dff], ignore_index=True, sort=False)
df.head(2)

Unnamed: 0,custom_id,error,id,response
0,request-0,,batch_req_67b5d395a6348190ab84388e1f4be824,"{'status_code': 200, 'request_id': '3c9a2177c3..."
1,request-1,,batch_req_67b5d395b7a481909747942b1cd53521,"{'status_code': 200, 'request_id': '6b4949a804..."


In [4]:
f"Number of Chandler dialogues: {len(df)}"

'Number of Chandler dialogues: 7488'

In [5]:
from json import JSONDecodeError

dfo = pd.DataFrame(columns=["context", "dialog"])
num_misformatted_outputs = 0

for idx in range(len(df)):
    response_str = df.iloc[idx]["response"]["body"]["choices"][0]["message"]["content"]
    request_id = int(df.iloc[idx]["custom_id"].split("request-")[1])
    try:
        response_json = json.loads(response_str)
        keys = [key for key in response_json]
        context = response_json[keys[0]]
        dialog = response_json[keys[1]]

    except JSONDecodeError:
        num_misformatted_outputs += 1
        context = None
        dialog = None
        print(idx)
    
    dfo.loc[request_id] = [context, dialog]
    

dfo.head(2)

617
1039
1227
2411
2611
3321
3477
3854
4510
4801
5085
5639
5945
6058
6477
6836
6871
6979
7073
7202


Unnamed: 0,context,dialog
0,Someone is downplaying their romantic involvem...,"Oh sure, because dating a guy from work is lik..."
1,There's a conversation among friends where one...,Could we be any more clear that it's not a dat...


### Removing entries with invalid LLM replies

In [6]:
print(f"Total number of Chandler dialogues: {len(dfo)}")
dfo.dropna(axis=0, inplace=True)
print(f"Number of Chandler dialogues after removing invalid LLM replies: {len(dfo)}")

Total number of Chandler dialogues: 7488
Number of Chandler dialogues after removing invalid LLM replies: 7468


### Use some of funniest dialogues as examples in few shot prompts

In [8]:
few_shot_prompts = [
    {
        "USER": "A character is grappling with the aftermath of a breakup and is hesitant to contact their ex due to unresolved feelings and the way things ended. Another character expresses concern about their evolving friendship with a mutual friend, fearing the relationship may worsen rather than improve.",
        "ASSISTANT": "I'm not great at the advice. Can I interest you in a sarcastic comment?"
    },
    {
        "USER": "A pair of friends is having an emotional moment about their lives and future. One expresses how little he had before meeting the other. However, the moment turns slightly awkward as an ex-boyfriend unexpectedly enters the scene, prompting confusion and light-hearted banter about past relationships and physical appearances.",
        "ASSISTANT": "Hi, I'm Chandler, I make jokes when I'm uncomfortable."
    },
    {
        "USER": "A friend says he had been to tanning salon and ended up getting badly tanned due to communication gap.",
        "ASSISTANT": "Was That Place The Sun?"
    },
    {
        "USER": "I am a gym member but I never find time to go to the gym",
        "ASSISTANT": "Oh yeah, gym member. I try to go four times a week, but I've missed the last 1200 times"
    },
    {
        "USER": "A couple is excitedly discussing their upcoming wedding in four weeks. One partner is enthusiastic, while the other uses humor to deflect some of the seriousness of the preparations. There's a conversation about wedding vows, with one partner joking about buying vows instead of writing them, while the other expresses a sense of urgency and seriousness about the wedding plans.",
        "ASSISTANT": "Look at her go! She must love me more than I love her! What's wrong with me? Ooh, don't open that door"
    },
    {
        "USER": "A friends asks me how am I after I ended up getting up very drunk and don't remember anything from last night.",
        "ASSISTANT": "My Apartment Isn’t There Anymore, Because I Drank It"
    },
    {
        "USER": "A friend expresses anxiety over being looking obese on TV and defends that camera adds weight",
        "ASSISTANT": "Ahh, so how many cameras are actually on you?"
    }
]

In [13]:
content_system_with_few_shots_prompts = """
    You are an expert and highly accomplished TV sitcom writer specialised in writing funny, sarcastic dialogues.
    You will be given a context summarizing a situation.
    Given this context, your task is to reply with a humorous sitcom like dialog in response to that context,
    most importantly, the dialog should be in the style of Chandler Bing, a funny lead character from the very popular 90s TV sitcom FRIENDS.
    Keep in mind that Chandler Bing’s humor is marked by a unique blend of sarcasm, self-deprecation, and quick wit.
    He tends to make jokes that deflect serious or emotional moments, often using his dry, sarcastic tone.
    His style is heavily reliant on irony, often delivering punchlines that are deliberately over-the-top or nonsensical.
    His most famous catch phrase is 'Could I be anymore...', do not use this excessively, use it sparingly.
""" 
content_system_with_few_shots_prompts = content_system_with_few_shots_prompts.replace("\n", "").replace("  ", "").replace(".", ". ").rstrip()
content_system_with_few_shots_prompts += "\n\nFollowing are few examples of pairs of context and Chandler Bing's dialog in reply to that.\n"

for i in range(len(few_shot_prompts)):
    content_system_with_few_shots_prompts += "\n"
    content_system_with_few_shots_prompts += f"USER: {few_shot_prompts[i]['USER']}"
    content_system_with_few_shots_prompts += "\n"
    content_system_with_few_shots_prompts += f"ASSISTANT: {few_shot_prompts[i]['ASSISTANT']}"
    content_system_with_few_shots_prompts += "\n"
    

In [14]:
print(content_system_with_few_shots_prompts)

You are an expert and highly accomplished TV sitcom writer specialised in writing funny, sarcastic dialogues. You will be given a context summarizing a situation. Given this context, your task is to reply with a humorous sitcom like dialog in response to that context,most importantly, the dialog should be in the style of Chandler Bing, a funny lead character from the very popular 90s TV sitcom FRIENDS. Keep in mind that Chandler Bing’s humor is marked by a unique blend of sarcasm, self-deprecation, and quick wit. He tends to make jokes that deflect serious or emotional moments, often using his dry, sarcastic tone. His style is heavily reliant on irony, often delivering punchlines that are deliberately over-the-top or nonsensical. His most famous catch phrase is 'Could I be anymore. . . ', do not use this excessively, use it sparingly.

Following are few examples of pairs of context and Chandler Bing's dialog in reply to that.

USER: A character is grappling with the aftermath of a brea

In [23]:
df_sft_dpo = pd.DataFrame(columns = ["messages", "prompt", "chosen", "rejected"])

content_system = """You are an expert and highly accomplished TV sitcom writer specialised in writing funny, sarcastic dialogues.
    You will be given a context summarizing a situation.
    Given this context, your task is to reply with a humorous sitcom like dialog in response to that context,
    most importantly, the dialog should be in the style of Chandler Bing, a funny lead character from the very popular 90s TV sitcom FRIENDS.
    Keep in mind that Chandler Bing’s humor is marked by a unique blend of sarcasm, self-deprecation, and quick wit.
    He tends to make jokes that deflect serious or emotional moments, often using his dry, sarcastic tone.
    His style is heavily reliant on irony, often delivering punchlines that are deliberately over-the-top or nonsensical."""
content_system = content_system.replace("\n", "").replace("  ", "").replace(".", ". ").rstrip()

    
for idx in tqdm(range(len(dfo))):
    
    message_system = {}
    #message_system["content"] = []
    #message_system["content"].append({"type": "text"})
    #message_system["content"].append({"text": content_system})
    message_system["content"] = content_system_with_few_shots_prompts
    message_system["role"] = "system"
    
    message_user = {}
    #message_user["content"] = []
    #message_user["content"].append({"type": "text"})
    content_user = dfo.iloc[idx]["context"]
    #message_user["content"].append({"text": content_user})
    message_user["content"] = content_user
    message_user["role"] = "user"
    
    message_assistant = {}
    #message_assistant["content"] = []
    #message_assistant["content"].append({"type": "text"})
    content_assistant = dfo.iloc[idx]["dialog"]
    #message_assistant["content"].append({"text": content_assistant})
    message_assistant["content"] = content_assistant
    message_assistant["role"] = "assistant"
    
    messages = []
    messages.extend([message_system, message_user, message_assistant])

    prompt = []
    prompt.extend([message_system, message_user])
    
    chosen = [message_assistant]
    rejected = None
    
    df_sft_dpo.loc[idx] = [messages, prompt, chosen, rejected]

100%|██████████| 7468/7468 [00:26<00:00, 285.49it/s]


In [24]:
df_sft_dpo.head(2)

Unnamed: 0,messages,prompt,chosen,rejected
0,[{'content': 'You are an expert and highly acc...,[{'content': 'You are an expert and highly acc...,"[{'content': 'Oh sure, because dating a guy fr...",
1,[{'content': 'You are an expert and highly acc...,[{'content': 'You are an expert and highly acc...,[{'content': 'Could we be any more clear that ...,


### Save file and push to HF Hub

In [29]:
!pip install pyarrow
df_sft_dpo.to_parquet("../datasets/friends/dataset_sft_dpo_without_rejected.parquet")



### Load SFT, DPO datasets with rejected messages

### Split into train and test sets

In [71]:
from sklearn.model_selection import train_test_split
df_sft_dpo_train, df_sft_dpo_test = train_test_split(df_sft_dpo, test_size=0.1,  shuffle=True)
print(f"Num of training samples: {len(df_sft_dpo_train)}")
print(f"Num of test samples: {len(df_sft_dpo_test)}")

Num of training samples: 6721
Num of test samples: 747


In [78]:
df_sft_dpo_train.reset_index(drop=True, inplace=True)
df_sft_dpo_test.reset_index(drop=True, inplace=True)