# Language dataset utils

In [None]:
#|default_exp lang.dataset

In [None]:
#|hide
from fastcore.test import *
from nbdev.showdoc import *

In [None]:
#|export


def alpaca2sharegpt(example):
    messages = []
    if "instruction" in example:
        messages.append({"from": "system", "value": example["instruction"]})
    messages.append({"from": "human", "value": example["input"]})
    messages.append({"from": "gpt", "value": example["output"]})
    return {"messages": messages}


def alpaca2openai(example):
    messages = []
    if "instruction" in example:
        messages.append({"role": "system", "content": example["instruction"]})
    messages.append({"role": "user", "content": example["input"]})
    messages.append({"role": "assistant", "content": example["output"]})
    return {"messages": messages}


def sharegpt2openai_message(message):
    role = {"system": "system", "human": "user", "gpt": "assistant"}[message["from"]]
    return {"role": role, "content": message["value"]}


def openai2sharegpt_message(message):
    frm = {"system": "system", "user": "human", "assistant": "gpt"}[message["role"]]
    return {"from": frm, "value": message["content"]}


def sharegpt2openai(example):
    return {
        "messages": [sharegpt2openai_message(message) for message in example["messages"]],
    }


def openai2sharegpt(example):
    return {
        "messages": [openai2sharegpt_message(message) for message in example["messages"]],
    }

In [None]:
alpaca_example = {
    "instruction": "You are a helpful assistant.",
    "input": "What is love?",
    "output": "Baby, don't hurt me!"
}

In [None]:
sharegpt_example = alpaca2sharegpt(alpaca_example)
sharegpt_example

{'messages': [{'from': 'system', 'value': 'You are a helpful assistant.'},
  {'from': 'human', 'value': 'What is love?'},
  {'from': 'gpt', 'value': "Baby, don't hurt me!"}]}

In [None]:
openai_example = alpaca2openai(alpaca_example)
openai_example

{'messages': [{'role': 'system', 'content': 'You are a helpful assistant.'},
  {'role': 'user', 'content': 'What is love?'},
  {'role': 'assistant', 'content': "Baby, don't hurt me!"}]}

In [None]:
test_eq(openai_example, sharegpt2openai(sharegpt_example))

In [None]:
test_eq(sharegpt_example, openai2sharegpt(openai_example))

In [None]:
#|hide
import nbdev; nbdev.nbdev_export()