### Install and Import required libraries

In [1]:
!pip install -q openai

import os
import json

os.environ['OPENAI_API_KEY'] = "OPENAI_API_KEY"

[0m

In [2]:
from openai import OpenAI
client = OpenAI()

### Prompt function that sends request to OpenAI to create synthetic data

In [3]:
def send_request(question):
    system = {"role": "system", "content": "I need you to generate 4 variations of the following question. You can be as creative or direct as you want when generating the variants. Dont add question numbers or any symbols before question"}
    user = {"role": "user", "content": "Here is the question: {}".format(question)}
    return [system, user]

### Reading original dataset in JSON Format

In [4]:
with open("./finance-alpaca.json", "r") as f:
    data = json.load(f)

len(data)

68912

In [5]:
# Augment a small sample of data

sample = data[:4]

### Generating synthetic data using OpenAI

In [6]:
augmented_data = []

In [7]:
for example in sample:
    question = example['instruction']
    messages = send_request(question)
    completion = client.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=messages
    )
    pair = {"question": question, "answer": example['output']}
    augmented_data.append(pair)
    response = completion.choices[0].message.content.split('\n')
    for i in range(4):
        pair = {
            "question": response[i], 
            "answer": example['output']
        }
        augmented_data.append(pair)


print(augmented_data)

[{'question': 'For a car, what scams can be plotted with 0% financing vs rebate?', 'answer': "The car deal makes money 3 ways. If you pay in one lump payment. If the payment is greater than what they paid for the car, plus their expenses, they make a profit. They loan you the money. You make payments over months or years, if the total amount you pay is greater than what they paid for the car, plus their expenses, plus their finance expenses they make money. Of course the money takes years to come in, or they sell your loan to another business to get the money faster but in a smaller amount. You trade in a car and they sell it at a profit. Of course that new transaction could be a lump sum or a loan on the used car... They or course make money if you bring the car back for maintenance, or you buy lots of expensive dealer options. Some dealers wave two deals in front of you: get a 0% interest loan. These tend to be shorter 12 months vs 36,48,60 or even 72 months. The shorter length makes

In [8]:
len(augmented_data)

20

### Writing generated data to a json file

In [10]:
with open("augmented_data.json", "w") as file:
    json.dump(augmented_data, file, indent=2)