In [5]:
!pip install openai

[0m

## Getting the training set from OPENAI

In [6]:
import os
import openai
import pandas as pd

openai.api_key = "OPENAI_KEY"

l_age = ['18', '20', '30', '40', '50', '60', '90']
l_gender = ['man', 'woman']
l_power = ['invisibility', 'read in the thoughts', 'turning lead into gold', 'immortality', 'telepathy', 'teleport', 'flight'] 

f_prompt = "Imagine a complete and detailed description of a {age}-year-old {gender} fictional character who has the superpower of {power}. Write out the entire description in a maximum of 100 words in great detail:"
f_sub_prompt = "{age}, {gender}, {power}"

df = pd.DataFrame()
for age in l_age:
    for gender in l_gender:
        for power in l_power:
            for i in range(3): ## 3 times each
                prompt = f_prompt.format(age=age, gender=gender, power=power)
                sub_prompt = f_sub_prompt.format(age=age, gender=gender, power=power)
                print(sub_prompt)

                response = openai.Completion.create(
                 model="text-davinci-003",
                 prompt=prompt,
                 temperature=1,
                 max_tokens=500,
                 top_p=1,
                 frequency_penalty=0,
                 presence_penalty=0
                )

                finish_reason = response['choices'][0]['finish_reason']
                response_txt = response['choices'][0]['text']

                new_row = {
                  'age':age, 
                  'gender':gender, 
                  'power':power, 
                  'prompt':prompt, 
                  'sub_prompt':sub_prompt, 
                  'response_txt':response_txt, 
                  'finish_reason':finish_reason}
                new_row = pd.DataFrame([new_row])
                df = pd.concat([df, new_row], axis=0, ignore_index=True)

df.to_csv("out_openai_completion.csv")


18, man, invisibility


RateLimitError: You exceeded your current quota, please check your plan and billing details.

## Fine tuning using the csv file we just generated

In [None]:
import pandas as pd
import openai
import subprocess

df = pd.read_csv("out_openai_completion.csv")

prepared_data = df.loc[:,['sub_prompt','response_txt']]
prepared_data.rename(columns={'sub_prompt':'prompt', 'response_txt':'completion'}, inplace=True)
prepared_data.to_csv('prepared_data.csv',index=False)


## prepared_data.csv --> prepared_data_prepared.json
subprocess.run('openai tools fine_tunes.prepare_data --file prepared_data.csv --quiet'.split())

## Start fine-tuning
subprocess.run('openai api fine_tunes.create --training_file prepared_data_prepared.jsonl --model davinci --suffix "SuperHero"'.split())

once the model is made we can go to playground and interact with the model.

Go to https://platform.openai.com/playground.
Click on ‘Model’ and search for the one with the suffix “Superhero”.
Add in ‘Stop sequences’ the token ‘END’.

In [None]:
# can be done in python
import os
import openai

openai.api_key = os.getenv("OPENAI_API_KEY")

response = openai.Completion.create(
  model="davinci:ft-klhm:superhero-2023-02-01-14-56-48",
  prompt="18, Man, can eat a lot ->\n",
  temperature=0.7,
  max_tokens=256,
  top_p=1,
  frequency_penalty=0,
  presence_penalty=0,
  stop=["END"]
)