In [2]:
! pip install openai

Looking in indexes: https://pypi.org/simple, https://pip.repos.neuron.amazonaws.com
Collecting openai
  Downloading openai-0.27.8-py3-none-any.whl (73 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m73.6/73.6 kB[0m [31m7.8 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: openai
Successfully installed openai-0.27.8


In [3]:
import json

In [4]:
with open('workflow_asset/catogory.json', 'r') as f:
    config = json.loads(f.read())

In [5]:
import openai

In [6]:
with open('api.key', 'r') as f:
    api_key = f.read().replace('\n', '')

In [7]:
import re
import json


class GPTGenerater:
    
    def __init__(self, api_key, model="gpt-3.5-turbo", temperature=0.7):
        self.api_key = api_key
        openai.api_key = self.api_key
        self.model = model
        self.temperature = temperature
        
    def predict(self, prompt):
        response = openai.ChatCompletion.create(
            model=self.model,
            messages=[{"role": "assistant", "content": prompt}],
            temperature=self.temperature,
        )
        return response
    
    

JSON_RE = re.compile(r'(\{.*\})')

class JsonExtract:
    
    def reply2result(self, reply: str) -> dict:

        json_str = JSON_RE.findall(reply.replace('\n', ' '))[0]
        ret = json.loads(json_str)
        return ret

    def result2products(self, result: dict, target_key='product_name') -> list[str]:
        assert target_key in result
        assert isinstance(result[target_key], list)

        ret = result[target_key]

        return ret

    def reply2products(self, reply:str) -> list[str]:
        
        result = self.reply2result(reply)
        ret = self.result2products(result)


        return ret

In [8]:
sample_generator = GPTGenerater(api_key)

In [11]:
sample_generator.predict('hello')

<OpenAIObject chat.completion id=chatcmpl-7lfHYtYMDjjHirEoYWTvfQXLnNFwv at 0x7f42d8301b20> JSON: {
  "id": "chatcmpl-7lfHYtYMDjjHirEoYWTvfQXLnNFwv",
  "object": "chat.completion",
  "created": 1691594460,
  "model": "gpt-3.5-turbo-0613",
  "choices": [
    {
      "index": 0,
      "message": {
        "role": "assistant",
        "content": "Hello! How can I assist you today?"
      },
      "finish_reason": "stop"
    }
  ],
  "usage": {
    "prompt_tokens": 8,
    "completion_tokens": 9,
    "total_tokens": 17
  }
}

In [9]:
instruction = '''
Please generate {n_samples} product names for {category} category.
1. Return the results in JSON format with the following key: "product_name".
2. Example product_name: {samples} .
3. Do not repeat inputs in the output.
4. Replied answer should be as diverse as possible.
5. Do not repeat answers.
6. Reply in Taiwan Chinese.

The json result is: 
'''


class SampleAugumentation(JsonExtract):
    
    def __init__(self, sample_generator, instruction):
        self.sample_generator = sample_generator
        self.instruction = instruction
        
    def _sample_generate(self, category:str, samples:list, n_samples:int=10, debug=False):
        
        n_provided_samples = len(samples)
        
        samples = list(map(lambda x: '"{}",'.format(x.replace('"', '')), samples))
        samples = ' '.join(samples)
        
        prompt = self.instruction.format(category=category, samples=samples, n_samples=n_samples+n_provided_samples)
        response = self.sample_generator.predict(prompt)
        
        content = response.to_dict()['choices'][0]['message']['content']
        
        complete = prompt+content
        
        print('[debug]', 'prompt', prompt)
        print('[debug]', 'content', content)
        
        ret = self.reply2products(complete)[n_provided_samples:]
        
        return ret
    
    def sample_generate(self, category:str, samples:list, n_samples:int=10):
        debug = False
        while True:
            try:
                return self._sample_generate(category=category, samples=samples, n_samples=n_samples, debug=debug)
            except Exception as e:
                debug = True
                print(e)

In [10]:
sa = SampleAugumentation(sample_generator, instruction)

In [11]:
n_samples = 11
generated = dict()
for k, v in config.items():
    gen_samples = sa.sample_generate(category=k, samples=v, n_samples=n_samples)
    generated[k] = gen_samples
    
    print(k)
    print(v)
    print(gen_samples)
    
    print('----------')
    

[debug] prompt 
Please generate 11 product names for 現做咖啡飲品 category.
1. Return the results in JSON format with the following key: "product_name".
2. Example product_name:  .
3. Do not repeat inputs in the output.
4. Replied answer should be as diverse as possible.
5. Do not repeat answers.
6. Reply in Taiwan Chinese.

The json result is: 

[debug] content {
  "product_name": [
    "手工拿鐵",
    "濃縮美式咖啡",
    "特調卡布奇諾",
    "覆盆子摩卡",
    "香草拿鐵",
    "焦糖瑪奇朵",
    "冰滴咖啡",
    "抹茶拿鐵",
    "薰衣草拿鐵",
    "巧克力瑪奇朵",
    "檸檬莓果茶"
  ]
}
現做咖啡飲品
[]
['手工拿鐵', '濃縮美式咖啡', '特調卡布奇諾', '覆盆子摩卡', '香草拿鐵', '焦糖瑪奇朵', '冰滴咖啡', '抹茶拿鐵', '薰衣草拿鐵', '巧克力瑪奇朵', '檸檬莓果茶']
----------
[debug] prompt 
Please generate 11 product names for 罐裝/瓶裝咖啡 category.
1. Return the results in JSON format with the following key: "product_name".
2. Example product_name:  .
3. Do not repeat inputs in the output.
4. Replied answer should be as diverse as possible.
5. Do not repeat answers.
6. Reply in Taiwan Chinese.

The json result is: 

[debug] 

In [None]:
with open('workflow_asset/catogory_generate.json', 'w') as f:
    f.write((json.dumps(generated)))

In [None]:
1