In [5]:
# import sys
# sys.path.append('..')
# from consts import MY_OPENAI_API_KEY as API_KEY
# CACHE_DIR = '../cache/'
FIRST_PROMPT = "You are a helpful assistant designed to output JSON. The client provides you with text containing sales announcements, and you need to create JSON for these announcements to input into the database. The JSON should be a list of dictionaries, where each announcement is a separate dictionary. Each dictionary must include the fields: name, price, description (if available, otherwise null), place (information about the seller's location, the same for all products if provided, otherwise null), count (if multiple items are being sold at once, otherwise null), and others (a dictionary with other useful information about the product)."
LAST_PROMPT = """You are an ai trainer in marketplace company. You need to create a JSON from a plain text of a product offer provided by user. This JSON must contain next fields:
"place" - where buyer can recieve product (or delivery info)
"products" - list of dictionaries of valid products.
Each product dict must contain next info:
"name" - short title for a customer,
"price" - cost of a product (one number in rubbles),
"count" - number of product if they are in a pack (weight or lenght if product is uncountable). Drop products without name or price. Answer only with JSON."""

In [6]:
API_KEY = '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'
!pip install openai



In [7]:
import openai
import pickle as pkl
import pandas as pd
import typing as tp
from IPython.display import display
import json

In [8]:
openai_client = openai.OpenAI(api_key=API_KEY)

In [10]:
with open('prompt_test_df.pkl', 'rb') as f:
  df = pkl.load(f)

In [11]:
class OpenAICache:
  def __init__(self, client):
    self.client = client
    self.storage = {}
    self.model = "gpt-3.5-turbo"

  def request(self, prompt, text):
    if prompt not in self.storage:
      self.storage[prompt] = {}
    if text not in self.storage[prompt]:
      response = self.client.chat.completions.create(
        model=self.model,
        messages=[
          {"role": "system", "content": prompt},
          {"role": "user", "content": text}
        ],
        **({'response_format':{"type": "json_object"}} if self.model == "gpt-3.5-turbo-1106" else {})
      )
      self.storage[prompt][text] = response
    return self.storage[prompt][text]

class Prompt:
  def __init__(self, prompt:str, openai_cache:OpenAICache, fake_mode=False):
    self.prompt = prompt
    self.results = {}
    self.responses = []
    self.openai = openai_cache
    self.fake_mode = fake_mode

  def make_requests(self, df:pd.DataFrame, rewrite=False):
    for id in df.index:
      if id not in self.results or rewrite:
        response = self.openai.request(self.prompt, df.loc[id]['text'])
        if not self.fake_mode:
          self.results[id] = json.loads(response.choices[0].message.content)
        else:
          self.results[id] = json.loads(response)

def make_flat_dict(obj):
  if isinstance(obj, list):
    obj = {str(i):obj[i] for i in range(len(obj))}
  if not isinstance(obj, dict):
    return obj
  new_obj = {}
  for key in obj.keys():
    val = make_flat_dict(obj[key])
    if isinstance(val, dict):
      for subkey in val.keys():
        new_obj[str(key) + '_' + subkey] = val[subkey]
    else:
      new_obj[str(key)] = val
  return new_obj


class PromptManager:
  def __init__(self, df, openai_cache):
    self.df = df
    self.openai = openai_cache
    self.prompts = {}
    self.baseline_name = None
    self.fake_mode = False

  def add(self, name, prompt:str):
    if name not in self.prompts:
      self.prompts[name] = Prompt(prompt, openai_cache=self.openai, fake_mode=self.fake_mode)

  def make_requests(self, name):
    self.prompts[name].make_requests(self.df)

  def compare_dicts(self, res1, res2, name1, name2):
    diff = []
    for key in set(res1.keys())|set(res2.keys()):
      val1 = res1[key] if key in res1 else '<no key>'
      val2 = res2[key] if key in res2 else '<no key>'
      if val1 != val2:
        col1 = name1 + (' (baseline)' if name1 == self.baseline_name else '')
        diff.append({'field':key, col1:val1, name2:val2})

    return diff
  def compare(self, name1, name2):
    res1 = make_flat_dict(self.prompts[name1].results)
    res2 = make_flat_dict(self.prompts[name2].results)

    diff = self.compare_dicts(res1, res2, name1, name2)
    display(pd.DataFrame(diff))
    return diff

  def compare_to_baseline(self, name):
    return self.compare(self.baseline_name, name)

  def make_all(self, name, prompt):
    self.add(name, prompt)
    self.make_requests(name)
    if self.baseline_name is not None:
      self.compare_to_baseline(name)
    else:
      self.baseline_name = name

  def set_baseline(self, name):
    self.baseline_name = name

  def print_prompt(self, name):
    print(self.prompts[name].prompt)

In [12]:
# tests
REQ = 'Всем доброго дня! \n\nПродаю туфли, метро Новокосино. \n\n1. Бежевые лакированные туфли, размер 35. Абсолютно новые. — 1000р. \n2. Синие туфли, размер 36. Абсолютно новые. — 1000р. \n3. Чёрные лакированные туфли из натуральной кожи марки Elmonte. Размер 36. Носились пару раз, в отличном состоянии. Стоят новые набойки и профилактика. — 1000р. \n4. Чёрные туфли под замшу. Размер 36. Носились недолго и аккуратно, в хорошем состоянии. — 500р.'
FAKE_PROMPT = 'lol'
class FakeCache(OpenAICache):
  def __init__(self):
    self.storage = {
      FIRST_PROMPT: {
        REQ: """[
          {
              "name": "Бежевые лакированные туфли",
              "price": 1000,
              "description": "Абсолютно новые",
              "place": "метро Новокосино",
              "count": null,
              "others": {"Размер": 35}
          },
          {
              "name": "Синие туфли",
              "price": 1000,
              "description": "Абсолютно новые",
              "place": "метро Новокосино",
              "count": null,
              "others": {"Размер": 36}
          },
          {
              "name": "Чёрные лакированные туфли из натуральной кожи марки Elmonte",
              "price": 1000,
              "description": "Носились пару раз, в отличном состоянии. Стоят новые набойки и профилактика.",
              "place": "метро Новокосино",
              "count": null,
              "others": {"Размер": 36}
          },
          {
              "name": "Чёрные туфли под замшу",
              "price": 500,
              "description": "Носились недолго и аккуратно, в хорошем состоянии.",
              "place": "метро Новокосино",
              "count": null,
              "others": {"Размер": 36}
          }
        ]"""
      },
      FAKE_PROMPT: {
        REQ: """[
          {
              "name": "Бежевые",
              "price": 1000,
              "description": "Абсолютно новые",
              "place": "метро Новокосино",
              "count": null,
              "others": {"Размер": 35}
          },
          {
              "name": "Синие туфли",
              "price": 1000,
              "description": "Абсолютно новые",
              "place": "метро",
              "count": null,
              "others": {"Размер": 36}
          },
          {
              "name": "Чёрные лакированные туфли из натуральной кожи марки Elmonte",
              "price": 1000,
              "description": "Носились пару раз, в отличном состоянии. Стоят новые набойки и профилактика.",
              "place": "метро Новокосино",
              "count": null,
              "others": {"Размер": 36}
          },
          {
              "name": "Чёрные туфли под замшу",
              "price": 400,
              "description": "Носились недолго и аккуратно, в хорошем состоянии.",
              "place": "метро Новокосино",
              "count": null,
              "others": {"Размер": 36}
          }
        ]"""
      }
    }

In [13]:
fc = FakeCache()
fpm = PromptManager(pd.DataFrame([{'text':REQ}]), fc)
fpm.fake_mode = True

In [15]:
fpm.make_all('req1', FIRST_PROMPT)

In [16]:
fpm.make_all('req2', FAKE_PROMPT)

Unnamed: 0,field,req1 (baseline),req2
0,0_0_name,Бежевые лакированные туфли,Бежевые
1,0_1_place,метро Новокосино,метро
2,0_3_price,500,400


# colab testing

In [17]:
cache = OpenAICache(client=openai_client)
pm = PromptManager(df, cache)

In [None]:
pm.make_all('first prompt', FIRST_PROMPT)

KeyboardInterrupt: ignored

In [None]:
print(FIRST_PROMPT)

You are a helpful assistant designed to output JSON. The client provides you with text containing sales announcements, and you need to create JSON for these announcements to input into the database. The JSON should be a list of dictionaries, where each announcement is a separate dictionary. Each dictionary must include the fields: name, price, description (if available, otherwise null), place (information about the seller's location, the same for all products if provided, otherwise null), count (if multiple items are being sold at once, otherwise null), and others (a dictionary with other useful information about the product).


In [None]:
pm.prompts['first prompt'].results

{184056: [{'name': 'Polypedates megacephalus (Белогубый веслоног)',
   'price': '1200',
   'description': 'подростки без определения пола',
   'place': None,
   'count': None,
   'others': {}},
  {'name': 'Pipa carvalhoi малыши',
   'price': '500',
   'description': '2,5-3 см',
   'place': 'Москва и МО',
   'count': None,
   'others': {}},
  {'name': 'Duttaphrynus sp. (Индия) годовалые пары',
   'price': '10000',
   'description': None,
   'place': None,
   'count': '3 пары',
   'others': {}},
  {'name': 'Duttaphrynus sp.',
   'price': None,
   'description': 'содержание полностью аналогично чернорубцовым жабам, самцы этого вида имеют насыщенный жёлтый цвет, самки более серо-красного цвета, размер взрослых особей не превышает 4-5 см у самцов и 6-7 см у самок',
   'place': 'Троицк (20 км от МКАДа по Калужскому шоссе)',
   'count': None,
   'others': {'вывоз': 'само',
    'езда': 'на автобусе 30 минут',
    'метро': 'м. Теплый Стан'}}],
 182319: [{'name': 'Морские коряги',
   'price': No

In [None]:
new_prompt = 'You are an ai trainer in marketplace company. You need to create a JSON from a plain text of a product offer provided by user. This JSON must contain next fields:\n\"place\" - where buyer can recieve product (or delivery info)\n\"products\" - list of dictionaries of valid products. \nEach product dict must contain next info: \n\"name\" - product name, \n\"price\" - cost of a product, \n\"count\" - number of products, \n\"description\" - short product description (if provided), \n\"other_info\" - all other provided info. \nIf some field is not provided pass null. Note that fields name and price are necessary for a product, if they are not provided skip this product.'
print(new_prompt)

You are an ai trainer in marketplace company. You need to create a JSON from a plain text of a product offer provided by user. This JSON must contain next fields:
"place" - where buyer can recieve product (or delivery info)
"products" - list of dictionaries of valid products. 
Each product dict must contain next info: 
"name" - product name, 
"price" - cost of a product, 
"count" - number of products, 
"description" - short product description (if provided), 
"other_info" - all other provided info. 
If some field is not provided pass null. Note that fields name and price are necessary for a product, if they are not provided skip this product.


In [None]:
pm.make_all('new_prompt', new_prompt)

In [None]:
pm.set_baseline('new_prompt')
list(pm.prompts['new_prompt'].results.values())[0]

{'place': 'Троицк, Россия',
 'products': [{'name': 'Polypedates megacephalus (Белогубый веслоног)',
   'price': 1200,
   'count': None,
   'description': 'подростки без определения пола',
   'other_info': None},
  {'name': 'Pipa carvalhoi',
   'price': 500,
   'count': 'малыши 2,5-3 см',
   'description': None,
   'other_info': 'без отправок, только самовывоз или доставка курьером по Москве и МО'},
  {'name': 'Duttaphrynus sp. (Индия)',
   'price': 10000,
   'count': 'годовалые пары',
   'description': None,
   'other_info': 'в наличии 3 пары, самки вроде с икрой, все 3 пары за 25000 руб.'},
  {'name': 'Duttaphrynus sp.',
   'price': None,
   'count': None,
   'description': 'содержание полностью аналогично чернорубцовым жабам, самцы этого вида имеют насыщенный жёлтый цвет, самки более серо-красного цвета, размер взрослых особей не превышает 4-5 см у самцов и 6-7 см у самок',
   'other_info': None}]}

In [None]:
with_tip = new_prompt + " I'm going to tip $200 for a perfect solution!"
pm.make_all('with_tip', with_tip)

JSONDecodeError: ignored

In [None]:
pm.prompts['with_tip'].responses

[]

In [None]:
list(cache.storage[with_tip].values())[3].choices[0].message.content

'Here is the JSON representation of the product offer provided:\n\n```json\n{\n  "place": null,\n  "products": [\n    {\n      "name": "шапка",\n      "price": "800р",\n      "count": null,\n      "description": null,\n      "other_info": "57 размер"\n    },\n    {\n      "name": "штанишки",\n      "price": null,\n      "count": null,\n      "description": null,\n      "other_info": "размеры 50 и 44-46"\n    },\n    {\n      "name": "китель x2",\n      "price": "1400р",\n      "count": null,\n      "description": null,\n      "other_info": null\n    },\n    {\n      "name": "берцы",\n      "price": "2к",\n      "count": null,\n      "description": null,\n      "other_info": "43 размер ноги"\n    },\n    {\n      "name": "куртка",\n      "price": "2500р",\n      "count": null,\n      "description": null,\n      "other_info": "44-46 размер"\n    },\n    {\n      "name": "все нашивки",\n      "price": "900р",\n      "count": null,\n      "description": null,\n      "other_info": "кроме фа

In [None]:
del cache.storage[with_tip]

In [None]:
with_tip += ' Provide only json, without other text.'

In [None]:
pm.make_all('with_tip', with_tip)

Unnamed: 0,field,new_prompt (baseline),with_tip
0,177295_products_8_other_info,,<no key>
1,182319_products_0_name,Морская коряга,Морские коряги
2,177295_products_10_description,,<no key>
3,177295_products_4_count,85g,
4,145796_products_2_name,китель,штанишки
...,...,...,...
111,10_products_0_description,Кухонный инструмент для выпечки,кухонный инструмент для выпечки
112,177295_products_7_count,227g,
113,184056_place,"Троицк, Россия","Москва, Троицк"
114,117067_products_0_other_info,"Размер: 36-37-38-39-40-41, Сезон: Зима, АРТ: 2...",Цена: 800р штучно\nРазмер: 36-37-38-39-40-41\n...


In [None]:
len(make_flat_dict(pm.prompts['with_tip'].results))

160

In [None]:
print(df.loc[145796].text)

Продам уставную форму для военки. Я ростом 190,вешу 70 кг
 Всё носилось 1 год:
1) шапка. 57 размер. 800р
2) штанишки x2, размеры 50 и 44-46
3) китель x2 за китель+штанцы= 1400р
4) берцы. 43 размер ноги 2к
5) куртка (44-46) 2500р
6) все нашивки кроме фамилии 900р
7) кепарик 700р
8) ремень x2 по 600р
9) футболка x2 по 300р 
10) сумка Balenciaga 500р
Всё суммарно 9700, новая стоила 14.5к в том году.
Фотки в коменты докину


In [None]:
print(df.loc[177295].text)

г. Москва
В продаже кальций и витамины для рептилий Zoo Med. Производство США, оригинальные добавки.
-Zoo Med reptivite with D3 (57g -1700 р.; 227g -3100 р.)
-Zoo Med reptivite without D3 (57g -1700 р.; 227g -3000 р.)
- Zoo Med repti calcium with D3 (85g -1700 р.; 227g -2400р.)
- Zoo Med repti calcium without D3 (85g -1500 р.(есть банки по 900); 227g -1900 р.)
- Repashy calcium plus (85g -2300 р.; 170g -3500 р.)
Другие Repashy под заказ, цены уточняйте.
Также есть другие товары для рептилий:
∆ Влажная камера для рептилий Exo Terra wet rock large 20×12×10 см. Цена-2000 р.
∆ JBL Proclean Terra 250 ml (средство для чистки стёкол террариума). Производство Германия. Цена-900 р.
∆ Растение для террариума Terra Della "Эхеверия" 14×12,6×9,2 см. Цена-400 р.

Есть много хороших отзывов на известной доске объявлений(мои отзывы на последнем фото). Профиль в ВК закрыт(прошу не смущаться по этому поводу), но написать в ЛС можно, на все вопросы отвечу)) Отправки в другие города возможны.


In [None]:
min_info = 'You are an ai trainer in marketplace company. You need to create a JSON from a plain text of a product offer provided by user. This JSON must contain next fields:\n\"place\" - where buyer can recieve product (or delivery info)\n\"products\" - list of dictionaries of valid products. \nEach product dict must contain next info: \n\"name\" - product name, \n\"price\" - cost of a product, \n\"count\" - number of products. \nIf count is not provided pass null. Note that fields name and price are necessary for a product, if they are not provided skip this product. Answer only with JSON.'
print(min_info)

You are an ai trainer in marketplace company. You need to create a JSON from a plain text of a product offer provided by user. This JSON must contain next fields:
"place" - where buyer can recieve product (or delivery info)
"products" - list of dictionaries of valid products. 
Each product dict must contain next info: 
"name" - product name, 
"price" - cost of a product, 
"count" - number of products. 
If count is not provided pass null. Note that fields name and price are necessary for a product, if they are not provided skip this product. Answer only with JSON.


In [None]:
diff = pm.make_all('min_info', min_info)

Unnamed: 0,field,new_prompt (baseline),min_info
0,145796_products_8_other_info,,<no key>
1,177295_products_8_other_info,,<no key>
2,182319_products_0_name,Морская коряга,Морские коряги
3,177295_products_10_description,,<no key>
4,184056_products_1_description,,<no key>
...,...,...,...
133,177295_products_7_count,227g,<no key>
134,177826_products_4_price,<no key>,4500
135,184056_place,"Троицк, Россия",Троицк
136,117067_products_0_other_info,"Размер: 36-37-38-39-40-41, Сезон: Зима, АРТ: 2...",<no key>


In [None]:
print(df.loc[177295].text)

г. Москва
В продаже кальций и витамины для рептилий Zoo Med. Производство США, оригинальные добавки.
-Zoo Med reptivite with D3 (57g -1700 р.; 227g -3100 р.)
-Zoo Med reptivite without D3 (57g -1700 р.; 227g -3000 р.)
- Zoo Med repti calcium with D3 (85g -1700 р.; 227g -2400р.)
- Zoo Med repti calcium without D3 (85g -1500 р.(есть банки по 900); 227g -1900 р.)
- Repashy calcium plus (85g -2300 р.; 170g -3500 р.)
Другие Repashy под заказ, цены уточняйте.
Также есть другие товары для рептилий:
∆ Влажная камера для рептилий Exo Terra wet rock large 20×12×10 см. Цена-2000 р.
∆ JBL Proclean Terra 250 ml (средство для чистки стёкол террариума). Производство Германия. Цена-900 р.
∆ Растение для террариума Terra Della "Эхеверия" 14×12,6×9,2 см. Цена-400 р.

Есть много хороших отзывов на известной доске объявлений(мои отзывы на последнем фото). Профиль в ВК закрыт(прошу не смущаться по этому поводу), но написать в ЛС можно, на все вопросы отвечу)) Отправки в другие города возможны.


In [None]:
diff = pm.compare_to_baseline('min_info')

Unnamed: 0,field,new_prompt (baseline),min_info
0,145796_products_8_other_info,,<no key>
1,177295_products_8_other_info,,<no key>
2,182319_products_0_name,Морская коряга,Морские коряги
3,177295_products_10_description,,<no key>
4,184056_products_1_description,,<no key>
...,...,...,...
133,177295_products_7_count,227g,<no key>
134,177826_products_4_price,<no key>,4500
135,184056_place,"Троицк, Россия",Троицк
136,117067_products_0_other_info,"Размер: 36-37-38-39-40-41, Сезон: Зима, АРТ: 2...",<no key>


In [30]:
def filter_diff(diff, exclude=['other_info', 'description', 'count']):
  return [obj for obj in diff if all(s not in obj['field'] for s in exclude)]
pd.DataFrame(filter_diff(diff))

NameError: ignored

In [None]:
clean_price = 'You are an ai trainer in marketplace company. You need to create a JSON from a plain text of a product offer provided by user. This JSON must contain next fields:\n\"place\" - where buyer can recieve product (or delivery info)\n\"products\" - list of dictionaries of valid products. \nEach product dict must contain next info: \n\"name\" - product name, \n\"price\" - cost of a product (one number in rubbles), \n\"count\" - amount of product. Note that fields name and price are necessary for a product, if they are not provided skip this product. Answer only with JSON.'
pm.make_all('clean_price', clean_price)

Unnamed: 0,field,new_prompt (baseline),clean_price
0,145796_products_8_other_info,,<no key>
1,177295_products_8_other_info,,<no key>
2,182319_products_0_name,Морская коряга,Морские коряги
3,177295_products_10_description,,<no key>
4,184056_products_1_description,,<no key>
...,...,...,...
145,177295_products_7_count,227g,227
146,184056_place,"Троицк, Россия","Троицк, Москва, Россия"
147,145796_products_2_count,2,
148,117067_products_0_other_info,"Размер: 36-37-38-39-40-41, Сезон: Зима, АРТ: 2...",<no key>


In [28]:
from IPython.display import clear_output
def smart_diff(name2, name1=pm.baseline_name):
  res = pd.DataFrame(filter_diff(pm.compare(name1, name2)))
  clear_output()
  return res

In [None]:
smart_diff('clean_price')

Unnamed: 0,field,new_prompt (baseline),clean_price
0,145796_products_8_other_info,,<no key>
1,177295_products_8_other_info,,<no key>
2,182319_products_0_name,Морская коряга,Морские коряги
3,177295_products_10_description,,<no key>
4,184056_products_1_description,,<no key>
...,...,...,...
145,177295_products_7_count,227g,227
146,184056_place,"Троицк, Россия","Троицк, Москва, Россия"
147,145796_products_2_count,2,
148,117067_products_0_other_info,"Размер: 36-37-38-39-40-41, Сезон: Зима, АРТ: 2...",<no key>


Unnamed: 0,field,new_prompt (baseline),clean_price
0,182319_products_0_name,Морская коряга,Морские коряги
1,177295_products_4_price,1700 р.,1700
2,177826_products_3_price,4500,<no key>
3,177295_products_7_price,1900 р.,1900
4,177295_products_10_price,2000 р.,2000
5,177295_products_0_price,1700 р.,1700
6,177295_products_2_price,1700 р.,1700
7,139989_products_0_name,Футболка с длинными рукавами,Классное Футболки с длинными рукавами
8,177295_products_3_price,3000 р.,3000
9,996_place,,Доставка


In [None]:
pm.set_baseline('clean_price')

In [None]:
shorter_name = 'You are an ai trainer in marketplace company. You need to create a JSON from a plain text of a product offer provided by user. This JSON must contain next fields:\n\"place\" - where buyer can recieve product (or delivery info)\n\"products\" - list of dictionaries of valid products. \nEach product dict must contain next info: \n\"name\" - short product name for a buyer, \n\"price\" - cost of a product (one number in rubbles), \n\"count\" - amount of product (with units if not just count). Note that fields name and price are necessary for a product, if they are not provided skip this product. Answer only with JSON.'
print(shorter_name)

You are an ai trainer in marketplace company. You need to create a JSON from a plain text of a product offer provided by user. This JSON must contain next fields:
"place" - where buyer can recieve product (or delivery info)
"products" - list of dictionaries of valid products. 
Each product dict must contain next info: 
"name" - short product name for a buyer, 
"price" - cost of a product (one number in rubbles), 
"count" - amount of product (with units if not just count). Note that fields name and price are necessary for a product, if they are not provided skip this product. Answer only with JSON.


In [None]:
pm.make_all('shorter_name', shorter_name)

Unnamed: 0,field,clean_price (baseline),shorter_name
0,182319_products_0_name,Морские коряги,Морская коряга
1,177295_products_4_count,85,85g
2,145796_products_4_count,1,44-46 размер
3,145796_products_8_count,,2
4,139989_products_0_count,1,"размеры: 48,50,52,54,56"
5,177295_products_10_count,1,20x12x10 см
6,145796_products_1_price,,Не указано
7,177295_products_0_count,57,57g
8,184056_products_1_count,0,"малыши 2,5-3 см"
9,145796_products_1_count,,2


In [None]:
better_count = 'You are an ai trainer in marketplace company. You need to create a JSON from a plain text of a product offer provided by user. This JSON must contain next fields:\n\"place\" - where buyer can recieve product (or delivery info)\n\"products\" - list of dictionaries of valid products. \nEach product dict must contain next info: \n\"name\" - short product name for a buyer, \n\"price\" - cost of a product (one number in rubbles), \n\"count\" - amount of product (number of products, weight or lenght if provided, otherwise null). Note that fields name and price are necessary for a product, if they are not provided skip this product. Answer only with JSON.'


You are an ai trainer in marketplace company. You need to create a JSON from a plain text of a product offer provided by user. This JSON must contain next fields:
"place" - where buyer can recieve product (or delivery info)
"products" - list of dictionaries of valid products. 
Each product dict must contain next info: 
"name" - short product name for a buyer, 
"price" - cost of a product (one number in rubbles), 
"count" - amount of product (number of products, weight or lenght if provided, otherwise null). Note that fields name and price are necessary for a product, if they are not provided skip this product. Answer only with JSON.


In [None]:
pm.make_all('better_count', better_count)

Unnamed: 0,field,clean_price (baseline),better_count
0,145796_products_7_name,ремень,кепарик
1,182319_products_0_name,Морские коряги,Морская коряга
2,182319_products_0_count,,
3,177295_products_4_count,85,<no key>
4,145796_products_2_name,китель,штанишки
...,...,...,...
80,145796_products_12_price,<no key>,500
81,184056_place,"Троицк, Москва, Россия",Если само вывоз из Троицка (20 км от МКАДа по ...
82,145796_products_0_count,1,
83,145796_products_3_name,берцы,китель


In [None]:
0.08 / (len(cache.storage) * 10)

0.001142857142857143

In [None]:
print(df.loc[145796].text)

Продам уставную форму для военки. Я ростом 190,вешу 70 кг
 Всё носилось 1 год:
1) шапка. 57 размер. 800р
2) штанишки x2, размеры 50 и 44-46
3) китель x2 за китель+штанцы= 1400р
4) берцы. 43 размер ноги 2к
5) куртка (44-46) 2500р
6) все нашивки кроме фамилии 900р
7) кепарик 700р
8) ремень x2 по 600р
9) футболка x2 по 300р 
10) сумка Balenciaga 500р
Всё суммарно 9700, новая стоила 14.5к в том году.
Фотки в коменты докину


In [None]:
smart_diff('better_count')

Unnamed: 0,field,clean_price (baseline),better_count
0,145796_products_7_name,ремень,кепарик
1,182319_products_0_name,Морские коряги,Морская коряга
2,145796_products_2_name,китель,штанишки
3,177295_products_4_price,1700,1700 р.
4,177295_products_9_price,3500,3500 р.
5,177295_products_5_price,2400,2400 р.
6,177295_products_7_price,1900,1900 р.
7,145796_products_6_price,700,900
8,145796_products_11_price,<no key>,300
9,177295_products_10_price,2000,2000 р.


In [None]:
pm.prompts['better_count'].results[145796]

{'place': 'Договорная',
 'products': [{'name': 'шапка', 'price': 800, 'count': None},
  {'name': 'штанишки', 'price': None, 'count': None},
  {'name': 'штанишки', 'price': None, 'count': None},
  {'name': 'китель', 'price': 1400, 'count': None},
  {'name': 'берцы', 'price': 2000, 'count': None},
  {'name': 'куртка', 'price': 2500, 'count': None},
  {'name': 'все нашивки', 'price': 900, 'count': None},
  {'name': 'кепарик', 'price': 700, 'count': None},
  {'name': 'ремень', 'price': 600, 'count': None},
  {'name': 'ремень', 'price': 600, 'count': None},
  {'name': 'футболка', 'price': 300, 'count': None},
  {'name': 'футболка', 'price': 300, 'count': None},
  {'name': 'сумка', 'price': 500, 'count': None}]}

In [None]:
better_count2 = 'You are an ai trainer in marketplace company. You need to create a JSON from a plain text of a product offer provided by user. This JSON must contain next fields:\n\"place\" - where buyer can recieve product (or delivery info)\n\"products\" - list of dictionaries of valid products. \nEach product dict must contain next info: \n\"name\" - short title for a customer, \n\"price\" - cost of a product (one number in rubbles), \n\"count\" - number of product if they are in a pack (weight or lenght if product is uncountable). Drop products without name or price. Answer only with JSON.'
print(better_count2)

You are an ai trainer in marketplace company. You need to create a JSON from a plain text of a product offer provided by user. This JSON must contain next fields:
"place" - where buyer can recieve product (or delivery info)
"products" - list of dictionaries of valid products. 
Each product dict must contain next info: 
"name" - short title for a customer, 
"price" - cost of a product (one number in rubbles), 
"count" - number of product if they are in a pack (weight or lenght if product is uncountable). Drop products without name or price. Answer only with JSON.


In [None]:
pm.make_all('better_count2', better_count2)

In [None]:
diff = pd.DataFrame(pm.compare_to_baseline('better_count2'))

Unnamed: 0,field,clean_price (baseline),better_count2
0,182319_products_0_count,,<no key>
1,177295_products_4_count,85,85g
2,145796_products_4_count,1,
3,145796_products_8_count,,
4,139989_products_0_count,1,количество
...,...,...,...
65,145796_products_6_count,1,
66,182319_products_0_price,,
67,184056_place,"Троицк, Москва, Россия","Москва, Теплый Стан"
68,145796_products_2_count,,


In [None]:
print(df.loc[184056].text)

Жабы и лягушки 
 
Возможен обмен на лягушек: Древолазов, Theloderma (Телодерма), так же на мелкие виды гекконов, на другую террариумную живность (обсуждаемо), если обмен, то по ценам без скидки 
 
Возможны отправки, через рептокурьера по многим направлениям (подробности в ЛС) 
 
Polypedates megacephalus (Белогубый веслоног), подростки без определения пола – 1200 руб/шт 
 
Pipa carvalhoi малыши 2,5-3 см – 500 руб/шт (без отправок, только самовывоз или доставка курьером по Москве и МО) 
 
Duttaphrynus sp. (Индия) годовалые пары – 10000 руб за пару, в наличии 3 пары, самки вроде с икрой, все 3 пары за 25000 руб. 
 
Duttaphrynus sp. - содержание полностью аналогично чернорубцовым жабам, самцы этого вида имеют насыщенный жёлтый цвет, самки более серо-красного цвета, размер взрослых особей не превышает 4-5 см у самцов и 6-7 см у самок 
 
Если само вывоз из Троицка (20 км от МКАДа по Калужскому шоссе) на автобусе 30 минут езды от м. Теплый Стан


In [None]:
pm.set_baseline('better_count2')

In [None]:
assistant_role = 'You are an assistant in marketplace company. You need to create a JSON from a plain text of a product offer provided by new seller. This JSON must contain next fields:\n\"place\" - where buyer can recieve product (or delivery info)\n\"products\" - list of dictionaries of valid products. \nEach product dict must contain next info: \n\"name\" - short title for a customer, \n\"price\" - cost of a product (one number in rubbles), \n\"count\" - number of product if they are in a pack (weight or lenght if product is uncountable). Drop products without name or price. Answer only with JSON.'
print(assistant_role)

You are an assistant in marketplace company. You need to create a JSON from a plain text of a product offer provided by new seller. This JSON must contain next fields:
"place" - where buyer can recieve product (or delivery info)
"products" - list of dictionaries of valid products. 
Each product dict must contain next info: 
"name" - short title for a customer, 
"price" - cost of a product (one number in rubbles), 
"count" - number of product if they are in a pack (weight or lenght if product is uncountable). Drop products without name or price. Answer only with JSON.


In [None]:
pm.make_all('assistant_role', assistant_role)

Unnamed: 0,field,better_count2 (baseline),assistant_role
0,182319_products_0_name,Морские коряги,Морская коряга
1,33902_products_0_материал,<no key>,хлопок
2,145796_products_4_count,,44-46 размер
3,145796_products_8_count,,не указано
4,139989_products_0_count,количество,
...,...,...,...
61,145796_products_6_count,,не указан
62,182319_products_0_price,,0
63,184056_place,"Москва, Теплый Стан","Москва, Троицк"
64,145796_products_2_count,,размер не указан


In [None]:
print(df.loc[145796].text)

Продам уставную форму для военки. Я ростом 190,вешу 70 кг
 Всё носилось 1 год:
1) шапка. 57 размер. 800р
2) штанишки x2, размеры 50 и 44-46
3) китель x2 за китель+штанцы= 1400р
4) берцы. 43 размер ноги 2к
5) куртка (44-46) 2500р
6) все нашивки кроме фамилии 900р
7) кепарик 700р
8) ремень x2 по 600р
9) футболка x2 по 300р 
10) сумка Balenciaga 500р
Всё суммарно 9700, новая стоила 14.5к в том году.
Фотки в коменты докину


In [None]:
pm.prompts['assistant_role'].results[145796]

{'place': 'указать место получения',
 'products': [{'name': 'шапка', 'price': 800, 'count': '57 размер'},
  {'name': 'штанишки', 'price': 'указать цену', 'count': 'размеры 50 и 44-46'},
  {'name': 'китель', 'price': 'указать цену', 'count': 'размер не указан'},
  {'name': 'берцы', 'price': 'указать цену', 'count': '43 размер'},
  {'name': 'куртка', 'price': 2500, 'count': '44-46 размер'},
  {'name': 'все нашивки кроме фамилии', 'price': 900, 'count': 'не указано'},
  {'name': 'кепарик', 'price': 700, 'count': 'не указан'},
  {'name': 'ремень', 'price': 'указать цену', 'count': 'не указано'},
  {'name': 'футболка', 'price': 'указать цену', 'count': 'не указано'},
  {'name': 'сумка Balenciaga', 'price': 500, 'count': 'не указано'}]}

In [None]:
first_new_format = "You are a helpful assistant designed to output JSON. The client provides you with text containing sales announcements, and you need to create JSON for these announcements to input into the database. The JSON should have key place with a seller place or delivery info and a products key: a list of dictionaries, where each product is a separate dictionary. Each dictionary must include the fields: name, price, description (if available, otherwise null), count (if multiple items are being sold at once, otherwise null), and others (a dictionary with other useful information about the product)."
print(first_new_format)

You are a helpful assistant designed to output JSON. The client provides you with text containing sales announcements, and you need to create JSON for these announcements to input into the database. The JSON should have key place with a seller place or delivery info and a products key: a list of dictionaries, where each product is a separate dictionary. Each dictionary must include the fields: name, price, description (if available, otherwise null), count (if multiple items are being sold at once, otherwise null), and others (a dictionary with other useful information about the product).


In [None]:
pm.make_all('first_new_format2', first_new_format)

Unnamed: 0,field,better_count2 (baseline),first_new_format2
0,177826_products_1_others_Спецификация,<no key>,Самец
1,182319_products_0_name,Морские коряги,Морская коряга
2,10_products_0_name,Полуавтоматический венчик для взбивания яиц,"Полуавтоматический венчик для взбивания яиц, к..."
3,145796_products_3_others,<no key>,
4,184056_products_1_description,<no key>,"2,5-3 см"
...,...,...,...
165,145796_products_3_price,2000,0
166,177295_products_7_count,900g,<no key>
167,184056_place,"Москва, Теплый Стан",Доставка курьером по Москве и МО
168,145796_products_2_count,,


In [None]:
smart_diff('first_new_format2')

Unnamed: 0,field,clean_price,first_new_format2
0,177826_products_1_others_Спецификация,<no key>,Самец
1,182319_products_0_name,Морские коряги,Морская коряга
2,10_products_0_name,Полуавтоматический венчик для взбивания яиц,"Полуавтоматический венчик для взбивания яиц, к..."
3,145796_products_3_others,<no key>,
4,184056_products_2_others_in_stock,<no key>,3
...,...,...,...
92,145796_products_8_price,,600
93,10_place_seller,<no key>,Распродажа
94,145796_products_3_price,2000,0
95,184056_place,"Троицк, Москва, Россия",Доставка курьером по Москве и МО


In [None]:
pm.make_all('another_tip', pm.prompts[pm.baseline_name].prompt + ' I will tip you 2$ for a perfect and accurate solution.')

Unnamed: 0,field,better_count2 (baseline),another_tip
0,145796_products_2_name,китель,китель+штанцы
1,145796_products_4_count,,
2,145796_products_8_count,,
3,139989_products_0_count,количество,
4,177295_products_8_name,Zoo Med repti calcium without D3,Repashy calcium plus
...,...,...,...
61,177295_products_7_count,900g,227g
62,145796_products_6_count,,
63,184056_place,"Москва, Теплый Стан",Возможны отправки через рептокурьера по многим...
64,145796_products_2_count,,


In [None]:
id = 139989
print(df.loc[id].text)
pm.prompts['better_count2'].results[id]

Грандиозная распродажа
Классное Футболки с длинными рукавами 
Отличное качество трикотажа 👌👌👌
Размеры : 48,50,52,54,56
Цена за штук 150💥💥💥💥. 😎😎
Мы бронируем товар 💫💫💫
Место :корпус А - 2А-54 
Спешите  🏃‍♂️🏃‍♂️пока они есть у нас 😉😉


{'place': 'корпус А - 2А-54',
 'products': [{'name': 'Футболка с длинными рукавами',
   'price': 150,
   'count': 'количество'}]}

In [None]:
pm.prompts['another_tip'].results[id]

{'place': 'корпус А - 2А-54',
 'products': [{'name': 'Футболка с длинными рукавами',
   'price': 150,
   'count': None}]}

In [None]:
print(pm.prompts[pm.baseline_name].prompt)

You are an ai trainer in marketplace company. You need to create a JSON from a plain text of a product offer provided by user. This JSON must contain next fields:
"place" - where buyer can recieve product (or delivery info)
"products" - list of dictionaries of valid products. 
Each product dict must contain next info: 
"name" - short title for a customer, 
"price" - cost of a product (one number in rubbles), 
"count" - number of product if they are in a pack (weight or lenght if product is uncountable). Drop products without name or price. Answer only with JSON.


In [None]:
cache1106 = OpenAICache(client=openai_client)
cache1106.model = "gpt-3.5-turbo-1106"
pm1106 = PromptManager(df=df, openai_cache=cache1106)

In [None]:
pm1106.make_all('better_count_1106', better_count2)

In [None]:
res1 = make_flat_dict(pm.prompts['better_count2'].results)
res2 = make_flat_dict(pm1106.prompts['better_count_1106'].results)
pd.DataFrame(pm.compare_dicts(res1, res2, 'better_count2', 'better_count_1106'))

Unnamed: 0,field,better_count2 (baseline),better_count_1106
0,145796_products_7_name,все нашивки,футболка
1,182319_products_0_name,Морская коряга,Морские коряги
2,10_products_0_name,Полуавтоматический венчик для взбивания яиц,"Полуавтоматический венчик для взбивания яиц, к..."
3,182319_products_0_count,,<no key>
4,145796_products_2_name,штанишки,берцы
...,...,...,...
68,184056_place,Самовывоз из Троицка (20 км от МКАДа по Калужс...,"Москва, возможен самовывоз или доставка курьер..."
69,145796_products_2_count,,43 размер
70,145796_products_0_count,,57 размер
71,145796_products_3_name,китель,куртка


In [None]:
id = 145796
print(df.loc[id].text)
pm.prompts['better_count2'].results[id]

Продам уставную форму для военки. Я ростом 190,вешу 70 кг
 Всё носилось 1 год:
1) шапка. 57 размер. 800р
2) штанишки x2, размеры 50 и 44-46
3) китель x2 за китель+штанцы= 1400р
4) берцы. 43 размер ноги 2к
5) куртка (44-46) 2500р
6) все нашивки кроме фамилии 900р
7) кепарик 700р
8) ремень x2 по 600р
9) футболка x2 по 300р 
10) сумка Balenciaga 500р
Всё суммарно 9700, новая стоила 14.5к в том году.
Фотки в коменты докину


{'place': 'уточните информацию о месте получения товара',
 'products': [{'name': 'шапка', 'price': 800, 'count': ''},
  {'name': 'штанишки', 'price': '', 'count': ''},
  {'name': 'штанишки', 'price': '', 'count': ''},
  {'name': 'китель', 'price': '', 'count': ''},
  {'name': 'китель', 'price': '', 'count': ''},
  {'name': 'берцы', 'price': 2000, 'count': ''},
  {'name': 'куртка', 'price': 2500, 'count': ''},
  {'name': 'все нашивки', 'price': 900, 'count': ''},
  {'name': 'кепарик', 'price': 700, 'count': ''},
  {'name': 'ремень', 'price': 600, 'count': ''},
  {'name': 'ремень', 'price': 600, 'count': ''},
  {'name': 'футболка', 'price': 300, 'count': ''},
  {'name': 'футболка', 'price': 300, 'count': ''},
  {'name': 'сумка Balenciaga', 'price': 500, 'count': ''}]}

In [None]:
pm1106.prompts['better_count_1106'].results[id]

{'place': 'Личная встреча, г.Москва',
 'products': [{'name': 'шапка', 'price': 800, 'count': '57 размер'},
  {'name': 'китель', 'price': 1400, 'count': 'x2'},
  {'name': 'берцы', 'price': 2000, 'count': '43 размер'},
  {'name': 'куртка', 'price': 2500, 'count': '44-46 размер'},
  {'name': 'все нашивки кроме фамилии', 'price': 900},
  {'name': 'кепарик', 'price': 700},
  {'name': 'ремень', 'price': 600, 'count': 'x2'},
  {'name': 'футболка', 'price': 300, 'count': 'x2'},
  {'name': 'сумка Balenciaga', 'price': 500}]}

# :/

In [22]:
pm.make_all('old_request', LAST_PROMPT)

In [25]:
NEW_PROMPT = """You are an ai trainer in marketplace company. You need to create a JSON from a plain text of a product offer provided by user. This JSON must contain next fields:
"place" - where buyer can recieve product (or delivery info)
"products" - list of dictionaries of valid products.
Each product dict must contain ONLY following information:
"name" - short title for a customer,
"price" - cost of one unit of a product (one number),
"currency" - in what currency is the price indicated (default is 'рубли'),
"size" - size or weight of a product (one string),
"count" - number of product units offered.
Represent different variations on each product as DIFFERENT products.
Do NOT add description and any other fields. Drop all fields with null value.
If NO information about the price provided, the product has to be excluded from the answer.
Answer ONLY with JSON string, no other text is needed.
"""
pm.make_all('new_request', NEW_PROMPT)

Unnamed: 0,field,old_request (baseline),new_request
0,177826_products_1_size,<no key>,огненный
1,139989_products_3_name,<no key>,Футболка с длинными рукавами
2,139989_products_2_price,<no key>,150
3,177295_products_5_name,Zoo Med repti calcium with D3,Zoo Med repti calcium with D3 (227g)
4,177295_products_11_currency,<no key>,рубли
...,...,...,...
203,177295_products_3_size,<no key>,227g
204,33902_products_4_currency,<no key>,рубли
205,145796_products_10_currency,<no key>,рубли
206,10_place,Магазин,


In [32]:
diff = smart_diff('new_request', 'old_request')

In [33]:
diff

Unnamed: 0,field,old_request (baseline),new_request
0,177826_products_1_size,<no key>,огненный
1,139989_products_3_name,<no key>,Футболка с длинными рукавами
2,139989_products_2_price,<no key>,150
3,177295_products_5_name,Zoo Med repti calcium with D3,Zoo Med repti calcium with D3 (227g)
4,177295_products_11_currency,<no key>,рубли
...,...,...,...
160,177295_products_3_size,<no key>,227g
161,33902_products_4_currency,<no key>,рубли
162,145796_products_10_currency,<no key>,рубли
163,10_place,Магазин,
