# Bert Models

https://huggingface.co/docs/transformers/model_doc/herbert

In [1]:
!pip install sacremoses # for Herbert

Collecting sacremoses
  Downloading sacremoses-0.1.1-py3-none-any.whl (897 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/897.5 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m122.9/897.5 kB[0m [31m3.8 MB/s[0m eta [36m0:00:01[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m890.9/897.5 kB[0m [31m14.7 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m897.5/897.5 kB[0m [31m12.8 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: sacremoses
Successfully installed sacremoses-0.1.1


In [2]:
from transformers import BertTokenizer, BertModel, DistilBertTokenizer, DistilBertModel, HerbertTokenizer, RobertaModel
import torch
class bert_similarity:
  def __init__(self, pretrained_name, tokenizer_class, model_class, tokenizer_name=None):
    self.device = "cuda:0" if torch.cuda.is_available() else "cpu"
    self.pretrained_name = pretrained_name
    self.tokenizer_name = tokenizer_name
    if self.tokenizer_name is None:
      self.tokenizer_name = self.pretrained_name
    self.tokenizer = tokenizer_class.from_pretrained(self.tokenizer_name)
    self.model = model_class.from_pretrained(self.pretrained_name).to(self.device)
    self.__similarity = torch.nn.CosineSimilarity(dim=0, eps=1e-6)

  def embedding(self, text):
    encoded_input = self.tokenizer(text, return_tensors='pt').to(self.device)
    output = self.model(**encoded_input)
    return output['last_hidden_state'][0,-1,:]

  def similarity(self, text1, text2):
    embed1 = self.embedding(text1)
    embed2 = self.embedding(text2)
    sim = self.__similarity(embed1, embed2)
    return sim

In [3]:
sim = bert_similarity("bert-base-uncased", BertTokenizer, BertModel)
sim.similarity("Replace me by any text you'd like.", "Replace me by any text you'd hate.")

tokenizer_config.json:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

tensor(0.9866, device='cuda:0', grad_fn=<SumBackward1>)

In [35]:
sim.similarity('dog', 'windmill')

tensor(0.9607, device='cuda:0', grad_fn=<SumBackward1>)

In [4]:
#%%timeit -n 1000
#sim.similarity("Replace me by any text you'd like.", "Replace me by any text you'd hate.")

0.01s = 10ms na pare, bert ~35.1ms, 35.1 ms ± 4.33 ms per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [5]:
sim2 = bert_similarity("distilbert-base-uncased", DistilBertTokenizer, DistilBertModel)
sim2.similarity("Replace me by any text you'd like.", "Replace me by any text you'd hate.")

tokenizer_config.json:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/483 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

tensor(0.9849, device='cuda:0', grad_fn=<SumBackward1>)

In [6]:
#%%timeit -n 1000
#sim2.similarity("Replace me by any text you'd like.", "Replace me by any text you'd hate.")

17.9 ms ± 1.19 ms per loop (mean ± std. dev. of 7 runs, 1000 loops each)

In [7]:
sim3 = bert_similarity("allegro/herbert-klej-cased-v1", HerbertTokenizer, RobertaModel, tokenizer_name="allegro/herbert-klej-cased-tokenizer-v1")

tokenizer_config.json:   0%|          | 0.00/341 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/591k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/300 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization. 
The tokenizer class you load from this checkpoint is 'XLMTokenizer'. 
The class this function is called from is 'HerbertTokenizer'.


config.json:   0%|          | 0.00/547 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/500M [00:00<?, ?B/s]

In [8]:
#%%timeit -n 100
#sim3.similarity("Kto ma lepszą sztukę, ma lepszy rząd – to jasne.", "Nie musisz, więc nie idź, do tej głupiej szkoły.")

257 ms ± 12.8 ms per loop (mean ± std. dev. of 7 runs, 100 loops each)

# Hugchat

https://github.com/Soulter/hugging-chat-api/tree/master

4 modele dostępne:

    meta-llama/Llama-2-70b-chat-hf
    codellama/CodeLlama-34b-Instruct-hf
    tiiuae/falcon-180B-chat
    mistralai/Mistral-7B-Instruct-v0.1

In [9]:
!pip install hugchat

Collecting hugchat
  Downloading hugchat-0.3.8-py3-none-any.whl (30 kB)
Collecting requests-toolbelt (from hugchat)
  Downloading requests_toolbelt-1.0.0-py2.py3-none-any.whl (54 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m54.5/54.5 kB[0m [31m3.4 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: requests-toolbelt, hugchat
Successfully installed hugchat-0.3.8 requests-toolbelt-1.0.0


In [15]:
from hugchat import hugchat
from hugchat.login import Login

email = input("Input mail")
password = input("Input password")
# Log in to huggingface and grant authorization to huggingchat
sign = Login(email, password)
cookies = sign.login()
# Save cookies to the local directory
cookie_path_dir = "./cookies_snapshot"
sign.saveCookiesToDir(cookie_path_dir)
chatbot = hugchat.ChatBot(cookies=cookies.get_dict())  # or cookie_path="usercookies/<email>.json"

# non stream response
query_result = chatbot.query("Hi!")
print(query_result) # or query_result.text or query_result["text"]

Input mailmati201420@gmail.com
Input password2h=dQtMh4T9AiT.
Hi!


In [39]:
import random
x = [random.random() for i in range(100)]

In [46]:
#for i in x:
query_result = chatbot.query("Hi!"+str(i))
print(query_result)

 It looks like you're sending a series of numbers after your greeting. I'm not sure what you're trying to communicate. Can you please provide more context or clarify your message? I'll do my best to assist you once I understand what you need.

If you're looking for a way to start a conversation, maybe we could talk about something you're interested in or ask each other some questions to get to know each other better. Just let me know what you'd like to do!

In the meantime, is there anything else you'd like to chat about? The weather, perhaps? Or maybe you have a favorite hobby or interest that you'd like to share with me? I'm here to listen and help in any way I can.


37.7 µs ± 7.2 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each) - sus, v.low

In [31]:
a = {"id_left":2549,"cluster_id_left":"8001841376004","identifiers_left":[{"EAN":["8001841376004"]}],"title_left":"lenor p\u0142yn do p\u0142ukania tkanin sparkling bloom yellow poppy","description_left":"p\u0142yn do p\u0142ukania tkanin lenor zapewnia d\u0142ugotrwa\u0142\u0105 \u015bwie\u017co\u015b\u0107 \u015bwie\u017co\u015b\u0107 po\u015bcieli ka\u017cdej nocy przez ca\u0142y tydzie\u0144 zapach sparkling bloom yellow poppy stopniowo uwalnia energetyzuj\u0105ce aromaty o\u017cywiaj\u0105c twoje zmys\u0142y eksplozj\u0105 kwiatowej \u015bwie\u017co\u015bci i daj\u0105c ci poczucie komfortu aby w pe\u0142ni cieszy\u0107 si\u0119 dzia\u0142aniem swojego ulubionego p\u0142ynu do zmi\u0119kczania tkanin lenor u\u017cywaj go razem z pere\u0142kami zapachowymi lenor unstoppables ","brand_left":"lenor","price_left":"14 99","specTableContent_left":"amount 1 42 l capacity 1 42 l extras  image_url https www frisco pl pid 119361 n lenor plyn do plukania tkanin sparkling bloom yellow poppy stn product ingredients  5 kationowe \u015brodki powierzchniowo czynne benzisothiazolinone kompozycje zapachowe alpha isomethyl ionone coumarin hexyl cinnamal origin  kraj pochodzenia czechy zapakowano w czechy  storage  url https www frisco pl pid 119361 n lenor plyn do plukania tkanin sparkling bloom yellow poppy stn product weight waga brutto 1471","keyValuePairs_left":{"amount":"1 42 l","capacity":"1 42 l","extras":"","image_url":"https www frisco pl pid 119361 n lenor plyn do plukania tkanin sparkling bloom yellow poppy stn product","ingredients":" 5 kationowe \u015brodki powierzchniowo czynne benzisothiazolinone kompozycje zapachowe alpha isomethyl ionone coumarin hexyl cinnamal","origin":" kraj pochodzenia czechy zapakowano w czechy ","storage":"","url":"https www frisco pl pid 119361 n lenor plyn do plukania tkanin sparkling bloom yellow poppy stn product","weight":"waga brutto 1471"},"id_right":2549,"cluster_id_right":"8001841376004","identifiers_right":[{"EAN":["8001841376004"]}],"title_right":"lenor p\u0142yn do p\u0142ukania tkanin sparkling bloom yellow poppy","description_right":"p\u0142yn do p\u0142ukania tkanin lenor zapewnia d\u0142ugotrwa\u0142\u0105 \u015bwie\u017co\u015b\u0107 \u015bwie\u017co\u015b\u0107 po\u015bcieli ka\u017cdej nocy przez ca\u0142y tydzie\u0144 zapach sparkling bloom yellow poppy stopniowo uwalnia energetyzuj\u0105ce aromaty o\u017cywiaj\u0105c twoje zmys\u0142y eksplozj\u0105 kwiatowej \u015bwie\u017co\u015bci i daj\u0105c ci poczucie komfortu aby w pe\u0142ni cieszy\u0107 si\u0119 dzia\u0142aniem swojego ulubionego p\u0142ynu do zmi\u0119kczania tkanin lenor u\u017cywaj go razem z pere\u0142kami zapachowymi lenor unstoppables ","brand_right":"lenor","price_right":"14 99","specTableContent_right":"amount 1 42 l capacity 1 42 l extras  image_url https www frisco pl pid 119361 n lenor plyn do plukania tkanin sparkling bloom yellow poppy stn product ingredients  5 kationowe \u015brodki powierzchniowo czynne benzisothiazolinone kompozycje zapachowe alpha isomethyl ionone coumarin hexyl cinnamal origin  kraj pochodzenia czechy zapakowano w czechy  storage  url https www frisco pl pid 119361 n lenor plyn do plukania tkanin sparkling bloom yellow poppy stn product weight waga brutto 1471","keyValuePairs_right":{"amount":"1 42 l","capacity":"1 42 l","extras":"","image_url":"https www frisco pl pid 119361 n lenor plyn do plukania tkanin sparkling bloom yellow poppy stn product","ingredients":" 5 kationowe \u015brodki powierzchniowo czynne benzisothiazolinone kompozycje zapachowe alpha isomethyl ionone coumarin hexyl cinnamal","origin":" kraj pochodzenia czechy zapakowano w czechy ","storage":"","url":"https www frisco pl pid 119361 n lenor plyn do plukania tkanin sparkling bloom yellow poppy stn product","weight":"waga brutto 1471"},"category_left":"chemia","category_right":"chemia","label":1}
b = {"id_left":629,"cluster_id_left":"5906395223572","identifiers_left":[{"EAN":["5906395223572"]}],"title_left":"sady wincenta sok wieloowocowy w kartonie t\u0142oczony","description_left":"sady wincenta sok wieloowocowy w kartonie t\u0142oczony sok wieloowocowy jab\u0142kowo gruszkowo malinowo aroniowy naturalnie m\u0119tny otrzymany w wyniku t\u0142oczenia miazgi jab\u0142kowej bez udzia\u0142u enzym\u00f3w filtrowany pasteryzowany bez dodatku jakichkolwiek substancji dodatkowych rozlewany na gor\u0105co w atmosferze azotu i pakowany w systemie bag in box ","brand_left":"sady wincenta","price_left":"16 99","specTableContent_left":"amount 3 l capacity 3 l extras  image_url https www frisco pl pid 119683 n sady wincenta sok wieloowocowy w kartonie tloczony stn product ingredients jab\u0142ko 65 gruszka 25 malina 5 aronia 5  origin  storage przechowywa\u0107 w suchym i ch\u0142odnym miejscu po otwarciu przechowywa\u0107 w lod\u00f3wce nie d\u0142u\u017cej ni\u017c 14 dni  url https www frisco pl pid 119683 n sady wincenta sok wieloowocowy w kartonie tloczony stn product weight ","keyValuePairs_left":{"amount":"3 l","capacity":"3 l","extras":"","image_url":"https www frisco pl pid 119683 n sady wincenta sok wieloowocowy w kartonie tloczony stn product","ingredients":"jab\u0142ko 65 gruszka 25 malina 5 aronia 5 ","origin":"","storage":"przechowywa\u0107 w suchym i ch\u0142odnym miejscu po otwarciu przechowywa\u0107 w lod\u00f3wce nie d\u0142u\u017cej ni\u017c 14 dni ","url":"https www frisco pl pid 119683 n sady wincenta sok wieloowocowy w kartonie tloczony stn product","weight":""},"id_right":629,"cluster_id_right":"5906395223572","identifiers_right":[{"EAN":["5906395223572"]}],"title_right":"sady wincenta sok wieloowocowy w kartonie t\u0142oczony","description_right":"sady wincenta sok wieloowocowy w kartonie t\u0142oczony sok wieloowocowy jab\u0142kowo gruszkowo malinowo aroniowy naturalnie m\u0119tny otrzymany w wyniku t\u0142oczenia miazgi jab\u0142kowej bez udzia\u0142u enzym\u00f3w filtrowany pasteryzowany bez dodatku jakichkolwiek substancji dodatkowych rozlewany na gor\u0105co w atmosferze azotu i pakowany w systemie bag in box ","brand_right":"sady wincenta","price_right":"16 99","specTableContent_right":"amount 3 l capacity 3 l extras  image_url https www frisco pl pid 119683 n sady wincenta sok wieloowocowy w kartonie tloczony stn product ingredients jab\u0142ko 65 gruszka 25 malina 5 aronia 5  origin  storage przechowywa\u0107 w suchym i ch\u0142odnym miejscu po otwarciu przechowywa\u0107 w lod\u00f3wce nie d\u0142u\u017cej ni\u017c 14 dni  url https www frisco pl pid 119683 n sady wincenta sok wieloowocowy w kartonie tloczony stn product weight ","keyValuePairs_right":{"amount":"3 l","capacity":"3 l","extras":"","image_url":"https www frisco pl pid 119683 n sady wincenta sok wieloowocowy w kartonie tloczony stn product","ingredients":"jab\u0142ko 65 gruszka 25 malina 5 aronia 5 ","origin":"","storage":"przechowywa\u0107 w suchym i ch\u0142odnym miejscu po otwarciu przechowywa\u0107 w lod\u00f3wce nie d\u0142u\u017cej ni\u017c 14 dni ","url":"https www frisco pl pid 119683 n sady wincenta sok wieloowocowy w kartonie tloczony stn product","weight":""},"category_left":"napoje","category_right":"napoje","label":1}


In [29]:
a['title_left'] == a['title_right'], a['title_left']

(True, 'lenor płyn do płukania tkanin sparkling bloom yellow poppy')

In [28]:
a['description_left'] == a['description_left'], a['description_left']

(True,
 'płyn do płukania tkanin lenor zapewnia długotrwałą świeżość świeżość pościeli każdej nocy przez cały tydzień zapach sparkling bloom yellow poppy stopniowo uwalnia energetyzujące aromaty ożywiając twoje zmysły eksplozją kwiatowej świeżości i dając ci poczucie komfortu aby w pełni cieszyć się działaniem swojego ulubionego płynu do zmiękczania tkanin lenor używaj go razem z perełkami zapachowymi lenor unstoppables ')

In [32]:
for i,j in a.items():
  print(i,j)

id_left 2549
cluster_id_left 8001841376004
identifiers_left [{'EAN': ['8001841376004']}]
title_left lenor płyn do płukania tkanin sparkling bloom yellow poppy
description_left płyn do płukania tkanin lenor zapewnia długotrwałą świeżość świeżość pościeli każdej nocy przez cały tydzień zapach sparkling bloom yellow poppy stopniowo uwalnia energetyzujące aromaty ożywiając twoje zmysły eksplozją kwiatowej świeżości i dając ci poczucie komfortu aby w pełni cieszyć się działaniem swojego ulubionego płynu do zmiękczania tkanin lenor używaj go razem z perełkami zapachowymi lenor unstoppables 
brand_left lenor
price_left 14 99
specTableContent_left amount 1 42 l capacity 1 42 l extras  image_url https www frisco pl pid 119361 n lenor plyn do plukania tkanin sparkling bloom yellow poppy stn product ingredients  5 kationowe środki powierzchniowo czynne benzisothiazolinone kompozycje zapachowe alpha isomethyl ionone coumarin hexyl cinnamal origin  kraj pochodzenia czechy zapakowano w czechy  stora

In [68]:
polish_query = chatbot.query("Please extract crucial tags out of this polish product description: "+a['description_left'])
print(polish_query)

 Sure, here are some crucial tags that can be extracted from the Polish product description:

1. Płyn do płukania tkanin (fabric softener)
2. Lenor (brand name)
3. Świeżość (freshness)
4. Pościela (bedding)
5. Każdej nocy (every night)
6. Tydzień (week)
7. Zapach (scent)
8. Sparkling Bloom Yellow (product name)
9. Energetyzujące aromaty (invigorating aromas)
10. Ożywiający (revitalizing)
11. Komfort (comfort)
12. Ulubiony płyn (favorite fabric softener)
13. Zmiękczanie tkanin (softening fabrics)
14. Perełki zapachowe (fragrance pearls)
15. Unstoppables (product line name)


In [69]:
model_prompt = "Extract key information from the product description provided. Focus on identifying essential features, specifications, and unique aspects of each product. Provide a concise summary with relevant details. Ensure that the extracted information is clear and captures the essence of the product's functionality and appeal."
Q = model_prompt +a['description_left']
query = chatbot.query(Q)
print(query)

 Product: Lenor Fabric Softener

Key Features:

* Long-lasting freshness for bedding (kwaszony)
* Sparkling Bloom Yellow Poppy fragrance
* Gradually releases invigorating aromas
* Revitalizes senses with an explosion of floral freshness
* Comfortable feeling

Specifications:

* Form: Liquid
* Fragrance: Sparkling Bloom Yellow Poppy
* Size: Not specified
* Packaging: Not specified

Unique Aspects:

* The product promises to provide long-lasting freshness for bedding, ensuring that it smells fresh every night for an entire week.
* The fragrance, Sparkling Bloom Yellow Poppy, is unique and gradually releases invigorating aromas.
* The product claims to revitalize senses with an explosion of floral freshness, providing a comfortable feeling.

Overall, Lenor Fabric Softener appears to offer long-lasting freshness, a unique fragrance, and a revitalizing experience for users. Its ability to provide comfort and its gradual release of aromas make it stand out from other fabric softeners.


Chat gpt attempt:

User
Please extract crucial tags out of this polish product description: 'płyn do płukania tkanin lenor zapewnia długotrwałą świeżość świeżość pościeli każdej nocy przez cały tydzień zapach sparkling bloom yellow poppy stopniowo uwalnia energetyzujące aromaty ożywiając twoje zmysły eksplozją kwiatowej świeżości i dając ci poczucie komfortu aby w pełni cieszyć się działaniem swojego ulubionego płynu do zmiękczania tkanin lenor używaj go razem z perełkami zapachowymi lenor unstoppables '
ChatGPT

Certainly! Here are the crucial tags extracted from the Polish product description:

    Płyn do płukania tkanin
    Lenor
    Długotrwała świeżość
    Zapach Sparkling Bloom Yellow Poppy
    Świeżość pościeli
    Energetyzujące aromaty
    Eksplozja kwiatowej świeżości
    Poczucie komfortu
    Ulubiony płyn do zmiękczania tkanin
    Perełki zapachowe
    Lenor Unstoppables

In [44]:
a['description_left'], b['description_left']

('płyn do płukania tkanin lenor zapewnia długotrwałą świeżość świeżość pościeli każdej nocy przez cały tydzień zapach sparkling bloom yellow poppy stopniowo uwalnia energetyzujące aromaty ożywiając twoje zmysły eksplozją kwiatowej świeżości i dając ci poczucie komfortu aby w pełni cieszyć się działaniem swojego ulubionego płynu do zmiękczania tkanin lenor używaj go razem z perełkami zapachowymi lenor unstoppables ',
 'sady wincenta sok wieloowocowy w kartonie tłoczony sok wieloowocowy jabłkowo gruszkowo malinowo aroniowy naturalnie mętny otrzymany w wyniku tłoczenia miazgi jabłkowej bez udziału enzymów filtrowany pasteryzowany bez dodatku jakichkolwiek substancji dodatkowych rozlewany na gorąco w atmosferze azotu i pakowany w systemie bag in box ')

In [43]:
sim3.similarity(a['description_left'], b['description_left'])

tensor(0.8083, device='cuda:0', grad_fn=<SumBackward1>)

In [45]:
c = 'Płyn do płukania tkanin Lenor o zapachu bergamotki, aloesu i eukaliptusa zapewnia długotrwałą świeżość, dzięki czemu pościel jest świeża przez cały tydzień. Wraz z wiosennym słońcem dni stają się cieplejsze i przynoszą nowe odkrycia, które zachwycą wszystkie Twoje zmysły. Poczuj świeżość i energię dzięki mieszance eukaliptusa, bergamotki i aloesu.'
sim3.similarity(a['description_left'], c)

tensor(0.8907, device='cuda:0', grad_fn=<SumBackward1>)

In [65]:
negative = {"brand_left":"amd","brand_right":"gigabyte","category_left":"Computers_and_Accessories","category_right":"Computers_and_Accessories","cluster_id_left":355117,"cluster_id_right":152098,"description_left":"six core technology unlocked multiplier 3 50ghz clock speed 6mb l2 cache 6mb l3 cache hypertransport 3 0 technology 3 year warranty","description_right":"amd 990fx chipset x4 ddr3 x3 pci e x16 x1 pci e x1 x2 pci x6 sata 6gb s x1 gigabit lan x2 usb 3 0 x14 usb 2 0 realtek hd 7 1 audio sli xfire support","id_left":11185963,"id_right":14492431,"identifiers_left":[{"\/productID":"[amdfx6300]"},{"\/mpn":"[fd6300wmhkbox]"}],"identifiers_right":[{"\/mpn":"[ga990xaud3]"},{"\/gtin13":"[4719331818135]"}],"keyValuePairs_left":{"processor number":"6300 black edition","socket":"am3","architecture":"32 nm technology","clock speed":"3 50ghz","cores":"6","cache":"6 mb l2 6mb l3","memory controller":"dual channel ddr3 800 1066 1333 1600mhz","tdp":"95w","heatsink included":"yes","warranty":"3 years"},"keyValuePairs_right":None,"label":0,"pair_id":"11185963#14492431","price_left":None,"price_right":None,"specTableContent_left":"processor number 6300 black edition socket am3 architecture 32 nm technology clock speed 3 50ghz cores 6 cache 6 mb l2 6mb l3 memory controller dual channel ddr3 800 1066 1333 1600mhz tdp 95w heatsink included yes warranty 3 years","specTableContent_right":None,"title_left":"amd piledriver fx 6 six core 6300 black edition 3 50ghz socket am3 processor retail am3plus fd6300wmhkbox novatech","title_right":"gigabyte 990xa ud3 amd 990x socket am3 ddr3 motherboard ocuk"}
positive = {"brand_left":"intel","brand_right":None,"category_left":"Computers_and_Accessories","category_right":"Computers_and_Accessories","cluster_id_left":7209527,"cluster_id_right":7209527,"description_left":None,"description_right":"the intel ssd dc p3600 series is a pcie gen3 ssd architected with the high performance controller interface non volatile memory express nvme delivering leading performance low latency and quality of service matching the performance with world class reliability and endurance intel ssd dc p3600 series offers a range of capacity 1 2 tb in both add in card and 2 5 inch form factor with pcie gen3 support and nvme queuing interface the intel ssd dc p3600 series delivers excellent sequential read performance of up to 2 8 gb s and sequential write speeds of up to 1700 mb s intel ssd dc p3600 series delivers very high random read iops of 450 k and random write iops of 70 k for 4 kb operations taking advantage of the direct path from the storage to the cpu by means of nvme intel ssd dc p3600 series exhibits low latency of less than 20 s for sequential access to the ssd the 2 5 inch intel ssd dc p3600 series takes advantage of the 8639 connector and provides hot pluggable removal and insertion providing in service replacement options","id_left":6023438,"id_right":15856907,"identifiers_left":[{"\/mpn":"[ssdpedme012t401]"}],"identifiers_right":[{"\/gtin8":"[43201830]"},{"\/mpn":"[ssdpedme012t401]"}],"keyValuePairs_left":None,"keyValuePairs_right":None,"label":1,"pair_id":"6023438#15856907","price_left":None,"price_right":None,"specTableContent_left":None,"specTableContent_right":None,"title_left":"intel dc p3600 1 2tb pci e solid state drive ssdpedme012t401 pcpartpicker united kingdom","title_right":"intel solid state drive dc p3600 series 1 2 tb pci ssdpedme012t401 drives ssds cdwg com"}
idgaf = {"brand_left":"hp enterprise","brand_right":"hp enterprise","category_left":"Computers_and_Accessories","category_right":"Computers_and_Accessories","cluster_id_left":2224466,"cluster_id_right":2224466,"description_left":"description proliant bl20p g2 1p 3 06ghz 512mb fc manufacturer part 323146 b21","description_right":"description pl bl20p 3 06 xeon 1p m1 512k sa5i nc7781 ilo manufacturer part 323146 b21","id_left":2673261,"id_right":8786270,"identifiers_left":[{"\/sku":"[323146b21]"},{"\/mpn":"[323146b21]"}],"identifiers_right":[{"\/sku":"[323146b21]"},{"\/mpn":"[323146b21]"}],"keyValuePairs_left":{"category":"proliant server","sub category":"bl20","generation":"g2","part number":"323146 b21","products id":"15400","chassis form factor":"blade","model":"hp proliant bl20p","cache memory installed":"512 kb","ram installed":"512 mb","ram technology":"ddr sdram","ram maximum":"8 gb","networking protocol":"ethernet","controller raid level":"raid 0 raid 1 raid 10 raid 5","weight":"50 lbs","":""},"keyValuePairs_right":{"category":"proliant server","sub category":"bl20","generation":"","part number":"323146 b21","products id":"6184","chassis form factor":"blade","model":"hp proliant bl20p","cache memory installed":"512 kb","ram installed":"512 mb","ram technology":"ddr sdram","ram maximum":"8 gb","networking protocol":"ethernet fast ethernet gb ethernet","storage controller raid level":"raid 0 raid 1 raid 10 raid 5","actual weight":"50 lbs","":""},"label":1,"pair_id":"2673261#8786270","price_left":None,"price_right":None,"specTableContent_left":"specifications category proliant server sub category bl20 generation g2 part number 323146 b21 products id 15400 chassis form factor blade model hp proliant bl20p cache memory installed 512 kb ram installed 512 mb ram technology ddr sdram ram maximum 8 gb networking protocol ethernet controller raid level raid 0 raid 1 raid 10 raid 5 weight 50 lbs","specTableContent_right":"specifications category proliant server sub category bl20 generation part number 323146 b21 products id 6184 chassis form factor blade model hp proliant bl20p cache memory installed 512 kb ram installed 512 mb ram technology ddr sdram ram maximum 8 gb networking protocol ethernet fast ethernet gb ethernet storage controller raid level raid 0 raid 1 raid 10 raid 5 actual weight 50 lbs","title_left":"null , 323146 b21 bl20p g2 1p xeon 3 06ghz wholesale price","title_right":"323146 b21 bl20p xeon 3 06ghz , null wholesale price"}
killme = {"brand_left":"intel","brand_right":None,"category_left":"Computers_and_Accessories","category_right":"Computers_and_Accessories","cluster_id_left":74810,"cluster_id_right":74810,"description_left":"quad core with hyperthreading technology 3 60ghz clock speed 22nm process 8mb l3 cache dual channel ddr3 controller integrated hd 4600 graphics 3 year warranty","description_right":"micro intel i7 4790 lga 1150 quad core 3 6ghz 8mb","id_left":10446386,"id_right":17540927,"identifiers_left":[{"\/mpn":"[bx80646i74790]"},{"\/gtin13":"[5032037061551]"}],"identifiers_right":[{"\/sku":"[bx80646i74790]"}],"keyValuePairs_left":None,"keyValuePairs_right":None,"label":1,"pair_id":"10446386#17540927","price_left":None,"price_right":None,"specTableContent_left":None,"specTableContent_right":None,"title_left":"intel core i7 4790 3 60ghz haswell socket lga1150 processor retail processo ocuk","title_right":"micro intel i7 4790 lga 1150"}


In [56]:
for i,j in positive.items():
  print(i,j)

brand_left intel
brand_right None
category_left Computers_and_Accessories
category_right Computers_and_Accessories
cluster_id_left 7209527
cluster_id_right 7209527
description_left None
description_right the intel ssd dc p3600 series is a pcie gen3 ssd architected with the high performance controller interface non volatile memory express nvme delivering leading performance low latency and quality of service matching the performance with world class reliability and endurance intel ssd dc p3600 series offers a range of capacity 1 2 tb in both add in card and 2 5 inch form factor with pcie gen3 support and nvme queuing interface the intel ssd dc p3600 series delivers excellent sequential read performance of up to 2 8 gb s and sequential write speeds of up to 1700 mb s intel ssd dc p3600 series delivers very high random read iops of 450 k and random write iops of 70 k for 4 kb operations taking advantage of the direct path from the storage to the cpu by means of nvme intel ssd dc p3600 ser

In [54]:
negative['cluster_id_left'] == negative['cluster_id_right'], positive['cluster_id_left'] == positive['cluster_id_right']

(False, True)

In [58]:
sim.similarity(negative['description_left'], negative['description_right'])

tensor(0.9466, device='cuda:0', grad_fn=<SumBackward1>)

In [64]:
idgaf['description_left'], idgaf['description_right']

('description proliant bl20p g2 1p 3 06ghz 512mb fc manufacturer part 323146 b21',
 'description pl bl20p 3 06 xeon 1p m1 512k sa5i nc7781 ilo manufacturer part 323146 b21')

In [63]:
sim.similarity(idgaf['description_left'], idgaf['description_right'])

tensor(0.9788, device='cuda:0', grad_fn=<SumBackward1>)

In [71]:
killme['description_left'], killme['description_right']

('quad core with hyperthreading technology 3 60ghz clock speed 22nm process 8mb l3 cache dual channel ddr3 controller integrated hd 4600 graphics 3 year warranty',
 'micro intel i7 4790 lga 1150 quad core 3 6ghz 8mb')

In [66]:
sim.similarity(killme['description_left'], killme['description_right'])

tensor(0.9805, device='cuda:0', grad_fn=<SumBackward1>)

In [73]:
model_prompt = "Extract key information from the product description provided. Focus on identifying essential features, specifications, and unique aspects of each product. Provide a concise summary with relevant details. Ensure that the extracted information is clear and captures the essence of the product's functionality and appeal."
Q_kill = model_prompt + killme['description_left']
query_kill = chatbot.query(Q_kill)
print(query_kill)

 Product: Quad Core Processor with Hyperthreading Technology

Essential Features:

* Quad core architecture
* Hyperthreading technology
* 3.6 GHz clock speed
* 22nm process technology
* 8MB L3 cache
* Dual channel DDR3 controller
* Integrated HD 4600 graphics

Specifications:

* Clock Speed: 3.6 GHz
* Number of Cores: 4
* Cache Memory: 8 MB L3 cache
* Process Technology: 22nm
* Memory Controller: Dual Channel DDR3
* Graphics: Integrated HD 4600

Unique Aspects:

* Hyperthreading technology allows for increased parallel processing capabilities, resulting in improved performance and efficiency.
* The 22nm process technology used in this processor results in lower power consumption and heat generation compared to older processes.
* The integrated HD 4600 graphics provide decent graphical performance for general use cases, eliminating the need for a separate graphics card in many situations.

Summary:
This quad core processor with hyperthreading technology offers excellent performance and 

In [74]:
Q_kill2 = model_prompt + killme['description_right']
query_kill2 = chatbot.query(Q_kill2)
print(query_kill2)

 Product: Micro Intel i7 4790 LGA 1150 Quad Core Processor

Essential Features:

* Quad core architecture
* 3.6 GHz clock speed
* 8 MB cache memory
* LGA 1150 socket compatibility

Specifications:

* Clock Speed: 3.6 GHz
* Number of Cores: 4
* Cache Memory: 8 MB
* Socket Compatibility: LGA 1150

Unique Aspects:

* The processor belongs to the high-performance Intel Core i7 family, offering fast and efficient computing.
* The quad core design enables simultaneous multi-tasking and smooth handling of demanding applications.
* The 8 MB cache memory ensures rapid access to frequently used data, further improving overall system performance.

Summary:
The Micro Intel i7 4790 LGA 1150 Quad Core Processor is a powerful and efficient CPU designed for seamless multitasking and handling demanding applications. With its 3.6 GHz clock speed, 8 MB cache memory, and LGA 1150 socket compatibility, this processor offers top-notch performance and upgradeability. Ideal for professionals and enthusiasts s

In [80]:
sim.similarity(str(query_kill), str(query_kill2))

tensor(0.8258, device='cuda:0', grad_fn=<SumBackward1>)

In [91]:
chatbot3 = hugchat.ChatBot(cookies=cookies.get_dict())
Q_3 = model_prompt + negative['description_right']
query_3 = chatbot3.query(Q_3)

In [95]:
print(query_3)

 Product: ASRock 990FX Extreme4 Motherboard

Essential Features:

* AMD 990FX Chipset
* Supports AM3+ CPUs
* 4 DDR3 slots, supporting up to 32GB RAM
* 3 PCI-Express 3.0 x16 slots (x16, x8, x4)
* 1 PCI-Express 2.0 x1 slot
* 2 PCI slots
* SATA 6Gb/s ports (6 ports)
* Gigabit LAN
* 2 USB 3.0 ports
* 14 USB 2.0 ports
* Realtek HD Audio 7.1 channel audio
* SLI and CrossFireX support

Unique Aspects:

* High-quality audio capacitors for improved sound quality
* Dual-Stack MOSFET design for better power delivery and lower temperatures
* 8-layer PCB for improved signal integrity and reduced noise
* Supports 3-Way SLI and Quad-GPU CrossFireX configurations
* Bundled with ASRock's XFast LAN software for improved network performance

Overall, the ASRock 990FX Extreme4 motherboard offers robust features and high-performance capabilities, making it an excellent choice for gaming enthusiasts and overclockers who demand fast processing speeds, ample storage options, and advanced connectivity. Its pre

In [92]:
chatbot2 = hugchat.ChatBot(cookies=cookies.get_dict())
Q_4 = model_prompt + negative['description_left']
query_4 = chatbot2.query(Q_4)

In [94]:
print(query_4)

 Product: AMD Ryzen 9 5900X

Essential Features:

* 16-core, 32-thread processor
* 50GHz clock speed
* 6MB L2 cache
* 6MB L3 cache
* HyperTransport 3.0 technology
* 3-year warranty

Unique Aspects:

* Unlocked multiplier for overclocking capabilities
* Highest clock speed in its class (50GHz)
* Large L2 and L3 caches for improved performance
* Advanced HyperTransport 3.0 technology for increased bandwidth and low latency

Summary: The AMD Ryzen 9 5900X is a high-performance desktop processor that offers 16 cores and 32 threads, with an impressive clock speed of 50GHz and large L2 and L3 caches. It also features advanced HyperTransport 3.0 technology and comes with a 3-year warranty. Its unlocked multiplier allows for overclocking, making it an attractive option for enthusiasts looking to push their system to the limit.


In [93]:
sim.similarity(str(query_3), str(query_4)) # wysoki score a różne, dlatego chcemy trenować

tensor(0.9800, device='cuda:0', grad_fn=<SumBackward1>)