## Instalação da Biblioteca sentence-transformers

In [1]:
pip install -U sentence-transformers




## Definição do Modelo

In [2]:
from sentence_transformers import SentenceTransformer

model_id = 'sentence-transformers/all-mpnet-base-v2'
model = SentenceTransformer(model_id)

In [3]:
from sentence_transformers import SentenceTransformer
sentences = ["This is an example sentence", "Each sentence is converted"]

model = SentenceTransformer('sentence-transformers/all-mpnet-base-v2')
embeddings = model.encode(sentences)
print(embeddings)

[[ 0.0225026  -0.07829181 -0.02303074 ... -0.00827927  0.02652692
  -0.00201897]
 [ 0.04170236  0.0010974  -0.01553418 ... -0.02181627 -0.0635936
  -0.00875284]]


## Importando o Pandas

In [4]:
import pandas as pd

## Importando a Base de Treinamento

In [5]:
df_train = pd.read_csv('df_train_wands.csv')

## Consulta do Cabeçalho

In [6]:
df_train.head()

Unnamed: 0,product_id,product_name,product_description,query,label
0,38855,abheer floor shelf coffee table,when it comes to rounding out your living ense...,smart coffee table,0.5
1,9929,addaly abstract coffee table,anchor your living room in mid-century style w...,smart coffee table,1.0
2,5235,adoni coffee table,,smart coffee table,1.0
3,26772,ahart frame coffee table,your coffee table is a style centerpiece for y...,smart coffee table,0.5
4,3510,ahern coffee table,the ahern 4 legs coffee table with storage is ...,smart coffee table,0.5


In [7]:
df_train["label"] = df_train["label"].replace({0.0: 0, 0.5: 1, 1.0: 2})
df_train

Unnamed: 0,product_id,product_name,product_description,query,label
0,38855,abheer floor shelf coffee table,when it comes to rounding out your living ense...,smart coffee table,1.0
1,9929,addaly abstract coffee table,anchor your living room in mid-century style w...,smart coffee table,2.0
2,5235,adoni coffee table,,smart coffee table,2.0
3,26772,ahart frame coffee table,your coffee table is a style centerpiece for y...,smart coffee table,1.0
4,3510,ahern coffee table,the ahern 4 legs coffee table with storage is ...,smart coffee table,1.0
...,...,...,...,...,...
142319,15439,fellsburg linen upholstered parsons chair,,worn leather office chair,1.0
142320,451,olin upholstered side chair,if you are looking for a simple yet sleek dini...,worn leather office chair,1.0
142321,30764,barbay lounge chair cushion,,worn leather office chair,0.0
142322,16796,haings upholstered parsons chair,this set of 2 armless side chairs boasts an up...,worn leather office chair,1.0


In [8]:
import numpy as np
df_train['label'] = df_train['label'].astype(np.int64)

In [9]:
df_train

Unnamed: 0,product_id,product_name,product_description,query,label
0,38855,abheer floor shelf coffee table,when it comes to rounding out your living ense...,smart coffee table,1
1,9929,addaly abstract coffee table,anchor your living room in mid-century style w...,smart coffee table,2
2,5235,adoni coffee table,,smart coffee table,2
3,26772,ahart frame coffee table,your coffee table is a style centerpiece for y...,smart coffee table,1
4,3510,ahern coffee table,the ahern 4 legs coffee table with storage is ...,smart coffee table,1
...,...,...,...,...,...
142319,15439,fellsburg linen upholstered parsons chair,,worn leather office chair,1
142320,451,olin upholstered side chair,if you are looking for a simple yet sleek dini...,worn leather office chair,1
142321,30764,barbay lounge chair cushion,,worn leather office chair,0
142322,16796,haings upholstered parsons chair,this set of 2 armless side chairs boasts an up...,worn leather office chair,1


## Substituição de float('NaN') por String Vazia

In [10]:
df_train['product_description'] = df_train['product_description'].replace(float("NaN"), '')
df_train

Unnamed: 0,product_id,product_name,product_description,query,label
0,38855,abheer floor shelf coffee table,when it comes to rounding out your living ense...,smart coffee table,1
1,9929,addaly abstract coffee table,anchor your living room in mid-century style w...,smart coffee table,2
2,5235,adoni coffee table,,smart coffee table,2
3,26772,ahart frame coffee table,your coffee table is a style centerpiece for y...,smart coffee table,1
4,3510,ahern coffee table,the ahern 4 legs coffee table with storage is ...,smart coffee table,1
...,...,...,...,...,...
142319,15439,fellsburg linen upholstered parsons chair,,worn leather office chair,1
142320,451,olin upholstered side chair,if you are looking for a simple yet sleek dini...,worn leather office chair,1
142321,30764,barbay lounge chair cushion,,worn leather office chair,0
142322,16796,haings upholstered parsons chair,this set of 2 armless side chairs boasts an up...,worn leather office chair,1


## Convertendo a Base de Treino para o Formato InputExample

In [11]:
df_train['product_name'][0]

'abheer floor shelf coffee table'

In [12]:
from sentence_transformers import InputExample
train_examples = []

for i in range(len(df_train)):
    label = df_train['label'][i]
    train_examples.append(InputExample(texts=[[df_train['product_name'][i], df_train['product_description'][i]], df_train['query'][i]], label=label))


train_examples = train_examples[:500]

## Convertendo os exemplos de treinamento em um arquivo Dataloader

In [13]:
from torch.utils.data import DataLoader

train_dataloader = DataLoader(train_examples, shuffle=True, batch_size=16)

## Definindo a função de perda

Softmax Loss <br>
Contrastive Loss

In [14]:
from sentence_transformers import losses

train_loss = losses.SoftmaxLoss(model=model, sentence_embedding_dimension=model.get_sentence_embedding_dimension(), num_labels=3)

In [15]:
pip install huggingface_hub

Note: you may need to restart the kernel to use updated packages.


In [16]:
from huggingface_hub import notebook_login
notebook_login()

Login successful
Your token has been saved to C:\Users\ton2/.huggingface/token


## Treinamento do Modelo

In [17]:
model.fit(train_objectives=[(train_dataloader, train_loss)], epochs=1) 

Epoch:   0%|          | 0/1 [00:00<?, ?it/s]

Iteration:   0%|          | 0/32 [00:00<?, ?it/s]

In [19]:
from sentence_transformers import SentenceTransformer, util

query_embedding = model.encode("salon chair")
passage_embedding = model.encode(["metal  chair",
                                  "the heavy duty barber chair is built to last"])

print("Similarity:", util.dot_score(query_embedding, passage_embedding))

Similarity: tensor([[0.6152, 0.7009]])


In [20]:
model.save_to_hub("wandss-bert")

Cloning https://huggingface.co/tubyneto/wandss-bert into local empty directory.


Upload file pytorch_model.bin:   0%|          | 32.0k/418M [00:00<?, ?B/s]

remote: Scanning LFS files for validity, may be slow...        
remote: LFS file scan complete.        
To https://huggingface.co/tubyneto/wandss-bert
   be1132a..1aea0c6  main -> main



'https://huggingface.co/tubyneto/wandss-bert/commit/1aea0c6a33183fde9ef0bfaceefa4e888c3b877d'

In [21]:
model = SentenceTransformer('tubyneto/wandss-bert')

Downloading:   0%|          | 0.00/1.44k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/190 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/2.26k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/669 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/116 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/438M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/239 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/711k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/438 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/349 [00:00<?, ?B/s]

In [23]:
from sentence_transformers import SentenceTransformer, util

query_embedding = model.encode("salon chair")
passage_embedding = model.encode(["metal salon frame",
                                  "the heavy duty barber chair is built to last"])

print("Similarity:", util.dot_score(query_embedding, passage_embedding))

Similarity: tensor([[0.5672, 0.7009]])
