## Instalação da Biblioteca sentence-transformers

In [1]:
pip install -U sentence-transformers

Note: you may need to restart the kernel to use updated packages.


## Definição do Modelo

In [2]:
from sentence_transformers import SentenceTransformer

model_id = 'sentence-transformers/all-mpnet-base-v2'
model = SentenceTransformer(model_id)

In [3]:
from sentence_transformers import SentenceTransformer
sentences = ["This is an example sentence", "Each sentence is converted"]

model = SentenceTransformer('sentence-transformers/all-mpnet-base-v2')
embeddings = model.encode(sentences)
print(embeddings)

[[ 0.0225026  -0.07829181 -0.02303074 ... -0.00827927  0.02652692
  -0.00201897]
 [ 0.04170236  0.0010974  -0.01553418 ... -0.02181627 -0.0635936
  -0.00875284]]


## Importando o Pandas

In [4]:
import pandas as pd

## Importando a Base de Treinamento

In [36]:
df_train = pd.read_csv('df_train_crowdedflower.csv')

## Consulta do Cabeçalho

In [37]:
df_train.head()

Unnamed: 0,_unit_id,product_title,product_description,query,relevance
0,711168214,Wintergreen Lighting Christmas LED Light Bulb ...,,playstation 4,0.0
1,711168220,French Toast Boys Long Sleeve Classic Dress Sh...,A comfortable dress shirt made from our Wrinkl...,playstation 4,0.0
2,711168220,French Toast Boys Long Sleeve Classic Dress Sh...,A comfortable dress shirt made from our Wrinkl...,playstation 4,0.0
3,711168222,Perry Ellis Thin Stripe Classic Fit Dress Shirt,,playstation 4,0.0
4,711168215,Sea Gull Lighting 3W Clear Incandescent Wedge ...,12V 3w clear incandescent wedge lamp Part of T...,playstation 4,0.0


In [39]:
import numpy as np
df_train['relevance'] = df_train['relevance'].astype(np.int64)

In [40]:
df_train

Unnamed: 0,_unit_id,product_title,product_description,query,relevance
0,711168214,Wintergreen Lighting Christmas LED Light Bulb ...,,playstation 4,0
1,711168220,French Toast Boys Long Sleeve Classic Dress Sh...,A comfortable dress shirt made from our Wrinkl...,playstation 4,0
2,711168220,French Toast Boys Long Sleeve Classic Dress Sh...,A comfortable dress shirt made from our Wrinkl...,playstation 4,0
3,711168222,Perry Ellis Thin Stripe Classic Fit Dress Shirt,,playstation 4,0
4,711168215,Sea Gull Lighting 3W Clear Incandescent Wedge ...,12V 3w clear incandescent wedge lamp Part of T...,playstation 4,0
...,...,...,...,...,...
29307,711179068,NewMetro Design KA-6LR KitchenAid 6-Quart Bowl...,Features - An Enticing Elixir Eau De Toilette ...,kitchenaid mixer,3
29308,711179069,NewMetro Design KA-THPRO KitchenAid Tilt-Head ...,The ultimate hands-free mixing blade endures t...,kitchenaid mixer,1
29309,711179070,KitchenAid Flex Edge Beater,Upgrade your kitchen accessory collection with...,kitchenaid mixer,1
29310,711179071,KitchenAid Sausage Stuffer Kit,If you love to create mouth-watering food at h...,kitchenaid mixer,2


## Substituição de float('NaN') por String Vazia

In [41]:
df_train['product_description'] = df_train['product_description'].replace(float("NaN"), '')
df_train

Unnamed: 0,_unit_id,product_title,product_description,query,relevance
0,711168214,Wintergreen Lighting Christmas LED Light Bulb ...,,playstation 4,0
1,711168220,French Toast Boys Long Sleeve Classic Dress Sh...,A comfortable dress shirt made from our Wrinkl...,playstation 4,0
2,711168220,French Toast Boys Long Sleeve Classic Dress Sh...,A comfortable dress shirt made from our Wrinkl...,playstation 4,0
3,711168222,Perry Ellis Thin Stripe Classic Fit Dress Shirt,,playstation 4,0
4,711168215,Sea Gull Lighting 3W Clear Incandescent Wedge ...,12V 3w clear incandescent wedge lamp Part of T...,playstation 4,0
...,...,...,...,...,...
29307,711179068,NewMetro Design KA-6LR KitchenAid 6-Quart Bowl...,Features - An Enticing Elixir Eau De Toilette ...,kitchenaid mixer,3
29308,711179069,NewMetro Design KA-THPRO KitchenAid Tilt-Head ...,The ultimate hands-free mixing blade endures t...,kitchenaid mixer,1
29309,711179070,KitchenAid Flex Edge Beater,Upgrade your kitchen accessory collection with...,kitchenaid mixer,1
29310,711179071,KitchenAid Sausage Stuffer Kit,If you love to create mouth-watering food at h...,kitchenaid mixer,2


## Convertendo a Base de Treino para o Formato InputExample

In [42]:
df_train['product_title'][0]

'Wintergreen Lighting Christmas LED Light Bulb (Pack of 25)'

In [43]:
from sentence_transformers import InputExample
train_examples = []

for i in range(len(df_train)):
    label = df_train['relevance'][i]
    train_examples.append(InputExample(texts=[[df_train['product_title'][i], df_train['product_description'][i]], df_train['query'][i]], label=label))


train_examples = train_examples[:500]

## Convertendo os exemplos de treinamento em um arquivo Dataloader

In [44]:
from torch.utils.data import DataLoader

train_dataloader = DataLoader(train_examples, shuffle=True, batch_size=16)

## Definindo a função de perda

Softmax Loss <br>
Contrastive Loss

In [45]:
from sentence_transformers import losses

train_loss = losses.SoftmaxLoss(model=model, sentence_embedding_dimension=model.get_sentence_embedding_dimension(), num_labels=3)

In [19]:
pip install huggingface_hub

Note: you may need to restart the kernel to use updated packages.


In [16]:
from huggingface_hub import notebook_login
notebook_login()

VBox(children=(HTML(value="<center>\n<img src=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

## Treinamento do Modelo

In [46]:
model.fit(train_objectives=[(train_dataloader, train_loss)], epochs=1) 

Epoch:   0%|          | 0/1 [00:00<?, ?it/s]

Iteration:   0%|          | 0/32 [00:00<?, ?it/s]

In [47]:
model.save_to_hub("crowdedflower-bert")

Cloning https://huggingface.co/tubyneto/crowdedflower-bert into local empty directory.


Upload file pytorch_model.bin:   0%|          | 32.0k/418M [00:00<?, ?B/s]

remote: Scanning LFS files for validity, may be slow...        
remote: LFS file scan complete.        
To https://huggingface.co/tubyneto/crowdedflower-bert
   2e61b0e..5ea7c76  main -> main



'https://huggingface.co/tubyneto/crowdedflower-bert/commit/5ea7c76cfec3556ea99e72b3e801a02f38175ba4'

In [27]:
model = SentenceTransformer('tubyneto/crowdedflower-bert')

Downloading:   0%|          | 0.00/1.44k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/190 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/2.27k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/674 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/116 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/438M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/280 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/711k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/474 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/349 [00:00<?, ?B/s]

In [29]:
from sentence_transformers import SentenceTransformer, util

query_embedding = model.encode("beaded curtains")
passage_embedding = model.encode(["aiyana striped sheer grommet single curtain panel",
                                  " sheer grommet top window curtain panel"])

print("Similarity:", util.dot_score(query_embedding, passage_embedding))

Similarity: tensor([[0.5960, 0.5604]])
