In [None]:
! pip install transformers datasets sentence_transformers

In [None]:
from sentence_transformers import SentenceTransformer, models, util
from scipy.spatial.distance import cityblock, chebyshev
import pandas as pd

results = [['pooling_mode','cosine_similarity','euclidean_distance','manhattan_distance','dot_product','chebyshev_distance']]

# Load the pre-trained transformer model
word_embedding_model = models.Transformer('sentence-transformers/msmarco-distilbert-base-tas-b')


for pooling_mode in ['cls', 'lasttoken', 'max', 'mean', 'mean_sqrt_len_tokens', 'weightedmean']:

  # Configure pooling - here we use CLS token pooling
  pooling_model = models.Pooling(
      word_embedding_dimension=word_embedding_model.get_word_embedding_dimension(),
      pooling_mode=pooling_mode
  )

  # Build the SentenceTransformer model with custom pooling
  custom_model = SentenceTransformer(modules=[word_embedding_model, pooling_model])

  # Encode sentences
  embedding1 = custom_model.encode("American pizza is one of the nation’s greatest cultural exports")

  embedding2 = custom_model.encode("Pizza is not authentic Italian pizza, as its inception is attributed to New York City")

  # Find distances
  cosine_similarity = util.cos_sim(embedding1, embedding2).item()
  euclidean_distance = util.pytorch_cos_sim(embedding1, embedding2).norm().item()
  manhattan_distance = cityblock(embedding1, embedding2)
  dot_product = util.dot_score(embedding1, embedding2).item()
  chebyshev_distance = chebyshev(embedding1, embedding2)

  # Add to results
  results.append([
      pooling_mode,
      cosine_similarity,
      euclidean_distance,
      manhattan_distance,
      dot_product,
      chebyshev_distance
      ])

pd.DataFrame(results[1:], columns=results[0])