Skip to content

Commit

Permalink
Merge pull request #31 from sourface94/main
Browse files Browse the repository at this point in the history
Change of sentence trnsofmer model
  • Loading branch information
woodthom2 committed Apr 9, 2024
2 parents 9c4cdfc + 53d7eda commit b822014
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 2 deletions.
2 changes: 1 addition & 1 deletion src/harmony/matching/default_matcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@
sentence_transformer_path = os.environ["HARMONY_SENTENCE_TRANSFORMER_PATH"]
else:
sentence_transformer_path = (
"sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"
"sentence-transformers/paraphrase-multilingual-mpnet-base-v2"
)

model = SentenceTransformer(sentence_transformer_path)
Expand Down
6 changes: 5 additions & 1 deletion src/harmony/matching/matcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,7 @@ def match_instruments_with_function(

text_vectors, new_vectors_dict = create_full_text_vectors(all_questions_str, query, vectorisation_function,
texts_cached_vectors)
# get vectors for all orignal texts and vectors for negated texts
vectors_pos, vectors_neg = vectors_pos_neg(text_vectors)

# Get similarity between the query (only one query?) and the questions
Expand All @@ -168,14 +169,16 @@ def match_instruments_with_function(
query_similarity = np.array([])

# Get similarity with polarity
if vectors_pos.any():
if vectors_pos.any(): # NOTE: Should an error be thrown if vectors_pos is empty?
pairwise_similarity = cosine_similarity(vectors_pos, vectors_pos)
# NOTE: Similarity of (vectors_neg, vectors_pos) & (vectors_pos, vectors_neg) should be the same
pairwise_similarity_neg1 = cosine_similarity(vectors_neg, vectors_pos)
pairwise_similarity_neg2 = cosine_similarity(vectors_pos, vectors_neg)
pairwise_similarity_neg_mean = np.mean(
[pairwise_similarity_neg1, pairwise_similarity_neg2], axis=0
)

# Polarity of 1 means the sentence shouldn't be negated, -1 means it should
similarity_difference = pairwise_similarity - pairwise_similarity_neg_mean
similarity_polarity = np.sign(similarity_difference)

Expand All @@ -186,6 +189,7 @@ def match_instruments_with_function(
similarity_max = np.max(
[pairwise_similarity, pairwise_similarity_neg_mean], axis=0
)
# NOTE: A value of -1 and +1 both mean sentences are similar, 0 means not similar
similarity_with_polarity = similarity_max * similarity_polarity
else:
similarity_with_polarity = np.array([])
Expand Down

0 comments on commit b822014

Please sign in to comment.