In [1]:
import numpy as np
from scipy.optimize import linear_sum_assignment

# Simulated word embeddings for 4 words in each sentence, with 3-dimensional embeddings
# In practice, these could be 300-dimensional GloVe or Word2Vec embeddings
target_sentence = np.array([[0.1, 0.2, 0.3],
                            [0.4, 0.3, 0.2],
                            [0.5, 0.6, 0.7],
                            [0.8, 0.9, 0.7]])

generated_sentence = np.array([[0.1, 0.2, 0.3],
                               [0.4, 0.3, 0.2],
                               [0.5, 0.6, 0.7],
                               [0.8, 0.9, 0.7]])

# Compute the cost matrix: Euclidean distance between each pair of word embeddings
cost_matrix = np.sum((target_sentence[:, np.newaxis, :] - generated_sentence[np.newaxis, :, :]) ** 2, axis=2)

# Use the Hungarian algorithm to find the optimal assignment of target words to generated words
row_ind, col_ind = linear_sum_assignment(cost_matrix)

# Earth Mover's Distance (EMD) or Optimal Transport (OT) distance is the sum of the costs of these optimal assignments
emd = cost_matrix[row_ind, col_ind].sum()

print("Optimal Assignment Indices:", row_ind, col_ind)
print("Optimal Transport Distance:", emd)


Optimal Assignment Indices: [0 1 2 3] [0 1 2 3]
Optimal Transport Distance: 0.0
