In [None]:
import numpy as np
import torch
from transformers import AutoTokenizer, AutoModel

In [None]:
# Load the BERT-tiny model and tokenizer
tokenizer = AutoTokenizer.from_pretrained('bert-tiny')
model = AutoModel.from_pretrained('bert-tiny')

In [None]:
# Encode a sentence into a matrix of BERT-tiny embeddings
def encode_sentence(sentence):
    input_ids = torch.tensor([tokenizer.encode(sentence)])
    with torch.no_grad():
        embeddings = model(input_ids)[0]
    return embeddings.numpy()

# Compute the distance between two embeddings using cosine similarity
def cosine_similarity(x, y):
    dot_product = np.dot(x, y)
    norm_x = np.linalg.norm(x)
    norm_y = np.linalg.norm(y)
    return dot_product / (norm_x * norm_y)

In [None]:
def compute_cost_matrix(sentence1, sentence2):
    embeddings1 = encode_sentence(sentence1)
    embeddings2 = encode_sentence(sentence2)
    cost_matrix = np.zeros((len(embeddings1), len(embeddings2)))
    for i in range(len(embeddings1)):
        for j in range(len(embeddings2)):
            cost_matrix[i][j] = 1 - cosine_similarity(embeddings1[i], embeddings2[j])
    return cost_matrix

In [None]:
# Perform DTW with a non-crossing map that can handle degenerate solutions
def dtw(sentence1, sentence2):
    cost_matrix = compute_cost_matrix(sentence1, sentence2)
    n = cost_matrix.shape[0]
    m = cost_matrix.shape[1]
    DTW = np.zeros((n + 1, m + 1))
    DTW[:, 0] = np.inf
    DTW[0, :] = np.inf
    DTW[0, 0] = 0
    for i in range(1, n + 1):
        for j in range(1, m + 1):
            cost = cost_matrix[i - 1][j - 1]
            DTW[i, j] = cost + min(DTW[i - 1, j], DTW[i, j - 1], DTW[i - 1, j - 1])
            if i == j and DTW[i, j] != np.inf:
                DTW[i, j] = min(DTW[i, j], DTW[i - 1, j - 1] + cost)
    return DTW

In [None]:
def align(s1, s2, DTW):
    i, j = len(s1), len(s2)
    alignment = []
    while i > 0 and j > 0:
        alignment.append((i - 1, j - 1))
        if DTW[i - 1, j] < DTW[i - 1, j - 1] and DTW[i - 1, j] < DTW[i, j - 1]:
            i -= 1
        elif DTW[i, j - 1] < DTW[i - 1, j - 1] and DTW[i, j - 1] < DTW[i - 1, j]:
            j -= 1
        else:
            i -= 1
            j -= 1
    alignment.reverse()
    return alignment

In [None]:
if __name__ == "__main__":
    s1 = "This is a test sentence."
    s2 = "This sentence is a test."

    # Compute the DTW matrix and the optimal alignment
    DTW = dtw(s1, s2)
    alignment = align(s1.split(), s2.split(), DTW)

    # Print the alignment
    for i, j in alignment:
        print(f"{s1.split()[i]} <--> {s2.split()[j]}")