In [None]:
from transformers import BertTokenizer, BertModel
import torch
import torch.nn.functional as F

# Function to generate embeddings using BERT
def get_bert_embeddings(model, tokenizer, sentence):
    # Tokenize the sentence and convert to tensor
    inputs = tokenizer(sentence, return_tensors="pt", padding=True, truncation=True)
    # Generate outputs using BERT
    outputs = model(**inputs)
    # Extract the embeddings from the last hidden state
    embeddings = outputs.last_hidden_state
    return embeddings, inputs

# Function to compute cosine similarity
def cosine_similarity(embedding1, embedding2):
    return F.cosine_similarity(embedding1, embedding2, dim=0)

# Initialize BERT base model and tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertModel.from_pretrained('bert-base-uncased')

# Sentences with multiple meanings of the word 'flies'
sentence1 = "Time flies like an arrow."
sentence2 = "Fruit flies like a banana."

# Generate embeddings and tokenized inputs
embeddings1, inputs1 = get_bert_embeddings(model, tokenizer, sentence1)
embeddings2, inputs2 = get_bert_embeddings(model, tokenizer, sentence2)

# Find the position of the word 'flies' in both sentences
word = 'flies'
token_ids = tokenizer.convert_tokens_to_ids(tokenizer.tokenize(word))
positions1 = [i for i, token_id in enumerate(inputs1['input_ids'][0]) if token_id in token_ids]
positions2 = [i for i, token_id in enumerate(inputs2['input_ids'][0]) if token_id in token_ids]

# Extract embeddings for the word 'flies' from both sentences
embedding1_flies = embeddings1[0, positions1[0], :]
embedding2_flies = embeddings2[0, positions2[0], :]

# Compute cosine similarity
similarity = cosine_similarity(embedding1_flies, embedding2_flies)

print(similarity)