In [1]:
from sentence_transformers import SentenceTransformer, util




# Load multilingual embedding model (LaBSE)

In [2]:
model = SentenceTransformer("sentence-transformers/LaBSE")

# Prototype sentences for each language

In [3]:
language_examples = {
    "English": "This is a sample sentence.",
    "Spanish": "Esta es una frase de ejemplo.",
    "French": "Ceci est une phrase d'exemple.",
    "German": "Dies ist ein Beispielsatz.",
    "Italian": "Questa √® una frase di esempio.",
    "Portuguese": "Esta √© uma frase de exemplo.",
    "Hindi": "‡§Ø‡§π ‡§è‡§ï ‡§â‡§¶‡§æ‡§π‡§∞‡§£ ‡§µ‡§æ‡§ï‡•ç‡§Ø ‡§π‡•à‡•§",
    "Japanese": "„Åì„Çå„ÅØ‰æãÊñá„Åß„Åô„ÄÇ"
}

In [4]:
# Encode language prototypes

In [5]:
langs = list(language_examples.keys())
prototypes = list(language_examples.values())
prototype_embeddings = model.encode(prototypes, convert_to_tensor=True)

# Input text for detection

In [6]:
text_input = "Dove posso trovare una farmacia vicino a me?"

# Encode input sentence

In [7]:
input_embedding = model.encode(text_input, convert_to_tensor=True)

# Compute cosine similarity to each language

In [8]:
similarities = util.pytorch_cos_sim(input_embedding, prototype_embeddings)[0]
best_lang_index = int(similarities.argmax())
predicted_language = langs[best_lang_index]

# Display result

In [9]:
print("üåê Input Text:\n", text_input)
print("\nüî§ Detected Language:", predicted_language)
print("\nüî¢ Similarity Score: {:.2f}".format(similarities[best_lang_index].item()))

üåê Input Text:
 Dove posso trovare una farmacia vicino a me?

üî§ Detected Language: Portuguese

üî¢ Similarity Score: 0.32
