<a href="https://colab.research.google.com/github/frm1789/LanguageDetection/blob/main/Language_Detection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install huggingface_hub --quiet
!pip install langdetect --quiet
!pip install langid --quiet
!pip install pycountry --quiet
!pip install transformers --quiet

In [2]:
!git config --global credential.helper store

In [3]:
!huggingface-cli login


    _|    _|  _|    _|    _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|_|_|_|    _|_|      _|_|_|  _|_|_|_|
    _|    _|  _|    _|  _|        _|          _|    _|_|    _|  _|            _|        _|    _|  _|        _|
    _|_|_|_|  _|    _|  _|  _|_|  _|  _|_|    _|    _|  _|  _|  _|  _|_|      _|_|_|    _|_|_|_|  _|        _|_|_|
    _|    _|  _|    _|  _|    _|  _|    _|    _|    _|    _|_|  _|    _|      _|        _|    _|  _|        _|
    _|    _|    _|_|      _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|        _|    _|    _|_|_|  _|_|_|_|

    To login, `huggingface_hub` requires a token generated from https://huggingface.co/settings/tokens .
Enter your token (input will not be visible): 
Add token as git credential? (Y/n) Y
Token is valid (permission: read).
Your token has been saved in your configured git credential helpers (store).
Your token has been saved to /root/.cache/huggingface/token
Login successful


In [14]:
from langdetect import detect as langdetect_detect
from langdetect import LangDetectException
import langid
from transformers import pipeline
import pycountry

In [15]:
def get_language_name(language_code):
    try:
        language = pycountry.languages.get(alpha_2=language_code)
        return language.name if language else "Unknown"
    except KeyError:
        return "Unknown"

In [16]:
language_detector = pipeline("text-classification", model="papluca/xlm-roberta-base-language-detection")


In [17]:
sentence = "It was the best of times, it was the worst of times, it was the age of wisdom, it was the age of foolishness, it was the epoch of belief, it was the epoch of incredulity, it was the season of light, it was the season of darkness, it was the spring of hope, it was the winter of despair."

In [18]:
# With langdetect
try:
    langdetect_code = langdetect_detect(sentence)
    langdetect_result = get_language_name(langdetect_code)
except LangDetectException:
    langdetect_result = "Detection failed"


In [19]:
# with langid
try:
    langid_code, langid_confidence = langid.classify(sentence)
    langid_result = {
        'language': get_language_name(langid_code),
        'confidence': langid_confidence
    }
except Exception as e:
    langid_result = "Detection failed"


In [20]:
# With Hugging Face's Transformers
try:
    hf_prediction = language_detector(sentence)[0]
    transformers_result = {
        'language': hf_prediction['label'],
        'confidence': hf_prediction['score']
    }
except Exception as e:
    transformers_result = "Detection failed"

In [22]:
# Print results
print("langdetect result:", langdetect_result)
print("langid result:", langid_result)
print("Transformers result:", transformers_result)

langdetect result: English
langid result: {'language': 'English', 'confidence': -1068.1853919029236}
Transformers result: {'language': 'en', 'confidence': 0.9366692900657654}
