In [None]:
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics import classification_report
from sklearn.naive_bayes import MultinomialNB
import pandas as pd

# Load the dataset
url = "https://raw.githubusercontent.com/YashiGarg016/Language-Detection/refs/heads/main/Language%20Detection.csv"
data = pd.read_csv(url)

# Prepare the data
X = data["Text"]
y = data["Language"]

# Split into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Feature extraction using CountVectorizer
vectorizer = CountVectorizer(ngram_range=(1, 2))  # Use unigrams and bigrams for better accuracy
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)

# Train a Naïve Bayes classifier
model = MultinomialNB()
model.fit(X_train_vec, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test_vec)

# Print classification report
print("Classification Report:")
print(classification_report(y_test, y_pred))

# Function to predict language for user input
def predict_language(text):
    text_vec = vectorizer.transform([text])
    predicted_language = model.predict(text_vec)[0]
    return predicted_language

# Example usage
user_input = "आप कैसे हैं?"
predicted_lang = predict_language(user_input)
print(f"The predicted language for '{user_input}' is: {predicted_lang}")