In [7]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.ensemble import RandomForestClassifier
import gradio as gr
%matplotlib inline

In [12]:
# Load the dataset
df = pd.read_csv('lang_dataset.csv')
df = df.rename(columns={"language_label": "language"})
df.head()

# Convert the text into numerical features
vectorizer = CountVectorizer()
X = vectorizer.fit_transform(df["Text"])

# Train the model
model = MultinomialNB()
# model = SVC()
# model = LogisticRegression()
model.fit(X, df["language"])

MultinomialNB()

In [13]:
# Evaluate the model
y_pred = model.predict(X)
accuracy = accuracy_score(df["language"], y_pred)
precision = precision_score(df["language"], y_pred, average='weighted')
recall = recall_score(df["language"], y_pred, average='weighted')
f1 = f1_score(df["language"], y_pred, average='weighted')

print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)


Accuracy: 1.0
Precision: 1.0
Recall: 1.0
F1 Score: 1.0


In [14]:
# Test the model
text = "This is a test sentence."
text_features = vectorizer.transform([text])
prediction = model.predict(text_features)
print("Prediction:", prediction)

# modify the predict_language() function to accept a text input, and return the predicted language label as a string
def predict_language(text):
    text_vectorized = vectorizer.transform([text])
    language = model.predict(text_vectorized)[0]
    return str(language)


Prediction: ['Swahili']


In [15]:
# Define the Gradio interface to accept a text input and display the predicted language label
iface = gr.Interface(
    fn=predict_language,
    inputs=gr.inputs.Textbox(lines=5, label="Input Text"),
    outputs=gr.outputs.Textbox(label="Language Prediction"),
    title="Language Identifier",
    description="Predict the language of a given text."
)
iface.launch()



Running on local URL:  http://127.0.0.1:7862

To create a public link, set `share=True` in `launch()`.


