In [1]:
import pandas as pd
import numpy as np
import nltk
nltk.download('stopwords')
import re
from nltk.corpus import stopwords
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_recall_fscore_support
from textblob import TextBlob
import gradio as gr

[nltk_data] Downloading package stopwords to
[nltk_data]     /home/codespace/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Load the dataset
df = pd.read_csv("/workspaces/Sentiment_Analyzer/Sentiment_Analyzer/dataset/swahili.csv")

In [4]:
# Text preprocessing
stop_words = stopwords.words("swahili")
df["maneno"] = df["maneno"].apply(lambda x: " ".join([word for word in re.sub('[^a-zA-Z0-9\s]', '', x).split() if word not in stop_words]))


In [5]:
# Split dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(df["maneno"], df["lugha"], test_size=0.3, random_state=42)


In [6]:
# Vectorize the text data using TF-IDF
vectorizer = TfidfVectorizer()
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)

In [7]:
# Train an SVM classifier
svm = SVC(kernel='linear', C=1.0)
svm.fit(X_train_vec, y_train)

In [8]:
# Make predictions and print results
y_pred = svm.predict(X_test_vec)
accuracy = accuracy_score(y_test, y_pred)
precision, recall, f1_score, _ = precision_recall_fscore_support(y_test, y_pred, average='weighted')

print(f"Results for swahili.csv")
print(f"Accuracy: {accuracy:.2f}")
print(f"F1 Score: {f1_score:.2f}")
print(f"Polarity: {TextBlob(' '.join(df['maneno'])).sentiment.polarity:.2f}")


Results for swahili.csv
Accuracy: 0.78
F1 Score: 0.78
Polarity: -0.09


In [9]:
def predict_sentiment(text):
    # Preprocess the input text
    text = " ".join([word for word in re.sub('[^a-zA-Z0-9\s]', '', text).split() if word not in stop_words])
    # Vectorize the input text
    text_vec = vectorizer.transform([text])
    # Predict the sentiment of the input text
    sentiment = svm.predict(text_vec)[0]
    return sentiment


In [None]:
# Create the Gradio interface
#input_text = gr.inputs.Textbox(label="Input Text")
#output_sentiment = gr.outputs.Label(label="Sentiment Prediction")
#gr.Interface(fn=predict_sentiment, inputs=input_text, outputs=output_sentiment, 
  #           title="Swahili Sentiment Analyzer", description="Predict the sentiment of Swahili text using an SVM classifier trained on a dataset of Swahili text.").launch()

In [10]:
def load_csv(file_path):
    # Load the CSV data
    df = pd.read_csv(file_path)

    # Text preprocessing
    df["maneno"] = df["maneno"].apply(lambda x: " ".join([word for word in re.sub('[^a-zA-Z0-9\s]', '', x).split() if word not in stop_words]))

    return df

In [11]:
def get_sentiment_metrics(df):
    # Vectorize the text data using TF-IDF
    X_vec = vectorizer.transform(df["maneno"])

    # Make predictions and print results
    y_pred = svm.predict(X_vec)
    accuracy = accuracy_score(df["lugha"], y_pred)
    precision, recall, f1_score, _ = precision_recall_fscore_support(df["lugha"], y_pred, average='weighted')
    
    print(f"Results for input CSV data")
    print(f"Accuracy: {accuracy:.2f}")
    print(f"F1 Score: {f1_score:.2f}")


In [12]:
import gradio as gr
import pandas as pd
#from swahili_sentiment import test_sentiment, predict_sentiment

# Define input and output interfaces
csv_input = gr.inputs.File(label="Upload CSV file")
input_text = gr.inputs.Textbox(label="Input Text")
output_sentiment = gr.outputs.Textbox(label="Sentiment")

# Define function to load CSV data and predict sentiment
def predict_csv_sentiment(data):
    df = pd.read_csv(data["csv"])
    df["Sentiment"] = df["Text"].apply(predict_sentiment)
    return df.to_html()

# Define interfaces for CSV and Text input
iface_csv = gr.Interface(fn=predict_csv_sentiment, inputs=csv_input, outputs="html", 
                         title="Swahili Sentiment Analyzer for CSV", 
                         description="Predict the sentiment and polarity score of Swahili text from a CSV file using an SVM classifier trained on a dataset of Swahili text.")

iface_text = gr.Interface(fn=predict_sentiment, inputs=input_text, outputs=output_sentiment, 
             title="Swahili Sentiment Analyzer for Text", 
             description="Predict the sentiment of Swahili text using an SVM classifier trained on a dataset of Swahili text.")

# Launch the interfaces
iface_text.launch()
iface_csv.launch()





Running on local URL:  http://127.0.0.1:7860

To create a public link, set `share=True` in `launch()`.


Running on local URL:  http://127.0.0.1:7861

To create a public link, set `share=True` in `launch()`.


