In [4]:
import pandas as pd
import numpy as np
import re
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
from nltk.corpus import stopwords
from nltk.sentiment import SentimentIntensityAnalyzer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns


In [5]:
# Initialize necessary tools
lemmatizer = WordNetLemmatizer()
stop_words = set(stopwords.words('english'))
custom_stopwords = ["would", "shall", "could", "might"]
stop_words.update(custom_stopwords)
stop_words.discard("not")  # Keep 'not' for sentiment analysis
sia = SentimentIntensityAnalyzer()

# Text cleaning functions
def remove_special_character(content):
    return re.sub(r'\W+', ' ', content)

def remove_url(content):
    return re.sub(r'http\S+', '', content)

def lemmatize_and_remove_stopwords(content):
    return " ".join([lemmatizer.lemmatize(word.lower()) for word in word_tokenize(content) if word.lower() not in stop_words and word.isalpha()])

def data_cleaning(content):
    content = remove_special_character(content)
    content = remove_url(content)
    content = lemmatize_and_remove_stopwords(content)
    return content

# Sentiment classification based on rating
def assign_sentiment(rating):
    if rating >= 4:
        return 'positive'
    elif rating == 3:
        return 'neutral'
    else:
        return 'negative'

# Sentiment classification using SIA
def classify_sentiment_using_sia(feedback):
    sentiment_score = sia.polarity_scores(feedback)
    if sentiment_score['compound'] >= 0.05:
        return 'positive'
    elif sentiment_score['compound'] <= -0.05:
        return 'negative'
    else:
        return 'neutral'

# Sample data for training the model (replace with more comprehensive data in practice)


In [10]:
data = {
    'Feedback': [
        "Great product, loved it!",
        "Horrible experience, will not buy again.",
        "It was okay, nothing special.",
        "Absolutely fantastic service!",
        "Very bad quality, disappointed.",
    ],
    'Rating': [5, 1, 3, 5, 2]
}
df = pd.DataFrame(data)

# Data cleaning and labeling
df['Cleaned_Feedback'] = df['Feedback'].apply(data_cleaning)
df['Label'] = df['Rating'].apply(assign_sentiment)

# Feature extraction with TfidfVectorizer
tfidfvect = TfidfVectorizer(ngram_range=(1, 3), min_df=1, max_features=1000)
x_train_tfidf = tfidfvect.fit_transform(df['Cleaned_Feedback']).toarray()
y_train = df['Label']

# Model training with Random Forest
model = RandomForestClassifier(n_estimators=200, max_depth=10, random_state=42)
model.fit(x_train_tfidf, y_train)

# Function to process user input and predict sentiment
def analyze_feedback():
    feedback = input("Enter your feedback: ")
    rating = int(input("Enter your rating (1-5): "))

    # Clean and process input
    cleaned_feedback = data_cleaning(feedback)
    sentiment_by_rating = assign_sentiment(rating)
    sentiment_by_sia = classify_sentiment_using_sia(cleaned_feedback)

    # Predict sentiment using the model
    feedback_tfidf = tfidfvect.transform([cleaned_feedback]).toarray()
    predicted_sentiment = model.predict(feedback_tfidf)[0]

    # Display results
    print("\nResults:")
    print(f"Cleaned Feedback: {cleaned_feedback}")
    print(f"Sentiment by Rating: {sentiment_by_rating}")
    print(f"Sentiment by SIA: {sentiment_by_sia}")
    print(f"Predicted Sentiment by Model: {predicted_sentiment}")

# Run the analysis function
analyze_feedback()

Enter your feedback:  the proffessor's cordial relation with student doesn't seem well and the teaching of the teacher is average
Enter your rating (1-5):  3



Results:
Cleaned Feedback: proffessor cordial relation student seem well teaching teacher average
Sentiment by Rating: neutral
Sentiment by SIA: positive
Predicted Sentiment by Model: positive
