In [3]:
import pandas as pd
import re
import nltk
import pickle

from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

# Download NLTK resources
nltk.download("punkt")
nltk.download("stopwords")
nltk.download("wordnet")

# Load Dataset (Ensure your CSV file has 'Text' and 'Emotion' columns)
df = pd.read_csv("emotion_data.csv")

# Text Preprocessing Function
def preprocess_text(text):
    lemmatizer = WordNetLemmatizer()
    stop_words = set(stopwords.words("english"))
    
    text = text.lower()
    text = re.sub(r"\W+", " ", text)  # Remove special characters
    tokens = nltk.word_tokenize(text)
    tokens = [lemmatizer.lemmatize(word) for word in tokens if word not in stop_words and word.isalnum()]
    
    return " ".join(tokens)

# Apply Preprocessing
df["Clean_Text"] = df["Text"].astype(str).apply(preprocess_text)

# Split Data into Training and Testing
X_train, X_test, y_train, y_test = train_test_split(df["Clean_Text"], df["Emotion"], test_size=0.2, random_state=42)

# Convert Text to TF-IDF Features
vectorizer = TfidfVectorizer(max_features=5000)
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)

# Train Logistic Regression Model
log_reg = LogisticRegression(max_iter=500, solver="liblinear")
log_reg.fit(X_train_tfidf, y_train)

# Evaluate the Model
y_pred = log_reg.predict(X_test_tfidf)
accuracy = accuracy_score(y_test, y_pred)

print(f"Model Accuracy: {accuracy:.2f}")  # Expecting ~80% accuracy
print("\nClassification Report:\n", classification_report(y_test, y_pred))

# Save Model and Vectorizer for Future Use
with open("logistic_regression_model.pkl", "wb") as model_file:
    pickle.dump(log_reg, model_file)

with open("tfidf_vectorizer.pkl", "wb") as vectorizer_file:
    pickle.dump(vectorizer, vectorizer_file)

print("\nModel and vectorizer saved successfully!")

# ------------------------------ #
# 📌 User Input for Emotion Detection #
# ------------------------------ #

# Load Saved Model and Vectorizer
with open("logistic_regression_model.pkl", "rb") as model_file:
    log_reg = pickle.load(model_file)

with open("tfidf_vectorizer.pkl", "rb") as vectorizer_file:
    vectorizer = pickle.load(vectorizer_file)

# Get User Input for Message
user_message = input("\nEnter your message: ")

# Process and Predict Emotion
processed_message = preprocess_text(user_message)
message_vector = vectorizer.transform([processed_message])
predicted_emotion = log_reg.predict(message_vector)[0]

# Display Result
print(f"\nPredicted Emotion: {predicted_emotion}")


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\Lenovo\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\Lenovo\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\Lenovo\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


Model Accuracy: 0.61

Classification Report:
               precision    recall  f1-score   support

       anger       0.64      0.54      0.59       836
     disgust       0.86      0.09      0.16       202
        fear       0.76      0.64      0.70      1104
         joy       0.56      0.82      0.67      2214
     neutral       0.69      0.33      0.45       481
     sadness       0.57      0.57      0.57      1327
       shame       0.75      0.39      0.51        23
    surprise       0.62      0.40      0.48       772

    accuracy                           0.61      6959
   macro avg       0.68      0.47      0.52      6959
weighted avg       0.63      0.61      0.59      6959


Model and vectorizer saved successfully!



Enter your message:  I am worried about what is going to happen tonight



Predicted Emotion: fear


In [18]:
user_message = input("\nEnter your message: ")

# Process and Predict Emotion
processed_message = preprocess_text(user_message)
message_vector = vectorizer.transform([processed_message])
predicted_emotion = log_reg.predict(message_vector)[0]

# Display Result
print(f"\nPredicted Emotion: {predicted_emotion}")


Enter your message:  do u mind if i ask you a question



Predicted Emotion: fear
