In [1]:
import pandas as pd
import numpy as np
import nltk
import string
import re
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score
import joblib


In [2]:
nltk.download('stopwords')
from nltk.corpus import stopwords


[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\Abhi\AppData\Roaming\nltk_data...
[nltk_data]   Unzipping corpora\stopwords.zip.


In [4]:
df = pd.read_csv("emotions.csv")
print("âœ… Dataset loaded successfully!")
df.head()


âœ… Dataset loaded successfully!


Unnamed: 0,text,emotion
0,I am feeling very happy today,happy
1,This is so frustrating and annoying,angry
2,I am scared of what might happen,fear
3,What a wonderful surprise!,surprise
4,I feel so sad and alone,sad


In [5]:
def clean_text(text):
    text = text.lower()
    text = re.sub(r'http\S+', '', text)
    text = re.sub(r'@\w+', '', text)
    text = re.sub(r'#\w+', '', text)
    text = text.translate(str.maketrans('', '', string.punctuation))
    text = re.sub(r'\d+', '', text)
    text = " ".join([word for word in text.split() if word not in stopwords.words('english')])
    return text


In [6]:
df['clean_text'] = df['text'].apply(clean_text)
print("âœ… Text cleaned successfully!")
df[['text', 'clean_text']].head()


âœ… Text cleaned successfully!


Unnamed: 0,text,clean_text
0,I am feeling very happy today,feeling happy today
1,This is so frustrating and annoying,frustrating annoying
2,I am scared of what might happen,scared might happen
3,What a wonderful surprise!,wonderful surprise
4,I feel so sad and alone,feel sad alone


In [9]:
vectorizer = TfidfVectorizer(max_features=5000)
X = vectorizer.fit_transform(df['clean_text'])
y = df['emotion']


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [10]:
model = LogisticRegression(max_iter=200)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
print("ðŸŽ¯ Model Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))


ðŸŽ¯ Model Accuracy: 0.0

Classification Report:
               precision    recall  f1-score   support

       angry       0.00      0.00      0.00       1.0
        fear       0.00      0.00      0.00       0.0
       happy       0.00      0.00      0.00       1.0

    accuracy                           0.00       2.0
   macro avg       0.00      0.00      0.00       2.0
weighted avg       0.00      0.00      0.00       2.0



  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


In [11]:
joblib.dump(model, "emotion_model.pkl")
joblib.dump(vectorizer, "vectorizer.pkl")

print("âœ… Model and vectorizer saved successfully!")


âœ… Model and vectorizer saved successfully!
