# Emotions Dataset

In [4]:
import zipfile
import pandas as pd

# Extract ZIP
with zipfile.ZipFile(r"C:\Users\jvina\Cynbit_training\archive.zip", 'r') as zip_ref:
    zip_ref.extractall("emotions_dataset")

# Load datasets
def load_data(filename):
    return pd.read_csv(filename, sep=';', header=None, names=['text', 'emotion'])

train = load_data("emotions_dataset/train.txt")
test = load_data("emotions_dataset/test.txt")
val = load_data("emotions_dataset/val.txt")

train.head()


Unnamed: 0,text,emotion
0,i didnt feel humiliated,sadness
1,i can go from feeling so hopeless to so damned...,sadness
2,im grabbing a minute to post i feel greedy wrong,anger
3,i am ever feeling nostalgic about the fireplac...,love
4,i am feeling grouchy,anger


#  Preprocess text

In [13]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import LabelEncoder

def preprocess_data(df):
    df['text'] =  df['text'].str.lower()
    return df

# Preprocess train, test, val
train =  preprocess_data(train)
test  =  preprocess_data(test)
val   =  preprocess_data(val)

# Convert emotions to numbers
label_encoder = LabelEncoder()
train_labels = label_encoder.fit_transform(train['emotion'])
test_labels = label_encoder.transform(test['emotion'])
val_labels = label_encoder.transform(val['emotion'])

# Vectorize text using TF-IDF
vectorizer = TfidfVectorizer(max_features = 5000)
X_train = vectorizer.fit_transform(train['text'])
X_test = vectorizer.transform(test['text'])
X_val = vectorizer.transform(val['text'])


# Train Logistic Regression model

In [14]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

model = LogisticRegression(max_iter = 1000)
model.fit(X_train, train_labels)

#  Evaluate on test data
y_pred = model.predict(X_test)

# Accuracy
print("Accuracy:", accuracy_score(test_labels, y_pred))

# Detailed performance
print("\nClassification Report:\n", classification_report(test_labels, y_pred, target_names=label_encoder.classes_))

Accuracy: 0.869

Classification Report:
               precision    recall  f1-score   support

       anger       0.90      0.82      0.86       275
        fear       0.89      0.80      0.84       224
         joy       0.84      0.96      0.89       695
        love       0.83      0.60      0.69       159
     sadness       0.90      0.93      0.92       581
    surprise       0.85      0.52      0.64        66

    accuracy                           0.87      2000
   macro avg       0.87      0.77      0.81      2000
weighted avg       0.87      0.87      0.86      2000



# Naive Bayes  model

In [17]:
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report

# Train Naive Bayes model
nb_model = MultinomialNB()
nb_model.fit(X_train, train_labels)

# Predict on test data
y_pred_nb = nb_model.predict(X_test)

# Accuracy
print("Accuracy:", accuracy_score(test_labels, y_pred_nb))

# Classification report
print("\nClassification Report:\n", classification_report(test_labels, y_pred_nb, target_names=label_encoder.classes_, zero_division=0))


Accuracy: 0.7165

Classification Report:
               precision    recall  f1-score   support

       anger       0.94      0.40      0.56       275
        fear       0.90      0.38      0.54       224
         joy       0.68      0.98      0.80       695
        love       1.00      0.12      0.21       159
     sadness       0.70      0.92      0.80       581
    surprise       0.00      0.00      0.00        66

    accuracy                           0.72      2000
   macro avg       0.70      0.47      0.48      2000
weighted avg       0.75      0.72      0.66      2000



In [18]:
import joblib

# Save trained model, vectorizer, and encoder
joblib.dump(model, "emotion_model.pkl")
joblib.dump(vectorizer, "tfidf_vectorizer.pkl")
joblib.dump(label_encoder, "label_encoder.pkl")


['label_encoder.pkl']