In [None]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)


Mounted at /content/drive


In [None]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import SVC
from sklearn.preprocessing import LabelEncoder
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score, classification_report
from sklearn.model_selection import GridSearchCV, StratifiedKFold, train_test_split


In [None]:

train_path = "/content/drive/My Drive/train.txt"
test_path = "/content/drive/My Drive/test.txt"
val_path = "/content/drive/My Drive/val.txt"

In [None]:
# Load Data Function
def load_data(filepath):
    with open(filepath, "r", encoding="utf-8") as file:
        lines = file.readlines()
    texts, labels = zip(*[line.strip().rsplit(";", 1) for line in lines])
    return list(texts), list(labels)

In [None]:
train_texts, train_labels = load_data(train_path)
test_texts, test_labels = load_data(test_path)
val_texts, val_labels = load_data(val_path)


In [None]:
# Combine all data for better generalization
all_texts = train_texts + test_texts + val_texts
all_labels = train_labels + test_labels + val_labels


In [None]:
label_encoder = LabelEncoder()
all_labels_enc = label_encoder.fit_transform(all_labels)

In [None]:
# Split into training and validation sets
train_texts, val_texts, train_labels_enc, val_labels_enc = train_test_split(
    all_texts, all_labels_enc, test_size=0.2, stratify=all_labels_enc, random_state=42
)


In [None]:
# Improved TF-IDF with stop words removal
tfidf = TfidfVectorizer(max_features=10000, ngram_range=(1, 2), min_df=2, max_df=0.95, stop_words='english')

# SVM with

In [None]:
# SVM with Hyperparameter Tuning
svm = SVC(class_weight='balanced')
param_grid = {'C': [0.1, 1, 10], 'kernel': ['linear', 'rbf']}
kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
grid_search = GridSearchCV(svm, param_grid, scoring='accuracy', cv=kfold, verbose=2, n_jobs=-1)


In [None]:
# Create Pipeline
pipeline = Pipeline([
    ('tfidf', tfidf),
    ('svm', grid_search)
])

In [None]:
# Train Model
pipeline.fit(train_texts, train_labels_enc)


Fitting 5 folds for each of 6 candidates, totalling 30 fits


In [None]:
# Validate Model
val_preds = pipeline.predict(val_texts)
val_preds_labels = label_encoder.inverse_transform(val_preds)  # Convert back to original labels


In [None]:
# Accuracy and Report
accuracy = accuracy_score(label_encoder.inverse_transform(val_labels_enc), val_preds_labels)
class_report = classification_report(label_encoder.inverse_transform(val_labels_enc), val_preds_labels)


In [None]:
print(f'Validation Accuracy: {accuracy}')
print(class_report)


Validation Accuracy: 0.8965
              precision    recall  f1-score   support

       anger       0.88      0.91      0.89       542
        fear       0.87      0.86      0.86       475
         joy       0.94      0.89      0.92      1352
        love       0.73      0.96      0.83       328
     sadness       0.96      0.90      0.93      1159
    surprise       0.69      0.88      0.78       144

    accuracy                           0.90      4000
   macro avg       0.85      0.90      0.87      4000
weighted avg       0.90      0.90      0.90      4000



In [None]:
def predict_emotion():
    while True:
        user_input = input("Enter a sentence (or type 'exit' to stop): ")
        if user_input.lower() == 'exit':
            break
        pred = pipeline.predict([user_input])
        emotion = label_encoder.inverse_transform(pred)[0]
        print(f'Predicted Emotion: {emotion}')

# Run user input testing
predict_emotion()

Enter a sentence (or type 'exit' to stop): I can't stop crying, everything feels so hopeless.
Predicted Emotion: sadness
Enter a sentence (or type 'exit' to stop): I am so nervous about my exam results tomorrow.
Predicted Emotion: fear
Enter a sentence (or type 'exit' to stop): I just received the best news of my life, I am thrilled!
Predicted Emotion: joy
Enter a sentence (or type 'exit' to stop): I feel so loved and appreciated by my friends and family
Predicted Emotion: love
Enter a sentence (or type 'exit' to stop): How could they betray me like this? I am so mad!
Predicted Emotion: anger
Enter a sentence (or type 'exit' to stop):  I feel so irritated when people don't listen to me.
Predicted Emotion: anger
Enter a sentence (or type 'exit' to stop):  She planned a surprise party for my birthday, and I was shocked!
Predicted Emotion: surprise
Enter a sentence (or type 'exit' to stop): exit
