In [2]:
import pandas as pd

df = pd.read_csv("../data/emotion_data.csv")
df.head()


Unnamed: 0,Comment,Emotion
0,i seriously hate one subject to death but now ...,fear
1,im so full of life i feel appalled,anger
2,i sit here to write i start to dig out my feel...,fear
3,ive been really angry with r and i feel like a...,joy
4,i feel suspicious if there is no one outside l...,fear


In [3]:
import nltk
import re

nltk.download('stopwords')
from nltk.corpus import stopwords

stop_words = set(stopwords.words('english'))

def clean_text(text):
    text = text.lower()
    text = re.sub(r'[^a-z\s]', '', text)
    words = text.split()
    words = [word for word in words if word not in stop_words]
    return " ".join(words)

df['clean_comment'] = df['Comment'].apply(clean_text)
df[['Comment', 'clean_comment']].head()


[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/ashwinsathishkumar/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


Unnamed: 0,Comment,clean_comment
0,i seriously hate one subject to death but now ...,seriously hate one subject death feel reluctan...
1,im so full of life i feel appalled,im full life feel appalled
2,i sit here to write i start to dig out my feel...,sit write start dig feelings think afraid acce...
3,ive been really angry with r and i feel like a...,ive really angry r feel like idiot trusting fi...
4,i feel suspicious if there is no one outside l...,feel suspicious one outside like rapture happe...


In [4]:
from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()
df['label'] = le.fit_transform(df['Emotion'])

df[['Emotion', 'label']].head()


Unnamed: 0,Emotion,label
0,fear,1
1,anger,0
2,fear,1
3,joy,2
4,fear,1


In [5]:
from sklearn.feature_extraction.text import TfidfVectorizer

tfidf = TfidfVectorizer(max_features=5000)

X = tfidf.fit_transform(df['clean_comment'])
y = df['label']

X.shape, y.shape


((5937, 5000), (5937,))

In [6]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report


In [7]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

X_train.shape, X_test.shape


((4749, 5000), (1188, 5000))

In [9]:
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)


In [11]:
from sklearn.linear_model import LogisticRegression

model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)


In [12]:
from sklearn.metrics import accuracy_score

y_pred = model.predict(X_test)
accuracy_score(y_test, y_pred)


0.9402356902356902

In [13]:
from sklearn.metrics import classification_report

print(classification_report(y_test, y_pred, target_names=le.classes_))


              precision    recall  f1-score   support

       anger       0.94      0.94      0.94       392
        fear       0.95      0.92      0.93       416
         joy       0.92      0.97      0.95       380

    accuracy                           0.94      1188
   macro avg       0.94      0.94      0.94      1188
weighted avg       0.94      0.94      0.94      1188



In [14]:
def predict_emotion(text):
    cleaned = clean_text(text)
    vector = tfidf.transform([cleaned])
    prediction = model.predict(vector)
    return le.inverse_transform(prediction)[0]

# Try it!
predict_emotion("I feel very lonely and tired today")


'anger'

In [15]:
mood_to_music = {
    "joy": ["Pop", "Dance", "Happy Hits"],
    "sad": ["Acoustic", "Sad Songs", "Piano"],
    "anger": ["Rock", "Metal"],
    "fear": ["Calm", "Lo-fi", "Ambient"],
    "love": ["Romantic", "Chill Love"]
}

def recommend_music(emotion):
    return mood_to_music.get(emotion, ["Chill"])

emotion = predict_emotion("I feel anxious and scared about my future")
recommend_music(emotion)


['Calm', 'Lo-fi', 'Ambient']