## Text Mining II. Music Recomendation Based on Moods

#### Data Preparation

In [None]:
import nltk
import spacy
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

from nltk.tree import Tree
from nltk import pos_tag, ne_chunk
from nltk.tokenize import word_tokenize

from sklearn.metrics import accuracy_score
from sklearn.naive_bayes import ComplementNB
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer

In [None]:
nltk.download([
    "words",
    "punkt",
    "wordnet",
    "punkt_tab",
    "stopwords",
    "maxent_ne_chunker",
    "vader_lexicon",
])

In [3]:
FILE_PATH_EMOTION = './dataset/combined_emotion.csv'
FILE_PATH_SONG = './dataset/data_moods.csv'

In [None]:
df_emotion = pd.read_csv(FILE_PATH_EMOTION)
print(df_emotion.count(), '\n')
print(df_emotion["emotion"].unique(), '\n')
print(df_emotion.head(), '\n')

df_song = pd.read_csv(FILE_PATH_SONG)
print(df_song.head())

In [None]:
sns.histplot(df_song["mood"])
plt.show()
sns.histplot(df_emotion["emotion"])
plt.show()

### Data Processing

In [None]:
df_emotion['emotion'] = df_emotion['emotion'].replace({'fear': 'Sad', 'sad': 'Sad'})
sns.histplot(df_emotion["emotion"])
plt.show()

In [None]:
df_emotion['emotion'] = df_emotion['emotion'].replace({'love': 'Happy','joy': 'Happy'})
sns.histplot(df_emotion["emotion"])
plt.show()

In [None]:
df_emotion['emotion'] = df_emotion['emotion'].replace({'suprise': 'Energetic','anger': 'Energetic'})
sns.histplot(df_emotion["emotion"])
plt.show()

In [None]:
df_song['mood'] = df_song['mood'].drop(df_song[df_song['mood'] == 'Calm'].index)
sns.histplot(df_song["mood"])
plt.show()

### Model for Mood Analysis

##### Load & Process Dataset

In [10]:
sentence = df_emotion['sentence']
emotion = df_emotion['emotion']

In [11]:
X_train, X_test, y_train, y_test = train_test_split(
    sentence,
    emotion,
    test_size=0.3,
    random_state=42
)

##### Compute TF-IDF Features

In [12]:
vectorizer = TfidfVectorizer(ngram_range=(1,2))
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)

##### Training Model

In [None]:
model = ComplementNB()
model.fit(
    X_train_tfidf, 
    y_train
)
import pickle

# Save the model
filename = 'musicmood.pkl'
with open(filename, 'wb') as file:
    pickle.dump(model, file)

print(f"Model saved to {filename}")

In [None]:
y_pred = model.predict(X_test_tfidf)
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy*100:.2f}%')

##### Sample in Training

In [None]:
for sentence, true_label, predicted_label in zip(X_test[:10], y_test[:10], y_pred[:10]):
    print(f"User Input: {sentence}")
    print(f"True Mood: {true_label} | Predicted Mood: {predicted_label}")
    print("-" * 50)

### Music Recomendation

In [16]:
def recommend_song(emotion, df_song, top_n=10):
    recommended_songs = df_song[df_song['mood'] == emotion].sort_values(by='popularity', ascending=False).head(top_n)[['name', 'album', 'artist', 'mood']]
    return recommended_songs

In [17]:
def predict_mood(user_input):
    user_input_tfidf = vectorizer.transform([user_input])
    return model.predict(user_input_tfidf)[0]

In [18]:
nlp = spacy.load("en_core_web_sm")
def extract_entities(text):
    doc = nlp(text)
    spacy.displacy.render(doc, style="ent", jupyter=True)
    entities = [(ent.text, ent.label_) for ent in doc.ents]
    
    return entities

### Menu Section

In [None]:
request = "NO REQUEST"
while True:
    print("\nMUSIC RECOMMENDATION APPLICATION BASED ON MOODS")
    print(f"YOUR REQUEST: {request}")
    print(f"YOUR MOOD: {'UNKNOWN' if request == 'NO REQUEST' else predict_mood(request).item()}")
    print("1. WRITE YOUR REQUEST")
    print("2. VIEW MUSIC RECOMMENDATION")
    print("3. VIEW NAMED ENTITIES RECOGNITION")
    print("4. EXIT")
    
    choice = input(">> ")
    
    if choice == "1":
        request = input("Enter your request: ")
    elif choice == "2":
        if request == "NO REQUEST":
            print("No request found. Please write a request first.")
        else:
            mood = predict_mood(request)
            print(f"Predicted Mood: {mood}")
            print("Recommended Songs:")
            print(recommend_song(mood, df_song))
    elif choice == "3":
        if request == "NO REVIEW":
            print("No request found. Please write a request first.")
            input("Press Enter to continue...")
        else:
            print("Named Entities:")
            extract_entities(request)
    elif choice == "4":
        print("Exiting...")
        break
    else:
        print("Invalid choice. Please select a valid option.")
