## Song Recommendation Based on Song Topic

This notebook recommends songs based on the user's mood and preferences, utilizing Gradio for interactive user input. The system uses the Fine-Tuned Feedforward Neural Network (FFNN) model, which achieved the best accuracy, to generate suggestions. Users can select a mood or topic, and the system provides up to ten random songs matching that topic. The system processes the song lyrics, applies a confidence threshold to select appropriate songs, and falls back on the topic if there aren’t enough confident songs available.

In [10]:
import gradio as gr
import pandas as pd
import numpy as np
import random
import torch
from torch import nn
import torch.optim as optim
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import LabelEncoder
from torch.utils.data import TensorDataset, DataLoader
import re
import string
import inflect
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
import nltk

# Load and preprocess data
df = pd.read_csv('music_data.csv')
topic_df = df[['artist_name', 'track_name', 'lyrics', 'topic']]

# Preprocessing function
def preprocess(text):
    p = inflect.engine()

    # Convert text to lowercase
    text = text.lower()
    
    # Remove punctuation
    text = re.sub(f"[{re.escape(string.punctuation)}]", "", text)

    # Replace numeric tokens with words
    text = re.sub(r'\b\d+\b', lambda match: p.number_to_words(match.group()), text)
    
    # Tokenize words
    tokens = word_tokenize(text)
    
    # Load stopwords
    stop_words = set(stopwords.words("english"))
    
    # Initialize lemmatizer
    lemmatizer = WordNetLemmatizer()
    
    # Lemmatize and filter tokens
    tokens = [lemmatizer.lemmatize(word) for word in tokens if word.isalpha() and word not in stop_words]
    
    return tokens

# Lyrics pre-processing
topic_df['lyric_tokens'] = topic_df['lyrics'].apply(preprocess)
topic_df['lyric_tokens'] = topic_df['lyric_tokens'].apply(lambda tokens: ' '.join(tokens))

# Label encoding
label_encoder = LabelEncoder()
topic_df['topic_labels'] = label_encoder.fit_transform(topic_df['topic'])

# Create n-gram embeddings
vectorizer = TfidfVectorizer(
    ngram_range=(1, 2),  # Unigrams + bigrams
    max_features=5000,   # Limit feature size
)
X = vectorizer.fit_transform(topic_df['lyric_tokens'])
y = topic_df['topic_labels']

# Define NN model
class LyricClassifier(nn.Module):
    def __init__(self, input_dim, num_classes):
        super(LyricClassifier, self).__init__()
        
        # Define the layers
        self.fc1 = nn.Linear(input_dim, 256)
        self.relu1 = nn.ReLU()
        self.dropout1 = nn.Dropout(0.3)
        
        self.fc2 = nn.Linear(256, 128)
        self.relu2 = nn.ReLU()
        self.dropout2 = nn.Dropout(0.3)
        
        self.fc3 = nn.Linear(128, num_classes)

    def forward(self, x):
        x = self.dropout1(self.relu1(self.fc1(x)))
        x = self.dropout2(self.relu2(self.fc2(x)))
        x = self.fc3(x)
        return x

# Initialize model
model = LyricClassifier(input_dim=X.shape[1], num_classes=len(torch.unique(torch.from_numpy(y.values))))

unique_topics = sorted(topic_df['topic'].unique())

def recommend_songs(topic_input, num_songs=5):
    # Convert topic to label
    topic_label = label_encoder.transform([topic_input])[0]
    
    # Predict probabilities
    X_all = vectorizer.transform(topic_df['lyric_tokens'])
    X_all_tensor = torch.from_numpy(X_all.toarray()).float()
    
    with torch.no_grad():
        probabilities = torch.softmax(model(X_all_tensor), dim=1)
    
    # Add confidence scores to dataframe
    topic_df['topic_confidence'] = probabilities[:, topic_label].numpy()
    
    # Get all songs that have at least moderate confidence for this topic
    confident_songs = topic_df[topic_df['topic_confidence'] > 0.5]  
    songs_for_topic = confident_songs[confident_songs['topic'] == topic_input]
    
    # If we don't have enough confident songs, fall back to regular matching
    if len(songs_for_topic) < num_songs:
        songs_for_topic = topic_df[topic_df['topic'] == topic_input]
    
    # Randomly select fresh recommendations each time
    recommendations = songs_for_topic.sample(n=min(num_songs, len(songs_for_topic)), 
                                          replace=False)
    
    # Format output
    return "\n".join([f"{row['track_name']} by {row['artist_name']}" 
                     for _, row in recommendations.iterrows()])

# Create a single Gradio interface (removed all the duplicate definitions)
iface = gr.Interface(
    fn=recommend_songs,
    inputs=[
        gr.Dropdown(choices=unique_topics, label="🎤 Select a Song Topic"),
        gr.Slider(1, 10, value=5, step=1, label="🔢 Number of Songs")
    ],
    outputs=gr.Textbox(label="Recommended Songs"),
    title="🎵 Neural Network-Powered Song Recommendation System Based on Song Topic",
    description="Pick a topic that matches your mood or preference, and get some song suggestions!",
    allow_flagging="never"   
)

iface.launch(share=True)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  topic_df['lyric_tokens'] = topic_df['lyrics'].apply(preprocess)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  topic_df['lyric_tokens'] = topic_df['lyric_tokens'].apply(lambda tokens: ' '.join(tokens))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  topic_df['topic_labels'] = label_encoder.fit_trans

* Running on local URL:  http://127.0.0.1:7873
* Running on public URL: https://34adb08df779172564.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




In [17]:
X.shape

(28372, 5000)