In [2]:
import pandas as pd

df = pd.read_csv('ultimate.csv')

print(df.head())


  Artist_x                        Title  Danceability  Energy  Acousticness  \
0     Joji                          Run         0.427   0.715       0.04690   
1   Eminem  Godzilla (feat. Juice WRLD)         0.808   0.745       0.14500   
2   Eminem                Lose Yourself         0.692   0.744       0.00868   
3   Eminem                  Mockingbird         0.637   0.678       0.20900   
4    Logic      Homicide (feat. Eminem)         0.694   0.759       0.13700   

   Instrumentalness    Tempo Artist_y        genre  Artist  \
0          0.000231  157.905     Joji  Alternative    Joji   
1          0.000000  165.995   Eminem      Hip-Hop  Eminem   
2          0.000423  171.374   Eminem  Alternative  Eminem   
3          0.000000   84.039   Eminem      Hip-Hop  Eminem   
4          0.000000  140.055    Logic      Hip-Hop   Logic   

                                              Lyrics  Sentiment_Positivity  \
0  Paroles de la chanson Run par Joji\nI fell for...                 0.262

In [6]:
import librosa
import numpy as np

def extract_audio_features(row):
    file_path = row['/Users/yash/Documents/GitHub/musik-ml/1.mp3']
    y, sr = librosa.load(file_path, duration=30)  
    mfccs = np.mean(librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13).T, axis=0)
    chroma = np.mean(librosa.feature.chroma_stft(y=y, sr=sr).T, axis=0)
    tempo, _ = librosa.beat.beat_track(y=y, sr=sr)
    rms = np.mean(librosa.feature.rms(y=y).T, axis=0)
    features = np.hstack([mfccs, chroma, tempo, rms])
    return features

audio_features = df.apply(extract_audio_features, axis=1)

audio_features_df = pd.DataFrame(audio_features.tolist())
audio_features_df.columns = [f'feature_{i}' for i in range(audio_features_df.shape[1])]

df = pd.concat([df, audio_features_df], axis=1)


KeyError: '/Users/yash/Documents/GitHub/musik-ml/1.mp3'

In [None]:
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer

nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')

def preprocess_lyrics(lyrics):
    tokens = nltk.word_tokenize(lyrics.lower())
    words = [word for word in tokens if word.isalpha()]
    stop_words = set(stopwords.words('english'))
    words = [word for word in words if word not in stop_words]

    lemmatizer = WordNetLemmatizer()
    lemmatized = [lemmatizer.lemmatize(word) for word in words]
    return ' '.join(lemmatized)

# Apply preprocessing
df['cleaned_lyrics'] = df['Lyrics'].apply(preprocess_lyrics)


In [None]:
from transformers import AutoTokenizer, TFAutoModelForSequenceClassification
import tensorflow as tf

model_name = 'nlptown/bert-base-multilingual-uncased-sentiment'
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = TFAutoModelForSequenceClassification.from_pretrained(model_name)

def sentiment_analysis(text):
    inputs = tokenizer.encode_plus(
        text,
        return_tensors='tf',
        truncation=True,
        padding='max_length',
        max_length=512
    )
    outputs = model(inputs)
    probs = tf.nn.softmax(outputs.logits, axis=-1)

    sentiment_score = tf.reduce_sum(probs * tf.constant([1, 2, 3, 4, 5], dtype=tf.float32), axis=-1)
    return sentiment_score.numpy()[0]

# Apply sentiment analysis
df['sentiment_score'] = df['cleaned_lyrics'].apply(sentiment_analysis)


In [None]:
audio_feature_cols = [col for col in df.columns if 'feature_' in col]

df['combined_features'] = df[audio_feature_cols].values.tolist() + df['sentiment_score'].apply(lambda x: [x])

X = np.array(df['combined_features'].tolist())


In [None]:
from tensorflow.keras import layers, models

def build_audio_cnn(input_shape):
    model = models.Sequential()
    model.add(layers.Conv1D(64, kernel_size=3, activation='relu', input_shape=input_shape))
    model.add(layers.MaxPooling1D(pool_size=2))
    model.add(layers.Conv1D(128, kernel_size=3, activation='relu'))
    model.add(layers.GlobalMaxPooling1D())
    model.add(layers.Dense(128, activation='relu'))
    model.add(layers.Dense(10, activation='softmax'))  # Assuming 10 emotional categories
    return model

input_shape = (X.shape[1], 1)
audio_model = build_audio_cnn(input_shape)
audio_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])


In [None]:
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

tokenizer = Tokenizer(num_words=5000)
tokenizer.fit_on_texts(df['cleaned_lyrics'])
sequences = tokenizer.texts_to_sequences(df['cleaned_lyrics'])
max_sequence_length = max(len(seq) for seq in sequences)
word_index = tokenizer.word_index

X_lyrics = pad_sequences(sequences, maxlen=max_sequence_length)

def build_lyrics_lstm(vocab_size, embedding_dim, input_length):
    model = models.Sequential()
    model.add(layers.Embedding(vocab_size, embedding_dim, input_length=input_length))
    model.add(layers.LSTM(128, return_sequences=True))
    model.add(layers.LSTM(64))
    model.add(layers.Dense(128, activation='relu'))
    model.add(layers.Dense(10, activation='softmax'))  
    return model

vocab_size = min(len(word_index) + 1, 5000)
embedding_dim = 100
input_length = max_sequence_length
lyrics_model = build_lyrics_lstm(vocab_size, embedding_dim, input_length)
lyrics_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])


In [None]:
from tensorflow.keras import Input, Model

audio_input = Input(shape=(X.shape[1], 1), name='audio_input')
audio_branch = layers.Conv1D(64, kernel_size=3, activation='relu')(audio_input)
audio_branch = layers.MaxPooling1D(pool_size=2)(audio_branch)
audio_branch = layers.Conv1D(128, kernel_size=3, activation='relu')(audio_branch)
audio_branch = layers.GlobalMaxPooling1D()(audio_branch)

lyrics_input = Input(shape=(input_length,), name='lyrics_input')
lyrics_branch = layers.Embedding(vocab_size, embedding_dim, input_length=input_length)(lyrics_input)
lyrics_branch = layers.LSTM(128, return_sequences=True)(lyrics_branch)
lyrics_branch = layers.LSTM(64)(lyrics_branch)

combined = layers.concatenate([audio_branch, lyrics_branch])
combined = layers.Dense(128, activation='relu')(combined)
output = layers.Dense(10, activation='softmax')(combined)

combined_model = Model(inputs=[audio_input, lyrics_input], outputs=output)
combined_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])


In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

def generate_moodboard(predictions, song_title):
    emotions = ['Happy', 'Sad', 'Angry', 'Relaxed', 'Energetic', 'Calm', 'Hopeful', 'Anxious', 'Content', 'Depressed']
    plt.figure(figsize=(10, 6))
    sns.barplot(x=emotions, y=predictions)
    plt.title(f'Moodboard for "{song_title}"')
    plt.xticks(rotation=45)
    plt.ylabel('Probability')
    plt.tight_layout()
    plt.show()


In [None]:
from sklearn.metrics.pairwise import cosine_similarity

def recommend_songs(song_index, df, num_recommendations=5):
    features = df[audio_feature_cols + ['sentiment_score']].values
    similarities = cosine_similarity([features[song_index]], features)[0]
    similar_indices = similarities.argsort()[::-1][1:num_recommendations+1]
    return df.iloc[similar_indices][['Artist', 'Title']]

song_index = 0  
recommendations = recommend_songs(song_index, df)
print(recommendations)


In [None]:
def suggest_therapy(emotion_label):
    therapy_dict = {
        'Sad': 'Mindfulness meditation',
        'Anxious': 'Deep breathing exercises',
        'Depressed': 'Consult a mental health professional',
        'Angry': 'Physical activity like jogging or boxing',
        'Relaxed': 'Maintain your routine',
        'Happy': 'Share your happiness with friends',
    }
    return therapy_dict.get(emotion_label, 'No suggestion available')

emotion_label = 'Sad' 
therapy = suggest_therapy(emotion_label)
print(f'Therapy suggestion: {therapy}')


In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical

labels = df['Emotion']  # Replace with your emotion labels
le = LabelEncoder()
y = le.fit_transform(labels)
y_categorical = to_categorical(y)

X_train_audio, X_test_audio, X_train_lyrics, X_test_lyrics, y_train, y_test = train_test_split(
    X.reshape(-1, X.shape[1], 1),
    X_lyrics,
    y_categorical,
    test_size=0.2,
    random_state=42
)

history = combined_model.fit(
    {'audio_input': X_train_audio, 'lyrics_input': X_train_lyrics},
    y_train,
    epochs=10,
    batch_size=32,
    validation_data=(
        {'audio_input': X_test_audio, 'lyrics_input': X_test_lyrics},
        y_test
    )
)

plt.figure(figsize=(12, 4))
plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'], label='Train Acc')
plt.plot(history.history['val_accuracy'], label='Val Acc')
plt.title('Model Accuracy')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Val Loss')
plt.title('Model Loss')
plt.legend()

plt.show()
