#### Music Match: Система рекомендации музыки по настроению

In [64]:
import pandas as pd
from transformers import pipeline
from sklearn.cluster import KMeans
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

In [65]:
data = pd.read_csv("C:/Users/admin/OneDrive/Рабочий стол/Song Dataset.csv")
data = data.drop('link', axis=1)

data.head(10)

Unnamed: 0,artist,song,text
0,ABBA,Ahe's My Kind Of Girl,"Look at her face, it's a wonderful face \nAnd..."
1,ABBA,"Andante, Andante","Take it easy with me, please \nTouch me gentl..."
2,ABBA,As Good As New,I'll never know why I had to go \nWhy I had t...
3,ABBA,Bang,Making somebody happy is a question of give an...
4,ABBA,Bang-A-Boomerang,Making somebody happy is a question of give an...
5,ABBA,Burning My Bridges,"Well, you hoot and you holler and you make me ..."
6,ABBA,Cassandra,Down in the street they're all singing and sho...
7,ABBA,Chiquitita,"Chiquitita, tell me what's wrong \nYou're enc..."
8,ABBA,Crazy World,I was out with the morning sun \nCouldn't sle...
9,ABBA,Crying Over You,I'm waitin' for you baby \nI'm sitting all al...


In [66]:
new_data = data.sample(n=1000, random_state=42)
new_data

Unnamed: 0,artist,song,text
56679,Wishbone Ash,Right Or Wrong,Like to have you 'round \nWith all the lies t...
224,Aerosmith,This Little Light Of Mine,"This Little Light of Mine (Light of Mine), \n..."
32457,Fall Out Boy,"Dance, Dance",She says she's no good with words but I'm wors...
9167,Janis Joplin,Easy Rider,"Hey mama, mama, come a look at sister, \nShe'..."
13270,Moody Blues,Peak Hour,I see it all through my window it seems. \nNe...
...,...,...,...
20276,Used,Generation Throwaway,Whoa \nGeneration throwaway \nWhoa \nGenera...
21464,Who,Bucket T,"Bucket bucket T T, bucket T bucket T, \nBucke..."
51282,Santana,America,This is America \nPOD and Santana live in La ...
56824,X,Burning House Of Love,Drive by my house late at night \nYou can see...


**"bhadresh-savani/distilbert-base-uncased-emotion"** - Это модель на базе DistilBERT (облегчённая версия BERT — одной из самых популярных моделей для обработки текста), которую обучили распознавать эмоции в английских текстах. Она была создана на основе открытого датасета GoEmotions от Google, который содержит высказывания из интернета с пометками эмоций.


In [67]:
emotion_model = pipeline("text-classification", model="bhadresh-savani/distilbert-base-uncased-emotion", top_k=3, truncation=True)
HF_HUB_DISABLE_SYMLINKS_WARNING=1

Device set to use cpu


Она принимает на вход текст и выдаёт, какие эмоции в нём выражены, например:

In [68]:
emotion_model("I'm feeling really happy today!")

[[{'label': 'joy', 'score': 0.9990272521972656},
  {'label': 'love', 'score': 0.00028907699743285775},
  {'label': 'sadness', 'score': 0.0002408125001238659}]]

In [69]:
# Функция принимает текст, запускает модель определения эмоций, и возвращает словарь эмоций с их "оценками" от 0 до 1.

def get_emotions(text):
    try:
        result = emotion_model(text) 
        emotions = result[0] 
        return {item['label']: item['score'] for item in emotions}
    except:
        return {}

In [70]:
# преобразуем словарь эмоций в числовой вектор
emotion_list = ['joy','love', 'sadness', 'surprise', 'fear','anger', 'no_emotion' ]

def to_emotion_vector(emotion_dict):
    return [emotion_dict.get(e, 0.0) for e in emotion_list]

Мы используем две функции для анализа эмоций в текстах песен:

**get_emotions(text)** — извлекает эмоции из текста с помощью обученной модели.

**to_emotion_vector(emotion_dict)** — преобразует результат в числовой вектор фиксированной длины.

Так мы получаем эмоциональный профиль каждой песни:

In [None]:
new_data['emotion_dict'] = new_data['text'].apply(get_emotions)
new_data['emotion_vector'] = new_data['emotion_dict'].apply(to_emotion_vector)
new_data[['artist', 'song', 'emotion_dict', 'emotion_vector']]

Unnamed: 0,artist,song,emotion_dict,emotion_vector
56679,Wishbone Ash,Right Or Wrong,"{'anger': 0.8817856907844543, 'sadness': 0.109...","[0.0047196513041853905, 0.0, 0.109413847327232..."
224,Aerosmith,This Little Light Of Mine,"{'joy': 0.8830875754356384, 'anger': 0.0850654...","[0.8830875754356384, 0.0, 0.015944965183734894..."
32457,Fall Out Boy,"Dance, Dance","{'love': 0.8936543464660645, 'joy': 0.04676710...","[0.04676710069179535, 0.8936543464660645, 0.04..."
9167,Janis Joplin,Easy Rider,"{'anger': 0.9330032467842102, 'joy': 0.0258607...","[0.025860704481601715, 0.0, 0.0233910940587520..."
13270,Moody Blues,Peak Hour,"{'joy': 0.8706589341163635, 'anger': 0.1037658...","[0.8706589341163635, 0.0, 0.016657516360282898..."
...,...,...,...,...
20276,Used,Generation Throwaway,"{'joy': 0.9516360759735107, 'fear': 0.02675172...","[0.9516360759735107, 0.0, 0.0, 0.0, 0.02675172..."
21464,Who,Bucket T,"{'joy': 0.913567304611206, 'anger': 0.07027292...","[0.913567304611206, 0.0, 0.009503941982984543,..."
51282,Santana,America,"{'joy': 0.9988148212432861, 'love': 0.00043445...","[0.9988148212432861, 0.0004344580229371786, 0...."
56824,X,Burning House Of Love,"{'sadness': 0.6608747243881226, 'anger': 0.233...","[0.0, 0.0, 0.6608747243881226, 0.0, 0.09542193..."


### Кластеризация песен по их эмоциональным вектором с помощью алгоритма K-Means

K-Means разбивает все данные на K кластеров (в нашем случае на 6) так, чтобы объекты внутри каждой группы были максимально похожи друг на друга и отличались от объектов из других групп.

In [72]:
vectors = np.array(new_data['emotion_vector'].tolist())

kmeans = KMeans(n_clusters=6, random_state=42)
new_data['cluster'] = kmeans.fit_predict(vectors)
new_data

Unnamed: 0,artist,song,text,emotion_dict,emotion_vector,cluster
56679,Wishbone Ash,Right Or Wrong,Like to have you 'round \nWith all the lies t...,"{'anger': 0.8817856907844543, 'sadness': 0.109...","[0.0047196513041853905, 0.0, 0.109413847327232...",5
224,Aerosmith,This Little Light Of Mine,"This Little Light of Mine (Light of Mine), \n...","{'joy': 0.8830875754356384, 'anger': 0.0850654...","[0.8830875754356384, 0.0, 0.015944965183734894...",0
32457,Fall Out Boy,"Dance, Dance",She says she's no good with words but I'm wors...,"{'love': 0.8936543464660645, 'joy': 0.04676710...","[0.04676710069179535, 0.8936543464660645, 0.04...",3
9167,Janis Joplin,Easy Rider,"Hey mama, mama, come a look at sister, \nShe'...","{'anger': 0.9330032467842102, 'joy': 0.0258607...","[0.025860704481601715, 0.0, 0.0233910940587520...",5
13270,Moody Blues,Peak Hour,I see it all through my window it seems. \nNe...,"{'joy': 0.8706589341163635, 'anger': 0.1037658...","[0.8706589341163635, 0.0, 0.016657516360282898...",0
...,...,...,...,...,...,...
20276,Used,Generation Throwaway,Whoa \nGeneration throwaway \nWhoa \nGenera...,"{'joy': 0.9516360759735107, 'fear': 0.02675172...","[0.9516360759735107, 0.0, 0.0, 0.0, 0.02675172...",0
21464,Who,Bucket T,"Bucket bucket T T, bucket T bucket T, \nBucke...","{'joy': 0.913567304611206, 'anger': 0.07027292...","[0.913567304611206, 0.0, 0.009503941982984543,...",0
51282,Santana,America,This is America \nPOD and Santana live in La ...,"{'joy': 0.9988148212432861, 'love': 0.00043445...","[0.9988148212432861, 0.0004344580229371786, 0....",0
56824,X,Burning House Of Love,Drive by my house late at night \nYou can see...,"{'sadness': 0.6608747243881226, 'anger': 0.233...","[0.0, 0.0, 0.6608747243881226, 0.0, 0.09542193...",2


In [73]:
class MusicRecommender:
    def __init__(self, df, kmeans):
        self.df = df
        self.kmeans = kmeans
        self.vectors = np.array(df['emotion_vector'].tolist())
    
    def recommend(self, query_vector, top_n=5):
        # Определяем кластер запроса
        query_cluster = self.kmeans.predict([query_vector])[0]
        
        # Получаем песни из этого кластера
        cluster_songs = self.df[self.df['cluster'] == query_cluster]
        cluster_vectors = np.array(cluster_songs['emotion_vector'].tolist())
        
        # Вычисляем косинусную схожесть
        similarities = cosine_similarity([query_vector], cluster_vectors)[0]
        cluster_songs['similarity'] = similarities
        
        # Сортируем по схожести
        return cluster_songs.sort_values('similarity', ascending=False).head(top_n)

**model="cointegrated/rubert-tiny2-cedr-emotion-detection"**  — модель для анализа эмоций, в данном случае это модель, обученная для распознавания эмоций в русском языке.

In [74]:
classifier = pipeline("text-classification", 
                        model="cointegrated/rubert-tiny2-cedr-emotion-detection",
                        return_all_scores=True)

Device set to use cpu


In [75]:
def get_multiple_emotions(query, top_k=6):
    emotions = classifier(query)[0]
    return {e['label']: e['score'] for e in sorted(emotions, key=lambda x: -x['score'])[:top_k]}


mood = get_multiple_emotions("Афигенное настроение!")
mood

{'joy': 0.9306477904319763,
 'surprise': 0.09164230525493622,
 'anger': 0.026012258604168892,
 'fear': 0.015287277288734913,
 'no_emotion': 0.007913720794022083,
 'sadness': 0.006766302045434713}

In [76]:
to_emotion_vector(mood)

[0.9306477904319763,
 0.0,
 0.006766302045434713,
 0.09164230525493622,
 0.015287277288734913,
 0.026012258604168892,
 0.007913720794022083]

In [77]:
recommender = MusicRecommender(new_data, kmeans)

joy_query = to_emotion_vector(mood)
recommendations = recommender.recommend(joy_query)

# Вывод результатов
print(recommendations[['artist', 'song', 'similarity']])


              artist              song  similarity
39416      Kate Bush              Once    0.998612
38378     John Waite           Godhead    0.997051
20562   Van Morrison      Daring Night    0.996273
25580  Black Sabbath  Feels Good To Me    0.995814
17663        Rihanna  Music Of The Sun    0.995635


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cluster_songs['similarity'] = similarities
