# Music Recommendation System
## Dechathon Niamsa-ard

1. Collecting Data from Spotify
2. Data Preprocessing 

In [16]:
# Import all necessary libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.manifold import TSNE

import warnings
warnings.filterwarnings('ignore')

In [17]:
# Data preprocessing
tracks = pd.read_csv('info_playlist.csv')
tracks.head()

Unnamed: 0.1,Unnamed: 0,name,album,artist,release_date,length,popularity,acousticness,danceability,energy,instrumentalness,liveness,loudness,speechiness,tempo,time_signature
0,0,ช่วงเวลา,I'm 20,Zweed n' Roll,2018-05-28,317841,56,0.387,0.441,0.489,0.000483,0.101,-7.108,0.0274,169.965,3
1,1,ไม่รู้ทำไม,PAR-K,Whal & Dolph,2019-02-14,289750,50,0.0504,0.622,0.679,0.000134,0.273,-6.537,0.0273,110.03,4
2,2,หัวใจเจ้าเอย​ (Not Again),หัวใจเจ้าเอย​ (Not Again),Alyn Wee,2021-07-02,238116,60,0.468,0.542,0.434,6.1e-05,0.0885,-9.396,0.0403,139.87,3
3,3,ลาก่อน,พบพา...ลาก่อน,YourMOOD,2023-11-03,188500,48,0.0175,0.563,0.852,0.00135,0.108,-7.114,0.0319,135.012,4
4,4,ได้ไหม,Daydream Blue,fluffypak,2019-12-25,312812,47,0.188,0.501,0.546,5e-06,0.141,-7.969,0.0306,85.892,4


3. Data Exploration (Exploratory Data Analysis)

In [18]:
tracks.isnull().sum()

Unnamed: 0          0
name                0
album               0
artist              0
release_date        0
length              0
popularity          0
acousticness        0
danceability        0
energy              0
instrumentalness    0
liveness            0
loudness            0
speechiness         0
tempo               0
time_signature      0
dtype: int64

In [19]:
import plotly_express as px

sctplot = px.scatter(tracks, x = tracks['tempo'], y = tracks['danceability'],
                     color=tracks['popularity'],size=tracks['popularity'],
                     title = 'Scatter Plot of Popular Songs using Tempo against Danceability')

sctplot.show()

4. Building the model

In [20]:
%%capture
song_vectorizer = CountVectorizer()
song_vectorizer.fit(tracks['name'])

In [21]:
tracks = tracks.sort_values(by=['popularity'], ascending=False)
tracks

Unnamed: 0.1,Unnamed: 0,name,album,artist,release_date,length,popularity,acousticness,danceability,energy,instrumentalness,liveness,loudness,speechiness,tempo,time_signature
48,48,ปล่อยดาว,ปล่อยดาว,YEW,2023-08-31,253412,65,0.035600,0.315,0.747,0.000169,0.1730,-6.022,0.0434,188.025,3
9,9,จากตรงนี้ที่(เคย)สวยงาม [La La Bye],จากตรงนี้ที่(เคย)สวยงาม [La La Bye],AYLA's,2023-01-20,288250,64,0.470000,0.605,0.452,0.000003,0.1240,-8.951,0.0264,140.027,4
93,93,อาจเป็นเพราะฉันเอง (me.),อาจเป็นเพราะฉันเอง (me.),PURPEECH,2023-04-05,283079,64,0.299000,0.571,0.562,0.000745,0.0668,-8.752,0.0312,78.059,4
45,45,รสหวาน,ท้องฟ้าจำลอง,FREEHAND,2022-01-12,293784,63,0.000924,0.374,0.569,0.000025,0.0967,-7.456,0.0266,167.957,4
13,13,เพลงรักในวันลา,เพลงรักในวันลา,Lower Mansion,2022-11-08,268941,63,0.522000,0.453,0.434,0.000003,0.5310,-6.854,0.0260,170.022,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,95,ปลอบ,Moving and Cut,Moving and Cut,2016-09-02,259083,26,0.171000,0.633,0.783,0.000015,0.1090,-5.576,0.0272,89.015,4
47,47,ย้อน,WILLISHMARA (วิลิศมาหรา),Whal & Dolph,2021-03-31,368250,22,0.324000,0.455,0.351,0.704000,0.3300,-8.523,0.0278,64.985,4
32,32,กลับไปที่เก่า (Revoke),Spirit,Singular,2012-06-20,201946,20,0.613000,0.492,0.468,0.000029,0.1690,-10.122,0.0310,156.053,4
60,60,Unfriend,Social Notwork,Helmetheads,2017-11-25,266528,0,0.054200,0.331,0.532,0.000004,0.1150,-6.507,0.0328,160.003,4


In [22]:
# Get similarities
def get_similarities(song_name, data):

    text_array1 = song_vectorizer.transform(data[data['name']==song_name]['artist']).toarray()
    num_array1 = data[data['name'] == song_name].select_dtypes(include=np.number).to_numpy()

    sim = []
    for idx, row in data.iterrows():
        name = row['name']

        text_array2 = song_vectorizer.transform(data[data['name']==name]['artist']).toarray()
        num_array2 = data[data['name'] == name].select_dtypes(include=np.number).to_numpy()

        text_sim = cosine_similarity(text_array1, text_array2)[0][0]
        num_sim = cosine_similarity(num_array1, num_array2)[0][0]
        sim.append(text_sim+num_sim)

    return sim

def recommend_songs(song_name, data = tracks):

    if tracks[tracks['name']==song_name].shape[0] == 0:
        print("No such song found")

        for song in data.sample(n=7)['name'].values:
            print(song)

        return
    
    data['similarity_factor'] = get_similarities(song_name, data)
    data.sort_values(by=['similarity_factor','popularity'], ascending= [False,False], inplace=True)

    display(data[['name','artist']][1:8])



In [24]:
recommend_songs('ปล่อยดาว')

Unnamed: 0,name,artist
54,กอดความเจ็บช้ำ ( Carry ),Safeplanet
44,ที่ผ่านมา,YERM
42,วันที่เธอไม่อยู่ (Farewell),tinn
53,บ่า,Moving and Cut
29,ถ้าต้องเสียเธอไป,lostbeans
40,ดอกไม้ไฟ,Mirrr
49,ความน่าจะเป็นเท่ากับสูญ (0%),AYLA's
