## MUSIC RECOMMENDATION SYSTEM
(by:- Vineet Singh Negi)

Data features are gathered from the Spotify API (here)[https://developer.spotify.com/documentation/web-api/reference/#/operations/get-audio-features].

In [4]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.manifold import TSNE

import warnings
warnings.filterwarnings('ignore')

In [5]:
##==================================================================
## Reading the file that was extrated by 'playlist_analysis.ipynb'
##==================================================================

tracks = pd.read_csv("playlist_features.csv")
tracks.head(3)

Unnamed: 0.1,Unnamed: 0,Name,Album,Artist,Release_Date,Length,Popularity,Acousticness,Danceability,Energy,Instrumentness,Liveness,Loudness,Speechness,Tempo,Time_Signature
0,0,"What Jhumka ? (From ""Rocky Aur Rani Kii Prem K...","What Jhumka ? (From ""Rocky Aur Rani Kii Prem K...",Various Artists,2023-07-12,213611,64,0.235,0.821,0.823,0.0,0.207,-5.455,0.1,107.953,4
1,1,"Zinda Banda (From ""Jawan"")","Zinda Banda (From ""Jawan"")",Anirudh Ravichander,2023-07-31,264000,72,0.00785,0.613,0.881,3e-06,0.155,-5.974,0.249,149.978,4
2,2,Mahiye Jinna Sohna,Mahiye Jinna Sohna,Darshan Raval,2023-06-22,181250,86,0.736,0.567,0.54,3.2e-05,0.164,-5.754,0.0406,92.027,4


In [6]:
##==========================================
## Checking for null values in the dataset
##==========================================

tracks.isnull().sum()

Unnamed: 0        0
Name              0
Album             0
Artist            0
Release_Date      0
Length            0
Popularity        0
Acousticness      0
Danceability      0
Energy            0
Instrumentness    0
Liveness          0
Loudness          0
Speechness        0
Tempo             0
Time_Signature    0
dtype: int64

In [7]:
##=========================================
## Using CountVectorizer to build a model
##=========================================

song_vectorizer = CountVectorizer()
song_vectorizer.fit(tracks["Name"])

# Sorting by Popularity
tracks = tracks.sort_values(by = ["Popularity"], ascending = False)
tracks.head()

Unnamed: 0.1,Unnamed: 0,Name,Album,Artist,Release_Date,Length,Popularity,Acousticness,Danceability,Energy,Instrumentness,Liveness,Loudness,Speechness,Tempo,Time_Signature
2,2,Mahiye Jinna Sohna,Mahiye Jinna Sohna,Darshan Raval,2023-06-22,181250,86,0.736,0.567,0.54,3.2e-05,0.164,-5.754,0.0406,92.027,4
19,19,Maan Meri Jaan,Champagne Talk,King,2022-10-12,194653,85,0.354,0.698,0.505,0.0,0.0995,-8.242,0.0356,95.868,4
6,6,Malang Sajna,Malang Sajna,Sachet Tandon,2022-12-19,161041,84,0.232,0.739,0.716,0.0,0.33,-4.964,0.037,113.943,4
15,15,"O Bedardeya (From ""Tu Jhoothi Main Makkaar"")","O Bedardeya (From ""Tu Jhoothi Main Makkaar"")",Pritam,2023-03-04,313051,83,0.794,0.582,0.418,0.0,0.109,-8.701,0.0288,112.909,4
27,27,Kahani Suno 2.0,Kahani Suno 2.0,Kaifi Khalil,2022-05-31,173637,82,0.909,0.577,0.379,0.000586,0.0986,-13.235,0.0374,139.585,3


In [8]:
##=======================================================
## Creating similarity function using Cosine Similarity
##=======================================================

def get_similarities(song_name, data):

    text_array1 = song_vectorizer.transform(data[data["Name"] == song_name]["Artist"]).toarray()
    num_array1 = data[data["Name"] == song_name].select_dtypes(include = np.number).to_numpy()

    sim = []
    for idx, row in data.iterrows():
        name = row["Name"]

        text_array2 = song_vectorizer.transform(data[data["Name"] == name]["Artist"]).toarray()
        num_array2 = data[data["Name"] == name].select_dtypes(include = np.number).to_numpy()

        text_sim = cosine_similarity(text_array1, text_array2)[0][0]
        num_sim = cosine_similarity(num_array1,num_array2)[0][0]
        sim.append(text_sim + num_sim)
    
    return sim

In [9]:
def recommend_songs(song_name, data = tracks):

    if tracks[tracks["Name"] == song_name].shape[0] == 0:
        print ("This song is either not so poplular have entered an invalid name not contained in this playlist")
        
        for song in data.sample(n=7)["Name"].values:
            print(song)

        return
    
    data["similarity_factor"] = get_similarities(song_name, data)
    data.sort_values(by = ["similarity_factor", "Popularity"],
                     ascending = [False,False], inplace = True)
    
    display(data[["Name", "Artist"]][1:7])

In [12]:
recommend_songs ('Kesariya (From "Brahmastra")')

Unnamed: 0,Name,Artist
36,Ranjha,Various Artists
40,"Raataan Lambiyan (From ""Shershaah"")",Tanishk Bagchi
22,Desperado,Raghav
47,Tu Aake Dekhle,King
15,"O Bedardeya (From ""Tu Jhoothi Main Makkaar"")",Pritam
21,Tere Pyaar Mein,Pritam
