In [50]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.cluster import KMeans

In [51]:
data = pd.read_csv("dataset.csv")

In [52]:
data.head()

Unnamed: 0.1,Unnamed: 0,track_id,artists,album_name,track_name,popularity,duration_ms,explicit,danceability,energy,...,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,time_signature,track_genre
0,0,5SuOikwiRyPMVoIQDJUgSV,Gen Hoshino,Comedy,Comedy,73,230666,False,0.676,0.461,...,-6.746,0,0.143,0.0322,1e-06,0.358,0.715,87.917,4,acoustic
1,1,4qPNDBW1i3p13qLCt0Ki3A,Ben Woodward,Ghost (Acoustic),Ghost - Acoustic,55,149610,False,0.42,0.166,...,-17.235,1,0.0763,0.924,6e-06,0.101,0.267,77.489,4,acoustic
2,2,1iJBSr7s7jYXzM8EGcbK5b,Ingrid Michaelson;ZAYN,To Begin Again,To Begin Again,57,210826,False,0.438,0.359,...,-9.734,1,0.0557,0.21,0.0,0.117,0.12,76.332,4,acoustic
3,3,6lfxq3CG4xtTiEg7opyCyx,Kina Grannis,Crazy Rich Asians (Original Motion Picture Sou...,Can't Help Falling In Love,71,201933,False,0.266,0.0596,...,-18.515,1,0.0363,0.905,7.1e-05,0.132,0.143,181.74,3,acoustic
4,4,5vjLSffimiIP26QG5WcN2K,Chord Overstreet,Hold On,Hold On,82,198853,False,0.618,0.443,...,-9.681,1,0.0526,0.469,0.0,0.0829,0.167,119.949,4,acoustic


In [53]:
nonNumeric = ['song_name', 'artist_name', 'track_id']
dataClean = data.drop(columns=nonNumeric, errors='ignore')
dataClean = dataClean.select_dtypes(include=[np.number])

In [54]:
scaler = StandardScaler()
dataStandardized = scaler.fit_transform(dataClean)

In [55]:
# knn
def knnRecommend(userSong, data, k=5):
    similarities = cosine_similarity(data[userSong].reshape(1,-1), data).flatten()
    recommended = np.argsort(-similarities)[1:k+1]
    return recommended

In [56]:
# Naive Bayes
class NaiveBayes:
    def __init__(self):
        self.classes = None
        self.means = {}
        self.variances = {}
        self.priors = {}

    def fit(self, X, y):
        self.classes = np.unique(y)
        for cls in self.classes:
            X_c = X[y == cls]
            self.means[cls] = np.mean(X_c, axis=0)
            self.variances[cls] = np.var(X_c, axis=0)
            self.priors[cls] = X_c.shape[0] / X.shape[0]

    def calcLikelihood(self, cls, x):
        mean = self.means[cls]
        variance = self.variances[cls]
        numerator = np.exp(-((x - mean) ** 2) / (2 * variance))
        denominator = np.sqrt(2 * np.pi * variance)
        return numerator / denominator

    def calcPosterior(self, x):
        posteriors = {}
        for cls in self.classes:
            prior = self.priors[cls]
            likelihood = np.prod(self.calcLikelihood(cls, x))
            posteriors[cls] = prior * likelihood
        return posteriors

    def predict(self, X):
        predictions = []
        for x in X:
            posteriors = self.calcPosterior(x)
            predictions.append(max(posteriors, key=posteriors.get))
        return np.array(predictions)

In [78]:
# Train NB
kmeans = KMeans(n_clusters=5, random_state=42)
labels = kmeans.fit_predict(dataStandardized)
nb = NaiveBayes()
nb.fit(dataStandardized, labels)

In [61]:
# Recommend with NB
def recommendNB(userSongID, X, labels, model, k=5):
    userLabel = model.predict(X[userSongID:userSongID+1])[0]
    recommendations = np.where(labels == userLabel)[0]
    return recommendations[:k]

recommendations = recommendNB(userSongID, dataStandardized, labels, nb, k=5)
print("Recommended Songs", recommendations)

Recommended Songs [ 5  8 14 18 32]


In [84]:
# Main program
def main():
    print("Welcome to the Song Recommendation System!")
    print("Please select 5 songs by their names:")
    print(data[['track_name', 'artists']].head(10))  # Show a few songs for reference

    data['track_name'] = data['track_name'].fillna('')
    data['artists'] = data['artists'].fillna('')
    userSongs = []
    for i in range(5):
        songName = input(f"Enter the name of song {i+1}: ").strip()
        songRow = data[data['track_name'].str.contains(songName, case=False)]
        if songRow.empty:
            print("Song not found. Please try again.")
            continue
        userSongs.append(songRow.index[0])

    print("\nRecommendations:")
    for songIndex in userSongs:
        print(f"\nFor '{data.loc[songIndex, 'track_name']}' by {data.loc[songIndex, 'artists']}:")
        print("Using KNN:")
        knnRecs = knnRecommend(songIndex, dataStandardized)
        for rec in knnRecs:
            print(f"- {data.loc[rec, 'track_name']} by {data.loc[rec, 'artists']}")

        print("\nUsing Naive Bayes:")
        nbRecs = recommendNB(songIndex, dataStandardized, labels, nb)
        for rec in nbRecs:
            print(f"- {data.loc[rec, 'track_name']} by {data.loc[rec, 'artists']}")

if __name__ == "__main__":
    main()

Welcome to the Song Recommendation System!
Please select 5 songs by their names:
                   track_name                               artists
0                      Comedy                           Gen Hoshino
1            Ghost - Acoustic                          Ben Woodward
2              To Begin Again                Ingrid Michaelson;ZAYN
3  Can't Help Falling In Love                          Kina Grannis
4                     Hold On                      Chord Overstreet
5        Days I Will Remember                          Tyrone Wells
6               Say Something  A Great Big World;Christina Aguilera
7                   I'm Yours                            Jason Mraz
8                       Lucky             Jason Mraz;Colbie Caillat
9                      Hunger                        Ross Copperman


Enter the name of song 1:  comedy
Enter the name of song 2:  to begin again
Enter the name of song 3:  hold on
Enter the name of song 4:  say something
Enter the name of song 5:  lucky



Recommendations:

For 'Comedy' by Gen Hoshino:
Using KNN:
- Go Crazy by Chris Brown;Young Thug
- Octopus's Garden - Remastered 2009 by The Beatles
- Blessed (feat. Damian Marley) by Wizkid;Damian Marley
- Meleğim by Soolking;Dadju
- Used to Me by Luke Chiang

Using Naive Bayes:
- Comedy by Gen Hoshino
- Unlonely by Jason Mraz
- If It Kills Me by Jason Mraz
- Pieces by Andrew Belle
- Sky's Still Blue by Andrew Belle

For 'To Begin Again' by Ingrid Michaelson;ZAYN:
Using KNN:
- Reckless by Lund
- i loved you first by joan
- Superman (It's Not Easy) by Five For Fighting
- One of Us - From "The Lion King II: Simba's Pride"/Soundtrack Version by Chorus - The Lion King 2: Simba's Pride
- Reckless by Lund

Using Naive Bayes:
- Days I Will Remember by Tyrone Wells
- Lucky by Jason Mraz;Colbie Caillat
- Hold On - Remix by Chord Overstreet;Deepend
- Lucky by Jason Mraz;Colbie Caillat
- All I Want For Christmas Is A Real Good Tan by Chord Overstreet

For 'Hold On' by Chord Overstreet:
Using KNN:

In [57]:
userSongID = random.randint(0, dataStandardized.shape[0]-1)

In [58]:
# knn trial
k = 5
knnRecommendations = knnRecommend(userSongID, dataStandardized, k)
print("KNN Recommendations", knnRecommendations)

KNN Recommendations [98546 52645 93250 69475 69862]


In [59]:
# Train NB
from sklearn.cluster import KMeans
kmeans = KMeans(n_clusters=5, random_state=42)
labels = kmeans.fit_predict(dataStandardized)
nb = NaiveBayes()
nb.fit(dataStandardized, labels)

In [60]:
# Predict NB
predictions = nb.predict(dataStandardized)
print("Predicted Labels:", predictions)

Predicted Labels: [4 1 1 ... 2 1 2]
