In [1]:
# import requried dependencies
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# read the data
df = pd.read_csv("song-dataset.csv", low_memory=True)[:6000]
# df = pd.read_csv("song-dataset.csv", low_memory=True)
df

Unnamed: 0,Position,Artist Name,Song Name,Days,Top 10 (xTimes),Peak Position,Peak Position (xTimes),Peak Streams,Total Streams
0,1,Post Malone,Sunflower SpiderMan: Into the SpiderVerse,1506,302.0,1,(x29),2118242,883369738
1,2,Juice WRLD,Lucid Dreams,1673,178.0,1,(x20),2127668,864832399
2,3,Lil Uzi Vert,XO TOUR Llif3,1853,212.0,1,(x4),1660502,781153024
3,4,J. Cole,No Role Modelz,2547,6.0,7,0,659366,734857487
4,5,Post Malone,rockstar,1223,186.0,1,(x124),2905678,718865961
...,...,...,...,...,...,...,...,...,...
5995,5996,XXXTENTACION,Gassed Up!,4,0.0,47,0,519100,1357202
5996,5997,Ty Dolla $ign,Don't Judge Me,6,0.0,78,0,330709,1357193
5997,5998,BTS,Go Go,7,0.0,146,0,222453,1356962
5998,5999,Nina Nesbitt,"Oh Holy Night Recorded at Metropolis Studios,...",4,0.0,133,0,402606,1355522


In [2]:

# remove duplicates
df = df.drop_duplicates(subset="Song Name")
df.shape

(5582, 9)

In [3]:
# drop Null values
df = df.dropna(axis=0)

# Drop the non-required columns
df = df.drop(df.columns[3:], axis=1)
df.head(3)

Unnamed: 0,Position,Artist Name,Song Name
0,1,Post Malone,Sunflower SpiderMan: Into the SpiderVerse
1,2,Juice WRLD,Lucid Dreams
2,3,Lil Uzi Vert,XO TOUR Llif3


In [4]:
# Removing space from "Artist Name" column
df["Artist Name"] = df["Artist Name"].str.replace(" ", "")

# Combine all columns and assgin as new column
df["data"] = df.apply(lambda value: " ".join(value.astype("str")), axis=1)
df.head(3)

Unnamed: 0,Position,Artist Name,Song Name,data
0,1,PostMalone,Sunflower SpiderMan: Into the SpiderVerse,1 PostMalone Sunflower SpiderMan: Into the Sp...
1,2,JuiceWRLD,Lucid Dreams,2 JuiceWRLD Lucid Dreams
2,3,LilUziVert,XO TOUR Llif3,3 LilUziVert XO TOUR Llif3


In [5]:
# models
vectorizer = CountVectorizer()
vectorized = vectorizer.fit_transform(df["data"])
similarities = cosine_similarity(vectorized)


In [6]:
# Assgin the new dataframe with `similarities` values
df_tmp = pd.DataFrame(similarities, columns=df["Song Name"], index=df["Song Name"]).reset_index()
df_tmp
# df_tmp.nlargest(11, 'rockstar')


Song Name,Song Name.1,Sunflower SpiderMan: Into the SpiderVerse,Lucid Dreams,XO TOUR Llif3,No Role Modelz,rockstar,goosebumps,Blinding Lights,Jocelyn Flores,SAD!,...,PUNTO 40,SI SUPIERAS,Burn the Witch,Once In A Lifetime,Know You Know (ft. 2 Chainz),Gassed Up!,Don't Judge Me,Go Go,"Oh Holy Night Recorded at Metropolis Studios, London",Like I Loved You
0,Sunflower SpiderMan: Into the SpiderVerse,1.000000,0.0,0.0,0.0,0.288675,0.0,0.0,0.000000,0.000000,...,0.0,0.0,0.182574,0.0,0.000000,0.0,0.0,0.0,0.0,0.0
1,Lucid Dreams,0.000000,1.0,0.0,0.0,0.000000,0.0,0.0,0.000000,0.000000,...,0.0,0.0,0.000000,0.0,0.000000,0.0,0.0,0.0,0.0,0.0
2,XO TOUR Llif3,0.000000,0.0,1.0,0.0,0.000000,0.0,0.0,0.000000,0.000000,...,0.0,0.0,0.000000,0.0,0.000000,0.0,0.0,0.0,0.0,0.0
3,No Role Modelz,0.000000,0.0,0.0,1.0,0.000000,0.0,0.0,0.000000,0.000000,...,0.0,0.0,0.000000,0.0,0.000000,0.0,0.0,0.0,0.0,0.0
4,rockstar,0.288675,0.0,0.0,0.0,1.000000,0.0,0.0,0.000000,0.000000,...,0.0,0.0,0.000000,0.0,0.000000,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5576,Gassed Up!,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.288675,0.353553,...,0.0,0.0,0.000000,0.0,0.000000,1.0,0.0,0.0,0.0,0.0
5577,Don't Judge Me,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.000000,0.000000,...,0.0,0.0,0.000000,0.0,0.000000,0.0,1.0,0.0,0.0,0.0
5578,Go Go,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.000000,0.000000,...,0.0,0.0,0.000000,0.0,0.000000,0.0,0.0,1.0,0.0,0.0
5579,"Oh Holy Night Recorded at Metropolis Studios,...",0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.000000,0.000000,...,0.0,0.0,0.000000,0.0,0.000000,0.0,0.0,0.0,1.0,0.0


In [8]:
df_tmp.to_csv('clean_songs_similarities_score.csv',index=False)

In [7]:

true = True
while true:
    print("The Top 10 Song Recommendation System")
    print("-------------------------------------")
    print("This will generate the 10 songs from the database thoese are similar to the song you entered.")

    # Asking the user for a song, it will loop until the song name is in our database.
    while True:
        input_song = input("Please enter the name of song: ")

        if input_song in df_tmp.columns:
            recommendation = df_tmp.nlargest(11, input_song)["Song Name"]
            break
        
        else:
            print("Sorry, there is no song name in our database. Please try another one.")
    
    print("You should check out these songs: \n")
    for song in recommendation.values[1:]:
        print(song)

    print("\n")
    # Asking the user for the next command, it will loop until the right command.
    while True:
        next_command = input("Do you want to generate again for the next song? [yes, no] ")

        if next_command == "yes":
            break

        elif next_command == "no":
            # `true` will be false. It will stop the whole script
            true = False
            break

        else:
            print("Please type 'yes' or 'no'")


The Top 10 Song Recommendation System
-------------------------------------
This will generate the 10 songs from the database thoese are similar to the song you entered.


In [27]:
print("The Top 10 Song Recommendation System")
print("-------------------------------------")
print("This will generate 10 songs from the database that are similar to the song you entered.")

# Continue until the user decides to stop
while True:
    # Asking the user for a song, it will loop until the song name is in our database.
    while True:
        input_song = input("Please enter the name of a song: ")

        if input_song in df_tmp.columns:
            recommendation = df_tmp.nlargest(11, input_song)["Song Name"]
            break
        else:
            print("Sorry, there is no song name in our database. Please try another one.")

    print("You should check out these songs:\n")
    for song in recommendation.values[1:]:
        print(song)

    next_command = input("\nDo you want to generate again for the next song? [yes, no] ")
    
    if next_command != "yes":
        break


The Top 10 Song Recommendation System
-------------------------------------
This will generate 10 songs from the database that are similar to the song you entered.
You should check out these songs:

Nonstop
Money In The Grave (Drake ft. Rick Ross)
Passionfruit
Jumpman
Controlla
Portland
I'm Upset
Energy
Gyalchester
Sneakin’
