In [39]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import cosine_similarity


In [40]:
# Load Dataset
df = pd.read_csv("MusicDataset.csv")
print("Dataset Loaded")
print(df.head())

Dataset Loaded
          Song_Title       Artist       Genre  Release_Year  Duration_Minutes  \
0              Faded  Alan Walker  Electronic          2024              4.02   
1    Blinding Lights   The Weeknd   Synth-pop          2018              4.45   
2              Faded  Alan Walker  Electronic          2024              4.86   
3              Faded  Alan Walker  Electronic          2012              4.92   
4  Bohemian Rhapsody        Queen        Rock          2023              3.90   

  Listened_Date     Platform  
0    2024-02-10  Apple Music  
1    2024-05-30     Zing MP3  
2    2024-05-07     Zing MP3  
3    2024-03-19      YouTube  
4    2024-01-16  Apple Music  


In [41]:
# Remove Duplicates
df_unique = df[['Song_Title','Artist','Genre','Release_Year','Duration_Minutes']].drop_duplicates()


In [42]:
#Feature engineering
listen_count = df.groupby('Song_Title').size().reset_index(name='Listen_Count')
df = df.merge(listen_count, on='Song_Title')

In [43]:
# Feature Encoding (Genre + Artist)
df_encoded = pd.get_dummies(df_unique, columns=['Genre','Artist'])

In [44]:
#Features scaling

features = df_encoded.drop('Song_Title', axis=1)

scaler = StandardScaler()
scaled_features = scaler.fit_transform(features)

In [45]:
#Model Training
#Compute Cosine Similarity
similarity_matrix = cosine_similarity(scaled_features)


In [46]:
#Recommendation Function

def recommend_song(song_name, top_k=3):

    if song_name not in df_unique['Song_Title'].values:
        print("Song not found!")
        return

    idx = df_unique[df_unique['Song_Title'] == song_name].index[0]

    similarity_scores = list(enumerate(similarity_matrix[idx]))

    similarity_scores = sorted(
        similarity_scores,
        key=lambda x: x[1],
        reverse=True
    )

    similarity_scores = similarity_scores[1:top_k+1]

    print(f"\nTop {top_k} Recommendations Based On '{song_name}':\n")

    for i in similarity_scores:
        print(df_unique.iloc[i[0]]['Song_Title'],
              "-",
              df_unique.iloc[i[0]]['Artist'])



In [47]:
#Test Recommendation
recommend_song("Faded", top_k=3)


Top 3 Recommendations Based On 'Faded':

Faded - Alan Walker
Faded - Alan Walker
Faded - Alan Walker
