<a href="https://colab.research.google.com/github/ayuri1512/Data-Science-Projects/blob/main/Music_Recommendation_System.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [35]:
import requests
import base64

In [36]:
from google.colab import userdata
userdata.get('CLIENT_ID')
userdata.get('CLIENT_SECRET')

client_credentials = f"{CLIENT_ID}:{CLIENT_SECRET}"
client_credentials_base64 = base64.b64encode(client_credentials.encode())

toker_url = 'https://accounts.spotify.com/api/token'
headers = {
    'Authorization' : f'Basic {client_credentials_base64.decode()}'
}
data = {
    'grant_type' : 'client_credentials'
}

response = requests.post(toker_url, data = data, headers = headers)

if response.status_code == 200:
  access_token = response.json()['access_token']
  print("Access token obtained successfully")
else:
  print("Error obtaining access token")
  exit()

Access token obtained successfully


In [37]:
!pip install spotipy



In [38]:
import pandas as pd
import spotipy
from spotipy.oauth2 import SpotifyOAuth

def get_trending_playlist_data(playlist_id, access_token):
  #set up Spotify with the access token
  sp = spotipy.Spotify(auth=access_token)

  #Get the tracks from the playlist
  playlist_tracks = sp.playlist_tracks(playlist_id, fields='items(track(id, name, artists, album(id, name)))')

  #Extract relevant information and store in a list of dictionaries
  music_data = []
  for track_info in playlist_tracks['items']:
    track = track_info['track']
    track_name = track['name']
    artists = ', '.join([artist['name'] for artist in track['artists']])
    album_name = track['album']['name']
    album_id = track['album']['id']
    track_id = track['id']

    #Get audio features for track
    audio_features = sp.audio_features(track_id)[0] if track_id != 'Not available' else None

    #Get release date of the album
    try:
      album_info = sp.album(album_id) if album_id != 'Not available' else None
      release_date = album_info['release_date'] if album_info else None
    except:
      release_date = None

    #Get popularity id the track
    try:
      track_info = sp.track(track_id) if track_id != 'Not available' else None
      popularity = track_info['popularity'] if track_info else None
    except:
      popularity = None

    #Add additional track information to the track data
    track_data = {
        'Track Name' : track_name,
        'Artists': artists,
        'Album Name': album_name,
        'Album ID': album_id,
        'Track ID': track_id,
        'Popularity': popularity,
        'Release Date': release_date,
        'Duration (ms)': audio_features['duration_ms'] if audio_features else None,
        'Explicit': track_info.get('explicit', None),
        'External URLs' : track_info.get('external_urls', {}).get('spotify', None),
        'Danceability' : audio_features['danceability'] if audio_features else None,
        'Energy' : audio_features['energy'] if audio_features else None,
        'Key' : audio_features['key'] if audio_features else None,
        'Loudness' : audio_features['loudness'] if audio_features else None,
        'Mode' : audio_features['mode'] if audio_features else None,
        'Speechiness' : audio_features['speechiness'] if audio_features else None,
        'Acousticness' : audio_features['acousticness'] if audio_features else None,
        'Instrumentalness' : audio_features['instrumentalness'] if audio_features else None,
        'Liveness' : audio_features['liveness'] if audio_features else None,
        'Valence' : audio_features['valence'] if audio_features else None,
        'Tempo' : audio_features['tempo'] if audio_features else None,
    }
    music_data.append(track_data)

  #Create a pandas DataFrame from the list of dictionaries
  df = pd.DataFrame(music_data)

  return df

In [39]:
playlist_ID = '46vlQJoLGqAn8rzIe7mccl'

df = get_trending_playlist_data(playlist_ID, access_token)

print(df)

                                           Track Name  \
0                  Lover (Remix) [feat. Shawn Mendes]   
1                                         Blank Space   
2                                              august   
3                                               Lover   
4                                            Believer   
5                                              Demons   
6                                             Thunder   
7                                               Bones   
8                                    Watermelon Sugar   
9                                             Perfect   
10                                 Paint The Town Red   
11                                            7 rings   
12                                           Memories   
13                                            bad guy   
14                                          Attention   
15            Dusk Till Dawn (feat. Sia) - Radio Edit   
16                           So

In [40]:
print(df.isnull().sum())

Track Name          0
Artists             0
Album Name          0
Album ID            0
Track ID            0
Popularity          0
Release Date        0
Duration (ms)       0
Explicit            0
External URLs       0
Danceability        0
Energy              0
Key                 0
Loudness            0
Mode                0
Speechiness         0
Acousticness        0
Instrumentalness    0
Liveness            0
Valence             0
Tempo               0
dtype: int64


In [41]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from datetime import datetime
from sklearn.metrics.pairwise import cosine_similarity

In [42]:
data = df

In [49]:
#Function to calculate weighted popularity scores bases on release date
def calculate_weighted_popularity(release_date):

  release_date = datetime.strptime(release_date, '%Y-%m-%d')

  time_span =datetime.now() - release_date

  #calculating the weighted popularity score based on time span(e.g more recent release have higher weight)
  weight = 1/(time_span.days+1)
  return weight

In [50]:
scaler = MinMaxScaler()
music_features = df[['Danceability','Energy','Key','Loudness','Mode','Speechiness',
                     'Acousticness','Instrumentalness','Liveness','Valence','Tempo']].values
music_features_scales = scaler.fit_transform(music_features)

In [51]:
#a function to get content-based recommendations based on music features

def content_based_recommendations(input_song_name, num_recommendations=5):
  if input_song_name not in df['Track Name'].values:
    print(f"'{input_song_name}' not found in the dataset. Please enter valid song name.")
    return

  #get the index of the input song in the music dataframe
  input_song_index = df[df['Track Name'] == input_song_name].index[0]

  #calculate the similarity scores based on music features(cosine similarity)
  similarity_scores = cosine_similarity([music_features_scales[input_song_index]], music_features_scales)

  #Get the indices of the most similar songs
  similar_song_indices = similarity_scores.argsort()[0][::-1][1:num_recommendations + 1]

  #Get the names of the most similar songs based on content-based filtering
  content_based_recommendations = df.iloc[similar_song_indices][['Track Name','Artists','Album Name','Release Date','Popularity']]

  return content_based_recommendations

In [52]:
#Music recommendation based on weighted popularity

def hybrid_recommendations(input_song_name, num_recommendations=5, alpha=0.5):
  if input_song_name not in df['Track Name'].values:
    print(f"'{input_song_name}' not found in the dataset. Please enter valid song name.")
    return

  content_based_rec = content_based_recommendations(input_song_name, num_recommendations)

  popularity_score = df.loc[df['Track Name'] == input_song_name, 'Popularity'].values[0]

  weighted_popularity_score = popularity_score * calculate_weighted_popularity(df.loc[df['Track Name'] == input_song_name, 'Release Date'].values[0])

  new_entry = pd.DataFrame({
      'Track Name' : [input_song_name],
      'Artists' : [df.loc[df['Track Name'] == input_song_name, 'Artists'].values[0]],
      'Album Name' : [df.loc[df['Track Name'] == input_song_name, 'Album Name'].values[0]],
      'Release Date' : [df.loc[df['Track Name'] == input_song_name, 'Release Date'].values[0]],
      'Popularity' : [weighted_popularity_score]
  })

  hybrid_recommendations = pd.concat([content_based_rec, new_entry], ignore_index = True)
  hybrid_recommendations = hybrid_recommendations.sort_values(by = 'Popularity', ascending = False)
  hybrid_recommendations = hybrid_recommendations[hybrid_recommendations['Track Name'] != input_song_name]

  return hybrid_recommendations

In [53]:
input_song_name = "Demons"
recommendations = hybrid_recommendations(input_song_name, num_recommendations = 5)
print(f"Hybrid recommended songs for '{input_song_name}':")
print(recommendations)

Hybrid recommended songs for 'Demons':
                                          Track Name                Artists  \
0                                   Watermelon Sugar           Harry Styles   
4                                         The Nights                 Avicii   
1                                           Payphone  Maroon 5, Wiz Khalifa   
3                                             No Lie    Sean Paul, Dua Lipa   
2  Back To You - From 13 Reasons Why – Season 2 S...           Selena Gomez   

                                          Album Name Release Date  Popularity  
0                                          Fine Line   2019-12-13        86.0  
4                                  The Days / Nights   2014-01-01        85.0  
1                         Overexposed Track By Track   2012-01-01        83.0  
3                               Mad Love The Prequel   2018-06-29        81.0  
2  Back To You (From 13 Reasons Why – Season 2 So...   2018-05-10        71.0  
