In [8]:
import requests
import os
import base64
import numpy as np
import pandas as pd
from dotenv import load_dotenv
import spotipy
from spotipy.oauth2 import SpotifyOAuth
import os

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from datetime import datetime
from sklearn.metrics.pairwise import cosine_similarity



In [3]:
CLIENT_ID = os.getenv('SPOTIFY_API_KEY')
CLIENT_SECRET = os.getenv('SPOTIFY_API_SECRET')

client_credentials = f"{CLIENT_ID}:{CLIENT_SECRET}"
client_credentials_base64 = base64.b64encode(client_credentials.encode())

In [4]:
# Requesting access token
token_url = "https://accounts.spotify.com/api/token"

headers = {
    'Authorization':f'Basic {client_credentials_base64.decode()}'
}

data = {
    'grant_type':'client_credentials'
}

response = requests.post(token_url, data=data, headers=headers)

if response.status_code == 200:
    access_token = response.json()['access_token']
    print("Access token obtained successfully")
else:
    print('Error obtaining access token')
    exit()

Access token obtained successfully


In [5]:
def get_trending_playlist_data(playlist_id, access_token):
    sp = spotipy.Spotify(auth=access_token)

    playlist_tracks = sp.playlist_tracks(playlist_id, fields='items(track(id, name, artists, album(id, name)))')

    music_data = []
    for track_info in playlist_tracks['items']:
        track = track_info['track']
        track_name = track['name']
        artists = ', '.join([artist['name'] for artist in track['artists']])
        album_name = track['album']['name']
        album_id = track['album']['id']
        track_id = track['id']

        # Get audio features for the track
        audio_features = sp.audio_features(track_id)[0] if track_id != 'Not available' else None

        # Get release date of the album
        try:
            album_info = sp.album(album_id) if album_id != 'Not available' else None
            release_date = album_info['release_date'] if album_info else None
        except:
            release_date = None

        # Get popularity of the track
        try:
            track_info = sp.track(track_id) if track_id != 'Not available' else None
            popularity = track_info['popularity'] if track_info else None
        except:
            popularity = None

        # Add additional track information to the track data
        track_data = {
            'Track Name': track_name,
            'Artists': artists,
            'Album Name': album_name,
            'Album ID': album_id,
            'Track ID': track_id,
            'Popularity': popularity,
            'Release Date': release_date,
            'Duration (ms)': audio_features['duration_ms'] if audio_features else None,
            'Explicit': track_info.get('explicit', None),
            'External URLs': track_info.get('external_urls', {}).get('spotify', None),
            'Danceability': audio_features['danceability'] if audio_features else None,
            'Energy': audio_features['energy'] if audio_features else None,
            'Key': audio_features['key'] if audio_features else None,
            'Loudness': audio_features['loudness'] if audio_features else None,
            'Mode': audio_features['mode'] if audio_features else None,
            'Speechiness': audio_features['speechiness'] if audio_features else None,
            'Acousticness': audio_features['acousticness'] if audio_features else None,
            'Instrumentalness': audio_features['instrumentalness'] if audio_features else None,
            'Liveness': audio_features['liveness'] if audio_features else None,
            'Valence': audio_features['valence'] if audio_features else None,
            'Tempo': audio_features['tempo'] if audio_features else None
        }

        music_data.append(track_data)

    # Create a pandas DataFrame from the list of dictionaries
    df = pd.DataFrame(music_data)

    return df

In [6]:
playlist_id = '6qSYIKJihVKpWr2HDeHjxS'

music_df = get_trending_playlist_data(playlist_id=playlist_id, access_token=access_token)

print(music_df)

                                           Track Name  \
0              Miss Americana & The Heartbreak Prince   
1                                        Cruel Summer   
2                                             The Man   
3                               You Need To Calm Down   
4                                               Lover   
5                         Fearless (Taylor’s Version)   
6               You Belong With Me (Taylor’s Version)   
7                       Love Story (Taylor’s Version)   
8                               22 (Taylor's Version)   
9   We Are Never Ever Getting Back Together (Taylo...   
10         I Knew You Were Trouble (Taylor's Version)   
11  All Too Well (10 Minute Version) (Taylor's Ver...   
12                       Enchanted (Taylor's Version)   
13                                   ...Ready For It?   
14                                           Delicate   
15                                     Don’t Blame Me   
16                           Lo

In [7]:
music_df.isnull().sum()

Track Name          0
Artists             0
Album Name          0
Album ID            0
Track ID            0
Popularity          0
Release Date        0
Duration (ms)       0
Explicit            0
External URLs       0
Danceability        0
Energy              0
Key                 0
Loudness            0
Mode                0
Speechiness         0
Acousticness        0
Instrumentalness    0
Liveness            0
Valence             0
Tempo               0
dtype: int64

In [9]:
def calculate_popularity(release_date):
    release_date = datetime.strptime(release_date, '%Y-%m-%d')
    time_span = datetime.now() - release_date
    weight = 1 / (time_span.days + 1)
    
    return weight

First, we will create recommendation system based on audio features

In [10]:
data = music_df

scaler = MinMaxScaler()
music_features = music_df[['Danceability', 'Energy', 'Key', 
                           'Loudness', 'Mode', 'Speechiness', 'Acousticness',
                           'Instrumentalness', 'Liveness', 'Valence', 'Tempo']].values
music_features_scaled = scaler.fit_transform(music_features)

In [33]:
def content_based_recommendation(input_song_name, num_recommendations=5):
    if input_song_name not in music_df['Track Name'].values:
        print(f"'{input_song_name}' not found in the dataset. Please enter a valid song name.")
        return
    
    input_song_index = music_df[music_df['Track Name'] == input_song_name].index[0]
    
    similarity_scores = cosine_similarity([music_features_scaled[input_song_index]], music_features_scaled)
    
    similarity_song_indices = similarity_scores.argsort()[0][::-1][1:num_recommendations+1]
    
    content_based_recommendations = music_df.iloc[similarity_song_indices][['Track Name', 'Artists', 'Album Name', 'Release Date', 'Popularity']]

    return content_based_recommendations

Now, we will generated weighted popularity and combine it with content based recommendation to introduce hybrid approach

In [34]:
def hybrid_recommendations(input_song_name, num_recommendations=5, alpha=0.5):
    if input_song_name not in music_df['Track Name'].values:
        print(f"'{input_song_name}' not found in the dataset. Please enter a valid song name.")
        return

    content_based_rec = content_based_recommendation(input_song_name, num_recommendations)

    popularity_score = music_df.loc[music_df['Track Name'] == input_song_name, 'Popularity'].values[0]
    weighted_popularity_score = popularity_score * calculate_popularity(music_df.loc[music_df['Track Name'] == input_song_name, 'Release Date'].values[0])
    
    hybrid_recommendations = pd.DataFrame([{
        'Track Name': input_song_name,
        'Artists': music_df.loc[music_df['Track Name'] == input_song_name, 'Artists'].values[0],
        'Album Name': music_df.loc[music_df['Track Name'] == input_song_name, 'Album Name'].values[0],
        'Release Date': music_df.loc[music_df['Track Name'] == input_song_name, 'Release Date'].values[0],
        'Popularity': weighted_popularity_score
    }])

    result = pd.concat([content_based_rec, hybrid_recommendations], ignore_index=True)
    result = result.sort_values(by='Popularity', ascending=False)
    result = result[result['Track Name'] != input_song_name]

    return result

In [36]:
input_song_name = "The Man"
recommendations = hybrid_recommendations(input_song_name, num_recommendations=5)
print(f"Hybrid recommended songs for '{input_song_name}':")
print(recommendations)

Hybrid recommended songs for 'The Man':
                        Track Name       Artists  \
4  I Can Do It With a Broken Heart  Taylor Swift   
2                        Anti-Hero  Taylor Swift   
0            You Need To Calm Down  Taylor Swift   
3   Blank Space (Taylor's Version)  Taylor Swift   
1    Love Story (Taylor’s Version)  Taylor Swift   

                                     Album Name Release Date  Popularity  
4  THE TORTURED POETS DEPARTMENT: THE ANTHOLOGY   2024-04-19        82.0  
2                                     Midnights   2022-10-21        81.0  
0                                         Lover   2019-08-23        77.0  
3                       1989 (Taylor's Version)   2023-10-26        76.0  
1                 Love Story (Taylor’s Version)   2021-02-12        75.0  
