In [1]:
import requests
import base64
import pandas as pd
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials

from sklearn.metrics.pairwise import cosine_similarity
from sklearn.neighbors import NearestNeighbors

In [2]:
client_id = 'specific_client_ID'
client_secret = 'specific_client_secret'

In [3]:
# Base64 encode the client ID and client secret
client_credentials = f"{client_id}:{client_secret}"
client_credentials_base64 = base64.b64encode(client_credentials.encode())

# Request the access token
token_url = 'https://accounts.spotify.com/api/token'
headers = {'Authorization': f'Basic {client_credentials_base64.decode()}'}
data = {'grant_type': 'client_credentials'}
response = requests.post(token_url, data=data, headers=headers)

if response.status_code == 200:
    access_token = response.json()['access_token']
    print("Access token obtained successfully.")
else:
    print("Error obtaining access token.")
    exit()

Access token obtained successfully.


In [4]:
# Initialize Spotipy client with the access token
sp = spotipy.Spotify(auth=access_token)

### Try recomending Songs using 'recommendations' endpoint from spotipy

In [5]:
# Function to search song ID from a song name and its artist
def search_track(track_name, artist_name):
    query = f"track:{track_name} artist:{artist_name}"
    results = sp.search(q=query, limit=1, type='track')
    if results['tracks']['items']:
        return results['tracks']['items'][0]['id']
    else:
        return None

# Example
track_name = 'Power'  # Example track name
artist_name = 'Kanye West'  # Example artist name
track_id = search_track(track_name, artist_name)
if track_id:
    print(f"The track ID for '{track_name}' by {artist_name} is: {track_id}")
else:
    print(f"No track found with the name '{track_name}' by {artist_name}.")


The track ID for 'Power' by Kanye West is: 2gZUPNdnz5Y45eiGxpHGSc


In [6]:
# Function to get recommendations based on a track
def get_recommendations(seed_track_id, limit=10):
    recommendations = sp.recommendations(seed_tracks=[seed_track_id], limit=limit)
    tracks = []
    for track in recommendations['tracks']:
        tracks.append(track['name'] + ' - ' + ', '.join([artist['name'] for artist in track['artists']]))
    return tracks

# Example
recommendations = get_recommendations(track_id)
print(f"Recommendations based on the track: {track_name} by {artist_name} with ID {track_id}")
for i, track in enumerate(recommendations, start=1):
    print(f"{i}. {track}")

Recommendations based on the track: Power by Kanye West with ID 2gZUPNdnz5Y45eiGxpHGSc
1. Domo23 - Tyler, The Creator
2. Ms. Jackson - Outkast
3. Lucid Dreams - Juice WRLD
4. Superhero (Heroes & Villains) [with Future & Chris Brown] - Metro Boomin, Future, Chris Brown
5. FRANCHISE (feat. Young Thug & M.I.A.) - Travis Scott, Young Thug, M.I.A.
6. Turn My Swag On - Soulja Boy
7. No Vaseline - Ice Cube
8. God's Plan - Drake
9. No Sleep - Wiz Khalifa
10. All Falls Down - Kanye West, Syleena Johnson


# Song Recommendation System

### Load extracted tracks data

In [7]:
file_path = 'tracks.csv'

# Load the CSV file into a DataFrame
tracks_df = pd.read_csv(file_path)

In [8]:
tracks_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1802 entries, 0 to 1801
Data columns (total 20 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   track_name        1802 non-null   object 
 1   track_id          1802 non-null   object 
 2   artist_name       1802 non-null   object 
 3   album_name        1802 non-null   object 
 4   release_date      1802 non-null   object 
 5   artist_genre      1802 non-null   object 
 6   popularity        1802 non-null   int64  
 7   Duration (ms)     1802 non-null   int64  
 8   Acousticness      1802 non-null   float64
 9   Danceability      1802 non-null   float64
 10  Energy            1802 non-null   float64
 11  Instrumentalness  1802 non-null   float64
 12  Key               1802 non-null   int64  
 13  Liveness          1802 non-null   float64
 14  Loudness          1802 non-null   float64
 15  Mode              1802 non-null   int64  
 16  Speechiness       1802 non-null   float64


## Extract info for new song

In [9]:
def extract_track_features(track_id): 
    track_data = []
    track = sp.track(track_id)
    audio_features = sp.audio_features(track_id)
    artist_id = track['artists'][0]['id']  # Assuming only one artist
    artist = sp.artist(artist_id)

    track_info = {
                    'track_name': track['name'],
                    'track_id': track_id,
                    'artist_name': ', '.join([artist['name'] for artist in track['artists']]),
                    'album_name': track['album']['name'],
                    'release_date': track['album']['release_date'],
                    'artist_genre': artist['genres'][0] if audio_features and len(artist['genres']) > 0 else None,
                    'popularity': track['popularity'],
                    'Duration (ms)': track['duration_ms'],
                    'Acousticness': audio_features[0]['acousticness'] if audio_features else None,
                    'Danceability': audio_features[0]['danceability'] if audio_features else None,
                    'Energy': audio_features[0]['energy'] if audio_features else None,
                    'Instrumentalness': audio_features[0]['instrumentalness'] if audio_features else None,
                    'Key': audio_features[0]['key'] if audio_features else None,
                    'Liveness': audio_features[0]['liveness'] if audio_features else None,
                    'Loudness': audio_features[0]['loudness'] if audio_features else None,
                    'Mode': audio_features[0]['mode'] if audio_features else None,
                    'Speechiness': audio_features[0]['speechiness'] if audio_features else None,
                    'Tempo': audio_features[0]['tempo'] if audio_features else None,
                    'Time Signature': audio_features[0]['time_signature'] if audio_features else None,
                    'Valence': audio_features[0]['valence'] if audio_features else None
                }
    
    return pd.DataFrame([track_info])

In [10]:
track_name = 'Enough is Enough'  # Track name
artist_name = 'Post Malone'  # Artist name


track_id = search_track(track_name, artist_name)
new_track_df = extract_track_features(track_id)

In [11]:
new_track_df

Unnamed: 0,track_name,track_id,artist_name,album_name,release_date,artist_genre,popularity,Duration (ms),Acousticness,Danceability,Energy,Instrumentalness,Key,Liveness,Loudness,Mode,Speechiness,Tempo,Time Signature,Valence
0,Enough Is Enough,3BHFResGQiUvbYToUdaDQz,Post Malone,AUSTIN,2023-07-28,dfw rap,77,165175,0.014,0.483,0.768,0,0,0.109,-4.911,1,0.0344,166.061,4,0.332


### artist_genre to generic codes to use it as a numerical feature

In [12]:
# Use just first genre
tracks_df['artist_genre'] = tracks_df['artist_genre'].str.extract(r'\[\'(.*?)\'')[0]
tracks_df.head()

Unnamed: 0,track_name,track_id,artist_name,album_name,release_date,artist_genre,popularity,Duration (ms),Acousticness,Danceability,Energy,Instrumentalness,Key,Liveness,Loudness,Mode,Speechiness,Tempo,Time Signature,Valence
0,Training Season,6Qb7YsAqH4wWFUMbGsCpap,Dua Lipa,Training Season,2024-02-15,dance pop,86,209487,0.0365,0.815,0.582,1e-06,5,0.138,-4.5,0,0.0532,123.03,4,0.671
1,Capricorn,2tKqhcDNVVKXJqllKzrecM,Vampire Weekend,Capricorn / Gen-X Cops,2024-02-16,baroque pop,66,249560,0.159,0.563,0.644,0.000311,7,0.118,-3.889,1,0.0318,140.022,4,0.525
2,Bittersweet,7yfRb4seXT7w8zVMW0dXNa,Gunna,Bittersweet,2024-02-16,atl hip hop,70,191493,0.564,0.44,0.636,0.0,1,0.253,-5.73,0,0.0705,158.475,4,0.687
3,CONTIGO (with Tiësto),4UkUxO2WlKLc0Q1iEutGGh,"KAROL G, Tiësto",CONTIGO (with Tiësto),2024-02-15,reggaeton,83,192786,0.178,0.846,0.545,6e-06,5,0.0989,-7.826,1,0.0736,122.029,4,0.268
4,TEXAS HOLD 'EM,0Z7nGFVCLfixWctgePsRk9,Beyoncé,TEXAS HOLD 'EM,2024-02-11,pop,90,235636,0.588,0.725,0.709,0.0,2,0.135,-6.514,1,0.072,110.024,4,0.353


In [18]:
# Extract all unique artist genres from both DataFrames
all_genres = pd.concat([tracks_df['artist_genre'], new_track_df['artist_genre']]).unique()

# Create a mapping between genres and their category codes
genre_mapping = {genre: code for code, genre in enumerate(all_genres)}

# Map the artist genres to their corresponding category codes in both DataFrames
tracks_df['artist_genre'] = tracks_df['artist_genre'].map(genre_mapping)
new_track_df['artist_genre'] = new_track_df['artist_genre'].map(genre_mapping)

## Use cosine similarity to find similar tracks

In [19]:
def get_similar_songs(new_track_features, training_features, num_recommendations=5):
    # Calculate cosine similarity between the new song and all training songs
    similarity_scores = cosine_similarity(new_track_features, training_features)
    
    # Get indices of the most similar songs
    similar_song_indices = similarity_scores.argsort()[0][::-1][:num_recommendations]
        
    return similar_song_indices

In [20]:
# Selecting numerical features
numerical_features = ['artist_genre', 'popularity', 'Duration (ms)', 'Acousticness', 'Danceability', 'Energy', 
                      'Instrumentalness', 'Key', 'Liveness', 'Loudness', 'Mode', 'Speechiness', 
                      'Tempo', 'Time Signature', 'Valence']

# Filter out the new track from the training features (if it is in the df)
tracks_df_filtered = tracks_df[tracks_df['track_id'] != new_track_df['track_id'].iloc[0]]

# Extract relevant features for feature-based recommendation
training_features = tracks_df_filtered[numerical_features]
new_track_features = new_track_df[numerical_features]

In [21]:
# Calculate similar songs indices
similar_song_indices = get_similar_songs(new_track_features, training_features)

In [22]:
# Get details of similar songs
similar_songs = tracks_df.iloc[similar_song_indices]
similar_songs.reset_index(drop=True, inplace=True)

similar_songs[['track_name', 'artist_name']]

Unnamed: 0,track_name,artist_name
0,Eyes on my baby,BETWEEN FRIENDS
1,Vibrate,"James Hype, Tita Lau"
2,Waves,Kanye West
3,Erase Me - Main,"Kid Cudi, Kanye West"
4,The Next Episode,"Dr. Dre, Snoop Dogg"


## Use kNN to find similar songs

In [23]:
# Selecting numerical features
numerical_features = ['artist_genre', 'popularity', 'Duration (ms)', 'Acousticness', 'Danceability', 'Energy', 
                      'Instrumentalness', 'Key', 'Liveness', 'Loudness', 'Mode', 'Speechiness', 
                      'Tempo', 'Time Signature', 'Valence']

# Filter out the new track from the training features (if it is in the df)
tracks_df_filtered = tracks_df[tracks_df['track_id'] != new_track_df['track_id'].iloc[0]]

# Extract relevant features for feature-based recommendation
training_features = tracks_df_filtered[numerical_features]
new_track_features = new_track_df[numerical_features]

In [24]:
# Fit KNN model
k = 5  # Number of songs to consider
knn_model = NearestNeighbors(n_neighbors=k)
knn_model.fit(training_features)

# Find similar songs using KNN
distances, similar_song_indices = knn_model.kneighbors(new_track_features)

In [25]:
# Get details of similar songs
similar_songs = tracks_df.iloc[similar_song_indices[0]]
similar_songs.reset_index(drop=True, inplace=True)

similar_songs[['track_name', 'artist_name']]

Unnamed: 0,track_name,artist_name
0,Turnt,"Money Man, Moneybagg Yo"
1,Satellite,Harry Styles
2,Tainted Love,Milky Chance
3,Darlin',Chase Matthew
4,Aphid (feat. Dijon),Matt Champion
