<a href="https://colab.research.google.com/github/jarodchristiansen/Machine-Learning-Deep-Learning/blob/master/Spotify_Recommendation_Algos.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Setup Spotify API

In [1]:
!pip install spotipy

Collecting spotipy
  Downloading spotipy-2.24.0-py3-none-any.whl.metadata (4.9 kB)
Collecting redis>=3.5.3 (from spotipy)
  Downloading redis-5.0.8-py3-none-any.whl.metadata (9.2 kB)
Downloading spotipy-2.24.0-py3-none-any.whl (30 kB)
Downloading redis-5.0.8-py3-none-any.whl (255 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m255.6/255.6 kB[0m [31m8.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: redis, spotipy
Successfully installed redis-5.0.8 spotipy-2.24.0


In [3]:
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials

from google.colab import userdata
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [4]:
# Set up Spotify API credentials
client_id = userdata.get('spotify_id')
client_secret = userdata.get('spotify_secret')

# Authenticate using Client Credentials Flow
auth_manager = SpotifyClientCredentials(client_id=client_id, client_secret=client_secret)
sp = spotipy.Spotify(auth_manager=auth_manager)


## Methods to gather initial tracks for seed dataset

### External methods to enhance specifity

In [14]:
def search_tracks_by_genre(genre, limit=50):
    """
    Fetches track IDs by searching for a specific genre.

    Args:
    - genre (str): Genre keyword to search for.
    - limit (int): Maximum number of tracks to fetch.

    Returns:
    - track_ids (list): List of track IDs.
    """
    track_ids = []

    # Search for tracks by genre
    results = sp.search(q=f'genre:{genre}', type='track', limit=limit)
    tracks = results['tracks']['items']

    # Collect track IDs
    for track in tracks:
        track_ids.append(track['id'])

    return track_ids

# Example usage
genre = 'pop'  # You can replace this with any genre you prefer
pop_tracks = search_tracks_by_genre(genre, limit=50)
print(pop_tracks)


['0WbMK4wrZ1wFSty9F7FCgu', '6dOtVTDdiauQNBQEDOtlAB', '2plbrEY59IikOBgBGLjaoe', '5G2f63n7IPVPPjfNIGih7Q', '5N3hjp1WNayUPZrA8kJmJP', '2qSkIjg1o9h3YT9RAgYN75', '4xdBrk0nFZaP54vvZj0yx7', '1UHS8Rf6h5Ar3CDWRd3wjF', '1k2pQc5i348DCHwbn5KTdc', '7221xIgOnuakPdLqT0F3nP', '7FOgcfdz9Nx5V9lCNXdBYv', '102YUQbYmwdBXS7jwamI90', '0mflMxspEfB0VbI1kyLiAv', '3WOhcATHxK2SLNeP5W3v1v', '2FQrifJ1N335Ljm3TjTVVf', '7tI8dRuH2Yc6RuoTjxo4dU', '21B4gaTWnTkuSh77iWEXdS', '19RybK6XDbAVpcdxSbZL1o', '0UYnhUfnUj5adChuAXvLUB', '3WSOUb3U7tqURbBSgZTrZX', '3QaPy1KgI7nu9FJEQUgn6h', '629DixmZGHc7ILtEntuiWE', '2QjOHCTQ1Jl3zawyYOpxh6', '5fZJQrFKWQLb7FpJXZ1g7K', '51eSHglvG1RJXtL3qI5trr', '3iPIDAFybaoyqX7hvAfWkl', '5oIVNm56t6OIf9ZjdEG3ud', '3Vr3zh0r7ALn8VLqCiRR10', '3xkHsmpQCBMytMJNiDf3Ii', '1BxfuPKGuaTgP7aM0Bbdwr', '5IZXB5IKAD2qlvTPJYDCFB', '4w2GLmK2wnioVnb5CPQeex', '53IRnAWx13PYmoVYtemUBS', '3qhlB30KknSejmIvZZLjOD', '0XkZmBCCcdMY0EPY8ij6Gb', '1bjeWoagtHmUKputLVyDxQ', '0AjmK0Eai4zGrLaJwPvrDp', '7BRD7x5pt8Lqa1eGYC4dzj', '7iQMm50NNw

In [8]:
def get_playlist_tracks(playlist_id, limit=100):
    """
    Fetches track IDs from a specific playlist.

    Args:
    - playlist_id (str): The Spotify playlist ID.
    - limit (int): Number of tracks to fetch (max 100 per request).

    Returns:
    - track_ids (list): List of track IDs from the playlist.
    """
    track_ids = []
    results = sp.playlist_tracks(playlist_id, limit=limit)

    # Collect track IDs from the playlist
    for item in results['items']:
        track = item['track']
        track_ids.append(track['id'])

    return track_ids

# Example usage
playlist_id = '37i9dQZEVXbMDoHDwVN2tF'  # Spotify Top 50 Global playlist
top_50_tracks = get_playlist_tracks(playlist_id, limit=50)
print(top_50_tracks)


['2plbrEY59IikOBgBGLjaoe', '6dOtVTDdiauQNBQEDOtlAB', '5G2f63n7IPVPPjfNIGih7Q', '7tI8dRuH2Yc6RuoTjxo4dU', '2qSkIjg1o9h3YT9RAgYN75', '0WbMK4wrZ1wFSty9F7FCgu', '6WatFBLVB0x077xWeoVc2k', '5N3hjp1WNayUPZrA8kJmJP', '2PnlsTsOTLE5jnBnNe2K0A', '3xkHsmpQCBMytMJNiDf3Ii', '1UHS8Rf6h5Ar3CDWRd3wjF', '5fZJQrFKWQLb7FpJXZ1g7K', '17phhZDn6oGtzMe56NuWvj', '2cZOYofOX4d6g0OXxkaIjA', '3hRV0jL3vUpRrcy398teAU', '5Z0UnEtpLDQyYlWwgi8m9C', '7CyPwkp0oE8Ro9Dd5CUDjW', '2esZG2XFtuoWWA9AfDvSxy', '7z7kvUQGwlC6iOl7vMuAr9', '3WOhcATHxK2SLNeP5W3v1v', '0OA00aPt3BV10qeMIs3meW', '2QjOHCTQ1Jl3zawyYOpxh6', '5XeFesFbtLpXzIVDNQP22n', '6AI3ezQ4o3HUoP6Dhudph3', '4xdBrk0nFZaP54vvZj0yx7', '5AJ9hqTS2wcFQCELCFRO7A', '5IZXB5IKAD2qlvTPJYDCFB', '51ZQ1vr10ffzbwIjDCwqm4', '2nLtzopw4rPReszdYBJU6h', '42VsgItocQwOQC3XWZ8JNA', '62bOmKYxYg7dhrC6gH9vFn', '51rfRCiUSvxXlCSCfIztBy', '7ov3TDp5D00Rnu5R1viX4w', '0UYnhUfnUj5adChuAXvLUB', '3QaPy1KgI7nu9FJEQUgn6h', '3qhlB30KknSejmIvZZLjOD', '3AJwUDP919kvQ9QcozQPxg', '2aYZaN5SmkRDLsrrV8GkBQ', '1BxfuPKGua

In [9]:
def get_user_saved_tracks(limit=50):
    """
    Fetches the current user's saved track IDs.

    Args:
    - limit (int): Number of saved tracks to fetch (max 50 per request).

    Returns:
    - track_ids (list): List of track IDs from the user's saved tracks.
    """
    track_ids = []

    # Get current user's saved tracks
    results = sp.current_user_saved_tracks(limit=limit)

    # Collect track IDs
    for item in results['items']:
        track = item['track']
        track_ids.append(track['id'])

    return track_ids

# Example usage
user_saved_tracks = get_user_saved_tracks(limit=50)
print(user_saved_tracks)


ERROR:spotipy.client:HTTP Error for GET to https://api.spotify.com/v1/me/tracks with Params: {'limit': 50, 'offset': 0, 'market': None} returned 403 due to Forbidden.


SpotifyException: http status: 403, code:-1 - https://api.spotify.com/v1/me/tracks?limit=50&offset=0:
 Forbidden., reason: None

In [10]:
def get_tracks_from_artist(artist_name, limit=50):
    """
    Fetches track IDs from albums of a specific artist.

    Args:
    - artist_name (str): The name of the artist.
    - limit (int): Number of tracks to fetch.

    Returns:
    - track_ids (list): List of track IDs.
    """
    track_ids = []

    # Search for the artist by name
    results = sp.search(q=f'artist:{artist_name}', type='artist', limit=1)
    artist = results['artists']['items'][0]
    artist_id = artist['id']

    # Get the artist's albums
    albums = sp.artist_albums(artist_id, limit=limit)

    # Collect track IDs from each album
    for album in albums['items']:
        album_tracks = sp.album_tracks(album['id'], limit=50)
        for track in album_tracks['items']:
            track_ids.append(track['id'])

    return track_ids

# Example usage
artist_tracks = get_tracks_from_artist('Taylor Swift', limit=50)
print(artist_tracks)


['6dODwocEuGzHAavXqTbwHv', '4PdLaGZubp4lghChqp8erB', '7uGYWMwRy24dm7RUDDhUlD', '1kbEbBdEgQdQeLXCJh28pJ', '7wAkQFShJ27V8362MqevQr', '4QMgEffJQuKtjCNvqfRZ0m', '7IWcDWOfiooH5hRs9XOVYz', '5ExOm0dh4NyRyAdSAO9hyM', '799KrpEbhZp0MHeiA8YK9P', '2d8UxVNhJinc8uat9PoM9y', '5chnRTB9qMK3W1M41SnU9s', '3YkNIrAvbKNrrwwEd7NVLl', '2fPvQfGQEZOKtJ9qXeL4x8', '1xtw1krCR6Dw2KwkXw5z63', '1tuNqJOtRQVHvONR8Lg3MZ', '4d9PtIEVij9jW5OaLinH66', '62E2nR0od0M5HYxuYLaDz7', '1kcwpPDQnqEqmezzXdJTCP', '4EF6IyONolQy0bIQXm2EmX', '1rmEsOezwf2lmIZTMAO5Ag', '5Bedn0svl0ZD7RGmJkmKKw', '7Mts0OfPorF4iwOomvfqn1', '3hlGuz3loYoLfI3bpwieWq', '7ogK4lJDVDMU6A6vYR5rvD', '1Zai5UJ2di3qEuR2HeT2s8', '18WFFUIsewmA8g31KAeo3e', '0g4fMVo4JjwnIpTfFfLdxS', '3zMDGj4D8ogaYgAIZPeU7S', '2913xXOVAIDAqxzV2g4VcU', '2CnjDMdpRjlWv04Xk3s6MW', '1DTRUYVd8rYpla9hhVVwjo', '2OzhQlSqBEmt7hmkYxfT6m', '3NMrVbIVWT3fPXBj0rNDKG', '2XXwLdtuAcE0HSCu61ijAb', '2F3N9tdombb64aW6VtZOdo', '3Vevii7qKqrmW8CcyzBHDl', '5og4Qzt92jJzVDkOtSEilb', '3fO566xJgwxIa3qGCGBvIC', '3ZVFcD8Wlw

### Bulk dataset gathering before getting recommendations/features

In [9]:
def get_available_genres():
    """
    Fetches a list of available genre seeds from Spotify API.

    Returns:
    - genres (list): List of available genres.
    """
    genres = sp.recommendation_genre_seeds()['genres']
    return genres

# Example usage
available_genres = get_available_genres()
print(available_genres)


['acoustic', 'afrobeat', 'alt-rock', 'alternative', 'ambient', 'anime', 'black-metal', 'bluegrass', 'blues', 'bossanova', 'brazil', 'breakbeat', 'british', 'cantopop', 'chicago-house', 'children', 'chill', 'classical', 'club', 'comedy', 'country', 'dance', 'dancehall', 'death-metal', 'deep-house', 'detroit-techno', 'disco', 'disney', 'drum-and-bass', 'dub', 'dubstep', 'edm', 'electro', 'electronic', 'emo', 'folk', 'forro', 'french', 'funk', 'garage', 'german', 'gospel', 'goth', 'grindcore', 'groove', 'grunge', 'guitar', 'happy', 'hard-rock', 'hardcore', 'hardstyle', 'heavy-metal', 'hip-hop', 'holidays', 'honky-tonk', 'house', 'idm', 'indian', 'indie', 'indie-pop', 'industrial', 'iranian', 'j-dance', 'j-idol', 'j-pop', 'j-rock', 'jazz', 'k-pop', 'kids', 'latin', 'latino', 'malay', 'mandopop', 'metal', 'metal-misc', 'metalcore', 'minimal-techno', 'movies', 'mpb', 'new-age', 'new-release', 'opera', 'pagode', 'party', 'philippines-opm', 'piano', 'pop', 'pop-film', 'post-dubstep', 'power-po

In [24]:
def build_large_track_dataset(genres, playlists, num_tracks_per_source=50):
    """
    Builds a dataset of tracks by combining tracks from multiple genres and playlists.

    Args:
    - genres (list): List of genres to search.
    - playlists (list): List of playlist IDs to pull tracks from.
    - num_tracks_per_source (int): Number of tracks to fetch per genre/playlist.

    Returns:
    - tracks_df (pd.DataFrame): DataFrame containing track information.
    """
    track_data = []

    # Fetch tracks by genre
    for genre in genres:
        results = sp.search(q=f'genre:{genre}', type='track', limit=num_tracks_per_source)
        tracks = results['tracks']['items']

        # Collect relevant track information
        for track in tracks:
            track_info = {
                'track_id': track['id'],
                'track_name': track['name'],
                'artist_name': track['artists'][0]['name'],  # Take the first artist listed
                'album_name': track['album']['name'],
                'release_date': track['album']['release_date'],
                'popularity': track['popularity'],
                'genre_source': genre  # Save which genre the track came from
            }
            track_data.append(track_info)

    # Fetch tracks from playlists
    for playlist_id in playlists:
        results = sp.playlist_tracks(playlist_id, limit=num_tracks_per_source)
        tracks = results['items']

        for item in tracks:
            track = item['track']
            track_info = {
                'track_id': track['id'],
                'track_name': track['name'],
                'artist_name': track['artists'][0]['name'],
                'album_name': track['album']['name'],
                'release_date': track['album']['release_date'],
                'popularity': track['popularity'],
                'playlist_source': playlist_id  # Save which playlist the track came from
            }
            track_data.append(track_info)

    # Convert list of track data to a DataFrame
    tracks_df = pd.DataFrame(track_data)

    return tracks_df

# Example usage
# genres = ['pop', 'rock', 'hip-hop']
playlists = ['37i9dQZEVXbMDoHDwVN2tF', '37i9dQZF1DWXRqgorJj26U']  # Top 50 Global and USA

# Fetch the dataset
tracks_df = build_large_track_dataset(available_genres, playlists, num_tracks_per_source=50)

# Display the first few rows of the dataframe
tracks_df


Unnamed: 0,track_id,track_name,artist_name,album_name,release_date,popularity,genre_source,playlist_source
0,1HMQmOWrkieKYWlFsjUP3D,Bloom - Bonus Track,The Paper Kites,Woodland,2013-03-05,77,acoustic,
1,6uHvbKL0Yi37AuvNRmUfMw,Paint,The Paper Kites,Young North,2013-03-05,73,acoustic,
2,7jIAttgQTpLDoNtykIQXjH,Blister In The Sun,Violent Femmes,Violent Femmes,1983-01-01,72,acoustic,
3,4E6cwWJWZw2zWf7VFbH7wf,Love Song,Sara Bareilles,Little Voice,2007-07-03,74,acoustic,
4,1jyddn36UN4tVsJGtaJfem,You Are the Best Thing,Ray LaMontagne,Gossip In The Grain,2008-10-13,69,acoustic,
...,...,...,...,...,...,...,...,...
5745,7GonnnalI2s19OCQO1J7Tf,Kickstart My Heart,Mötley Crüe,Dr. Feelgood,1989,2,,37i9dQZF1DWXRqgorJj26U
5746,5LNiqEqpDc8TuqPy79kDBu,Edge of Seventeen - 2016 Remaster,Stevie Nicks,Bella Donna (Deluxe Edition),2016-11-04,56,,37i9dQZF1DWXRqgorJj26U
5747,6NxsCnLeLd8Ai1TrgGxzIx,Bad Moon Rising,Creedence Clearwater Revival,Green River (40th Anniversary Edition),1969-08-03,0,,37i9dQZF1DWXRqgorJj26U
5748,5eYwDBLucWfWI5KsV7oYX2,Mary Jane's Last Dance,Tom Petty and the Heartbreakers,Anthology: Through The Years,2000-01-01,0,,37i9dQZF1DWXRqgorJj26U


In [25]:
tracks_df.to_csv('tracks_df_og.csv', index=False)

In [16]:
import pandas as pd

def load_existing_dataset(filepath):
    """
    Load an existing dataset from a CSV file.

    Args:
    - filepath (str): Path to the CSV file.

    Returns:
    - df (pd.DataFrame): Loaded DataFrame.
    """
    try:
        return pd.read_csv(filepath)
    except FileNotFoundError:
        print(f"File {filepath} not found. Returning an empty DataFrame.")
        return pd.DataFrame()

def save_dataset(df, filepath):
    """
    Save the dataset to a CSV file.

    Args:
    - df (pd.DataFrame): DataFrame to save.
    - filepath (str): Path to save the CSV file.
    """
    df.to_csv(filepath, index=False)
    print(f"Dataset saved to {filepath}")

def fetch_tracks_by_artist(artist_name, limit=50):
    """
    Fetches tracks by a specific artist.

    Args:
    - artist_name (str): The artist's name.
    - limit (int): Number of tracks to fetch.

    Returns:
    - track_data (list): List of track information dictionaries.
    """
    track_data = []
    results = sp.search(q=f'artist:{artist_name}', type='track', limit=limit)
    tracks = results['tracks']['items']

    for track in tracks:
        track_info = {
            'track_id': track['id'],
            'track_name': track['name'],
            'artist_name': track['artists'][0]['name'],  # First artist listed
            'album_name': track['album']['name'],
            'release_date': track['album']['release_date'],
            'popularity': track['popularity']
        }
        track_data.append(track_info)

    return track_data

def update_dataset_with_artist_tracks(df, artist_names, limit_per_artist=50):
    """
    Updates the dataset by fetching additional tracks for each unique artist.

    Args:
    - df (pd.DataFrame): Existing DataFrame with track information.
    - artist_names (list): List of artist names to fetch more tracks for.
    - limit_per_artist (int): Number of tracks to fetch per artist.

    Returns:
    - updated_df (pd.DataFrame): DataFrame with additional tracks.
    """
    existing_track_ids = set(df['track_id'].tolist())  # Track existing track IDs to avoid duplicates
    new_track_data = []

    for artist_name in artist_names:
        print(f"Fetching tracks for artist: {artist_name}")
        artist_tracks = fetch_tracks_by_artist(artist_name, limit=limit_per_artist)

        for track_info in artist_tracks:
            if track_info['track_id'] not in existing_track_ids:  # Avoid duplicates
                new_track_data.append(track_info)

    # Convert the new tracks to a DataFrame and append to the existing one
    if new_track_data:
        new_tracks_df = pd.DataFrame(new_track_data)
        updated_df = pd.concat([df, new_tracks_df], ignore_index=True)
    else:
        updated_df = df  # No new data, return original

    return updated_df

# Example usage:

# Load existing dataset
filepath = "existing_tracks_dataset.csv"
tracks_df = load_existing_dataset('tracks_df_og.csv')

# Get unique artist names
unique_artists = tracks_df['artist_name'].unique()

# Fetch additional tracks by these artists
updated_tracks_df = update_dataset_with_artist_tracks(tracks_df, unique_artists)

# Save the updated dataset
save_dataset(updated_tracks_df, 'tracks_df_og-w-artists.csv')

updated_tracks_df

Fetching tracks for artist: The Paper Kites
Fetching tracks for artist: Violent Femmes
Fetching tracks for artist: Sara Bareilles
Fetching tracks for artist: Ray LaMontagne
Fetching tracks for artist: Jason Mraz
Fetching tracks for artist: Chord Overstreet
Fetching tracks for artist: Iron & Wine
Fetching tracks for artist: Matt Nathanson
Fetching tracks for artist: Drew Holcomb & The Neighbors
Fetching tracks for artist: Ben Rector
Fetching tracks for artist: Disney Peaceful Guitar
Fetching tracks for artist: Ingrid Michaelson
Fetching tracks for artist: Anna Nalick
Fetching tracks for artist: Ichiko Aoba
Fetching tracks for artist: Kurt Cobain
Fetching tracks for artist: Howie Day
Fetching tracks for artist: Brandi Carlile
Fetching tracks for artist: Amos Lee
Fetching tracks for artist: Eddie Vedder
Fetching tracks for artist: Kina Grannis
Fetching tracks for artist: Augustana
Fetching tracks for artist: Dr. John
Fetching tracks for artist: Eric Hutchinson
Fetching tracks for artist: 

Unnamed: 0,track_id,track_name,artist_name,album_name,release_date,popularity,genre_source,playlist_source
0,1HMQmOWrkieKYWlFsjUP3D,Bloom - Bonus Track,The Paper Kites,Woodland,2013-03-05,77,acoustic,
1,6uHvbKL0Yi37AuvNRmUfMw,Paint,The Paper Kites,Young North,2013-03-05,73,acoustic,
2,7jIAttgQTpLDoNtykIQXjH,Blister In The Sun,Violent Femmes,Violent Femmes,1983-01-01,72,acoustic,
3,4E6cwWJWZw2zWf7VFbH7wf,Love Song,Sara Bareilles,Little Voice,2007-07-03,74,acoustic,
4,1jyddn36UN4tVsJGtaJfem,You Are the Best Thing,Ray LaMontagne,Gossip In The Grain,2008-10-13,69,acoustic,
...,...,...,...,...,...,...,...,...
91268,1DqYqVLxUsR6NLUH1ys3AU,If Anyone Falls - 2016 Remaster,Stevie Nicks,The Wild Heart (2016 Remastered),1983-06-10,36,,
91269,0Adyxuv3X9l2CtMt0OeY5M,Stop Draggin' My Heart Around (with Tom Petty ...,Stevie Nicks,Bella Donna (Deluxe Edition),2016-11-04,36,,
91270,48RJAYTcIXuBPg55EzbaEq,Stand Back - 2016 Remaster,Stevie Nicks,The Wild Heart (2016 Remastered),1983-06-10,37,,
91271,35I5lX6yE00YSu7PEgES54,Sleeping Angel (From Fast Times at Ridgemont H...,Stevie Nicks,Bella Donna (Deluxe Edition),2016-11-04,34,,


In [9]:
tracks_df = pd.read_csv('tracks_df_og-w-artists.csv')
tracks_df

  tracks_df = pd.read_csv('tracks_df_og-w-artists.csv')


Unnamed: 0,track_id,track_name,artist_name,album_name,release_date,popularity,genre_source,playlist_source
0,1HMQmOWrkieKYWlFsjUP3D,Bloom - Bonus Track,The Paper Kites,Woodland,2013-03-05,77,acoustic,
1,6uHvbKL0Yi37AuvNRmUfMw,Paint,The Paper Kites,Young North,2013-03-05,73,acoustic,
2,7jIAttgQTpLDoNtykIQXjH,Blister In The Sun,Violent Femmes,Violent Femmes,1983-01-01,72,acoustic,
3,4E6cwWJWZw2zWf7VFbH7wf,Love Song,Sara Bareilles,Little Voice,2007-07-03,74,acoustic,
4,1jyddn36UN4tVsJGtaJfem,You Are the Best Thing,Ray LaMontagne,Gossip In The Grain,2008-10-13,69,acoustic,
...,...,...,...,...,...,...,...,...
91268,1DqYqVLxUsR6NLUH1ys3AU,If Anyone Falls - 2016 Remaster,Stevie Nicks,The Wild Heart (2016 Remastered),1983-06-10,36,,
91269,0Adyxuv3X9l2CtMt0OeY5M,Stop Draggin' My Heart Around (with Tom Petty ...,Stevie Nicks,Bella Donna (Deluxe Edition),2016-11-04,36,,
91270,48RJAYTcIXuBPg55EzbaEq,Stand Back - 2016 Remaster,Stevie Nicks,The Wild Heart (2016 Remastered),1983-06-10,37,,
91271,35I5lX6yE00YSu7PEgES54,Sleeping Angel (From Fast Times at Ridgemont H...,Stevie Nicks,Bella Donna (Deluxe Edition),2016-11-04,34,,


In [12]:
tracks_with_features_df = pd.read_csv('audio_features_backup.csv')
tracks_with_features_df

Unnamed: 0,track_id,track_name,artist_name,album_name,release_date,popularity,genre_source,playlist_source,danceability,energy,...,liveness,valence,tempo,type,id,uri,track_href,analysis_url,duration_ms,time_signature
0,1HMQmOWrkieKYWlFsjUP3D,Bloom - Bonus Track,The Paper Kites,Woodland,2013-03-05,77,acoustic,,0.591,0.416,...,0.1120,0.415,96.003,audio_features,1HMQmOWrkieKYWlFsjUP3D,spotify:track:1HMQmOWrkieKYWlFsjUP3D,https://api.spotify.com/v1/tracks/1HMQmOWrkieK...,https://api.spotify.com/v1/audio-analysis/1HMQ...,210080,4
1,6uHvbKL0Yi37AuvNRmUfMw,Paint,The Paper Kites,Young North,2013-03-05,73,acoustic,,0.364,0.308,...,0.1230,0.387,73.328,audio_features,6uHvbKL0Yi37AuvNRmUfMw,spotify:track:6uHvbKL0Yi37AuvNRmUfMw,https://api.spotify.com/v1/tracks/6uHvbKL0Yi37...,https://api.spotify.com/v1/audio-analysis/6uHv...,191773,4
2,7jIAttgQTpLDoNtykIQXjH,Blister In The Sun,Violent Femmes,Violent Femmes,1983-01-01,72,acoustic,,0.726,0.537,...,0.0707,0.882,96.889,audio_features,7jIAttgQTpLDoNtykIQXjH,spotify:track:7jIAttgQTpLDoNtykIQXjH,https://api.spotify.com/v1/tracks/7jIAttgQTpLD...,https://api.spotify.com/v1/audio-analysis/7jIA...,145707,4
3,4E6cwWJWZw2zWf7VFbH7wf,Love Song,Sara Bareilles,Little Voice,2007-07-03,74,acoustic,,0.583,0.786,...,0.1880,0.573,123.055,audio_features,4E6cwWJWZw2zWf7VFbH7wf,spotify:track:4E6cwWJWZw2zWf7VFbH7wf,https://api.spotify.com/v1/tracks/4E6cwWJWZw2z...,https://api.spotify.com/v1/audio-analysis/4E6c...,258827,4
4,1jyddn36UN4tVsJGtaJfem,You Are the Best Thing,Ray LaMontagne,Gossip In The Grain,2008-10-13,69,acoustic,,0.578,0.727,...,0.1720,0.885,170.593,audio_features,1jyddn36UN4tVsJGtaJfem,spotify:track:1jyddn36UN4tVsJGtaJfem,https://api.spotify.com/v1/tracks/1jyddn36UN4t...,https://api.spotify.com/v1/audio-analysis/1jyd...,231840,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1748,4HrIZyZIqd7kqShDXEyN1n,Laranjinha,Wesley Safadão,Arrocha Safadão,2024-09-06,59,forro,,0.722,0.687,...,0.0656,0.923,137.992,audio_features,4HrIZyZIqd7kqShDXEyN1n,spotify:track:4HrIZyZIqd7kqShDXEyN1n,https://api.spotify.com/v1/tracks/4HrIZyZIqd7k...,https://api.spotify.com/v1/audio-analysis/4HrI...,175978,4
1749,4dGx53NJZJyuuLP5ownH1p,Mentira Estampada,Wesley Safadão,Arrocha Safadão,2024-09-06,60,forro,,0.704,0.773,...,0.0978,0.864,137.881,audio_features,4dGx53NJZJyuuLP5ownH1p,spotify:track:4dGx53NJZJyuuLP5ownH1p,https://api.spotify.com/v1/tracks/4dGx53NJZJyu...,https://api.spotify.com/v1/audio-analysis/4dGx...,173207,4
1750,1eyzqe2QqGZUmfcPZtrIyt,Midnight City,M83,"Hurry Up, We're Dreaming",2011,70,french,,0.526,0.712,...,0.1790,0.320,105.009,audio_features,1eyzqe2QqGZUmfcPZtrIyt,spotify:track:1eyzqe2QqGZUmfcPZtrIyt,https://api.spotify.com/v1/tracks/1eyzqe2QqGZU...,https://api.spotify.com/v1/audio-analysis/1eyz...,241440,4
1751,5ZduaRci3iNUiDfJbBfAaf,Give It To Me - Full Vocal Mix,Matt Sassari,Give It To Me (Full Vocal Mix),2021-10-22,83,french,,0.874,0.869,...,0.1640,0.726,126.027,audio_features,5ZduaRci3iNUiDfJbBfAaf,spotify:track:5ZduaRci3iNUiDfJbBfAaf,https://api.spotify.com/v1/tracks/5ZduaRci3iNU...,https://api.spotify.com/v1/audio-analysis/5Zdu...,102861,4


## Extract track features/get spotify recommendations to seed

In [10]:
# import time
# import logging
# import pandas as pd

# # Set up basic logging
# logging.basicConfig(level=logging.WARNING)

# def add_audio_features_to_df(df, max_retries=3, wait_time=2, save_filepath='audio_features_backup.csv'):
#     """
#     Adds audio features to a DataFrame containing track information with error handling.
#     Saves the progress and returns what has been gathered so far if the rate limit is hit multiple times.

#     Args:
#     - df (pd.DataFrame): DataFrame with track information including 'track_id'.
#     - max_retries (int): Maximum number of retries before skipping a track.
#     - wait_time (int): Time to wait in seconds before retrying a request (in case of rate limit).
#     - save_filepath (str): Filepath to save partial results in case of multiple 429 rate limit errors.

#     Returns:
#     - df_with_features (pd.DataFrame): DataFrame with additional columns for audio features.
#     """
#     audio_features_list = []

#     for index, track_id in enumerate(df['track_id']):
#         retries = 0
#         while retries < max_retries:
#             try:
#                 # Fetch audio features for the track
#                 print('track id:', track_id)
#                 features = sp.audio_features(track_id)[0]
#                 if features:
#                     audio_features_list.append(features)
#                 else:
#                     audio_features_list.append({})
#                 break  # Exit the retry loop if successful

#             except spotipy.exceptions.SpotifyException as e:
#                 # Handle rate limit (429 error)
#                 if e.http_status == 429:
#                     retries += 1
#                     # Get retry-after header to know how long to wait
#                     retry_after = int(e.headers.get('Retry-After', wait_time))
#                     logging.warning(f"Rate limited on track {track_id}, retrying in {retry_after} seconds...")
#                     time.sleep(retry_after)  # Sleep for the time specified by Spotify

#                 else:
#                     logging.error(f"Error fetching audio features for track {track_id}: {str(e)}")
#                     audio_features_list.append({})
#                     break  # Exit the retry loop if it's not a rate limit error

#         # If retries exhausted due to rate limiting, save progress and return
#         if retries == max_retries:
#             logging.warning(f"Max retries reached for track {track_id}, saving progress and stopping...")

#             # Convert the list of audio features to a DataFrame
#             audio_features_df = pd.DataFrame(audio_features_list)

#             # Concatenate the original DataFrame with the audio features DataFrame
#             df_with_features = pd.concat([df.iloc[:len(audio_features_df)].reset_index(drop=True), audio_features_df.reset_index(drop=True)], axis=1)

#             # Save the progress to a CSV file
#             df_with_features.to_csv(save_filepath, index=False)
#             logging.info(f"Partial results saved to {save_filepath}")
#             return df_with_features

#     # Convert the list of audio features to a DataFrame
#     audio_features_df = pd.DataFrame(audio_features_list)

#     # Concatenate the original DataFrame with the audio features DataFrame
#     df_with_features = pd.concat([df.reset_index(drop=True), audio_features_df.reset_index(drop=True)], axis=1)

#     return df_with_features

# # Example usage
# tracks_with_features_df = add_audio_features_to_df(tracks_df, max_retries=3, wait_time=2, save_filepath='audio_features_backup.csv')
# tracks_with_features_df


import time
import logging
import pandas as pd

# Set up basic logging
logging.basicConfig(level=logging.WARNING)

def add_audio_features_in_batches(df, max_retries=3, wait_time=2, save_filepath='audio_features_backup.csv'):
    """
    Adds audio features to a DataFrame containing track information in batches.
    Loads existing progress from a CSV file, resumes fetching from the next track, and appends new features.

    Args:
    - df (pd.DataFrame): DataFrame with track information including 'track_id'.
    - max_retries (int): Maximum number of retries before skipping a track.
    - wait_time (int): Time to wait in seconds before retrying a request (in case of rate limit).
    - save_filepath (str): Filepath to save partial results in case of rate limit errors or script failure.

    Returns:
    - df_with_features (pd.DataFrame): DataFrame with added columns for audio features.
    """
    # Load the previous progress if it exists
    try:
        existing_data = pd.read_csv(save_filepath)
        fetched_track_ids = set(existing_data['track_id'].dropna())  # Tracks we already have features for
        logging.info(f"Loaded {len(fetched_track_ids)} tracks from {save_filepath}.")
    except FileNotFoundError:
        logging.warning(f"No previous file found. Starting from scratch.")
        existing_data = pd.DataFrame()  # Start fresh if no previous data
        fetched_track_ids = set()  # Empty set to track fetched IDs

    # Prepare to collect new audio features
    audio_features_list = []

    # Start fetching features for tracks that are not yet in the backup CSV
    for index, row in df.iterrows():
        track_id = row['track_id']

        # Skip tracks we already have audio features for
        if track_id in fetched_track_ids:
            continue

        retries = 0
        while retries < max_retries:
            try:
                # Fetch audio features for the track
                print(f'Fetching audio features for track ID: {track_id}')
                features = sp.audio_features(track_id)[0]
                if features:
                    audio_features_list.append(features)
                else:
                    audio_features_list.append({})  # Append an empty dict if no features found
                break  # Exit retry loop if successful

            except spotipy.exceptions.SpotifyException as e:
                # Handle rate limit (429 error)
                if e.http_status == 429:
                    retries += 1
                    retry_after = int(e.headers.get('Retry-After', wait_time))
                    logging.warning(f"Rate limited on track {track_id}, retrying in {retry_after} seconds...")
                    time.sleep(retry_after)
                else:
                    logging.error(f"Error fetching audio features for track {track_id}: {str(e)}")
                    audio_features_list.append({})  # Append an empty dict in case of an error
                    break  # Exit retry loop if it's not a rate limit error

        # If max retries are reached, save progress and stop
        if retries == max_retries:
            logging.warning(f"Max retries reached for track {track_id}. Saving progress and stopping...")
            break

    # Convert the list of newly fetched audio features to a DataFrame
    if audio_features_list:
        new_audio_features_df = pd.DataFrame(audio_features_list)

        # Combine the newly fetched data with the previous data
        if not existing_data.empty:
            combined_data = pd.concat([existing_data, new_audio_features_df], ignore_index=True)
        else:
            combined_data = new_audio_features_df

        # Save the combined data to the CSV file
        combined_data.to_csv(save_filepath, index=False)
        logging.info(f"Progress saved to {save_filepath}.")
    else:
        logging.warning("No new audio features fetched.")

    return combined_data if audio_features_list else existing_data

# Example usage: Resume fetching from where we left off and save progress
tracks_with_features_df = add_audio_features_in_batches(tracks_df, max_retries=3, wait_time=2, save_filepath='audio_features_backup.csv')
tracks_with_features_df


Fetching audio features for track ID: 65uoaqX5qcjXZRheAj1qQT
Fetching audio features for track ID: 5fnA9mkIfScSqHIpeDyvck
Fetching audio features for track ID: 2QVmiA93GVhWNTWQctyY1K
Fetching audio features for track ID: 3rTnGUeDrnZV22DvRuUuXr
Fetching audio features for track ID: 1Fwj0wThn3kTg8D7KgWdsU
Fetching audio features for track ID: 4AfGJPK64DlMAy86TtTVUa
Fetching audio features for track ID: 3HeZam86SuxGp1wZ3XMIjE
Fetching audio features for track ID: 6nGeLlakfzlBcFdZXteDq7
Fetching audio features for track ID: 4TNFLwe6DhtR3Wn1JKMqMJ
Fetching audio features for track ID: 269XSeQhDrWEotMALJaACW
Fetching audio features for track ID: 3WRQUvzRvBDr4AxMWhXc5E
Fetching audio features for track ID: 5SOA0gaKMMp9cgwn3nuwk7
Fetching audio features for track ID: 67eYAnkdTu8BMcIx29z26L
Fetching audio features for track ID: 5fIZ683j2xPeLAXfHeWKEG
Fetching audio features for track ID: 2RuOx8TOWRGbMoNw2lQvCa
Fetching audio features for track ID: 7nZ9CzhiFRPhOQCn7eDSnn
Fetching audio features 

ERROR:spotipy.client:Max Retries reached


Fetching audio features for track ID: 1eZefeDb8uOsjvcbl1fJrG


ERROR:spotipy.client:Max Retries reached


Fetching audio features for track ID: 1eZefeDb8uOsjvcbl1fJrG


ERROR:spotipy.client:Max Retries reached


Unnamed: 0,track_id,track_name,artist_name,album_name,release_date,popularity,genre_source,playlist_source,danceability,energy,...,liveness,valence,tempo,type,id,uri,track_href,analysis_url,duration_ms,time_signature
0,1HMQmOWrkieKYWlFsjUP3D,Bloom - Bonus Track,The Paper Kites,Woodland,2013-03-05,77.0,acoustic,,0.591,0.416,...,0.1120,0.415,96.003,audio_features,1HMQmOWrkieKYWlFsjUP3D,spotify:track:1HMQmOWrkieKYWlFsjUP3D,https://api.spotify.com/v1/tracks/1HMQmOWrkieK...,https://api.spotify.com/v1/audio-analysis/1HMQ...,210080,4
1,6uHvbKL0Yi37AuvNRmUfMw,Paint,The Paper Kites,Young North,2013-03-05,73.0,acoustic,,0.364,0.308,...,0.1230,0.387,73.328,audio_features,6uHvbKL0Yi37AuvNRmUfMw,spotify:track:6uHvbKL0Yi37AuvNRmUfMw,https://api.spotify.com/v1/tracks/6uHvbKL0Yi37...,https://api.spotify.com/v1/audio-analysis/6uHv...,191773,4
2,7jIAttgQTpLDoNtykIQXjH,Blister In The Sun,Violent Femmes,Violent Femmes,1983-01-01,72.0,acoustic,,0.726,0.537,...,0.0707,0.882,96.889,audio_features,7jIAttgQTpLDoNtykIQXjH,spotify:track:7jIAttgQTpLDoNtykIQXjH,https://api.spotify.com/v1/tracks/7jIAttgQTpLD...,https://api.spotify.com/v1/audio-analysis/7jIA...,145707,4
3,4E6cwWJWZw2zWf7VFbH7wf,Love Song,Sara Bareilles,Little Voice,2007-07-03,74.0,acoustic,,0.583,0.786,...,0.1880,0.573,123.055,audio_features,4E6cwWJWZw2zWf7VFbH7wf,spotify:track:4E6cwWJWZw2zWf7VFbH7wf,https://api.spotify.com/v1/tracks/4E6cwWJWZw2z...,https://api.spotify.com/v1/audio-analysis/4E6c...,258827,4
4,1jyddn36UN4tVsJGtaJfem,You Are the Best Thing,Ray LaMontagne,Gossip In The Grain,2008-10-13,69.0,acoustic,,0.578,0.727,...,0.1720,0.885,170.593,audio_features,1jyddn36UN4tVsJGtaJfem,spotify:track:1jyddn36UN4tVsJGtaJfem,https://api.spotify.com/v1/tracks/1jyddn36UN4t...,https://api.spotify.com/v1/audio-analysis/1jyd...,231840,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3750,,,,,,,,,0.741,0.620,...,0.0398,0.934,117.038,audio_features,4IadxL6BUymXlh8RCJJu7T,spotify:track:4IadxL6BUymXlh8RCJJu7T,https://api.spotify.com/v1/tracks/4IadxL6BUymX...,https://api.spotify.com/v1/audio-analysis/4Iad...,251424,4
3751,,,,,,,,,0.504,0.386,...,0.0961,0.281,192.004,audio_features,2OzhQlSqBEmt7hmkYxfT6m,spotify:track:2OzhQlSqBEmt7hmkYxfT6m,https://api.spotify.com/v1/tracks/2OzhQlSqBEmt...,https://api.spotify.com/v1/audio-analysis/2Ozh...,228965,4
3752,,,,,,,,,0.606,0.714,...,0.1060,0.448,141.512,audio_features,2mWfVxEo4xZYDaz0v7hYrN,spotify:track:2mWfVxEo4xZYDaz0v7hYrN,https://api.spotify.com/v1/tracks/2mWfVxEo4xZY...,https://api.spotify.com/v1/audio-analysis/2mWf...,195110,4
3753,,,,,,,,,0.829,0.617,...,0.0618,0.810,125.605,audio_features,4S4QJfBGGrC8jRIjJHf1Ka,spotify:track:4S4QJfBGGrC8jRIjJHf1Ka,https://api.spotify.com/v1/tracks/4S4QJfBGGrC8...,https://api.spotify.com/v1/audio-analysis/4S4Q...,98544,4


In [13]:
tracks_with_features_df

Unnamed: 0,track_id,track_name,artist_name,album_name,release_date,popularity,genre_source,playlist_source,danceability,energy,...,liveness,valence,tempo,type,id,uri,track_href,analysis_url,duration_ms,time_signature
0,1HMQmOWrkieKYWlFsjUP3D,Bloom - Bonus Track,The Paper Kites,Woodland,2013-03-05,77,acoustic,,0.591,0.416,...,0.1120,0.415,96.003,audio_features,1HMQmOWrkieKYWlFsjUP3D,spotify:track:1HMQmOWrkieKYWlFsjUP3D,https://api.spotify.com/v1/tracks/1HMQmOWrkieK...,https://api.spotify.com/v1/audio-analysis/1HMQ...,210080,4
1,6uHvbKL0Yi37AuvNRmUfMw,Paint,The Paper Kites,Young North,2013-03-05,73,acoustic,,0.364,0.308,...,0.1230,0.387,73.328,audio_features,6uHvbKL0Yi37AuvNRmUfMw,spotify:track:6uHvbKL0Yi37AuvNRmUfMw,https://api.spotify.com/v1/tracks/6uHvbKL0Yi37...,https://api.spotify.com/v1/audio-analysis/6uHv...,191773,4
2,7jIAttgQTpLDoNtykIQXjH,Blister In The Sun,Violent Femmes,Violent Femmes,1983-01-01,72,acoustic,,0.726,0.537,...,0.0707,0.882,96.889,audio_features,7jIAttgQTpLDoNtykIQXjH,spotify:track:7jIAttgQTpLDoNtykIQXjH,https://api.spotify.com/v1/tracks/7jIAttgQTpLD...,https://api.spotify.com/v1/audio-analysis/7jIA...,145707,4
3,4E6cwWJWZw2zWf7VFbH7wf,Love Song,Sara Bareilles,Little Voice,2007-07-03,74,acoustic,,0.583,0.786,...,0.1880,0.573,123.055,audio_features,4E6cwWJWZw2zWf7VFbH7wf,spotify:track:4E6cwWJWZw2zWf7VFbH7wf,https://api.spotify.com/v1/tracks/4E6cwWJWZw2z...,https://api.spotify.com/v1/audio-analysis/4E6c...,258827,4
4,1jyddn36UN4tVsJGtaJfem,You Are the Best Thing,Ray LaMontagne,Gossip In The Grain,2008-10-13,69,acoustic,,0.578,0.727,...,0.1720,0.885,170.593,audio_features,1jyddn36UN4tVsJGtaJfem,spotify:track:1jyddn36UN4tVsJGtaJfem,https://api.spotify.com/v1/tracks/1jyddn36UN4t...,https://api.spotify.com/v1/audio-analysis/1jyd...,231840,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1748,4HrIZyZIqd7kqShDXEyN1n,Laranjinha,Wesley Safadão,Arrocha Safadão,2024-09-06,59,forro,,0.722,0.687,...,0.0656,0.923,137.992,audio_features,4HrIZyZIqd7kqShDXEyN1n,spotify:track:4HrIZyZIqd7kqShDXEyN1n,https://api.spotify.com/v1/tracks/4HrIZyZIqd7k...,https://api.spotify.com/v1/audio-analysis/4HrI...,175978,4
1749,4dGx53NJZJyuuLP5ownH1p,Mentira Estampada,Wesley Safadão,Arrocha Safadão,2024-09-06,60,forro,,0.704,0.773,...,0.0978,0.864,137.881,audio_features,4dGx53NJZJyuuLP5ownH1p,spotify:track:4dGx53NJZJyuuLP5ownH1p,https://api.spotify.com/v1/tracks/4dGx53NJZJyu...,https://api.spotify.com/v1/audio-analysis/4dGx...,173207,4
1750,1eyzqe2QqGZUmfcPZtrIyt,Midnight City,M83,"Hurry Up, We're Dreaming",2011,70,french,,0.526,0.712,...,0.1790,0.320,105.009,audio_features,1eyzqe2QqGZUmfcPZtrIyt,spotify:track:1eyzqe2QqGZUmfcPZtrIyt,https://api.spotify.com/v1/tracks/1eyzqe2QqGZU...,https://api.spotify.com/v1/audio-analysis/1eyz...,241440,4
1751,5ZduaRci3iNUiDfJbBfAaf,Give It To Me - Full Vocal Mix,Matt Sassari,Give It To Me (Full Vocal Mix),2021-10-22,83,french,,0.874,0.869,...,0.1640,0.726,126.027,audio_features,5ZduaRci3iNUiDfJbBfAaf,spotify:track:5ZduaRci3iNUiDfJbBfAaf,https://api.spotify.com/v1/tracks/5ZduaRci3iNU...,https://api.spotify.com/v1/audio-analysis/5Zdu...,102861,4


In [18]:
# def get_track_features(track_ids):
#     """
#     Fetches audio features for a list of track IDs.

#     Args:
#     - track_ids (list): List of Spotify track IDs.

#     Returns:
#     - features (list): A list of dictionaries containing audio features for each track.
#     """
#     features = []

#     # Fetch audio features in batches
#     for i in range(0, len(track_ids), 100):  # 100 is the maximum batch size per request
#         audio_features = sp.audio_features(track_ids[i:i+100])
#         features.extend(audio_features)

#     return features

# # Example usage
# # track_ids = ['track_id1', 'track_id2', 'track_id3']  # Replace with actual track IDs
# track_features = get_track_features(large_track_id_array)
# track_features

[{'danceability': 0.7,
  'energy': 0.582,
  'key': 11,
  'loudness': -5.96,
  'mode': 0,
  'speechiness': 0.0356,
  'acousticness': 0.0502,
  'instrumentalness': 0,
  'liveness': 0.0881,
  'valence': 0.785,
  'tempo': 116.712,
  'type': 'audio_features',
  'id': '0WbMK4wrZ1wFSty9F7FCgu',
  'uri': 'spotify:track:0WbMK4wrZ1wFSty9F7FCgu',
  'track_href': 'https://api.spotify.com/v1/tracks/0WbMK4wrZ1wFSty9F7FCgu',
  'analysis_url': 'https://api.spotify.com/v1/audio-analysis/0WbMK4wrZ1wFSty9F7FCgu',
  'duration_ms': 218424,
  'time_signature': 4},
 {'danceability': 0.747,
  'energy': 0.507,
  'key': 2,
  'loudness': -10.171,
  'mode': 1,
  'speechiness': 0.0358,
  'acousticness': 0.2,
  'instrumentalness': 0.0608,
  'liveness': 0.117,
  'valence': 0.438,
  'tempo': 104.978,
  'type': 'audio_features',
  'id': '6dOtVTDdiauQNBQEDOtlAB',
  'uri': 'spotify:track:6dOtVTDdiauQNBQEDOtlAB',
  'track_href': 'https://api.spotify.com/v1/tracks/6dOtVTDdiauQNBQEDOtlAB',
  'analysis_url': 'https://api.sp

In [20]:
import pandas as pd

def get_recommendations(seed_tracks, limit=10, features_df=None):
    """
    Fetches track recommendations based on seed tracks and formats the output.

    Args:
    - seed_tracks (list): List of seed track IDs.
    - limit (int): Number of recommendations to fetch.
    - features_df (pd.DataFrame): The DataFrame containing the original track features. Used to map seed track info.

    Returns:
    - recommendations_df (pd.DataFrame): DataFrame of recommended tracks with added columns for seed tracks.
    """
    all_recommendations = []

    # Loop through each seed track to get its recommendations
    for seed_track in seed_tracks:
        # Fetch recommendations for the current seed track
        recommendations = sp.recommendations(seed_tracks=[seed_track], limit=limit)

        # Extract track data
        rec_tracks = recommendations['tracks']

        # Format and extract relevant fields (album, artists, etc.)
        for track in rec_tracks:
            # Flatten artist and album fields
            track_data = {
                'track_id': track['id'],
                'track_name': track['name'],
                'album_name': track['album']['name'],
                'album_id': track['album']['id'],
                'artist_names': ', '.join([artist['name'] for artist in track['artists']]),  # Join artist names
                'popularity': track['popularity'],
                'duration_ms': track['duration_ms'],
                'seed_track_id': seed_track  # Indicate the seed track
            }

            # Add seed track name from features_df if provided
            if features_df is not None:
                seed_track_name = features_df[features_df['track_id'] == seed_track]['track_name'].values[0]
                seed_artist_name = features_df[features_df['track_id'] == seed_track]['artist_name'].values[0]

                track_data['seed_track_name'] = seed_track_name
                track_data['seed_artist_name'] = seed_artist_name

            all_recommendations.append(track_data)

    # Convert the list of track data into a DataFrame
    recommendations_df = pd.DataFrame(all_recommendations)

    return recommendations_df

# Example usage
seed_tracks_list = tracks_with_features_df['track_id'].tolist()[0:5]  # Use first 2 tracks as seed tracks
spotify_recommendations_df = get_recommendations(seed_tracks_list, limit=10, features_df=tracks_with_features_df)

# Display the result
spotify_recommendations_df


Unnamed: 0,track_id,track_name,album_name,album_id,artist_names,popularity,duration_ms,seed_track_id,seed_track_name,seed_artist_name
0,3RFj986WYsjgYBgzrtWARi,Dearest (feat. Lydia Cole),Roses,6hrJk6bOTnQ6dbD7WKcVoq,"The Paper Kites, Lydia Cole",28,194922,1HMQmOWrkieKYWlFsjUP3D,Bloom - Bonus Track,The Paper Kites
1,3GLMvFjzOgJ3XGphEEVEPv,Our Remains,Ancient Transition,1ybYkyaTa4UKK38JnMXAJl,Beta Radio,52,244120,1HMQmOWrkieKYWlFsjUP3D,Bloom - Bonus Track,The Paper Kites
2,47DKI4RxI8ZW0sBqDenMhO,Woman,Delta,0Wmnkh4lzGy5rgkUPOjYbg,Mumford & Sons,57,274552,1HMQmOWrkieKYWlFsjUP3D,Bloom - Bonus Track,The Paper Kites
3,1zFUk2P19r0j3ohAhTU9kX,The Days That Are to Come,High Ridge & Stones,5katg4N8jjdgrxGLfHnPyS,John Vincent III,41,352632,1HMQmOWrkieKYWlFsjUP3D,Bloom - Bonus Track,The Paper Kites
4,42UaitnwvuKqNcD5Oa2HlD,Thinking 'Bout Love,Songs to Break Up To,5OVxLrOoXXD2HSKkQNqhQW,Wild Rivers,72,199391,1HMQmOWrkieKYWlFsjUP3D,Bloom - Bonus Track,The Paper Kites


In [21]:
spotify_recommendations_df

Unnamed: 0,track_id,track_name,album_name,album_id,artist_names,popularity,duration_ms,seed_track_id,seed_track_name,seed_artist_name
0,3RFj986WYsjgYBgzrtWARi,Dearest (feat. Lydia Cole),Roses,6hrJk6bOTnQ6dbD7WKcVoq,"The Paper Kites, Lydia Cole",28,194922,1HMQmOWrkieKYWlFsjUP3D,Bloom - Bonus Track,The Paper Kites
1,3GLMvFjzOgJ3XGphEEVEPv,Our Remains,Ancient Transition,1ybYkyaTa4UKK38JnMXAJl,Beta Radio,52,244120,1HMQmOWrkieKYWlFsjUP3D,Bloom - Bonus Track,The Paper Kites
2,47DKI4RxI8ZW0sBqDenMhO,Woman,Delta,0Wmnkh4lzGy5rgkUPOjYbg,Mumford & Sons,57,274552,1HMQmOWrkieKYWlFsjUP3D,Bloom - Bonus Track,The Paper Kites
3,1zFUk2P19r0j3ohAhTU9kX,The Days That Are to Come,High Ridge & Stones,5katg4N8jjdgrxGLfHnPyS,John Vincent III,41,352632,1HMQmOWrkieKYWlFsjUP3D,Bloom - Bonus Track,The Paper Kites
4,42UaitnwvuKqNcD5Oa2HlD,Thinking 'Bout Love,Songs to Break Up To,5OVxLrOoXXD2HSKkQNqhQW,Wild Rivers,72,199391,1HMQmOWrkieKYWlFsjUP3D,Bloom - Bonus Track,The Paper Kites
5,6QXolVfFVjUHPp5TVzsjIF,Only Time,Only Time,7GsStYh3eGaa8PYiZPnkMD,Aaron Espe,47,178546,1HMQmOWrkieKYWlFsjUP3D,Bloom - Bonus Track,The Paper Kites
6,0uQxV7ysuBPhbfUkW1WOCp,First Class,Seven + Mary,5krQbJqzCCOxCGOQ3W8Tr6,Rainbow Kitten Surprise,64,343485,1HMQmOWrkieKYWlFsjUP3D,Bloom - Bonus Track,The Paper Kites
7,5zT5cMnMKoyruPj13TQXGx,I Found,5AM (Expanded Edition),05WTBdwkHAqk5Mg68ugD7L,Amber Run,73,273000,1HMQmOWrkieKYWlFsjUP3D,Bloom - Bonus Track,The Paper Kites
8,6LKvzEJQTIm2RrWAMPwQH6,If We Were Vampires (feat. Wesley Schultz) - S...,Spotify Singles,6WdZ6j0Mm6fOzpyHXKx8PP,"Noah Kahan, Wesley Schultz",66,217873,1HMQmOWrkieKYWlFsjUP3D,Bloom - Bonus Track,The Paper Kites
9,7f0PCIJFVw7U5tbuFH4rb0,Need You,Olly Olly,4ruy6zMB9t2sKrNyK2B8zF,Penny and Sparrow,48,189282,1HMQmOWrkieKYWlFsjUP3D,Bloom - Bonus Track,The Paper Kites


In [23]:
# import pandas as pd
# from sklearn.preprocessing import MinMaxScaler

# # Step 1: Normalize audio features
# scaler = MinMaxScaler()
# tracks_with_normalized_features = tracks_with_features_df.copy()
# numerical_columns = ['danceability', 'energy', 'tempo', 'acousticness', 'valence']
# tracks_with_normalized_features[numerical_columns] = scaler.fit_transform(
#     tracks_with_normalized_features[numerical_columns])



# def get_spotify_track_recommendations(seed_tracks, limit=10):
#     """
#     Fetches track recommendations based on seed tracks.

#     Args:
#     - seed_tracks (list): List of seed track IDs.
#     - limit (int): Number of recommendations to fetch.

#     Returns:
#     - recommendations (list): List of recommended track objects.
#     """
#     recommendations = sp.recommendations(seed_tracks=seed_tracks, limit=limit)

#     recommendations_df = pd.DataFrame(recommendations['tracks'])

#     return recommendations_df

# # Example usage
# # seed_tracks_list = tracks_with_features_df['track_id'].tolist()[0:1]  # Convert to list
# # spotify_recommendations = get_recommendations(seed_tracks_list[:2])  # Fetch recommendations using the first 5 track IDs as seeds
# # spotify_recommendations



# # Step 2: Fetch recommendations and build the target dataframe
# recommendations = []
# for track_id in tracks_with_normalized_features['track_id']:
#     recs = get_spotify_track_recommendations([track_id], limit=5)  # Get 5 recommendations
#     rec_track_ids = [track['id'] for track in recs]
#     recommendations.append({'track_id': track_id, 'recommended_tracks': rec_track_ids})

# recommendations_df = pd.DataFrame(recommendations)
# recommendations_df

TypeError: string indices must be integers

## Build seed dataset

In [None]:
import pandas as pd

def build_dataset(seed_tracks, num_recommendations=20):
    """
    Builds a dataset of tracks and their features, including recommendations.

    Args:
    - seed_tracks (list): List of seed track IDs.
    - num_recommendations (int): Number of recommendations to fetch.

    Returns:
    - df (pd.DataFrame): A DataFrame containing track features and recommendations.
    """
    # Get initial track features for seed tracks
    seed_track_features = get_track_features(seed_tracks)

    # Get recommended tracks
    recommended_tracks = get_recommendations(seed_tracks, limit=num_recommendations)

    # Extract the IDs of the recommended tracks
    recommended_track_ids = [track['id'] for track in recommended_tracks]

    # Get audio features for recommended tracks
    recommended_track_features = get_track_features(recommended_track_ids)

    # Combine seed track features and recommended track features
    all_features = seed_track_features + recommended_track_features

    # Convert to DataFrame
    df = pd.DataFrame(all_features)

    return df

# Example usage
seed_tracks = ['track_id1', 'track_id2', 'track_id3']  # Replace with actual seed track IDs
df = build_dataset(seed_tracks)
print(df.head())


## Content-Based Filtering
Content-based filtering recommends songs based on features of the song itself (e.g., audio features). This method can be useful when starting with a general dataset and allows you to build a recommendation engine that compares items directly.

Steps:

Collect features such as tempo, energy, danceability, acousticness, valence, etc., using Spotify’s API.
Use these features to compute song similarity (e.g., cosine similarity).
Recommend songs that are closest to the song features using similarity metrics.

In [24]:
tracks_with_features_df

Unnamed: 0,track_id,track_name,artist_name,album_name,release_date,popularity,genre_source,playlist_source,danceability,energy,...,liveness,valence,tempo,type,id,uri,track_href,analysis_url,duration_ms,time_signature
0,1HMQmOWrkieKYWlFsjUP3D,Bloom - Bonus Track,The Paper Kites,Woodland,2013-03-05,77,acoustic,,0.591,0.416,...,0.1120,0.415,96.003,audio_features,1HMQmOWrkieKYWlFsjUP3D,spotify:track:1HMQmOWrkieKYWlFsjUP3D,https://api.spotify.com/v1/tracks/1HMQmOWrkieK...,https://api.spotify.com/v1/audio-analysis/1HMQ...,210080,4
1,6uHvbKL0Yi37AuvNRmUfMw,Paint,The Paper Kites,Young North,2013-03-05,73,acoustic,,0.364,0.308,...,0.1230,0.387,73.328,audio_features,6uHvbKL0Yi37AuvNRmUfMw,spotify:track:6uHvbKL0Yi37AuvNRmUfMw,https://api.spotify.com/v1/tracks/6uHvbKL0Yi37...,https://api.spotify.com/v1/audio-analysis/6uHv...,191773,4
2,7jIAttgQTpLDoNtykIQXjH,Blister In The Sun,Violent Femmes,Violent Femmes,1983-01-01,72,acoustic,,0.726,0.537,...,0.0707,0.882,96.889,audio_features,7jIAttgQTpLDoNtykIQXjH,spotify:track:7jIAttgQTpLDoNtykIQXjH,https://api.spotify.com/v1/tracks/7jIAttgQTpLD...,https://api.spotify.com/v1/audio-analysis/7jIA...,145707,4
3,4E6cwWJWZw2zWf7VFbH7wf,Love Song,Sara Bareilles,Little Voice,2007-07-03,74,acoustic,,0.583,0.786,...,0.1880,0.573,123.055,audio_features,4E6cwWJWZw2zWf7VFbH7wf,spotify:track:4E6cwWJWZw2zWf7VFbH7wf,https://api.spotify.com/v1/tracks/4E6cwWJWZw2z...,https://api.spotify.com/v1/audio-analysis/4E6c...,258827,4
4,1jyddn36UN4tVsJGtaJfem,You Are the Best Thing,Ray LaMontagne,Gossip In The Grain,2008-10-13,69,acoustic,,0.578,0.727,...,0.1720,0.885,170.593,audio_features,1jyddn36UN4tVsJGtaJfem,spotify:track:1jyddn36UN4tVsJGtaJfem,https://api.spotify.com/v1/tracks/1jyddn36UN4t...,https://api.spotify.com/v1/audio-analysis/1jyd...,231840,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1748,4HrIZyZIqd7kqShDXEyN1n,Laranjinha,Wesley Safadão,Arrocha Safadão,2024-09-06,59,forro,,0.722,0.687,...,0.0656,0.923,137.992,audio_features,4HrIZyZIqd7kqShDXEyN1n,spotify:track:4HrIZyZIqd7kqShDXEyN1n,https://api.spotify.com/v1/tracks/4HrIZyZIqd7k...,https://api.spotify.com/v1/audio-analysis/4HrI...,175978,4
1749,4dGx53NJZJyuuLP5ownH1p,Mentira Estampada,Wesley Safadão,Arrocha Safadão,2024-09-06,60,forro,,0.704,0.773,...,0.0978,0.864,137.881,audio_features,4dGx53NJZJyuuLP5ownH1p,spotify:track:4dGx53NJZJyuuLP5ownH1p,https://api.spotify.com/v1/tracks/4dGx53NJZJyu...,https://api.spotify.com/v1/audio-analysis/4dGx...,173207,4
1750,1eyzqe2QqGZUmfcPZtrIyt,Midnight City,M83,"Hurry Up, We're Dreaming",2011,70,french,,0.526,0.712,...,0.1790,0.320,105.009,audio_features,1eyzqe2QqGZUmfcPZtrIyt,spotify:track:1eyzqe2QqGZUmfcPZtrIyt,https://api.spotify.com/v1/tracks/1eyzqe2QqGZU...,https://api.spotify.com/v1/audio-analysis/1eyz...,241440,4
1751,5ZduaRci3iNUiDfJbBfAaf,Give It To Me - Full Vocal Mix,Matt Sassari,Give It To Me (Full Vocal Mix),2021-10-22,83,french,,0.874,0.869,...,0.1640,0.726,126.027,audio_features,5ZduaRci3iNUiDfJbBfAaf,spotify:track:5ZduaRci3iNUiDfJbBfAaf,https://api.spotify.com/v1/tracks/5ZduaRci3iNU...,https://api.spotify.com/v1/audio-analysis/5Zdu...,102861,4


In [25]:
list(tracks_with_features_df)

['track_id',
 'track_name',
 'artist_name',
 'album_name',
 'release_date',
 'popularity',
 'genre_source',
 'playlist_source',
 'danceability',
 'energy',
 'key',
 'loudness',
 'mode',
 'speechiness',
 'acousticness',
 'instrumentalness',
 'liveness',
 'valence',
 'tempo',
 'type',
 'id',
 'uri',
 'track_href',
 'analysis_url',
 'duration_ms',
 'time_signature']

### Using cosine-similarity to recommend with content-based filtering

In [26]:
from sklearn.metrics.pairwise import cosine_similarity

# Example of fetching and comparing Spotify song features
song_features = tracks_with_features_df[['danceability',
 'energy',
 'key',
 'loudness',
 'mode',
 'speechiness',
 'acousticness',
 'instrumentalness',
 'liveness',
 'valence',
 'tempo',
 'time_signature',
#  'duration_ms',
 ]]
similarity_matrix = cosine_similarity(song_features)

# Recommend similar songs
def recommend_songs(song_id, similarity_matrix, df, top_n=10):
    song_index = df[df['track_id'] == song_id].index[0]
    similar_indices = similarity_matrix[song_index].argsort()[::-1][1:top_n+1]
    return df.iloc[similar_indices]

cosine_rec_df = recommend_songs('7BlvPctRnjjJUjBnrySJ7b', similarity_matrix, tracks_with_features_df)
cosine_rec_df

Unnamed: 0,track_id,track_name,artist_name,album_name,release_date,popularity,genre_source,playlist_source,danceability,energy,...,liveness,valence,tempo,type,id,uri,track_href,analysis_url,duration_ms,time_signature
1126,1dr9fQuYZ6NtGuVqdMOTw1,Romance,Varials,In Darkness,2019-10-11,55,death-metal,,0.661,0.692,...,0.162,0.0595,126.845,audio_features,1dr9fQuYZ6NtGuVqdMOTw1,spotify:track:1dr9fQuYZ6NtGuVqdMOTw1,https://api.spotify.com/v1/tracks/1dr9fQuYZ6Nt...,https://api.spotify.com/v1/audio-analysis/1dr9...,142520,4
606,3aawXLnJpcDVXCl1UUksAW,淘汰,Eason Chan,認了吧,2007-01-01,58,cantopop,,0.512,0.551,...,0.302,0.206,117.978,audio_features,3aawXLnJpcDVXCl1UUksAW,spotify:track:3aawXLnJpcDVXCl1UUksAW,https://api.spotify.com/v1/tracks/3aawXLnJpcDV...,https://api.spotify.com/v1/audio-analysis/3aaw...,285000,4
1161,6txvQu0zUbiqG24A8XMLnK,Miracle Maker,Dom Dolla,Miracle Maker,2022-07-14,66,deep-house,,0.609,0.94,...,0.15,0.238,128.001,audio_features,6txvQu0zUbiqG24A8XMLnK,spotify:track:6txvQu0zUbiqG24A8XMLnK,https://api.spotify.com/v1/tracks/6txvQu0zUbiq...,https://api.spotify.com/v1/audio-analysis/6txv...,188592,4
556,1c8gk2PeTE04A1pIDH9YMk,Rolling in the Deep,Adele,21,2011-01-24,79,british,,0.73,0.769,...,0.0473,0.507,104.948,audio_features,1c8gk2PeTE04A1pIDH9YMk,spotify:track:1c8gk2PeTE04A1pIDH9YMk,https://api.spotify.com/v1/tracks/1c8gk2PeTE04...,https://api.spotify.com/v1/audio-analysis/1c8g...,228093,4
964,5W4kiM2cUYBJXKRudNyxjW,You Proof,Morgan Wallen,One Thing At A Time,2023-03-03,82,country,,0.728,0.818,...,0.582,0.681,119.706,audio_features,5W4kiM2cUYBJXKRudNyxjW,spotify:track:5W4kiM2cUYBJXKRudNyxjW,https://api.spotify.com/v1/tracks/5W4kiM2cUYBJ...,https://api.spotify.com/v1/audio-analysis/5W4k...,157478,4
500,4ZtqsOdBbS6GoedzzRGSo9,Breathe,The Prodigy,The Fat of the Land,1997-06-30,53,breakbeat,,0.673,0.808,...,0.037,0.303,130.041,audio_features,4ZtqsOdBbS6GoedzzRGSo9,spotify:track:4ZtqsOdBbS6GoedzzRGSo9,https://api.spotify.com/v1/tracks/4ZtqsOdBbS6G...,https://api.spotify.com/v1/audio-analysis/4Ztq...,336280,4
1232,1BW0sbpZavICte8D22HKNW,Something Here for the Club - Instrumental,Terrence Parker,Something Here for the Club (Instrumental),2018-09-14,37,detroit-techno,,0.825,0.858,...,0.0321,0.906,130.03,audio_features,1BW0sbpZavICte8D22HKNW,spotify:track:1BW0sbpZavICte8D22HKNW,https://api.spotify.com/v1/tracks/1BW0sbpZavIC...,https://api.spotify.com/v1/audio-analysis/1BW0...,403191,4
311,3iubkenxO8JUJNp7phyVlb,My Meds Aren't Working,Dystopia,Dystopia,2008-04-19,52,black-metal,,0.288,0.792,...,0.209,0.16,137.077,audio_features,3iubkenxO8JUJNp7phyVlb,spotify:track:3iubkenxO8JUJNp7phyVlb,https://api.spotify.com/v1/tracks/3iubkenxO8JU...,https://api.spotify.com/v1/audio-analysis/3iub...,241625,4
1078,6YB6CK4Tsb0BgtxCEL9KlI,Give It Up to Me,Sean Paul,The Trinity,2005-09-27,64,dancehall,,0.855,0.674,...,0.0548,0.578,95.991,audio_features,6YB6CK4Tsb0BgtxCEL9KlI,spotify:track:6YB6CK4Tsb0BgtxCEL9KlI,https://api.spotify.com/v1/tracks/6YB6CK4Tsb0B...,https://api.spotify.com/v1/audio-analysis/6YB6...,242253,4
1565,4YIoQqE50AdyG4BQafCi3u,High You Are (Branchez Remix),What So Not,High You Are (Remixes),2013-10-25,61,electronic,,0.691,0.856,...,0.034,0.414,136.98,audio_features,4YIoQqE50AdyG4BQafCi3u,spotify:track:4YIoQqE50AdyG4BQafCi3u,https://api.spotify.com/v1/tracks/4YIoQqE50Ady...,https://api.spotify.com/v1/audio-analysis/4YIo...,213340,4


### Cosine Similarity, Weighted Cosine Similarity, K-Nearest Neighbors


In [28]:
import pandas as pd
from sklearn.preprocessing import OneHotEncoder


def prepare_data(df):
    """
    Prepares the dataset for content-based filtering by one-hot encoding genre
    and normalizing the features.

    Args:
    - df (pd.DataFrame): The dataframe containing track data.

    Returns:
    - df (pd.DataFrame): The prepared dataframe with one-hot encoded genres
      and normalized features.
    """
    # One-hot encode the genre column
    # if 'genre_source' in df.columns:
    #     df = pd.get_dummies(df, columns=['genre_source'], prefix='genre')

    genre_encoder = OneHotEncoder()
    genre_encoded = genre_encoder.fit_transform(tracks_with_features_df[['genre_source']]).toarray() # doesn't seem to work rn


    # Normalize the audio features and popularity
    features_to_normalize = ['popularity', 'danceability', 'energy', 'key', 'loudness',
                             'mode', 'speechiness', 'acousticness', 'instrumentalness',
                             'liveness', 'valence', 'tempo', 'duration_ms']

    df[features_to_normalize] = df[features_to_normalize].apply(lambda x: (x - x.min()) / (x.max() - x.min()))

    return df

# Example usage:
tracks_prepared_df = prepare_data(tracks_with_features_df)
tracks_prepared_df

Unnamed: 0,track_id,track_name,artist_name,album_name,release_date,popularity,genre_source,playlist_source,danceability,energy,...,liveness,valence,tempo,type,id,uri,track_href,analysis_url,duration_ms,time_signature
0,1HMQmOWrkieKYWlFsjUP3D,Bloom - Bonus Track,The Paper Kites,Woodland,2013-03-05,0.763441,acoustic,,0.606776,0.415263,...,0.096296,0.423038,0.469641,audio_features,1HMQmOWrkieKYWlFsjUP3D,spotify:track:1HMQmOWrkieKYWlFsjUP3D,https://api.spotify.com/v1/tracks/1HMQmOWrkieK...,https://api.spotify.com/v1/audio-analysis/1HMQ...,0.255254,4
1,6uHvbKL0Yi37AuvNRmUfMw,Paint,The Paper Kites,Young North,2013-03-05,0.720430,acoustic,,0.373717,0.306756,...,0.107937,0.394495,0.358716,audio_features,6uHvbKL0Yi37AuvNRmUfMw,spotify:track:6uHvbKL0Yi37AuvNRmUfMw,https://api.spotify.com/v1/tracks/6uHvbKL0Yi37...,https://api.spotify.com/v1/audio-analysis/6uHv...,0.227876,4
2,7jIAttgQTpLDoNtykIQXjH,Blister In The Sun,Violent Femmes,Violent Femmes,1983-01-01,0.709677,acoustic,,0.745380,0.536832,...,0.052593,0.899083,0.473975,audio_features,7jIAttgQTpLDoNtykIQXjH,spotify:track:7jIAttgQTpLDoNtykIQXjH,https://api.spotify.com/v1/tracks/7jIAttgQTpLD...,https://api.spotify.com/v1/audio-analysis/7jIA...,0.158983,4
3,4E6cwWJWZw2zWf7VFbH7wf,Love Song,Sara Bareilles,Little Voice,2007-07-03,0.731183,acoustic,,0.598563,0.787003,...,0.176720,0.584098,0.601977,audio_features,4E6cwWJWZw2zWf7VFbH7wf,spotify:track:4E6cwWJWZw2zWf7VFbH7wf,https://api.spotify.com/v1/tracks/4E6cwWJWZw2z...,https://api.spotify.com/v1/audio-analysis/4E6c...,0.328156,4
4,1jyddn36UN4tVsJGtaJfem,You Are the Best Thing,Ray LaMontagne,Gossip In The Grain,2008-10-13,0.677419,acoustic,,0.593429,0.727726,...,0.159788,0.902141,0.834530,audio_features,1jyddn36UN4tVsJGtaJfem,spotify:track:1jyddn36UN4tVsJGtaJfem,https://api.spotify.com/v1/tracks/1jyddn36UN4t...,https://api.spotify.com/v1/audio-analysis/1jyd...,0.287796,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1748,4HrIZyZIqd7kqShDXEyN1n,Laranjinha,Wesley Safadão,Arrocha Safadão,2024-09-06,0.569892,forro,,0.741273,0.687538,...,0.047196,0.940877,0.675048,audio_features,4HrIZyZIqd7kqShDXEyN1n,spotify:track:4HrIZyZIqd7kqShDXEyN1n,https://api.spotify.com/v1/tracks/4HrIZyZIqd7k...,https://api.spotify.com/v1/audio-analysis/4HrI...,0.204254,4
1749,4dGx53NJZJyuuLP5ownH1p,Mentira Estampada,Wesley Safadão,Arrocha Safadão,2024-09-06,0.580645,forro,,0.722793,0.773942,...,0.081270,0.880734,0.674505,audio_features,4dGx53NJZJyuuLP5ownH1p,spotify:track:4dGx53NJZJyuuLP5ownH1p,https://api.spotify.com/v1/tracks/4dGx53NJZJyu...,https://api.spotify.com/v1/audio-analysis/4dGx...,0.200110,4
1750,1eyzqe2QqGZUmfcPZtrIyt,Midnight City,M83,"Hurry Up, We're Dreaming",2011,0.688172,french,,0.540041,0.712655,...,0.167196,0.326198,0.513697,audio_features,1eyzqe2QqGZUmfcPZtrIyt,spotify:track:1eyzqe2QqGZUmfcPZtrIyt,https://api.spotify.com/v1/tracks/1eyzqe2QqGZU...,https://api.spotify.com/v1/audio-analysis/1eyz...,0.302153,4
1751,5ZduaRci3iNUiDfJbBfAaf,Give It To Me - Full Vocal Mix,Matt Sassari,Give It To Me (Full Vocal Mix),2021-10-22,0.827957,french,,0.897331,0.870393,...,0.151323,0.740061,0.616516,audio_features,5ZduaRci3iNUiDfJbBfAaf,spotify:track:5ZduaRci3iNUiDfJbBfAaf,https://api.spotify.com/v1/tracks/5ZduaRci3iNU...,https://api.spotify.com/v1/audio-analysis/5Zdu...,0.094907,4




#### Weighted Cosine Similarity

By adding additional weighting to certain features we can adjust the recommendations we get, by including key and speechinees, and adding weighting to popularity, genre seems to align more closely

In [30]:
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

def weighted_cosine_similarity(df, weight_dict):
    """
    Computes a weighted cosine similarity matrix based on the given weights.

    Args:
    - df (pd.DataFrame): The dataframe containing track data.
    - weight_dict (dict): A dictionary of weights for each feature.

    Returns:
    - similarity_matrix (np.array): A matrix of weighted cosine similarities.
    """
    feature_columns = list(weight_dict.keys())

    # Apply weights to the features
    weighted_features = df[feature_columns] * np.array([weight_dict[feat] for feat in feature_columns])

    # Compute cosine similarity on the weighted features
    similarity_matrix = cosine_similarity(weighted_features)

    return similarity_matrix

# Example weights
feature_weights = {
    'popularity': 0.2,
    'danceability': 0.2,
    'energy': 0.2,
    'valence': 0.1,
    'loudness': 0.1,
    'key': 0.1,
    'speechiness': 0.1
    # Add other features if needed
    # 'popularity', 'danceability', 'energy', 'key', 'loudness',
    # 'mode', 'speechiness', 'acousticness', 'instrumentalness',
    # 'liveness', 'valence', 'tempo', 'duration_ms'
}

# Compute weighted similarity matrix
weighted_similarity_matrix = weighted_cosine_similarity(tracks_prepared_df, feature_weights)
weighted_similarity_matrix

array([[1.        , 0.97420876, 0.97572887, ..., 0.90640296, 0.96868938,
        0.95535937],
       [0.97420876, 1.        , 0.92914891, ..., 0.92758802, 0.91654641,
        0.96435619],
       [0.97572887, 0.92914891, 1.        , ..., 0.88078839, 0.98280735,
        0.93599592],
       ...,
       [0.90640296, 0.92758802, 0.88078839, ..., 1.        , 0.91957489,
        0.98092578],
       [0.96868938, 0.91654641, 0.98280735, ..., 0.91957489, 1.        ,
        0.94519759],
       [0.95535937, 0.96435619, 0.93599592, ..., 0.98092578, 0.94519759,
        1.        ]])

In [31]:
weighted_sim_rec_df = recommend_songs('7BlvPctRnjjJUjBnrySJ7b', weighted_similarity_matrix, tracks_prepared_df)
weighted_sim_rec_df


Unnamed: 0,track_id,track_name,artist_name,album_name,release_date,popularity,genre_source,playlist_source,danceability,energy,...,liveness,valence,tempo,type,id,uri,track_href,analysis_url,duration_ms,time_signature
491,1erRDf98WX0VllciGPYO1H,Arise,Sepultura,Arise,1991-03-20,0.526882,brazil,,0.38193,0.962826,...,0.074709,0.16208,0.564216,audio_features,1erRDf98WX0VllciGPYO1H,spotify:track:1erRDf98WX0VllciGPYO1H,https://api.spotify.com/v1/tracks/1erRDf98WX0V...,https://api.spotify.com/v1/audio-analysis/1erR...,0.237528,4
1317,6tC2iHfUlzB2W4ntXXL2BH,Propane Nightmares,Pendulum,In Silico,2008-05-09,0.548387,drum-and-bass,,0.365503,0.96785,...,0.180952,0.219164,0.851158,audio_features,6tC2iHfUlzB2W4ntXXL2BH,spotify:track:6tC2iHfUlzB2W4ntXXL2BH,https://api.spotify.com/v1/tracks/6tC2iHfUlzB2...,https://api.spotify.com/v1/audio-analysis/6tC2...,0.409691,4
324,0bKs1y9PTFBddM9qj0JGvb,Puritania,Dimmu Borgir,Puritanical Euphoric Misanthropia,2001-03-12,0.494624,black-metal,,0.311088,0.970864,...,0.419048,0.166157,0.5351,audio_features,0bKs1y9PTFBddM9qj0JGvb,spotify:track:0bKs1y9PTFBddM9qj0JGvb,https://api.spotify.com/v1/tracks/0bKs1y9PTFBd...,https://api.spotify.com/v1/audio-analysis/0bKs...,0.219972,5
1110,5OjCsHeByDYEGxMrb1z8KQ,Flying Whales,Gojira,From Mars to Sirius,2005-09-07,0.602151,death-metal,,0.273101,0.926657,...,0.178836,0.203874,0.466877,audio_features,5OjCsHeByDYEGxMrb1z8KQ,spotify:track:5OjCsHeByDYEGxMrb1z8KQ,https://api.spotify.com/v1/tracks/5OjCsHeByDYE...,https://api.spotify.com/v1/audio-analysis/5OjC...,0.635672,4
1108,6W2KBpMMXW17r7dPyqi8Iu,Repentless,Slayer,Repentless,2015-09-11,0.623656,death-metal,,0.410678,0.995981,...,0.131217,0.2579,0.509417,audio_features,6W2KBpMMXW17r7dPyqi8Iu,spotify:track:6W2KBpMMXW17r7dPyqi8Iu,https://api.spotify.com/v1/tracks/6W2KBpMMXW17...,https://api.spotify.com/v1/audio-analysis/6W2K...,0.23998,4
1139,6yerffT19n4aHyY25Rnkfq,Bratva,Slaughter to Prevail,Kostolom,2019,0.548387,death-metal,,0.427105,0.993972,...,0.032593,0.135576,0.880563,audio_features,6yerffT19n4aHyY25Rnkfq,spotify:track:6yerffT19n4aHyY25Rnkfq,https://api.spotify.com/v1/tracks/6yerffT19n4a...,https://api.spotify.com/v1/audio-analysis/6yer...,0.374775,4
1326,2lN6G35gsXkA3xzPYqmis5,Self vs Self (feat. In Flames),Pendulum,Immersion,2010-05-21,0.526882,drum-and-bass,,0.449692,0.990958,...,0.973545,0.33843,0.851256,audio_features,2lN6G35gsXkA3xzPYqmis5,spotify:track:2lN6G35gsXkA3xzPYqmis5,https://api.spotify.com/v1/tracks/2lN6G35gsXkA...,https://api.spotify.com/v1/audio-analysis/2lN6...,0.368355,4
309,1PZ1po0vZzESv0AJCURC72,Sun//Eater,Lorna Shore,Pain Remains,2022-10-14,0.537634,black-metal,,0.25154,0.890487,...,0.230688,0.053007,0.683771,audio_features,1PZ1po0vZzESv0AJCURC72,spotify:track:1PZ1po0vZzESv0AJCURC72,https://api.spotify.com/v1/tracks/1PZ1po0vZzES...,https://api.spotify.com/v1/audio-analysis/1PZ1...,0.495154,4
1111,3zwdN4h7DNlGlm3w4KylOM,Remember Me,Currents,The Death We Seek,2023-05-05,0.569892,death-metal,,0.290554,0.922638,...,0.12381,0.374108,0.831502,audio_features,3zwdN4h7DNlGlm3w4KylOM,spotify:track:3zwdN4h7DNlGlm3w4KylOM,https://api.spotify.com/v1/tracks/3zwdN4h7DNlG...,https://api.spotify.com/v1/audio-analysis/3zwd...,0.306362,4
311,3iubkenxO8JUJNp7phyVlb,My Meds Aren't Working,Dystopia,Dystopia,2008-04-19,0.494624,black-metal,,0.295688,0.793031,...,0.198942,0.163099,0.670572,audio_features,3iubkenxO8JUJNp7phyVlb,spotify:track:3iubkenxO8JUJNp7phyVlb,https://api.spotify.com/v1/tracks/3iubkenxO8JU...,https://api.spotify.com/v1/audio-analysis/3iub...,0.30243,4


#### K nearest neighbors

In [32]:
from sklearn.neighbors import NearestNeighbors

def knn_recommendation(df, weight_dict, n_neighbors=10):
    """
    Recommend songs using K-Nearest Neighbors based on weighted features.

    Args:
    - df (pd.DataFrame): The dataframe containing track data.
    - weight_dict (dict): A dictionary of weights for each feature.
    - n_neighbors (int): Number of neighbors (recommendations) to return.

    Returns:
    - knn_model: Trained KNN model.
    - distances: Distances of nearest neighbors.
    - indices: Indices of nearest neighbors in the dataframe.
    """
    feature_columns = list(feature_weights.keys())

    # Apply weights to the features
    weighted_features = df[feature_columns] * np.array([feature_weights[feat] for feat in feature_columns])

    # Initialize KNN model
    knn_model = NearestNeighbors(n_neighbors=n_neighbors, metric='cosine')

    # Fit the KNN model on weighted features
    knn_model.fit(weighted_features)

    return knn_model

def get_knn_recommendations(song_id, df, knn_model, n_neighbors=10):
    """
    Fetch recommendations for a specific song using a pre-trained KNN model.

    Args:
    - song_id (str): The ID of the song for which recommendations are to be fetched.
    - df (pd.DataFrame): The dataframe containing track data.
    - knn_model: Trained KNN model.
    - n_neighbors (int): Number of recommendations to return.

    Returns:
    - recommendations (pd.DataFrame): DataFrame of recommended songs.
    """
    song_index = df[df['track_id'] == song_id].index[0]
    distances, indices = knn_model.kneighbors([df.iloc[song_index][list(feature_weights.keys())].values], n_neighbors=n_neighbors+1)

    # Get indices of the most similar songs (excluding the first one, which is the seed song itself)
    similar_indices = indices.flatten()[1:]

    return df.iloc[similar_indices]

# Example usage:
knn_model = knn_recommendation(tracks_prepared_df, feature_weights)
knn_recommendations = get_knn_recommendations('7BlvPctRnjjJUjBnrySJ7b', tracks_prepared_df, knn_model)
knn_recommendations




Unnamed: 0,track_id,track_name,artist_name,album_name,release_date,popularity,genre_source,playlist_source,danceability,energy,...,liveness,valence,tempo,type,id,uri,track_href,analysis_url,duration_ms,time_signature
378,0H7ROR8cZTsh6YUgnlqjM2,Ghosts Of Mississippi,The Steeldrivers,Reckless,2010-01-01,0.516129,bluegrass,,0.328542,0.723707,...,0.116402,0.591233,0.857708,audio_features,0H7ROR8cZTsh6YUgnlqjM2,spotify:track:0H7ROR8cZTsh6YUgnlqjM2,https://api.spotify.com/v1/tracks/0H7ROR8cZTsh...,https://api.spotify.com/v1/audio-analysis/0H7R...,0.501057,4
311,3iubkenxO8JUJNp7phyVlb,My Meds Aren't Working,Dystopia,Dystopia,2008-04-19,0.494624,black-metal,,0.295688,0.793031,...,0.198942,0.163099,0.670572,audio_features,3iubkenxO8JUJNp7phyVlb,spotify:track:3iubkenxO8JUJNp7phyVlb,https://api.spotify.com/v1/tracks/3iubkenxO8JU...,https://api.spotify.com/v1/audio-analysis/3iub...,0.30243,4
1429,1E0CZWim9mfwrCkXvieES8,Ecstasy Of Soul,Zeds Dead,Ecstasy Of Soul,2022-12-14,0.505376,dubstep,,0.389117,0.89953,...,0.373545,0.131498,0.734881,audio_features,1E0CZWim9mfwrCkXvieES8,spotify:track:1E0CZWim9mfwrCkXvieES8,https://api.spotify.com/v1/tracks/1E0CZWim9mfw...,https://api.spotify.com/v1/audio-analysis/1E0C...,0.297607,4
1383,1E0CZWim9mfwrCkXvieES8,Ecstasy Of Soul,Zeds Dead,Ecstasy Of Soul,2022-12-14,0.505376,dub,,0.389117,0.89953,...,0.373545,0.131498,0.734881,audio_features,1E0CZWim9mfwrCkXvieES8,spotify:track:1E0CZWim9mfwrCkXvieES8,https://api.spotify.com/v1/tracks/1E0CZWim9mfw...,https://api.spotify.com/v1/audio-analysis/1E0C...,0.297607,4
332,5fLHBxfx4JvgU65pkO74br,Step One,Make Them Suffer,How To Survive A Funeral,2020-06-19,0.419355,black-metal,,0.389117,0.501668,...,0.212698,0.038022,0.553963,audio_features,5fLHBxfx4JvgU65pkO74br,spotify:track:5fLHBxfx4JvgU65pkO74br,https://api.spotify.com/v1/tracks/5fLHBxfx4Jvg...,https://api.spotify.com/v1/audio-analysis/5fLH...,0.116886,5
1408,27YD36FUikKxbp3bKSiKGZ,First Time (feat. Dylan Matthew),Seven Lions,First Time (feat. Dylan Matthew),2018-10-12,0.537634,dubstep,,0.433265,0.677491,...,0.14709,0.148828,0.734431,audio_features,27YD36FUikKxbp3bKSiKGZ,spotify:track:27YD36FUikKxbp3bKSiKGZ,https://api.spotify.com/v1/tracks/27YD36FUikKx...,https://api.spotify.com/v1/audio-analysis/27YD...,0.386414,4
1359,27YD36FUikKxbp3bKSiKGZ,First Time (feat. Dylan Matthew),Seven Lions,First Time (feat. Dylan Matthew),2018-10-12,0.537634,dub,,0.433265,0.677491,...,0.14709,0.148828,0.734431,audio_features,27YD36FUikKxbp3bKSiKGZ,spotify:track:27YD36FUikKxbp3bKSiKGZ,https://api.spotify.com/v1/tracks/27YD36FUikKx...,https://api.spotify.com/v1/audio-analysis/27YD...,0.386414,4
317,3ffljpjKfPISFjCt4zpYUk,Hollowed Heart,Make Them Suffer,Hollowed Heart,2019-06-07,0.473118,black-metal,,0.310062,0.986939,...,0.140741,0.37105,0.444256,audio_features,3ffljpjKfPISFjCt4zpYUk,spotify:track:3ffljpjKfPISFjCt4zpYUk,https://api.spotify.com/v1/tracks/3ffljpjKfPIS...,https://api.spotify.com/v1/audio-analysis/3ffl...,0.287885,4
1332,2qxiyScLvjCMb9gg1hTl5F,Here For You,Wilkinson,Cognition,2022-02-11,0.591398,drum-and-bass,,0.367556,0.780975,...,0.258201,0.040979,0.850791,audio_features,2qxiyScLvjCMb9gg1hTl5F,spotify:track:2qxiyScLvjCMb9gg1hTl5F,https://api.spotify.com/v1/tracks/2qxiyScLvjCM...,https://api.spotify.com/v1/audio-analysis/2qxi...,0.279372,4
1439,6BYzwbWg1Z2EB6VUXTYnhm,Worlds Away,Dabin,Worlds Away,2023-06-16,0.505376,dubstep,,0.455852,0.827191,...,0.382011,0.333333,0.733644,audio_features,6BYzwbWg1Z2EB6VUXTYnhm,spotify:track:6BYzwbWg1Z2EB6VUXTYnhm,https://api.spotify.com/v1/tracks/6BYzwbWg1Z2E...,https://api.spotify.com/v1/audio-analysis/6BYz...,0.315553,4


### Euclidean Distance

Description: Unlike cosine similarity, which measures the angle between two vectors, Euclidean distance measures the "straight-line" distance between points in a multi-dimensional space. It works best when features are normalized.


When to Use: Useful when you want to measure the actual distance between two feature sets, but it tends to be sensitive to scale, so normalization is important.

In [33]:
from sklearn.metrics.pairwise import euclidean_distances
from sklearn.preprocessing import MinMaxScaler

weighted_similarity_features = list(feature_weights.keys())

def euclidean_similarity(df, feature_columns):
    # Normalize the features to a range of [0, 1]
    scaler = MinMaxScaler()
    scaled_features = scaler.fit_transform(df[feature_columns])

    # Compute the Euclidean distance matrix
    distance_matrix = euclidean_distances(scaled_features)

    # Convert distances to similarity (lower distance = higher similarity)
    similarity_matrix = 1 / (1 + distance_matrix)

    return similarity_matrix

# Example usage:
euclidean_similarity_matrix = euclidean_similarity(tracks_with_features_df, weighted_similarity_features)
euclidean_similarity_matrix

euclidean_distance_rec_df = recommend_songs('7BlvPctRnjjJUjBnrySJ7b', euclidean_similarity_matrix, tracks_prepared_df)
euclidean_distance_rec_df

Unnamed: 0,track_id,track_name,artist_name,album_name,release_date,popularity,genre_source,playlist_source,danceability,energy,...,liveness,valence,tempo,type,id,uri,track_href,analysis_url,duration_ms,time_signature
1317,6tC2iHfUlzB2W4ntXXL2BH,Propane Nightmares,Pendulum,In Silico,2008-05-09,0.548387,drum-and-bass,,0.365503,0.96785,...,0.180952,0.219164,0.851158,audio_features,6tC2iHfUlzB2W4ntXXL2BH,spotify:track:6tC2iHfUlzB2W4ntXXL2BH,https://api.spotify.com/v1/tracks/6tC2iHfUlzB2...,https://api.spotify.com/v1/audio-analysis/6tC2...,0.409691,4
1110,5OjCsHeByDYEGxMrb1z8KQ,Flying Whales,Gojira,From Mars to Sirius,2005-09-07,0.602151,death-metal,,0.273101,0.926657,...,0.178836,0.203874,0.466877,audio_features,5OjCsHeByDYEGxMrb1z8KQ,spotify:track:5OjCsHeByDYEGxMrb1z8KQ,https://api.spotify.com/v1/tracks/5OjCsHeByDYE...,https://api.spotify.com/v1/audio-analysis/5OjC...,0.635672,4
324,0bKs1y9PTFBddM9qj0JGvb,Puritania,Dimmu Borgir,Puritanical Euphoric Misanthropia,2001-03-12,0.494624,black-metal,,0.311088,0.970864,...,0.419048,0.166157,0.5351,audio_features,0bKs1y9PTFBddM9qj0JGvb,spotify:track:0bKs1y9PTFBddM9qj0JGvb,https://api.spotify.com/v1/tracks/0bKs1y9PTFBd...,https://api.spotify.com/v1/audio-analysis/0bKs...,0.219972,5
491,1erRDf98WX0VllciGPYO1H,Arise,Sepultura,Arise,1991-03-20,0.526882,brazil,,0.38193,0.962826,...,0.074709,0.16208,0.564216,audio_features,1erRDf98WX0VllciGPYO1H,spotify:track:1erRDf98WX0VllciGPYO1H,https://api.spotify.com/v1/tracks/1erRDf98WX0V...,https://api.spotify.com/v1/audio-analysis/1erR...,0.237528,4
331,7dQrAIlUHD9DpA3wUxpaDW,Apotheosis,Lorna Shore,Pain Remains,2022-10-14,0.451613,black-metal,,0.217659,0.832215,...,0.150265,0.20999,0.681481,audio_features,7dQrAIlUHD9DpA3wUxpaDW,spotify:track:7dQrAIlUHD9DpA3wUxpaDW,https://api.spotify.com/v1/tracks/7dQrAIlUHD9D...,https://api.spotify.com/v1/audio-analysis/7dQr...,0.381875,4
345,5wWRdIjndDOh1j4OXAPpdD,Deathcrush,Mayhem,Deathcrush,1993,0.462366,black-metal,,0.224846,0.909577,...,0.315344,0.212029,0.594253,audio_features,5wWRdIjndDOh1j4OXAPpdD,spotify:track:5wWRdIjndDOh1j4OXAPpdD,https://api.spotify.com/v1/tracks/5wWRdIjndDOh...,https://api.spotify.com/v1/audio-analysis/5wWR...,0.259602,3
309,1PZ1po0vZzESv0AJCURC72,Sun//Eater,Lorna Shore,Pain Remains,2022-10-14,0.537634,black-metal,,0.25154,0.890487,...,0.230688,0.053007,0.683771,audio_features,1PZ1po0vZzESv0AJCURC72,spotify:track:1PZ1po0vZzESv0AJCURC72,https://api.spotify.com/v1/tracks/1PZ1po0vZzES...,https://api.spotify.com/v1/audio-analysis/1PZ1...,0.495154,4
1346,0CkmD2L3xMeZTXOwlOCsVm,"Infinity (feat. ILIRA, iiola & Tom Cane)",Wilkinson,"Infinity (feat. ILIRA, iiola & Tom Cane)",2023-03-31,0.548387,drum-and-bass,,0.436345,0.871398,...,0.152381,0.083894,0.851109,audio_features,0CkmD2L3xMeZTXOwlOCsVm,spotify:track:0CkmD2L3xMeZTXOwlOCsVm,https://api.spotify.com/v1/tracks/0CkmD2L3xMeZ...,https://api.spotify.com/v1/audio-analysis/0Ckm...,0.260807,4
1403,49o6YjBAnjwPKLcXwIH53Z,Crawl Outta Love,ILLENIUM,Awake,2017-09-21,0.580645,dubstep,,0.387064,0.723707,...,0.064021,0.170234,0.411603,audio_features,49o6YjBAnjwPKLcXwIH53Z,spotify:track:49o6YjBAnjwPKLcXwIH53Z,https://api.spotify.com/v1/tracks/49o6YjBAnjwP...,https://api.spotify.com/v1/audio-analysis/49o6...,0.303009,4
1353,49o6YjBAnjwPKLcXwIH53Z,Crawl Outta Love,ILLENIUM,Awake,2017-09-21,0.580645,dub,,0.387064,0.723707,...,0.064021,0.170234,0.411603,audio_features,49o6YjBAnjwPKLcXwIH53Z,spotify:track:49o6YjBAnjwPKLcXwIH53Z,https://api.spotify.com/v1/tracks/49o6YjBAnjwP...,https://api.spotify.com/v1/audio-analysis/49o6...,0.303009,4


### Manhattan Distance

Description: Manhattan distance (or "L1 distance") measures the absolute sum of differences across features. This can sometimes perform better when features are sparse or have high variability.


When to Use: Good for high-dimensional spaces or sparse features

In [34]:
from sklearn.metrics.pairwise import manhattan_distances

def manhattan_similarity(df, feature_columns):
    # Normalize the features
    scaler = MinMaxScaler()
    scaled_features = scaler.fit_transform(df[feature_columns])

    # Compute Manhattan distance matrix
    distance_matrix = manhattan_distances(scaled_features)

    # Convert distances to similarity
    similarity_matrix = 1 / (1 + distance_matrix)

    return similarity_matrix

# Example usage:
manhattan_similarity_matrix = manhattan_similarity(tracks_with_features_df, weighted_similarity_features)
manhattan_similarity_matrix

manhattan_similarity_df = recommend_songs('7BlvPctRnjjJUjBnrySJ7b', manhattan_similarity_matrix, tracks_prepared_df)
manhattan_similarity_df

Unnamed: 0,track_id,track_name,artist_name,album_name,release_date,popularity,genre_source,playlist_source,danceability,energy,...,liveness,valence,tempo,type,id,uri,track_href,analysis_url,duration_ms,time_signature
1317,6tC2iHfUlzB2W4ntXXL2BH,Propane Nightmares,Pendulum,In Silico,2008-05-09,0.548387,drum-and-bass,,0.365503,0.96785,...,0.180952,0.219164,0.851158,audio_features,6tC2iHfUlzB2W4ntXXL2BH,spotify:track:6tC2iHfUlzB2W4ntXXL2BH,https://api.spotify.com/v1/tracks/6tC2iHfUlzB2...,https://api.spotify.com/v1/audio-analysis/6tC2...,0.409691,4
324,0bKs1y9PTFBddM9qj0JGvb,Puritania,Dimmu Borgir,Puritanical Euphoric Misanthropia,2001-03-12,0.494624,black-metal,,0.311088,0.970864,...,0.419048,0.166157,0.5351,audio_features,0bKs1y9PTFBddM9qj0JGvb,spotify:track:0bKs1y9PTFBddM9qj0JGvb,https://api.spotify.com/v1/tracks/0bKs1y9PTFBd...,https://api.spotify.com/v1/audio-analysis/0bKs...,0.219972,5
513,3m1JFLpKLYgMi1jf6Bu16P,Let Forever Be,The Chemical Brothers,Surrender,1999-01-01,0.526882,breakbeat,,0.258727,0.873408,...,0.093122,0.406728,0.620973,audio_features,3m1JFLpKLYgMi1jf6Bu16P,spotify:track:3m1JFLpKLYgMi1jf6Bu16P,https://api.spotify.com/v1/tracks/3m1JFLpKLYgM...,https://api.spotify.com/v1/audio-analysis/3m1J...,0.294556,4
1110,5OjCsHeByDYEGxMrb1z8KQ,Flying Whales,Gojira,From Mars to Sirius,2005-09-07,0.602151,death-metal,,0.273101,0.926657,...,0.178836,0.203874,0.466877,audio_features,5OjCsHeByDYEGxMrb1z8KQ,spotify:track:5OjCsHeByDYEGxMrb1z8KQ,https://api.spotify.com/v1/tracks/5OjCsHeByDYE...,https://api.spotify.com/v1/audio-analysis/5OjC...,0.635672,4
491,1erRDf98WX0VllciGPYO1H,Arise,Sepultura,Arise,1991-03-20,0.526882,brazil,,0.38193,0.962826,...,0.074709,0.16208,0.564216,audio_features,1erRDf98WX0VllciGPYO1H,spotify:track:1erRDf98WX0VllciGPYO1H,https://api.spotify.com/v1/tracks/1erRDf98WX0V...,https://api.spotify.com/v1/audio-analysis/1erR...,0.237528,4
331,7dQrAIlUHD9DpA3wUxpaDW,Apotheosis,Lorna Shore,Pain Remains,2022-10-14,0.451613,black-metal,,0.217659,0.832215,...,0.150265,0.20999,0.681481,audio_features,7dQrAIlUHD9DpA3wUxpaDW,spotify:track:7dQrAIlUHD9DpA3wUxpaDW,https://api.spotify.com/v1/tracks/7dQrAIlUHD9D...,https://api.spotify.com/v1/audio-analysis/7dQr...,0.381875,4
309,1PZ1po0vZzESv0AJCURC72,Sun//Eater,Lorna Shore,Pain Remains,2022-10-14,0.537634,black-metal,,0.25154,0.890487,...,0.230688,0.053007,0.683771,audio_features,1PZ1po0vZzESv0AJCURC72,spotify:track:1PZ1po0vZzESv0AJCURC72,https://api.spotify.com/v1/tracks/1PZ1po0vZzES...,https://api.spotify.com/v1/audio-analysis/1PZ1...,0.495154,4
1346,0CkmD2L3xMeZTXOwlOCsVm,"Infinity (feat. ILIRA, iiola & Tom Cane)",Wilkinson,"Infinity (feat. ILIRA, iiola & Tom Cane)",2023-03-31,0.548387,drum-and-bass,,0.436345,0.871398,...,0.152381,0.083894,0.851109,audio_features,0CkmD2L3xMeZTXOwlOCsVm,spotify:track:0CkmD2L3xMeZTXOwlOCsVm,https://api.spotify.com/v1/tracks/0CkmD2L3xMeZ...,https://api.spotify.com/v1/audio-analysis/0Ckm...,0.260807,4
345,5wWRdIjndDOh1j4OXAPpdD,Deathcrush,Mayhem,Deathcrush,1993,0.462366,black-metal,,0.224846,0.909577,...,0.315344,0.212029,0.594253,audio_features,5wWRdIjndDOh1j4OXAPpdD,spotify:track:5wWRdIjndDOh1j4OXAPpdD,https://api.spotify.com/v1/tracks/5wWRdIjndDOh...,https://api.spotify.com/v1/audio-analysis/5wWR...,0.259602,3
311,3iubkenxO8JUJNp7phyVlb,My Meds Aren't Working,Dystopia,Dystopia,2008-04-19,0.494624,black-metal,,0.295688,0.793031,...,0.198942,0.163099,0.670572,audio_features,3iubkenxO8JUJNp7phyVlb,spotify:track:3iubkenxO8JUJNp7phyVlb,https://api.spotify.com/v1/tracks/3iubkenxO8JU...,https://api.spotify.com/v1/audio-analysis/3iub...,0.30243,4


### Pearson Correlation

Description: Pearson correlation measures the linear correlation between two sets of variables (features in this case). It gives a similarity score based on how the values of different features increase or decrease together.


When to Use: Best when you believe there is a linear relationship between features

In [37]:
def pearson_similarity(df, feature_columns):
    # Compute the Pearson correlation matrix
    correlation_matrix = df[feature_columns].corr().values

    # Convert correlation matrix to a similarity matrix (scale between [0, 1])
    similarity_matrix = (correlation_matrix + 1) / 2

    return similarity_matrix

# Example usage:
pearson_similarity_matrix = pearson_similarity(tracks_with_features_df, weighted_similarity_features)
pearson_similarity_matrix

pearson_similarity_df = recommend_songs('7BlvPctRnjjJUjBnrySJ7b', pearson_similarity_matrix, tracks_prepared_df)
pearson_similarity_df

IndexError: index 1146 is out of bounds for axis 0 with size 7

### Jaccard Similarity
Description: Jaccard similarity measures the similarity between two sets by looking at the ratio of common features to total features. It's typically used for binary data but can be adapted to measure categorical or multi-label features, such as genre.


When to Use: Useful when working with binary or categorical data (like genre, mood tags, etc.).

In [40]:
from sklearn.metrics import jaccard_score

def jaccard_similarity(df, feature_columns):
    # Compute Jaccard similarity matrix for the one-hot encoded genre features
    genre_similarity_matrix = cosine_similarity(df[feature_columns])

    return genre_similarity_matrix

# Example usage with one-hot encoded genre columns:
one_hot_genres = pd.get_dummies(tracks_with_features_df['genre_source'])
tracks_with_genres_df = pd.concat([tracks_with_features_df, one_hot_genres], axis=1)
jaccard_similarity_matrix = jaccard_similarity(tracks_with_genres_df, one_hot_genres.columns)
jaccard_similarity_matrix

array([[1., 1., 1., ..., 0., 0., 0.],
       [1., 1., 1., ..., 0., 0., 0.],
       [1., 1., 1., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 1., 1., 1.],
       [0., 0., 0., ..., 1., 1., 1.],
       [0., 0., 0., ..., 1., 1., 1.]])

## Evaluation Metrics

####  Diversity

1 Diversity


Diversity ensures the recommendations aren’t too similar to each other. You can measure this by comparing the pairwise cosine similarities between the recommended tracks.

In [41]:
def compute_diversity(recommendations_df, feature_columns):
    """
    Computes diversity by calculating pairwise cosine similarity between recommended tracks.

    Args:
    - recommendations_df (pd.DataFrame): DataFrame of recommended tracks.
    - feature_columns (list): List of feature columns used for comparison.

    Returns:
    - diversity (float): Average pairwise cosine similarity (lower is more diverse).
    """
    features = recommendations_df[feature_columns]
    pairwise_similarities = cosine_similarity(features)

    # Take the upper triangle of the similarity matrix (excluding the diagonal) to avoid redundancy
    upper_tri_indices = np.triu_indices_from(pairwise_similarities, k=1)
    avg_similarity = np.mean(pairwise_similarities[upper_tri_indices])

    return 1 - avg_similarity  # A higher diversity is represented by lower average similarity


#### Coverage

Coverage measures the proportion of the entire dataset that your recommendation model can meaningfully recommend.

In [42]:
def compute_coverage(recommendations_df, total_df):
    """
    Computes coverage as the proportion of unique tracks recommended.

    Args:
    - recommendations_df (pd.DataFrame): DataFrame of recommended tracks.
    - total_df (pd.DataFrame): The entire dataset of tracks.

    Returns:
    - coverage (float): The percentage of tracks recommended relative to the dataset size.
    """
    unique_recommended_tracks = recommendations_df['track_id'].nunique()
    total_tracks = total_df['track_id'].nunique()

    return unique_recommended_tracks / total_tracks


#### Popularity Bias

Popularity bias measures how much the recommendations favor popular tracks.



In [43]:
def compute_popularity_bias(recommendations_df):
    """
    Computes the average popularity score of the recommended tracks.

    Args:
    - recommendations_df (pd.DataFrame): DataFrame of recommended tracks.

    Returns:
    - avg_popularity (float): Average popularity score.
    """
    return recommendations_df['popularity'].mean()


#### Artist Variety

This checks whether the recommendations come from a variety of different artists.

In [44]:
def compute_artist_variety(recommendations_df):
    """
    Computes the variety of artists in the recommended tracks.

    Args:
    - recommendations_df (pd.DataFrame): DataFrame of recommended tracks.

    Returns:
    - artist_variety (float): The proportion of unique artists in the recommendations.
    """
    unique_artists = recommendations_df['artist_name'].nunique()
    total_recommendations = len(recommendations_df)

    return unique_artists / total_recommendations


### Evaluate Recommendations Main Method

In [45]:
def evaluate_recommendations(recommendations_df, total_df, feature_columns):
    """
    Evaluates recommendations using various metrics such as diversity, coverage, popularity bias, and artist variety.

    Args:
    - recommendations_df (pd.DataFrame): DataFrame of recommended tracks.
    - total_df (pd.DataFrame): The entire dataset of tracks.
    - feature_columns (list): List of feature columns used for similarity comparison.

    Returns:
    - evaluation (dict): Dictionary containing evaluation metrics.
    """
    evaluation = {
        'diversity': compute_diversity(recommendations_df, feature_columns),
        'coverage': compute_coverage(recommendations_df, total_df),
        'popularity_bias': compute_popularity_bias(recommendations_df),
        'artist_variety': compute_artist_variety(recommendations_df),
    }

    return evaluation

In [46]:
cosine_similarity_features = [
    'danceability',
    'energy',
    'valence',
    'loudness',
    'key',
    'speechiness'
]


cos_sim_results = evaluate_recommendations(cosine_rec_df, tracks_prepared_df, cosine_similarity_features)
cos_sim_results

{'diversity': 0.0025058800751338595,
 'coverage': 0.006321112515802781,
 'popularity_bias': 60.7,
 'artist_variety': 1.0}

In [47]:
weighted_similarity_features = list(feature_weights.keys())

weighted_sim_results = evaluate_recommendations(weighted_sim_rec_df, tracks_prepared_df, weighted_similarity_features)
weighted_sim_results

{'diversity': 0.010983808793516237,
 'coverage': 0.006321112515802781,
 'popularity_bias': 0.5473118279569892,
 'artist_variety': 0.9}

In [48]:
knn_features = list(feature_weights.keys())

knn_results = evaluate_recommendations(knn_recommendations, tracks_prepared_df, weighted_similarity_features)
knn_results

{'diversity': 0.018833437707238443,
 'coverage': 0.0050568900126422255,
 'popularity_bias': 0.5086021505376344,
 'artist_variety': 0.7}

In [49]:
euclidean_features = list(feature_weights.keys())

euclidean_results = evaluate_recommendations(euclidean_distance_rec_df, tracks_prepared_df, weighted_similarity_features)
euclidean_results

{'diversity': 0.011061117766022699,
 'coverage': 0.005689001264222503,
 'popularity_bias': 0.5333333333333334,
 'artist_variety': 0.8}

In [50]:
manhattan_results = evaluate_recommendations(manhattan_similarity_df, tracks_prepared_df, weighted_similarity_features)
manhattan_results

{'diversity': 0.011568843278761287,
 'coverage': 0.006321112515802781,
 'popularity_bias': 0.5193548387096774,
 'artist_variety': 0.9}

## Collaborative Filtering
Collaborative filtering leverages user preferences across a large set of users, analyzing patterns of co-listened songs. It's one of the most popular techniques for recommendations and can be useful when you move toward incorporating user data.

Types:

User-based collaborative filtering: Recommends songs based on what similar users have enjoyed.
Item-based collaborative filtering: Recommends songs based on what similar songs were listened to by others.
Example (Using Matrix Factorization with surprise library):

In [None]:
from surprise import Dataset, Reader, SVD
from surprise.model_selection import train_test_split
from surprise import accuracy

# Assuming you have user-song ratings data (e.g., from Spotify listens)
reader = Reader(rating_scale=(1, 5))  # Scale depends on how you collect feedback
data = Dataset.load_from_df(df[['user_id', 'song_id', 'rating']], reader)

trainset, testset = train_test_split(data, test_size=0.2)

# Use SVD for matrix factorization
algo = SVD()
algo.fit(trainset)

predictions = algo.test(testset)
accuracy.rmse(predictions)


## Matrix Factorization (Latent Factor Models)
Matrix factorization aims to find hidden factors in the data, which can capture underlying patterns in how users interact with songs. This is effective for implicit data (e.g., listen counts instead of explicit ratings).

Approach:

Use techniques like SVD (Singular Value Decomposition) or ALS (Alternating Least Squares).
You can use implicit feedback (e.g., play count) to build a matrix and decompose it to find relationships between users and songs.

### Matrix Decomposition (content based)

In [51]:
from sklearn.decomposition import TruncatedSVD

def apply_svd_to_features(df, n_components=10):
    """
    Apply Singular Value Decomposition (SVD) to the audio feature matrix to reduce dimensionality and extract latent features.

    Args:
    - df (pd.DataFrame): DataFrame with track features.
    - n_components (int): Number of latent features to extract.

    Returns:
    - latent_matrix (np.array): The reduced latent matrix.
    """
    feature_columns = ['danceability', 'energy', 'key', 'loudness', 'speechiness', 'acousticness',
                       'instrumentalness', 'liveness', 'valence', 'tempo', 'popularity']

    # Apply SVD
    svd = TruncatedSVD(n_components=n_components)
    latent_matrix = svd.fit_transform(df[feature_columns])

    return latent_matrix

# Example usage
latent_features_matrix = apply_svd_to_features(tracks_with_features_df)
latent_features_matrix

array([[ 1.49373118,  0.45495234, -0.59121968, ..., -0.04451885,
        -0.14888442, -0.05421065],
       [ 1.34711965,  0.56160783, -0.39563741, ..., -0.11322665,
        -0.17325537, -0.02150168],
       [ 1.61677512, -0.07432722, -0.59565599, ..., -0.06563303,
        -0.0482518 ,  0.06995466],
       ...,
       [ 1.71990178, -0.15750962,  0.41161746, ..., -0.10671043,
        -0.01258124, -0.03154619],
       [ 1.86849695, -0.4097134 , -0.33251152, ..., -0.11400179,
         0.01655628, -0.01699808],
       [ 1.75222268,  0.34855471, -0.05166054, ..., -0.07527831,
        -0.19099088, -0.03553652]])

In [52]:
tracks_with_features_df

Unnamed: 0,track_id,track_name,artist_name,album_name,release_date,popularity,genre_source,playlist_source,danceability,energy,...,liveness,valence,tempo,type,id,uri,track_href,analysis_url,duration_ms,time_signature
0,1HMQmOWrkieKYWlFsjUP3D,Bloom - Bonus Track,The Paper Kites,Woodland,2013-03-05,0.763441,acoustic,,0.606776,0.415263,...,0.096296,0.423038,0.469641,audio_features,1HMQmOWrkieKYWlFsjUP3D,spotify:track:1HMQmOWrkieKYWlFsjUP3D,https://api.spotify.com/v1/tracks/1HMQmOWrkieK...,https://api.spotify.com/v1/audio-analysis/1HMQ...,0.255254,4
1,6uHvbKL0Yi37AuvNRmUfMw,Paint,The Paper Kites,Young North,2013-03-05,0.720430,acoustic,,0.373717,0.306756,...,0.107937,0.394495,0.358716,audio_features,6uHvbKL0Yi37AuvNRmUfMw,spotify:track:6uHvbKL0Yi37AuvNRmUfMw,https://api.spotify.com/v1/tracks/6uHvbKL0Yi37...,https://api.spotify.com/v1/audio-analysis/6uHv...,0.227876,4
2,7jIAttgQTpLDoNtykIQXjH,Blister In The Sun,Violent Femmes,Violent Femmes,1983-01-01,0.709677,acoustic,,0.745380,0.536832,...,0.052593,0.899083,0.473975,audio_features,7jIAttgQTpLDoNtykIQXjH,spotify:track:7jIAttgQTpLDoNtykIQXjH,https://api.spotify.com/v1/tracks/7jIAttgQTpLD...,https://api.spotify.com/v1/audio-analysis/7jIA...,0.158983,4
3,4E6cwWJWZw2zWf7VFbH7wf,Love Song,Sara Bareilles,Little Voice,2007-07-03,0.731183,acoustic,,0.598563,0.787003,...,0.176720,0.584098,0.601977,audio_features,4E6cwWJWZw2zWf7VFbH7wf,spotify:track:4E6cwWJWZw2zWf7VFbH7wf,https://api.spotify.com/v1/tracks/4E6cwWJWZw2z...,https://api.spotify.com/v1/audio-analysis/4E6c...,0.328156,4
4,1jyddn36UN4tVsJGtaJfem,You Are the Best Thing,Ray LaMontagne,Gossip In The Grain,2008-10-13,0.677419,acoustic,,0.593429,0.727726,...,0.159788,0.902141,0.834530,audio_features,1jyddn36UN4tVsJGtaJfem,spotify:track:1jyddn36UN4tVsJGtaJfem,https://api.spotify.com/v1/tracks/1jyddn36UN4t...,https://api.spotify.com/v1/audio-analysis/1jyd...,0.287796,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1748,4HrIZyZIqd7kqShDXEyN1n,Laranjinha,Wesley Safadão,Arrocha Safadão,2024-09-06,0.569892,forro,,0.741273,0.687538,...,0.047196,0.940877,0.675048,audio_features,4HrIZyZIqd7kqShDXEyN1n,spotify:track:4HrIZyZIqd7kqShDXEyN1n,https://api.spotify.com/v1/tracks/4HrIZyZIqd7k...,https://api.spotify.com/v1/audio-analysis/4HrI...,0.204254,4
1749,4dGx53NJZJyuuLP5ownH1p,Mentira Estampada,Wesley Safadão,Arrocha Safadão,2024-09-06,0.580645,forro,,0.722793,0.773942,...,0.081270,0.880734,0.674505,audio_features,4dGx53NJZJyuuLP5ownH1p,spotify:track:4dGx53NJZJyuuLP5ownH1p,https://api.spotify.com/v1/tracks/4dGx53NJZJyu...,https://api.spotify.com/v1/audio-analysis/4dGx...,0.200110,4
1750,1eyzqe2QqGZUmfcPZtrIyt,Midnight City,M83,"Hurry Up, We're Dreaming",2011,0.688172,french,,0.540041,0.712655,...,0.167196,0.326198,0.513697,audio_features,1eyzqe2QqGZUmfcPZtrIyt,spotify:track:1eyzqe2QqGZUmfcPZtrIyt,https://api.spotify.com/v1/tracks/1eyzqe2QqGZU...,https://api.spotify.com/v1/audio-analysis/1eyz...,0.302153,4
1751,5ZduaRci3iNUiDfJbBfAaf,Give It To Me - Full Vocal Mix,Matt Sassari,Give It To Me (Full Vocal Mix),2021-10-22,0.827957,french,,0.897331,0.870393,...,0.151323,0.740061,0.616516,audio_features,5ZduaRci3iNUiDfJbBfAaf,spotify:track:5ZduaRci3iNUiDfJbBfAaf,https://api.spotify.com/v1/tracks/5ZduaRci3iNU...,https://api.spotify.com/v1/audio-analysis/5Zdu...,0.094907,4




### Matrix factorization (content-based)



In [53]:
from sklearn.decomposition import NMF

# Choose the number of latent components
n_components = 20  # You can experiment with this number

# Initialize the NMF model
nmf_model = NMF(n_components=n_components, init='random', random_state=42)

column_list = [
    track_id	track_name	artist_name	album_name	release_date	popularity	genre_source	playlist_source	danceability	energy	key	loudness	mode	speechiness	acousticness	instrumentalness	liveness	valence	tempo	type
]

# Fit the model to the scaled features
W = nmf_model.fit_transform(tracks_with_features_df.iloc)
H = nmf_model.components_


ValueError: could not convert string to float: '1HMQmOWrkieKYWlFsjUP3D'

In [54]:
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

# Compute the cosine similarity matrix between latent features
latent_similarity_matrix = cosine_similarity(W)

def recommend_songs_nmf(song_id, df, latent_similarity_matrix, top_n=10):
    song_index = df[df['track_id'] == song_id].index[0]
    similarity_scores = latent_similarity_matrix[song_index]
    # Get indices of the most similar songs
    similar_indices = similarity_scores.argsort()[::-1][1:top_n+1]
    return df.iloc[similar_indices]

# Example usage
recommendations_nmf = recommend_songs_nmf('7BlvPctRnjjJUjBnrySJ7b', tracks_with_features_df, latent_similarity_matrix)
recommendations_nmf


NameError: name 'W' is not defined

### Deep Learning Method with autoencoder

In [55]:
import tensorflow as tf
from tensorflow.keras import layers

def build_autoencoder(input_dim):
    """
    Builds a basic autoencoder model for feature representation learning.

    Args:
    - input_dim (int): Number of input features.

    Returns:
    - autoencoder (tf.keras.Model): The autoencoder model.
    """
    # Encoder
    input_layer = layers.Input(shape=(input_dim,))
    encoded = layers.Dense(64, activation='relu')(input_layer)
    encoded = layers.Dense(32, activation='relu')(encoded)

    # Decoder
    decoded = layers.Dense(64, activation='relu')(encoded)
    decoded = layers.Dense(input_dim, activation='sigmoid')(decoded)

    # Autoencoder
    autoencoder = tf.keras.Model(input_layer, decoded)
    autoencoder.compile(optimizer='adam', loss='mse')

    return autoencoder

# Example usage
input_dim = len(tracks_with_features_df.columns)  # Number of features in the dataset
autoencoder = build_autoencoder(input_dim)
autoencoder.summary()


In [56]:
# Extract the features for training
features = tracks_with_features_df[['danceability', 'energy', 'key', 'loudness', 'speechiness', 'acousticness',
                                    'instrumentalness', 'liveness', 'valence', 'tempo', 'popularity']].values

# Train the autoencoder
autoencoder.fit(features, features, epochs=50, batch_size=256, shuffle=True, validation_split=0.2)


Epoch 1/50


ValueError: Input 0 of layer "functional" is incompatible with the layer: expected shape=(None, 26), found shape=(None, 11)

### Deep Learning Method 2

In [70]:
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense

features = tracks_prepared_df[['danceability', 'energy', 'key', 'loudness', 'speechiness', 'acousticness',
                                    'instrumentalness', 'liveness', 'valence', 'tempo', 'popularity']].values

# Define the size of the input and latent space
input_dim = features.shape[1]
encoding_dim = 20  # Size of the latent space

# Input layer
input_layer = Input(shape=(input_dim,))

# Encoder layers
encoded = Dense(64, activation='relu')(input_layer)
encoded = Dense(32, activation='relu')(encoded)
encoded = Dense(encoding_dim, activation='relu')(encoded)

# Decoder layers
decoded = Dense(32, activation='relu')(encoded)
decoded = Dense(64, activation='relu')(decoded)
decoded = Dense(input_dim, activation='sigmoid')(decoded)

# Autoencoder model
autoencoder = Model(inputs=input_layer, outputs=decoded)

# Encoder model to extract latent features
encoder = Model(inputs=input_layer, outputs=encoded)

# Compile the model
autoencoder.compile(optimizer='adam', loss='mse')


In [71]:
# Train the autoencoder
history = autoencoder.fit(
    features, features,
    epochs=200,
    batch_size=256,
    shuffle=True,
    validation_split=0.1
)


Epoch 1/200
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 32ms/step - loss: 0.1038 - val_loss: 0.0949
Epoch 2/200
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.0930 - val_loss: 0.0791
Epoch 3/200
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.0790 - val_loss: 0.0596
Epoch 4/200
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.0628 - val_loss: 0.0466
Epoch 5/200
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.0517 - val_loss: 0.0437
Epoch 6/200
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.0470 - val_loss: 0.0423
Epoch 7/200
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.0439 - val_loss: 0.0409
Epoch 8/200
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.0423 - val_loss: 0.0384
Epoch 9/200
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m 

In [72]:
# Get the latent representations of the tracks
latent_features = encoder.predict(features)
latent_features

[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step


array([[2.0031817 , 0.2576646 , 0.        , ..., 1.8104788 , 3.1701078 ,
        0.        ],
       [1.9841547 , 0.20245212, 0.        , ..., 1.8651583 , 2.4706252 ,
        0.        ],
       [1.7164402 , 0.6794693 , 0.        , ..., 0.9478439 , 3.4692073 ,
        0.        ],
       ...,
       [1.53116   , 0.78003573, 0.        , ..., 1.4571259 , 2.3623722 ,
        0.        ],
       [1.5664101 , 0.9899046 , 0.        , ..., 0.8601847 , 3.9079962 ,
        0.        ],
       [2.0694659 , 0.40149623, 0.        , ..., 1.9134965 , 2.8876293 ,
        0.        ]], dtype=float32)

In [81]:
# Compute cosine similarity between latent features
latent_similarity_matrix_dl = cosine_similarity(latent_features)

def recommend_songs_dl(song_id, df, latent_similarity_matrix, top_n=10):
    song_index = df[df['track_id'] == song_id].index[0]
    similarity_scores = latent_similarity_matrix[song_index]
    similar_indices = similarity_scores.argsort()[::-1][1:top_n+1]
    return df.iloc[similar_indices]

# Example usage
recommendations_dl = recommend_songs_dl('7BlvPctRnjjJUjBnrySJ7b', tracks_with_features_df, latent_similarity_matrix_dl)
recommendations_dl


Unnamed: 0,track_id,track_name,artist_name,album_name,release_date,popularity,genre_source,playlist_source,danceability,energy,...,liveness,valence,tempo,type,id,uri,track_href,analysis_url,duration_ms,time_signature
309,1PZ1po0vZzESv0AJCURC72,Sun//Eater,Lorna Shore,Pain Remains,2022-10-14,0.537634,black-metal,,0.25154,0.890487,...,0.230688,0.053007,0.683771,audio_features,1PZ1po0vZzESv0AJCURC72,spotify:track:1PZ1po0vZzESv0AJCURC72,https://api.spotify.com/v1/tracks/1PZ1po0vZzES...,https://api.spotify.com/v1/audio-analysis/1PZ1...,0.495154,4
331,7dQrAIlUHD9DpA3wUxpaDW,Apotheosis,Lorna Shore,Pain Remains,2022-10-14,0.451613,black-metal,,0.217659,0.832215,...,0.150265,0.20999,0.681481,audio_features,7dQrAIlUHD9DpA3wUxpaDW,spotify:track:7dQrAIlUHD9DpA3wUxpaDW,https://api.spotify.com/v1/tracks/7dQrAIlUHD9D...,https://api.spotify.com/v1/audio-analysis/7dQr...,0.381875,4
1573,5soMJpcVhSrGrB4prvPL6P,Rip & Tear,Mick Gordon,Doom (Original Game Soundtrack),2016-09-28,0.623656,electronic,,0.519507,0.883455,...,0.161905,0.142712,0.5283,audio_features,5soMJpcVhSrGrB4prvPL6P,spotify:track:5soMJpcVhSrGrB4prvPL6P,https://api.spotify.com/v1/tracks/5soMJpcVhSrG...,https://api.spotify.com/v1/audio-analysis/5soM...,0.326523,4
1599,4COR2ZPEyUn0lsbAouRWxA,Bfg Division,Mick Gordon,Doom (Original Game Soundtrack),2016-09-28,0.591398,electronic,,0.291581,0.722702,...,0.150265,0.039042,0.568893,audio_features,4COR2ZPEyUn0lsbAouRWxA,spotify:track:4COR2ZPEyUn0lsbAouRWxA,https://api.spotify.com/v1/tracks/4COR2ZPEyUn0...,https://api.spotify.com/v1/audio-analysis/4COR...,0.698947,3
324,0bKs1y9PTFBddM9qj0JGvb,Puritania,Dimmu Borgir,Puritanical Euphoric Misanthropia,2001-03-12,0.494624,black-metal,,0.311088,0.970864,...,0.419048,0.166157,0.5351,audio_features,0bKs1y9PTFBddM9qj0JGvb,spotify:track:0bKs1y9PTFBddM9qj0JGvb,https://api.spotify.com/v1/tracks/0bKs1y9PTFBd...,https://api.spotify.com/v1/audio-analysis/0bKs...,0.219972,5
345,5wWRdIjndDOh1j4OXAPpdD,Deathcrush,Mayhem,Deathcrush,1993,0.462366,black-metal,,0.224846,0.909577,...,0.315344,0.212029,0.594253,audio_features,5wWRdIjndDOh1j4OXAPpdD,spotify:track:5wWRdIjndDOh1j4OXAPpdD,https://api.spotify.com/v1/tracks/5wWRdIjndDOh...,https://api.spotify.com/v1/audio-analysis/5wWR...,0.259602,3
312,3thCp5S3lfRDikpfhXsuil,Loneliness,Decalius,Dehumanizing Loneliness,2023-10-13,0.537634,black-metal,,0.212526,0.729735,...,0.097354,0.189602,0.73381,audio_features,3thCp5S3lfRDikpfhXsuil,spotify:track:3thCp5S3lfRDikpfhXsuil,https://api.spotify.com/v1/tracks/3thCp5S3lfRD...,https://api.spotify.com/v1/audio-analysis/3thC...,0.424485,4
322,6hPoIXMBQuX7Af4XyqBcSX,In Death,Angelmaker,Angelmaker,2019-05-31,0.44086,black-metal,,0.565708,0.966845,...,0.373545,0.153925,0.562587,audio_features,6hPoIXMBQuX7Af4XyqBcSX,spotify:track:6hPoIXMBQuX7Af4XyqBcSX,https://api.spotify.com/v1/tracks/6hPoIXMBQuX7...,https://api.spotify.com/v1/audio-analysis/6hPo...,0.142313,4
326,6WBdwdLLif4kuG3s6ot8uB,Wolverine Blues,Entombed,Wolverine Blues,1993-10-04,0.462366,black-metal,,0.35729,0.974882,...,0.120635,0.381244,0.744724,audio_features,6WBdwdLLif4kuG3s6ot8uB,spotify:track:6WBdwdLLif4kuG3s6ot8uB,https://api.spotify.com/v1/tracks/6WBdwdLLif4k...,https://api.spotify.com/v1/audio-analysis/6WBd...,0.14008,1
311,3iubkenxO8JUJNp7phyVlb,My Meds Aren't Working,Dystopia,Dystopia,2008-04-19,0.494624,black-metal,,0.295688,0.793031,...,0.198942,0.163099,0.670572,audio_features,3iubkenxO8JUJNp7phyVlb,spotify:track:3iubkenxO8JUJNp7phyVlb,https://api.spotify.com/v1/tracks/3iubkenxO8JU...,https://api.spotify.com/v1/audio-analysis/3iub...,0.30243,4
