In [1]:
!pip install spotipy



In [2]:
import base64
import requests
import datetime
from urllib.parse import urlencode
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import os

spotify_api_key = "6b68c3794bbb4c1cbf3ec4a8f2f3f6c3"
spotify_api_key_secret = "73f5aa7628f7443190a6515ff45c602e"


class SpotifyAPI(object):
    access_token = None
    access_token_expires = datetime.datetime.now()
    access_token_did_expire = True
    client_id = None
    client_secret = None
    token_url = "https://accounts.spotify.com/api/token"

    def __init__(self, client_id, client_secret, *args, **kwargs):
        # In case we want to inherit from somehwere else
        super().__init__(*args, **kwargs)
        self.client_id = client_id
        self.client_secret = client_secret

    def get_client_credentials(self):
        """
        Returns a base64 encoded string
        """
        client_id = self.client_id
        client_secret = self.client_secret
        if client_secret == None or client_id == None:
            raise Exception("You must set client_id and client_secret")
        client_creds = f"{client_id}:{client_secret}"
        client_creds_b64 = base64.b64encode(client_creds.encode())
        return client_creds_b64.decode()

    def get_token_headers(self):
        client_creds_b64 = self.get_client_credentials()
        return {"Authorization": f"Basic {client_creds_b64}"}

    def get_token_data(self):
        return {"grant_type": "client_credentials"}

    def perform_auth(self):
        token_url = self.token_url
        token_data = self.get_token_data()
        token_headers = self.get_token_headers()
        r = requests.post(token_url, data=token_data, headers=token_headers)
        if r.status_code not in range(200, 299):
            raise Exception("Could not authenticate client.")
        data = r.json()
        now = datetime.datetime.now()
        access_token = data["access_token"]
        expires_in = data["expires_in"]
        expires = now + datetime.timedelta(seconds=expires_in)
        self.access_token = access_token
        self.access_token_expires = expires
        self.access_token_did_expire = expires < now
        return True

    def get_access_token(self):
        token = self.access_token
        expires = self.access_token_expires
        now = datetime.datetime.now()
        if expires < now:
            self.perform_auth()
            return self.get_access_token()
        elif token == None:
            self.perform_auth()
            return self.get_access_token()
        return token

    def get_resource_header(self):
        access_token = self.get_access_token()
        headers = {"Authorization": f"Bearer {access_token}"}
        return headers

    # def get_resource(self, lookup_id, resource_type="albums", version="v1"):
    #     endpoint = f"https://api.spotify.com/{version}/{resource_type}/{lookup_id}"
    #     headers = self.get_resource_header()
    #     r = requests.get(endpoint, headers=headers)
    #     if r.status_code not in range(200, 299):
    #         return {}
    #     return r.json()

    # def get_artist(self, _id):
    #     return self.get_resource(_id, resource_type="artists")

    def base_search(self, query_params):
        headers = self.get_resource_header()
        endpoint = "https://api.spotify.com/v1/search"
        lookup_url = f"{endpoint}?{query_params}"
        resp = requests.get(lookup_url, headers=headers)
        if resp.status_code not in range(200, 299):
            return {}
        return resp.json()

    def search(
        self, query=None, operator=None, operator_query=None, search_type="artist"
    ):
        if query == None:
            raise Exception("A query is required")
        if isinstance(query, dict):
            query = " ".join([f"{k}:{v}" for k, v in query.items()])
        if operator != None and operator_query != None:
            if operator.lower() == "or" or operator.lower() == "not":
                operator = operator.upper()
                if isinstance(operator_query, str):
                    query = f"{query} {operator} {operator_query}"
        query_params = urlencode({"q": query, "type": search_type.lower()})
        return self.base_search(query_params)

In [3]:
auth_manager = SpotifyClientCredentials(spotify_api_key, spotify_api_key_secret)

In [4]:
spotipy_instance = spotipy.Spotify(auth_manager=auth_manager)

In [5]:
def get_id_and_year(song_name, artist_name):
    """
    Example:

    song_id, song_year = get_id_and_year('margaritaville','jimmy buffett')

    print("Song ID is:", song_id)
    print("Release year is:", song_year)

    Output:
    Song ID is: 4EEjMyQub6tgFVshlM9j1M
    Release year is: 1987

    """

    spotify = SpotifyAPI(spotify_api_key, spotify_api_key_secret)
    token = spotify.get_access_token()
    if token == None:
        return "Failed to get Spotify token"

    try:
        song_data = spotify.search(
            {"track": song_name, "artist": artist_name}, search_type="track"
        )
        song_id = song_data["tracks"]["items"][0]["id"]
        release_date = song_data["tracks"]["items"][0]["album"]["release_date"]
        format = "%Y-%m-%d"
        song_year = datetime.datetime.strptime(release_date, format).year
    except:
        return "There was an error finding your song. Please enter another song."

    return song_id, song_year

In [6]:
spotify = SpotifyAPI(spotify_api_key, spotify_api_key_secret)
song_data = spotify.search(
                {"track": "Margaritaville", "artist": "Jimmy Buffet"}, search_type="track"
            )

In [7]:
def retrieve_audio_features(spotify_id):
    """
    Example:

    retrieve_audio_features('4EEjMyQub6tgFVshlM9j1M')

    returns a python dict
    [{'danceability': 0.611,
    'energy': 0.578,
    'key': 1,
    'loudness': -14.171,
    'mode': 1,
    'speechiness': 0.0676,
    'acousticness': 0.0598,
    'instrumentalness': 0.0219,
    'liveness': 0.0983,
    'valence': 0.884,
    'tempo': 100.625,
    'type': 'audio_features',
    'id': '4N0TP4Rmj6QQezWV88ARNJ',
    'uri': 'spotify:track:4N0TP4Rmj6QQezWV88ARNJ',
    'track_href': 'https://api.spotify.com/v1/tracks/4N0TP4Rmj6QQezWV88ARNJ',
    'analysis_url': 'https://api.spotify.com/v1/audio-analysis/4N0TP4Rmj6QQezWV88ARNJ',
    'duration_ms': 266133,
    'time_signature': 4}]

    """

    auth_manager = SpotifyClientCredentials(spotify_api_key, spotify_api_key_secret)
    spotipy_instance = spotipy.Spotify(auth_manager=auth_manager)

    audio_features = spotipy_instance.audio_features(tracks=[spotify_id])
    return audio_features

In [8]:
song_name = 'margaritaville'
artist_name = 'Jimmy Buffet'
song_id, song_year = get_id_and_year(song_name, artist_name)

In [9]:
audio_features_dict = retrieve_audio_features(song_id)

danceability = audio_features_dict[0]["danceability"]
energy = audio_features_dict[0]["energy"]
key = audio_features_dict[0]["key"]
loudness = audio_features_dict[0]["loudness"]
mode = audio_features_dict[0]["mode"]
speechiness = audio_features_dict[0]["speechiness"]
acousticness = audio_features_dict[0]["acousticness"]
instrumentalness = audio_features_dict[0]["instrumentalness"]
liveness = audio_features_dict[0]["liveness"]
valence = audio_features_dict[0]["valence"]
tempo = audio_features_dict[0]["tempo"]
duration_ms = audio_features_dict[0]["duration_ms"]
time_signature = audio_features_dict[0]["time_signature"]

user_song_features = [
    danceability,
    energy,
    key,
    loudness,
    mode,
    speechiness,
    acousticness,
    instrumentalness,
    liveness,
    valence,
    tempo,
    duration_ms,
    time_signature,
    song_year,
]

# Recommendations

In [11]:
import pandas as pd

In [31]:
# Read in spotify data from csv
spotify_df = pd.read_csv("spotify_songs_dataset.csv")

# Filter df for songs with release year equal to user input
df = spotify_df[spotify_df['year'] == user_song_features[-1]]

# If less than 100 songs with matching year, add more songs
song_year = user_song_features[-1]
song_year_list = [song_year]
while len(df) < 100:
  song_year += 1
  song_year_list.append(song_year)
  df = spotify_df[spotify_df['year'].isin(song_year_list)]

# Sample 100 random songs from that year
df = df.sample(100).reset_index()

Unnamed: 0,index,id,name,album,artists,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature,year
0,433995,3NyYWAxR3Thd99ZxpXegVh,If Heaven Never Was Promised To Me,"I Am Willing, Lord",['Heritage Singers'],0.359,0.462,5,-9.691,1,0.0352,0.662,0.0,0.0778,0.545,135.927,174226,4.0,1977
1,545387,6fkmYkHaWsctfpwWA8CK7Y,Mary Don'T You Weep,The Best Of The Caravans,['The Caravans'],0.529,0.235,8,-17.696,0,0.048,0.883,3e-06,0.128,0.432,92.626,393200,4.0,1977
2,439479,1pimWzwUERYZbAFr5tL0Xl,Elevation,Marquee Moon,['Television'],0.531,0.588,9,-10.215,0,0.0289,0.15,0.295,0.081,0.66,110.366,305267,4.0,1977


In [32]:
import numpy as np
from sklearn.preprocessing import MinMaxScaler

In [33]:
def euclidian_distance(point1, point2):
  point1 = np.array(point1)
  point2 = np.array(point2)
  dist = np.linalg.norm(point1 - point2)
  return dist

In [37]:
# Normalize numerical audio features
scaler = MinMaxScaler()
df_normalized = scaler.fit_transform(df[['danceability', 'energy',
       'key', 'loudness', 'mode', 'speechiness', 'acousticness',
       'instrumentalness', 'liveness', 'valence', 'tempo', 'duration_ms',
       'time_signature']])

# Convert user_song_features to df for MinMaxScaler
user_song_features_df = pd.DataFrame([user_song_features[:-1]], columns=['danceability', 'energy',
       'key', 'loudness', 'mode', 'speechiness', 'acousticness',
       'instrumentalness', 'liveness', 'valence', 'tempo', 'duration_ms',
       'time_signature'])

# Compute euclidian distance as a similarity metric
input_song_normalized = scaler.transform(user_song_features_df)
df['euc_dist'] = ""
for i, features in enumerate(df_normalized):
  df.at[i, 'euc_dist'] = euclidian_distance(input_song_normalized, features)

In [40]:
top_ten = df.sort_values(by=['euc_dist']).head(10)

In [45]:
song_list = list(top_ten['name'])
artist_list = list(top_ten['artists'])
album_list = list(top_ten['album'])

In [47]:
recommendations = list(zip(song_list, artist_list, album_list))
recommendations

[('Starlight', "['Electric Light Orchestra']", 'Out Of The Blue'),
 ('Make Yourself At Home (Lullabye For A Wayward Husband)',
  "['Midnight Well']",
  'Midnight Well'),
 ('Blue Eyed Darling',
  "['Jimmy Martin', 'Bob Osborne']",
  'Thirty Years Of Bluegrass'),
 ('The Dreaded Lergy', "['Buddy Defranco']", 'Lush Life'),
 ('Lay Your Money Down', "['Bread']", 'Lost Without Your Love'),
 ('Brand New Goodbye Song', "['Waylon Jennings']", "Ol' Waylon"),
 ('Just The Way You Are', "['Billy Joel']", 'The Stranger (Legacy Edition)'),
 ('In The Real World',
  "['The Alan Parsons Project']",
  'The Definitive Collection'),
 ("I'Ve Got The Melody (Deep In My Heart)",
  "['Kenny Loggins']",
  'Celebrate Me Home'),
 ('Mr. Brother', "['Mother Freedom Band']", 'Cutting The Chord')]

In [None]:
def find_recommendations(user_song_features, song_id):

    # Read in spotify data from csv
    spotify_df = pd.read_csv(
        "https://raw.githubusercontent.com/dankositzke/spotify-song-recommender/heroku-deployment/app_folder/assets/spotify_songs_dataset.csv",
        sep=",",
    )

    # Remove input song from df so we do not recommend the input song back to the user
    if song_id in spotify_df["id"].values:
        i = spotify_df[spotify_df["id"] == song_id].index
        spotify_df = spotify_df.drop(i)

    # Filter df for songs with release year equal to user input
    df = spotify_df[spotify_df["year"] == user_song_features[-1]]

    # If less than 100 songs with matching year, add more songs
    song_year = user_song_features[-1]
    song_year_list = [song_year]
    while len(df) < 100:
        song_year += 1
        song_year_list.append(song_year)
        df = spotify_df[spotify_df["year"].isin(song_year_list)]

    # Sample 100 random songs from that year
    df = df.sample(100).reset_index()

    # Normalize numerical audio features
    scaler = MinMaxScaler()
    df_normalized = scaler.fit_transform(
        df[
            [
                "danceability",
                "energy",
                "key",
                "loudness",
                "mode",
                "speechiness",
                "acousticness",
                "instrumentalness",
                "liveness",
                "valence",
                "tempo",
                "duration_ms",
                "time_signature",
            ]
        ]
    )

    # Convert user_song_features to df for MinMaxScaler
    user_song_features_df = pd.DataFrame(
        [user_song_features[:-1]],
        columns=[
            "danceability",
            "energy",
            "key",
            "loudness",
            "mode",
            "speechiness",
            "acousticness",
            "instrumentalness",
            "liveness",
            "valence",
            "tempo",
            "duration_ms",
            "time_signature",
        ],
    )

    # Compute euclidian distance as a similarity metric, find 10 most similar songs
    input_song_normalized = scaler.transform(user_song_features_df)
    df["euc_dist"] = ""
    for i, features in enumerate(df_normalized):
        df.at[i, "euc_dist"] = euclidian_distance(input_song_normalized, features)
    top_ten = df.sort_values(by=["euc_dist"]).head(10)

    song_list = list(top_ten["name"])
    artist_list = list(top_ten["artists"])
    album_list = list(top_ten["album"])

    recommendations = list(zip(song_list, artist_list, album_list))

    return recommendations

In [None]:
recommendations = find_recommendations(user_song_features, song_id)