In [21]:
# !pip install textblob
# !pip install spotipy
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
from spotipy.oauth2 import SpotifyOAuth # this not used in this notebook but it allows username authentication

import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import MinMaxScaler
from sklearn.feature_extraction.text import TfidfVectorizer
from textblob import TextBlob
import re
from tqdm import tqdm


In [5]:
# Define environment variables
client_id = '' #replace with your client ID
client_secret = ''#replace with your client secret
playlist_personal = '' 
playlist_compare = ''

In [6]:
client_credentials_manager = SpotifyClientCredentials(client_id=client_id, client_secret=client_secret)
sp = spotipy.Spotify(client_credentials_manager = client_credentials_manager)

In [7]:
a = sp.audio_features(tracks=[playlist_personal,playlist_compare])

In [8]:
genre_lst = sp.recommendation_genre_seeds()['genres']

In [9]:
genre_lst

['acoustic',
 'afrobeat',
 'alt-rock',
 'alternative',
 'ambient',
 'anime',
 'black-metal',
 'bluegrass',
 'blues',
 'bossanova',
 'brazil',
 'breakbeat',
 'british',
 'cantopop',
 'chicago-house',
 'children',
 'chill',
 'classical',
 'club',
 'comedy',
 'country',
 'dance',
 'dancehall',
 'death-metal',
 'deep-house',
 'detroit-techno',
 'disco',
 'disney',
 'drum-and-bass',
 'dub',
 'dubstep',
 'edm',
 'electro',
 'electronic',
 'emo',
 'folk',
 'forro',
 'french',
 'funk',
 'garage',
 'german',
 'gospel',
 'goth',
 'grindcore',
 'groove',
 'grunge',
 'guitar',
 'happy',
 'hard-rock',
 'hardcore',
 'hardstyle',
 'heavy-metal',
 'hip-hop',
 'holidays',
 'honky-tonk',
 'house',
 'idm',
 'indian',
 'indie',
 'indie-pop',
 'industrial',
 'iranian',
 'j-dance',
 'j-idol',
 'j-pop',
 'j-rock',
 'jazz',
 'k-pop',
 'kids',
 'latin',
 'latino',
 'malay',
 'mandopop',
 'metal',
 'metal-misc',
 'metalcore',
 'minimal-techno',
 'movies',
 'mpb',
 'new-age',
 'new-release',
 'opera',
 'pagode',

In [10]:
len(genre_lst)

126

In [11]:
# Empty lists for desired features
track_name = []
artist_name = []
album_name = []
genre = []
duration_ms = []
popularity = []
explicit = []
track_id = []
artist_id = []

In [12]:
# Iterate through each genre
for g in genre_lst:
    # Requests are limited to 50 units, so we need multiple API requests to get 1000 songs per genre
    for i in range(0,1000,50):
        q = 'genre:'+str(g)
        # Store API request results in a variable for extraction
        genre_results = sp.search(q=q, type='track', limit=50,offset=i)
        # Iterate through tracks and store relevant information in lists
        for i, t in enumerate(genre_results['tracks']['items']):
            track_name.append(t['name'])
            artist_name.append(t['artists'][0]['name'])
            album_name.append(t['album']['name'])
            genre.append(g)
            duration_ms.append(t['duration_ms'])
            popularity.append(t['popularity'])
            explicit.append(t['explicit'])
            track_id.append(t['id'])
            artist_id.append(t['artists'][0]['id'])

In [22]:
# Initialize DataFrame with data
df = pd.DataFrame({'track_name':track_name,'artist_name':artist_name,
                   'album_name':album_name,'genre':genre,'duration_ms':duration_ms,
                   'popularity':popularity,'explicit':explicit,
                   'track_id' : track_id,'artist_id':artist_id})

In [23]:
df.head()

Unnamed: 0,track_name,artist_name,album_name,genre,duration_ms,popularity,explicit,track_id,artist_id
0,P.Y.T. (Pretty Young Thing),Karizma Duo,Acoustic Cover Album,acoustic,185266,0,False,61SsC1KU762IMSJ1kEO5cQ,4AG4GdKn7FmC3EPk8m6dxg
1,King Jesus,Big Joe Williams,Shake Your Boogie,acoustic,103200,0,False,40BZMRed29k0DIr8QmlFtT,07NzVZ0BHZ0QOOw7nGvCgo
2,I'm Yours - Acoustic Covers Versions of Popula...,Vendredi,Covers & Chill Lounge Covers To Relax - Covers...,acoustic,146052,0,False,2WKxNLe2BKwDqxCVYchag4,728y8xDi9WMeYqKeuYgDF0
3,Barrel House Woman,Leroy Carr,Evil Devil Woman Blues (Blues People 1933 - 1934),acoustic,176146,0,False,32TV9fNwocQohmITCehS5A,2FtoVJBkfbE89zyZJ0E9eR
4,Stuck in a Moment,Daniel Robinson,Stuck in a Moment,acoustic,259393,16,False,4UJEMuXmnOXUCSAx3xJcgu,1tlNtqr5jF7Z8pR9i4RWsa


In [24]:
df.to_csv('../../../data/csv/myplaylist_data.csv')

In [25]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 112950 entries, 0 to 112949
Data columns (total 9 columns):
 #   Column       Non-Null Count   Dtype 
---  ------       --------------   ----- 
 0   track_name   112950 non-null  object
 1   artist_name  112950 non-null  object
 2   album_name   112950 non-null  object
 3   genre        112950 non-null  object
 4   duration_ms  112950 non-null  int64 
 5   popularity   112950 non-null  int64 
 6   explicit     112950 non-null  bool  
 7   track_id     112950 non-null  object
 8   artist_id    112950 non-null  object
dtypes: bool(1), int64(2), object(6)
memory usage: 7.0+ MB


In [26]:
df['genre'].unique()


array(['acoustic', 'afrobeat', 'alt-rock', 'alternative', 'ambient',
       'anime', 'black-metal', 'bluegrass', 'blues', 'brazil',
       'breakbeat', 'british', 'cantopop', 'chicago-house', 'children',
       'chill', 'classical', 'club', 'comedy', 'country', 'dance',
       'dancehall', 'death-metal', 'deep-house', 'detroit-techno',
       'disco', 'drum-and-bass', 'dub', 'dubstep', 'edm', 'electro',
       'electronic', 'emo', 'folk', 'forro', 'french', 'funk', 'garage',
       'german', 'gospel', 'goth', 'grindcore', 'groove', 'grunge',
       'guitar', 'happy', 'hard-rock', 'hardcore', 'hardstyle',
       'heavy-metal', 'hip-hop', 'honky-tonk', 'house', 'idm', 'indian',
       'indie', 'indie-pop', 'industrial', 'iranian', 'j-dance', 'j-idol',
       'j-pop', 'j-rock', 'jazz', 'k-pop', 'kids', 'latin', 'latino',
       'malay', 'mandopop', 'metal', 'metalcore', 'minimal-techno', 'mpb',
       'new-age', 'opera', 'pagode', 'party', 'piano', 'pop', 'pop-film',
       'power-pop', '

In [27]:
# Pull audio features
tracks = df.track_id.to_list()
audio_features = []
batchsize = 100

# Iterate over 100 song batches (due to API limit per request)
for i in tqdm(range(0,len(tracks),batchsize)):
    batch = tracks[i:i+batchsize]
    # Collect features for 100 tracks
    feature_results = sp.audio_features(batch)
    # Store individual track info in list
    for track in feature_results:
        if track is not None:
            audio_features.append(track)

100%|██████████████████████████████████████████████████████████████████████████████| 1130/1130 [03:24<00:00,  5.51it/s]


In [28]:
audio_features[0]


{'danceability': 0.802,
 'energy': 0.311,
 'key': 9,
 'loudness': -7.537,
 'mode': 0,
 'speechiness': 0.0341,
 'acousticness': 0.763,
 'instrumentalness': 0,
 'liveness': 0.11,
 'valence': 0.492,
 'tempo': 120.217,
 'type': 'audio_features',
 'id': '61SsC1KU762IMSJ1kEO5cQ',
 'uri': 'spotify:track:61SsC1KU762IMSJ1kEO5cQ',
 'track_href': 'https://api.spotify.com/v1/tracks/61SsC1KU762IMSJ1kEO5cQ',
 'analysis_url': 'https://api.spotify.com/v1/audio-analysis/61SsC1KU762IMSJ1kEO5cQ',
 'duration_ms': 185267,
 'time_signature': 4}

In [29]:
af_df = pd.DataFrame.from_dict(data=audio_features,orient='columns')

In [30]:
af_df.head()

Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,type,id,uri,track_href,analysis_url,duration_ms,time_signature
0,0.802,0.311,9,-7.537,0,0.0341,0.763,0.0,0.11,0.492,120.217,audio_features,61SsC1KU762IMSJ1kEO5cQ,spotify:track:61SsC1KU762IMSJ1kEO5cQ,https://api.spotify.com/v1/tracks/61SsC1KU762I...,https://api.spotify.com/v1/audio-analysis/61Ss...,185267,4
1,0.469,0.166,10,-18.608,1,0.0702,0.87,0.468,0.442,0.593,85.334,audio_features,40BZMRed29k0DIr8QmlFtT,spotify:track:40BZMRed29k0DIr8QmlFtT,https://api.spotify.com/v1/tracks/40BZMRed29k0...,https://api.spotify.com/v1/audio-analysis/40BZ...,103200,4
2,0.654,0.243,10,-8.965,1,0.0693,0.9,0.0,0.114,0.639,75.057,audio_features,2WKxNLe2BKwDqxCVYchag4,spotify:track:2WKxNLe2BKwDqxCVYchag4,https://api.spotify.com/v1/tracks/2WKxNLe2BKwD...,https://api.spotify.com/v1/audio-analysis/2WKx...,146053,4
3,0.799,0.527,0,-8.746,1,0.0422,0.966,3.5e-05,0.0878,0.837,107.356,audio_features,32TV9fNwocQohmITCehS5A,spotify:track:32TV9fNwocQohmITCehS5A,https://api.spotify.com/v1/tracks/32TV9fNwocQo...,https://api.spotify.com/v1/audio-analysis/32TV...,176147,4
4,0.48,0.198,10,-12.65,1,0.0479,0.945,0.0,0.104,0.308,145.501,audio_features,4UJEMuXmnOXUCSAx3xJcgu,spotify:track:4UJEMuXmnOXUCSAx3xJcgu,https://api.spotify.com/v1/tracks/4UJEMuXmnOXU...,https://api.spotify.com/v1/audio-analysis/4UJE...,259394,4


In [31]:
# Remove unwanted features
af_df.drop(['type','uri','track_href','analysis_url','duration_ms','time_signature'],axis=1,inplace=True)

In [32]:
# Match join feature name
af_df.rename(columns={'id':'track_id'}, inplace=True)

In [33]:
final = pd.merge(df,af_df,how='inner',on='track_id')

In [34]:
final.head()

Unnamed: 0,track_name,artist_name,album_name,genre,duration_ms,popularity,explicit,track_id,artist_id,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo
0,P.Y.T. (Pretty Young Thing),Karizma Duo,Acoustic Cover Album,acoustic,185266,0,False,61SsC1KU762IMSJ1kEO5cQ,4AG4GdKn7FmC3EPk8m6dxg,0.802,0.311,9,-7.537,0,0.0341,0.763,0.0,0.11,0.492,120.217
1,King Jesus,Big Joe Williams,Shake Your Boogie,acoustic,103200,0,False,40BZMRed29k0DIr8QmlFtT,07NzVZ0BHZ0QOOw7nGvCgo,0.469,0.166,10,-18.608,1,0.0702,0.87,0.468,0.442,0.593,85.334
2,King Jesus,Big Joe Williams,Shake Your Boogie,acoustic,103200,0,False,40BZMRed29k0DIr8QmlFtT,07NzVZ0BHZ0QOOw7nGvCgo,0.469,0.166,10,-18.608,1,0.0702,0.87,0.468,0.442,0.593,85.334
3,King Jesus,Big Joe Williams,Shake Your Boogie,acoustic,103200,0,False,40BZMRed29k0DIr8QmlFtT,07NzVZ0BHZ0QOOw7nGvCgo,0.469,0.166,10,-18.608,1,0.0702,0.87,0.468,0.442,0.593,85.334
4,King Jesus,Big Joe Williams,Shake Your Boogie,acoustic,103200,0,False,40BZMRed29k0DIr8QmlFtT,07NzVZ0BHZ0QOOw7nGvCgo,0.469,0.166,10,-18.608,1,0.0702,0.87,0.468,0.442,0.593,85.334


In [35]:
final.to_csv('../../../data/csv/mySpotifyData.csv')

In [36]:
final.shape

(248696, 20)