In [3]:
import sys
!{sys.executable} -m pip install spotipy


Collecting spotipy
  Downloading spotipy-2.19.0-py3-none-any.whl (27 kB)
Installing collected packages: spotipy
Successfully installed spotipy-2.19.0


In [167]:
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import spotipy.util as util
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report
from sklearn.ensemble import RandomForestClassifier
from sklearn import svm

In [6]:
#Connecting to Spotify API

# Credentials
client_id = ''
client_secret = ''
username = ''

# Scope:  https://developer.spotify.com/web-api/using-scopes/
scope = 'user-library-read playlist-modify-public playlist-read-private'

redirect_uri = ''

def sign_in():
    client_credentials_manager = SpotifyClientCredentials(client_id=client_id, client_secret=client_secret) 
    sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)
    token = util.prompt_for_user_token(username, scope, client_id, client_secret, redirect_uri)

    if token:
        return spotipy.Spotify(auth=token)
    else:
        print("Can't get token for", username)
        
sp = sign_in()

In [99]:
#retrieving tracks from playlists imported from Spotify API

def tracks_from_playlists(playlist_uris, sp):
    
    sp = sign_in()
    
    all_uris = []
    
    for uri in playlist_uris:
        
        results = sp.user_playlist_tracks(playlist_id=uri)
        playlist_tracks = results['items']

        # Loop to ensure all tracks are captured
        while results['next']:
            results = sp.next(results)
            playlist_tracks.extend(results['items'])
        
        # Save playlist track URI's to list
        track_uris = []
        
        for track in playlist_tracks:
            # Skip tracks that have no data
            try:
                track_uri = track['track']['uri']
                track_uris.append(track_uri)
            except TypeError:
                pass
        
        # Append playlist track URI's to full list
        all_uris.extend(track_uris)
    
    return list(set(all_uris))


In [100]:
#Extracting features from tracks

def get_audio_features(track_list, spot):
    
    spot = sign_in() # Establish Spotify connection
    
    track_list = list(track_list)
    
    audio_df = pd.DataFrame(sp.audio_features(track_list[0]))
    start = 1
    while start < len(track_list):
        end = min(start + 100, len(track_list))
        
        # For subsets with no audio data, loop through one by one and pass on tracks missing data
        try:
            audio_df = pd.concat([audio_df, pd.DataFrame(sp.audio_features(track_list[start:end]))])
        except AttributeError:
            for i in range(start, end):
                try:
                    audio_df = pd.concat([audio_df, pd.DataFrame(sp.audio_features(track_list[i]))])
                except AttributeError:
                    pass
            
        start += 100
    
    return audio_df.reset_index(drop=True)

In [10]:
playlists_deep_house = ['spotify:playlist:6vDGVr652ztNWKZuHvsFvx',\
                        'spotify:playlist:37i9dQZF1DX2TRYkJECvfC',\
                        'spotify:playlist:37i9dQZF1DX5xiztvBdlUf',\
                        'spotify:playlist:4wjWoRltrA5qZ0uNsnzdWp',\
                        'spotify:playlist:4UJaDEATfh5FoNFyVcMBHV',\
                        'spotify:playlist:2GOq4YducOjWm88a4IkCQq',\
                        'spotify:playlist:5h1oEk4W9KVMHkOd8WWWlC',\
                        'spotify:playlist:3oRNodhtGLVnZl0Q32FJHB',\
                        'spotify:playlist:7qKZ5hzM099puSI6MKiUyX',\
                        'spotify:playlist:6n7XGSMHS4Xhv0NWL88in9',\
                        'spotify:playlist:0ekUBqyN6aMDI90qsn96Gz',\
                        'spotify:playlist:0e7ZClzZLXcgtRvhdmfIID',\
                        'spotify:playlist:2t5U3e6mHq4T2fKXknS4dd',\
                        'spotify:playlist:0ypuLGVb7lGs0WGu8a9PHg',\
                        'spotify:playlist:5CLCReqqp7OTLDMcvB1oLw',\
                        'spotify:playlist:2b1gr7NkoWDHatEQvhGsjy',\
                        'spotify:playlist:3dHfJPYowP6ZHUixqA4pUK',\
                        'spotify:playlist:5fwNQf619fTX4jcK7GlJ4l',\
                        'spotify:playlist:6r0LRTsaL5DQoZkwheYiw6',\
                        'spotify:playlist:0zWcfPRGU4GI2sQSIuwu4j',\
                        'spotify:playlist:2J8VHkLSdlvT5jey7pvBf7',\
                        'spotify:playlist:0sjYcaKmksfAcZVV9Zid3G',\
                        'spotify:playlist:3GoE2qRab6zBwaO9r0b0nv',\
                        'spotify:playlist:48IubOKEvNWeEZ1vECS0q3',\
                        'spotify:playlist:3QGZ3W7vJbnnkWnbeh2j6M',\
                        'spotify:playlist:5tmnS4UuyxxW41M7c3wDVB',\
                        'spotify:playlist:0uBddqxhSNKw38usJ8j1y6',\
                        'spotify:playlist:2QOeH7Fn7oU7pz0ZyL2cAq',\
                        'spotify:playlist:4JuVgEhvl4WgNGk8nFvruv',\
                        'spotify:playlist:1KIL5f54E1i2f4mSPTqu5B']

In [11]:
playlists_dnb = ['spotify:playlist:068WHS0zOWsqvn2uIBYb5D',\
                 'spotify:playlist:0Zarq4BVkFkZOWkmqsfrjA',\
                 'spotify:playlist:3OYBpiQl7zOac5j5nGl3mu',\
                 'spotify:playlist:4Xb7allH5pITXzHc1nadT7',\
                 'spotify:playlist:37i9dQZF1DX1jlzMTQ2PY5',\
                 'spotify:playlist:37i9dQZF1DX4to60X6goeK',\
                 'spotify:playlist:5XGbuIRSb5INv66b817DJH',\
                 'spotify:playlist:37i9dQZF1DX8jnAPF7Iiqp',\
                 'spotify:playlist:5ABMzUESx7K7EyowE5kFCl',\
                 'spotify:playlist:1K0VIne9ejeBhISlO43gRA',\
                 'spotify:playlist:2vAT9MoHmHiOZ89zyk1uqd',\
                 'spotify:playlist:37i9dQZF1DX5wDmLW735Yd',\
                 'spotify:playlist:30sfJJYyp9raHP4HEYbACe',\
                 'spotify:playlist:7CXXmPbwLrRJkr7EP6Fmrt',\
                 'spotify:playlist:3gqEaRQUN0xYi9kHexWQpY',\
                 'spotify:playlist:1CDdeNMmQ9jKQks8XD854G',\
                 'spotify:playlist:19y0UVk0bcrJWEqMwBHosj',\
                 'spotify:playlist:4oOZJEq1TBUti6PSouTo5M',\
                 'spotify:playlist:0CjxEzL8Vm9IC3Or12KrAP',\
                 'spotify:playlist:0oP76q6CyLk41eVDO0AxV1',\
                 'spotify:playlist:05CWFSX7qOugRLcpy9VQED',\
                 'spotify:playlist:6PwLxu5DoY39rGQiOq5GP2',\
                 'spotify:playlist:4Oa99cnfYN2NmaUSTxiZaS',\
                 'spotify:playlist:0W8KE1NZFTl8gpd7zHoQD2',\
                 'spotify:playlist:0ydFX2Mwe0sX1bbHgma93g',\
                 'spotify:playlist:1jnbdkFbVzMmDqUFnHSWid',\
                 'spotify:playlist:2LP3pEcFPfVm15Pld7gwz0',\
                 'spotify:playlist:5osGKmqCMhajgNmNIXAXmM',\
                 'spotify:playlist:6yFDqUY09Hd3Ix3AamPYX4',\ 
                 'spotify:playlist:5MG037sSypV015Ns1U6UwA',\
                 'spotify:playlist:501tF4mo7f0rwwK8pJFSm6',\ 
                 'spotify:playlist:6dDMrOGa5BJDDSbolmy9IC']

In [91]:
playlists_dubstep = ['spotify:playlist:3ObJ6Qra3CkV0gNCRTtK0c',\
                       'spotify:playlist:6xo6Pr528QIucumzKcMXOu', 'spotify:playlist:37i9dQZF1DX4arVIN5Cg4U',\
                     'spotify:playlist:6Qu7co7czjjXwSEuCyCOAD',\
                     'spotify:playlist:5szak97F9g5KufvE9gI3wZ',\
                     'spotify:playlist:2V0gyT0QmFdURZMO6V8uBC',\
                     'spotify:playlist:37i9dQZF1DX5Q27plkaOQ3',\
                     'spotify:playlist:0KEWfN1VI1gQIuZwnnyZrV',\
                     'spotify:playlist:7r4NzA8aEKdNJcsLwRvNv8',\
                     'spotify:playlist:1r3wxe2Bxdu5PE6qOkBPut',\
                     'spotify:playlist:2ZBRrHz00yrqhCIUWpXgBI',\
                     'spotify:playlist:5BLf2cCU57iKnVBYnXYZaO',\
                     'spotify:playlist:3TMIAE4WfGH6NVo3FVJJke',\
                     'spotify:playlist:09I0tyeHnd4G6vnt8iBYDM',\
                     'spotify:playlist:3fRGztTdMolxw0nNRnZdj1',\
                     'spotify:playlist:3quLyj3mybW3hHTt9UVGtw',\
                     'spotify:playlist:2sjVpVZpcje4dIznYcQfAC',\
                     'spotify:playlist:5iExAknaGP8JmScz4mdgp1',\
                     'spotify:playlist:1AsVFc0487EvJwDqEIJHBa',\
                     'spotify:playlist:5wNRJwSnBImUuZXJW1TiAj',\
                     'spotify:playlist:3IA3ITmQpz3ljORSuLKI9g',\
                     'spotify:playlist:1VBPcr2mWcjUBI89omHbq0',\
                     'spotify:playlist:5XfRIAnUvliBwA78AfI21J',\
                     'spotify:playlist:3vKsFlHqEkwbDLzUdm7k85',\
                     'spotify:playlist:6E4ExdbRj89LWi61AOHjxo',\
                     'spotify:playlist:6phIrRw2XU4sz3rYS7lSnv',\
                     'spotify:playlist:45sWJkHfnQ3oJ1QxNiRSGN',\
                     'spotify:playlist:2M4LqkvwUoZUTwaAfy2UKP',\
                     'spotify:playlist:372Tvhb55xHYHlbGcwr2rv']


In [13]:
playlists_hardstyle = ['spotify:playlist:3bGSAHGYFEDxyEj7uXe0qq',\
                       'spotify:playlist:37i9dQZF1DX0pH2SQMRXnC',\
                       'spotify:playlist:3M5bphHgTe5oTebqMUkczJ',\
                       'spotify:playlist:4yHL0Vok6DofsmOEAUemTs',\
                       'spotify:playlist:4FK8PNOzHWhZbgxvBIWxfr',\
                       'spotify:playlist:2haltdKf2U7JhvCpeI3bpM',\
                       'spotify:playlist:7ecjOhrAkcnWMJaJxMUiSK',\
                       'spotify:playlist:3LHR9Ko2cIqfyOugDU6Sbr',\
                       'spotify:playlist:37i9dQZF1DX7NhGf24haor',\
                       'spotify:playlist:0jvp2XkmT8EAe8aKprzPSN',\
                       'spotify:playlist:6jxElHkfE4i59rYA0iw7Nh',\
                       'spotify:playlist:3TPVoqbqXyeYoiLr6f5Qzw',\
                       'spotify:playlist:2MSHhxtmzMyBHjSVD0rcuN',\
                       'spotify:playlist:2NY34sL3vaST2m41bbGj9I',\
                       'spotify:playlist:4TGyWrMDsSEc1flS0XwCLR',\
                       'spotify:playlist:0945N3eI9uOdY3dKnesoC8',\
                       'spotify:playlist:2LVvgP7VJN5xAkmPdl5J4q',\
                       'spotify:playlist:0J6rTAWgzATvixpbmoUyBw',\
                       'spotify:playlist:3aR2n0XpRNlrWose8kx82S',\
                       'spotify:playlist:2LPRY9zd2ou5nzBijiHhsT',\
                       'spotify:playlist:02yGrg25tSif38IKdnu5BC',\
                       'spotify:playlist:6yLZGml16kzULqUBjhWdKP',\
                       'spotify:playlist:6bgAF9cZlt373xe3ZglWhv',\
                       'spotify:playlist:1P6ue4O5NIFkfDgF9GFrkU',\
                       'spotify:playlist:0r2Ah8Iq0F87jTIoSUPUFF',\
                       'spotify:playlist:5hFd2Eakmovb6RAstuclNO',\
                       'spotify:playlist:5SPe4piyussZvC5wS03gye',\
                       'spotify:playlist:1QINLBaqnigRe2O5bnZrRS',\
                       'spotify:playlist:2vGynBrYFZZahXI2yHrlua',\
                       'spotify:playlist:3VdI2oR2OSTlggFuZf9DB4']                      ]

In [92]:
playlists_prog_house = ['spotify:playlist:5CMvAWTlDPdZnkleiTHyyo',\
                        'spotify:playlist:626UDFY53J9Fma3om0Jkx8',\
                        'spotify:playlist:7LVIOFEgmTbDYnwGv162vN',\
                        'spotify:playlist:5S33MuzhWWNW57z49eBSui',\
                        'spotify:playlist:27VY0PivrvmGgNvFx23sU5',\
                        'spotify:playlist:6JcBwspcgUBohikbK8vSzn',\
                        'spotify:playlist:3YeajqIAyj4wpRF28r200x', 'spotify:playlist:0twgzXVsIw5TTU3rTM35MS',\
                        'spotify:playlist:05rymuboflZFqbz6qp2umN',\
                        'spotify:playlist:3Wcdyxo643EZOQL1ES6M4s',\
                        'spotify:playlist:4KRKrZA5Xd2Yg2DdqGkExt',\
                        'spotify:playlist:2FfELJaBcybxCRkRJOv9HC',\
                        'spotify:playlist:3aibqawnjfomPVR0Y8DaJX',\
                        'spotify:playlist:6PCvHPLlCmRB40ecbX2ujS',\
                        'spotify:playlist:5naf36PQw7BxiiTRBYudRv',\
                        'spotify:playlist:6EH7SkbiK6hO5fo5HMedPX',\
                        'spotify:playlist:528Hzmamt58Sjz2tVeiTyX',\
                        'spotify:playlist:67Wb4vEhaQ8toEleqzHL2i',\
                        'spotify:playlist:5M28rzdeQFlbMiymhMo1tC',\
                        'spotify:playlist:3aIJD4EEfC26itf5RFttVs',\
                        'spotify:playlist:5P1sC87GZ8F2d5zIo5NCJI',\
                        'spotify:playlist:0YYfBqObHKDQgp03QgFpAF',\
                        'spotify:playlist:77vGgwlmUAyaOcGouhK5X8',\
                        'spotify:playlist:7bDn5o2sR24ZxpWHzhm2Qz',\
                        'spotify:playlist:1R0QuM6uXZQRIojoxJK2Zj',\
                        'spotify:playlist:2ATOfdwJgpYlkJ0zsexZYw',\
                        'spotify:playlist:1oFzUnIGNXglnGfPuZefc5',\
                        'spotify:playlist:0GVRyHxMZr6xjO0HoqilPe',\
                        'spotify:playlist:4wdVFpxzUIr3nldYvt1DNg']

In [67]:
playlists_techno = ['spotify:playlist:18vUeZ9BdtMRNV6gI8RnR6',\
                    'spotify:playlist:37i9dQZF1DX6J5NfMJS675',\
                    'spotify:playlist:6MJSGcF4iV79gyo8xZpd8U',\
                    'spotify:playlist:3Y3NX4qhBo5rxvU60MgUoh',\
                    'spotify:playlist:3xZiKQnlj2sG9s2mrQK1lZ',\
                    'spotify:playlist:4C80pHNiCw3R7vcA5xvgPO',\
                    'spotify:playlist:48yJRrbHtz1DwDEQEZuohc',\
                    'spotify:playlist:2n9IW8aEnZ0C0o2e3etOdx',\
                    'spotify:playlist:6JqrgEHEMAzgfyuQTXYReT',\
                    'spotify:playlist:6VQ0Gn0ISUX4bAl9LVsQNK',\
                    'spotify:playlist:5KlUhhSR7sZOdl8Hxy3Guz',\
                    'spotify:playlist:37i9dQZF1DX8ZMwsPgxIOs',\
                    'spotify:playlist:0B3WoheGNqol1B69LM9Y8n',\
                    'spotify:playlist:0iDXvLH6Y6euU2bTxIjgxC',\
                    'spotify:playlist:1irK0GrzY5R1ZTSKvD0egn',\
                    'spotify:playlist:1wTBf9md3Qx1vEXJJ9VRpH',\
                    'spotify:playlist:47km1wjUFyMh3LZ1J2R5D7',\
                    'spotify:playlist:6sMkUAotBUAiXq1dP5s9nG',\
                    'spotify:playlist:7LS2grTMmboABdGoy1Nbf4',\
                    'spotify:playlist:7mwPa6HjqoiUrsk3C2Hitk',\
                    'spotify:playlist:4JFZfMDsV30FAs3NR8y9Cq',\
                    'spotify:playlist:7Anb1HtKdhvK3Pb1d36f22',\
                    'spotify:playlist:3iSSSGjtYzad5IisyBrm2U',\
                    'spotify:playlist:5qUiLFZRY0CdQJJNnvDMtY',\
                    'spotify:playlist:0G2fxTnXZCwCfE5e3paeqq',\
                    'spotify:playlist:7q0AoI7HnxTxfMpnH4Q92Z',\
                    'spotify:playlist:1Ho106dv2ZNq07uVWFXFed',\
                    'spotify:playlist:4teaXY1gkkq3Fv6ksOfmNC',\
                    'spotify:playlist:7qNw8Sn2N5lbHYnirZIni6',\
                    'spotify:playlist:7sCdZfCThcJRLjcQuV5qWe']                   ]

In [81]:
playlists_trance = ['spotify:playlist:0Q3ugz23LAXFg2PvXJ8hMx',\
                    'spotify:playlist:37i9dQZF1DXbtYAdenGE9U',\
                    'spotify:playlist:69gyd1SXiVhvfsyYS078f0',\
                    'spotify:playlist:78AFAJFvRzboZfEDnAkkFn',\
                    'spotify:playlist:5dQ4RlPHRjGDZQwsWnpdJ2',\
                    'spotify:playlist:1coHeAMbS3luasCXzfnm7b',\
                    'spotify:playlist:1qxzbI2JYHwt1Uvcw6ZLPL',\
                    'spotify:playlist:5VlGm8N7DJaMA7mCFOp6GV',\
                    'spotify:playlist:0uVIGYfnUAkOT5REqtQICx',\
                    'spotify:playlist:4wM86bbXb5U6IxOAXh5Wev',\
                    'spotify:playlist:260cw4PvjDjcWuCi5duiEf',\
                    'spotify:playlist:2iTMQ36cvjhxgIYGv7uld5',\
                    'spotify:playlist:3dFiXOwSqPw3Qi59No6tfh',\
                    'spotify:playlist:3TxOh9fFiyyOA8aTh7IaqC',\
                    'spotify:playlist:12bgX5upoSMOSFr1HWpL02',\
                    'spotify:playlist:5O4EoYxZlrkWJegYMMEMm3',\
                    'spotify:playlist:7cEc9ewYHymDkAHvyQf344',\
                    'spotify:playlist:3MKHH3b7ExUNwm228Za7wM',\
                    'spotify:playlist:4nWDc1cjcL1yPkR3Z0uYeU',\
                    'spotify:playlist:2Ps64TVbfmfX6jxWJnlX1j',\
                    'spotify:playlist:1RX2XgpOYPAHPptQZZq5aF',\
                    'spotify:playlist:2lrCh3HJpqVhx6Ia3HDdzm',\
                    'spotify:playlist:5XZXBrQQ6D7aEjhJ2RC6Io',\
                    'spotify:playlist:5qzG29K5RdtdFpvvpsMuY6',\
                    'spotify:playlist:5QafFMGgQKGwqgV7k3qHy6',\
                    'spotify:playlist:03JQeIhJa8jiLYLmwL7mBT',\
                    'spotify:playlist:1t027lkuoGFJ6CQPs3WXSb',\
                    'spotify:playlist:79BWXGe8jwjFr4vm6Md3qQ',\
                    'spotify:playlist:72DnoMQTeGRgLbLk7WlKol']


In [104]:
# Building the dataset of tracks
track_uris_deep_house = tracks_from_playlists(playlists_deep_house, sp)
track_uris_dnb = tracks_from_playlists(playlists_dnb, sp)
track_uris_dubstep = tracks_from_playlists(playlists_dubstep, sp)
track_uris_hardstyle = tracks_from_playlists(playlists_hardstyle, sp)
track_uris_prog_house = tracks_from_playlists(playlists_prog_house, sp)
track_uris_techno = tracks_from_playlists(playlists_techno, sp)
track_uris_trance = tracks_from_playlists(playlists_trance, sp)

deep_house_tracks_unshuff = np.array(track_uris_deep_house[:])
dnb_tracks_unshuff = np.array(track_uris_dnb[:])
dubstep_tracks_unshuff = np.array(track_uris_dubstep[:])
hardstyle_tracks_unshuff = np.array(track_uris_hardstyle[:])
prog_house_tracks_unshuff = np.array(track_uris_prog_house[:])
techno_tracks_unshuff = np.array(track_uris_techno[:])
trance_tracks_unshuff = np.array(track_uris_trance[:])

np.random.shuffle(deep_house_tracks_unshuff)
np.random.shuffle(dnb_tracks_unshuff)
np.random.shuffle(dubstep_tracks_unshuff)
np.random.shuffle(hardstyle_tracks_unshuff)
np.random.shuffle(prog_house_tracks_unshuff)
np.random.shuffle(techno_tracks_unshuff)
np.random.shuffle(trance_tracks_unshuff)

deep_house_tracks = np.array(deep_house_tracks_unshuff[0:2000])
dnb_tracks = np.array(dnb_tracks_unshuff[0:2000])
dubstep_tracks = np.array(dubstep_tracks_unshuff[0:2000])
hardstyle_tracks = np.array(hardstyle_tracks_unshuff[0:2000])
prog_house_tracks = np.array(prog_house_tracks_unshuff[0:2000])
techno_tracks = np.array(techno_tracks_unshuff[0:2000])
trance_tracks = np.array(trance_tracks_unshuff[0:2000])


print('Deep House Tracks:        ' + str(len(deep_house_tracks)))
print('Drum and Bass Tracks:     ' + str(len(dnb_tracks)))
print('Dubstep Tracks:           ' + str(len(dubstep_tracks)))
print('Hardstyle Tracks:         ' + str(len(hardstyle_tracks)))
print('Progressive House Tracks: ' + str(len(prog_house_tracks)))
print('Techno Tracks:            ' + str(len(techno_tracks)))
print('Trance Tracks:            ' + str(len(trance_tracks)))

Deep House Tracks:        2000
Drum and Bass Tracks:     2000
Dubstep Tracks:           2000
Hardstyle Tracks:         2000
Progressive House Tracks: 2000
Techno Tracks:            2000
Trance Tracks:            2000


In [105]:
# Get audio features data for all tracks in the dataset
audio_dnb = get_audio_features(dnb_tracks, sp)
audio_deep_house = get_audio_features(deep_house_tracks, sp)
audio_dubstep = get_audio_features(dubstep_tracks, sp)
audio_hardstyle = get_audio_features(hardstyle_tracks, sp)
audio_prog_house = get_audio_features(prog_house_tracks, sp)
audio_techno = get_audio_features(techno_tracks, sp)
audio_trance = get_audio_features(trance_tracks, sp)

Expected id of type track but found type Believe+In+Me+%281996%29 spotify:local:Alaska:Early+Ambient+DnB:Believe+In+Me+%281996%29:435
Expected id of type track but found type Believe+In+Me+%281996%29 spotify:local:Alaska:Early+Ambient+DnB:Believe+In+Me+%281996%29:435
Expected id of type track but found type Beyond+The+Fundamental+%28Big+Bud+Remix%29+%281999%29 spotify:local:Omni+Trio:Early+Ambient+DnB:Beyond+The+Fundamental+%28Big+Bud+Remix%29+%281999%29:496
Expected id of type track but found type Beyond+The+Fundamental+%28Big+Bud+Remix%29+%281999%29 spotify:local:Omni+Trio:Early+Ambient+DnB:Beyond+The+Fundamental+%28Big+Bud+Remix%29+%281999%29:496
Expected id of type track but found type Revolution spotify:local:Neil+Trix:Early+Ambient+DnB:Revolution:462
Expected id of type track but found type Revolution spotify:local:Neil+Trix:Early+Ambient+DnB:Revolution:462
Expected id of type track but found type Sketch+%28Omni+Music%29 spotify:local:Pete+Rann+%26+Tidal:Early+DnB+from+%2795+Onwa

In [106]:
# Add genre tag and combine dataframes
audio_dnb['genre'] = 'dnb'
audio_deep_house['genre'] = 'deep_house'
audio_dubstep['genre'] = 'dubstep'
audio_hardstyle['genre'] = 'hardstyle'
audio_prog_house['genre'] = 'prog_house'
audio_techno['genre'] = 'techno'
audio_trance['genre'] = 'trance'


In [107]:
all_genres = pd.concat([audio_dnb, audio_deep_house, audio_dubstep, audio_hardstyle,
                        audio_prog_house, audio_techno, audio_trance])

In [108]:
all_genres.head()

Unnamed: 0,0,acousticness,analysis_url,danceability,duration_ms,energy,id,instrumentalness,key,liveness,loudness,mode,speechiness,tempo,time_signature,track_href,type,uri,valence,genre
0,,0.0224,https://api.spotify.com/v1/audio-analysis/7GWP...,0.491,299333.0,0.894,7GWP1uguKGMEX2JXW2TqOw,0.869,9.0,0.367,-8.828,0.0,0.0667,164.177,4.0,https://api.spotify.com/v1/tracks/7GWP1uguKGME...,audio_features,spotify:track:7GWP1uguKGMEX2JXW2TqOw,0.895,dnb
1,,0.0112,https://api.spotify.com/v1/audio-analysis/0iON...,0.624,386227.0,0.992,0iONLeOXEQkEIvJPgq7nRP,0.894,7.0,0.543,-5.796,0.0,0.129,169.877,4.0,https://api.spotify.com/v1/tracks/0iONLeOXEQkE...,audio_features,spotify:track:0iONLeOXEQkEIvJPgq7nRP,0.882,dnb
2,,0.000413,https://api.spotify.com/v1/audio-analysis/1acp...,0.429,275862.0,0.993,1acpIlnD71Anfcy2RXZkm3,0.771,10.0,0.434,-2.33,1.0,0.4,174.002,4.0,https://api.spotify.com/v1/tracks/1acpIlnD71An...,audio_features,spotify:track:1acpIlnD71Anfcy2RXZkm3,0.226,dnb
3,,0.0087,https://api.spotify.com/v1/audio-analysis/365w...,0.544,288000.0,0.973,365wSjnv3SsxQB6wXznlAa,0.107,0.0,0.739,-0.599,1.0,0.0748,175.004,4.0,https://api.spotify.com/v1/tracks/365wSjnv3Ssx...,audio_features,spotify:track:365wSjnv3SsxQB6wXznlAa,0.801,dnb
4,,0.0026,https://api.spotify.com/v1/audio-analysis/71xC...,0.543,273103.0,0.831,71xCEFFeTnFz7EGgydKsQx,0.669,4.0,0.168,-4.736,0.0,0.0359,87.002,4.0,https://api.spotify.com/v1/tracks/71xCEFFeTnFz...,audio_features,spotify:track:71xCEFFeTnFz7EGgydKsQx,0.0379,dnb


In [109]:
cols = ['uri', 'genre', 'duration_ms', 'acousticness', 'danceability', 'energy',
        'instrumentalness', 'key', 'liveness', 'loudness', 'mode', 'speechiness', 'tempo',
        'time_signature', 'valence']

all_genres_droppedcols_raw = all_genres[cols]
all_genres_droppedcols_raw.drop_duplicates(subset='uri', keep=False, inplace=True)

In [110]:
# adjusting tempo for half time and double time tracks
def adjust_tempo(tempo):

    # Divide any tempo over 200 by 2
    if tempo > 200:
        tempo /= 2
        return tempo
    
    # Multiply any tempo under 100 by 2
    elif tempo < 100:
        tempo *= 2
        return tempo
    
    # Leave all other tempos alone
    else:
        return tempo
 

In [120]:
#ensuring data integrity and dismissing outliers using logical mask

loudness_mean = all_genres_droppedcols_raw['loudness'].mean()
loudness_std = all_genres_droppedcols_raw['loudness'].std()


mask = ((all_genres_droppedcols_raw['duration_ms'] >= 135000) &
        (all_genres_droppedcols_raw['duration_ms'] <= 600000) &
        (all_genres_droppedcols_raw['acousticness'] < 0.95) &
        (all_genres_droppedcols_raw['danceability'] > 0.15) &
        (all_genres_droppedcols_raw['energy'] > 0.10) &
        (all_genres_droppedcols_raw['loudness'] >= loudness_mean - 3 * loudness_std) &
        (all_genres_droppedcols_raw['loudness'] <= loudness_mean + 3 * loudness_std)&
       (all_genres_droppedcols_raw['key'] != 'NaN'))

all_genres = all_genres_droppedcols_raw[mask]


In [121]:
all_genres_droppedcols_raw['tempo'] = all_genres_droppedcols_raw['tempo'].map(adjust_tempo)

In [122]:
all_genres_droppedcols_raw['genre'].value_counts()

dnb           2000
deep_house    2000
dubstep       2000
hardstyle     2000
prog_house    2000
techno        2000
trance        2000
Name: genre, dtype: int64

In [126]:
all_genres_droppedcols_raw.head()
sorted(all_genres_droppedcols_raw['key'].unique())

[0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, nan]

In [135]:
newdf = all_genres_droppedcols_raw[all_genres_droppedcols_raw.key.notnull()]
sorted(newdf['key'].unique())

[0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0]

In [124]:
#Creating Dummy variables for categorical variables key and time_sig

def dummies(df):
    
    for i in sorted(df['key'].unique()):
        df['key_' + str(int(i))] = np.where(df['key'] == i, 1, 0)
    
    for i in sorted(df['time_signature'].unique()):
        df['time_sig_' + str(int(i))] = np.where(df['time_signature'] == i, 1, 0)
        
    df.drop(columns=['key', 'time_signature'], inplace=True)
    
    return df

In [136]:
all_genres_balanced = dummies(newdf)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['key_' + str(int(i))] = np.where(df['key'] == i, 1, 0)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['time_sig_' + str(int(i))] = np.where(df['time_signature'] == i, 1, 0)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


In [137]:
all_genres_balanced.head()

Unnamed: 0,uri,genre,duration_ms,acousticness,danceability,energy,instrumentalness,liveness,loudness,mode,...,key_6,key_7,key_8,key_9,key_10,key_11,time_sig_1,time_sig_3,time_sig_4,time_sig_5
0,spotify:track:7GWP1uguKGMEX2JXW2TqOw,dnb,299333.0,0.0224,0.491,0.894,0.869,0.367,-8.828,0.0,...,0,0,0,1,0,0,0,0,1,0
1,spotify:track:0iONLeOXEQkEIvJPgq7nRP,dnb,386227.0,0.0112,0.624,0.992,0.894,0.543,-5.796,0.0,...,0,1,0,0,0,0,0,0,1,0
2,spotify:track:1acpIlnD71Anfcy2RXZkm3,dnb,275862.0,0.000413,0.429,0.993,0.771,0.434,-2.33,1.0,...,0,0,0,0,1,0,0,0,1,0
3,spotify:track:365wSjnv3SsxQB6wXznlAa,dnb,288000.0,0.0087,0.544,0.973,0.107,0.739,-0.599,1.0,...,0,0,0,0,0,0,0,0,1,0
4,spotify:track:71xCEFFeTnFz7EGgydKsQx,dnb,273103.0,0.0026,0.543,0.831,0.669,0.168,-4.736,0.0,...,0,0,0,0,0,0,0,0,1,0


In [138]:
model_data = all_genres_balanced.drop(columns='uri')
model_data.head()

Unnamed: 0,genre,duration_ms,acousticness,danceability,energy,instrumentalness,liveness,loudness,mode,speechiness,...,key_6,key_7,key_8,key_9,key_10,key_11,time_sig_1,time_sig_3,time_sig_4,time_sig_5
0,dnb,299333.0,0.0224,0.491,0.894,0.869,0.367,-8.828,0.0,0.0667,...,0,0,0,1,0,0,0,0,1,0
1,dnb,386227.0,0.0112,0.624,0.992,0.894,0.543,-5.796,0.0,0.129,...,0,1,0,0,0,0,0,0,1,0
2,dnb,275862.0,0.000413,0.429,0.993,0.771,0.434,-2.33,1.0,0.4,...,0,0,0,0,1,0,0,0,1,0
3,dnb,288000.0,0.0087,0.544,0.973,0.107,0.739,-0.599,1.0,0.0748,...,0,0,0,0,0,0,0,0,1,0
4,dnb,273103.0,0.0026,0.543,0.831,0.669,0.168,-4.736,0.0,0.0359,...,0,0,0,0,0,0,0,0,1,0


In [158]:
rs = 13
audio_metrics = ['acousticness', 'danceability', 'duration_ms', 'energy', 'instrumentalness', 'liveness', 
                 'loudness', 'speechiness', 'tempo', 'valence']

dummy_vars = ['key_0', 'key_1', 'key_2', 'key_3', 'key_4', 'key_5', 'key_6', 'key_7', 'key_8', 'key_9', 'key_10', 'key_11',
              'time_sig_1', 'time_sig_3', 'time_sig_4', 'time_sig_5', 'mode']

X = model_data[audio_metrics + dummy_vars]
Y = model_data['genre']
X, x_test, Y, y_test = train_test_split(X, Y, test_size=0.20, random_state=rs)
x_train, x_val, y_train, y_val = train_test_split(X, Y, test_size=0.25, random_state=rs)

In [159]:
#scaled down data
st_scale = StandardScaler()
x_train_st = st_scale.fit_transform(x_train[audio_metrics])
x_val_st = st_scale.fit_transform(x_val[audio_metrics])
x_test_st = st_scale.transform(x_test[audio_metrics])
x_train_st = np.hstack((x_train_st, x_train[dummy_vars].to_numpy()))
x_val_st = np.hstack((x_val_st, x_val[dummy_vars].to_numpy()))
x_test_st = np.hstack((x_test_st, x_test[dummy_vars].to_numpy()))

In [164]:
#Logistic Regression
unique_vals = model_data['genre'].unique()
lr_y_train = y_train.replace(to_replace=unique_vals, value= list(range(len(unique_vals))))
lr_y_val = y_val.replace(to_replace=unique_vals, value= list(range(len(unique_vals))))
lr_y_test = y_test.replace(to_replace=unique_vals, value= list(range(len(unique_vals))))

lr_classifier = LogisticRegression(multi_class='multinomial', solver='sag', max_iter=1000, random_state=rs)
lr_classifier.fit(x_train_st, lr_y_train)

y_pred_train = lr_classifier.predict(x_train_st)
y_pred_val = lr_classifier.predict(x_val_st)

lr_train_score = accuracy_score(lr_y_train, y_pred_train, normalize = True)
lr_val_score = accuracy_score(lr_y_val, y_pred_val, normalize = True)

print('Training Score:   ' + str(lr_train_score))
print('Validation Score: ' + str(lr_val_score))

Training Score:   0.6180431414610893
Validation Score: 0.6157969978556112


In [166]:
#Random Forest Classifier
rf_classifier = RandomForestClassifier(random_state=rs)
rf_classifier.fit(x_train, y_train)

y_pred_train = rf_classifier.predict(x_train)
y_pred_val = rf_classifier.predict(x_val)

rf_train_score = accuracy_score(y_train, y_pred_train, normalize = True)
rf_val_score = accuracy_score(y_val, y_pred_val, normalize = True)

print('Training Score:   ' + str(rf_train_score))
print('Validation Score: ' + str(rf_val_score))

Training Score:   0.9952329877249434
Validation Score: 0.724446032880629


In [168]:
#Support Vector Classifier
svm_classifier = svm.SVC()
svm_classifier.fit(x_train_st, y_train)

y_pred_train = svm_classifier.predict(x_train_st)
y_pred_val = svm_classifier.predict(x_val_st)

svm_train_score = accuracy_score(y_train, y_pred_train, normalize = True)
svm_val_score = accuracy_score(y_val, y_pred_val, normalize = True)

print('Training Score:   ' + str(svm_train_score))
print('Validation Score: ' + str(svm_val_score))

Training Score:   0.7512811345489214
Validation Score: 0.6954967834167263


In [170]:
print('Logistic Regression')
print('Training Score:   ' + str(lr_train_score))
print('Validation Score: ' + str(lr_val_score))
print()
print('Random Forest')
print('Training Score:   ' + str(rf_train_score))
print('Validation Score: ' + str(rf_val_score))
print()
print('Support Vector')
print('Training Score:   ' + str(svm_train_score))
print('Validation Score: ' + str(svm_val_score))

Logistic Regression
Training Score:   0.6180431414610893
Validation Score: 0.6157969978556112

Random Forest
Training Score:   0.9952329877249434
Validation Score: 0.724446032880629

Support Vector
Training Score:   0.7512811345489214
Validation Score: 0.6954967834167263
