In [1]:
import configparser
import json
import spotipy
import spotipy.util as util
import pandas as pd
import spotipy.oauth2 as oauth2
from spotipy.oauth2 import SpotifyClientCredentials
import bamboolib
import numpy as np

import warnings
warnings.filterwarnings("ignore")

In [2]:
with open('../data/raw/spotify_creds.json') as f:
    spotify_creds = json.load(f)

with open('../data/raw/spotify_token.json') as f:
    spotify_token = json.load(f)

In [3]:
client_id = spotify_creds['client_id']
client_secret = spotify_creds['client_secret']
username = spotify_creds['username']
scope = spotify_creds['saved_library_scope']
redirect_uri = spotify_creds['saved_library_redirect_url']
token = spotify_token['all_access_token']


def connect_to_spotify_api(client_id, client_secret, username, scope, redirect_uri):
    
    client_credentials_manager = SpotifyClientCredentials(client_id, client_secret) 
    sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)

    #token = util.prompt_for_user_token(username, scope, client_id, client_secret, redirect_uri)
    if token:
        sp = spotipy.Spotify(auth=token)
    else:
        print("Can't get token for", username)
        
    return sp

sp = connect_to_spotify_api(client_id, client_secret, username, scope, redirect_uri)

In [4]:
def search_artist_album(artist_NAME):
    df_album = pd.DataFrame()

    album_name = []
    album_url = []
    album_date = []
    album_type = []
    album_group = []
    num_tracks = []

    artist_id = sp.search(q=artist_NAME, type="artist", limit=10)
    artist_url = artist_id['artists']['items'][0]['external_urls']['spotify']
    artist_url = artist_url.split('/')[-1]
    artist_name = artist_id['artists']['items'][0]['name']
    
    results = sp.artist_albums(artist_url, album_type='single,album', country='US')
    albums = results['items']
    while results['next']:
        results = sp.next(results)
        albums.extend(results['items'])

    for album in albums:
        album_name.append(album['name'])
        url = album['external_urls']['spotify']
        url = url.split('/')[-1]
        album_url.append(url)
        album_date.append(album['release_date'])
        album_type.append(album['album_type'])
        album_group.append(album['album_group'])
        num_tracks.append(album['total_tracks'])

    
    df_album['name'] = album_name
    df_album['url'] = album_url
    df_album['release_date'] = album_date
    df_album['type'] = album_type
    df_album['group'] = album_group
    df_album['total_tracks'] = num_tracks
    # Delete Remix Songs
    #artist_remix = 
    #df_album = df_album.loc[~df_album['name'].str.contains('remix', case=False, regex=False, na=False)]
    # Delete songs at same day 
    
    return df_album

In [12]:
df = search_artist_album('r3hab')

In [6]:
def find_album_tracks(album_url):
    
    album_tracks = sp.album_tracks(album_url)

    df_album_tracks = pd.DataFrame()

    track_name = []
    track_url = []
    track_date = []
    artist_list = []
    songs = album_tracks['items']

    while album_tracks['next']:
        album_tracks = sp.next(album_tracks)
        songs.extend(album_tracks['items'])

    for song in songs:
        track_name.append(song['name'])
        url = song['external_urls']['spotify']
        url = url.split('/')[-1]
        track_url.append(url)
        #all_artists.append(artist_list)

    df_album_tracks['track'] = track_name
    df_album_tracks['url'] = track_url
    #df_album_tracks['artists'] = artist_list
    
    return df_album_tracks

In [7]:
def find_all_tracks_features(album, artist_NAME):
    df_all_tracks = []

    for i in range(len(album)):
        #print(i)
        album_url = album.iloc[i]['url']
        df_tracks = find_album_tracks(album_url)
        df_tracks['release_date'] = album.iloc[i]['release_date']
        df_tracks['ablum'] = album.iloc[i]['name']
        df_all_tracks.append(df_tracks)

    df_all_tracks = pd.concat(df_all_tracks)
    
    df_tracks = []

    for i in range(len(df_all_tracks)):
        track_url = df_all_tracks.iloc[i]['url']
        track_name = df_all_tracks.iloc[i]['track']
        track_date = df_all_tracks.iloc[i]['release_date']
        album_name = df_all_tracks.iloc[i]['ablum']
        #Audio Features
        track_features = sp.audio_features(track_url)
        df_track = df_all_tracks[df_all_tracks['url'] == track_url]
        df_temp = pd.DataFrame(track_features)
        df_temp = df_temp[['acousticness', 'danceability', 'energy', 'speechiness', 'valence', 'tempo']]
        df_temp = df_temp.rename(columns={'tempo': 'BPM'})
        df_temp['track'] = track_name
        df_temp['album'] = album_name
        df_temp['release date'] = track_date
        df_temp['url'] = track_url
        #Artists
        song = sp.track(track_url)
        artists = song['artists']
        artist_list = []
        artists_name = ''
        for artist in artists:
            artists_name += artist['name']  + ','
        artist_list.append(artists_name[:-1])
        df_temp['artists'] = artist_list

        df_tracks.append(df_temp)

    df_tracks = pd.concat(df_tracks)
    # select artists 
    df_tracks = df_tracks.loc[df_tracks['artists'].str.contains(artist_NAME, case=False, regex=False, na=False)]
    # drop remix not by artist
    df_tracks = df_tracks.loc[(df_tracks['track'].str.contains(artist_NAME, case=False, regex=False, na=False)) | (~df_tracks['track'].str.contains('mix', case=False, regex=False, na=False))]
    # drop radio edit
    df_tracks = df_tracks.loc[~(df_tracks['track'].str.contains('Radio Edit', case=False, regex=False, na=False))]
    # drop same track in different album
    df_tracks = df_tracks.drop_duplicates(subset=['track'], keep='last')
    
    return df_tracks

In [14]:
df_2 = find_all_tracks_features(df,'r3hab')

In [15]:
df_2

Unnamed: 0,acousticness,danceability,energy,speechiness,valence,BPM,track,album,release date,url,artists
0,0.031900,0.416,0.700,0.0375,0.1000,141.934,Eyes Closed,The Wave,2018-08-25,1dmmZ4gkN8U6fYT8cOqAPE,R3HAB
0,0.109000,0.670,0.458,0.0366,0.3140,87.994,Cherry Blossom,The Wave,2018-08-25,6WRTNDVblJW6Dstku4aHix,R3HAB
0,0.161000,0.722,0.706,0.0506,0.5170,124.013,Wrong Move,The Wave,2018-08-25,3CG3RfisS4Ihe0qCs8x230,"R3HAB,THRDL!FE,Olivia Holt"
0,0.109000,0.629,0.573,0.0277,0.1690,93.009,Back to You,The Wave,2018-08-25,4p7fb6fjHMcyEebumewT2p,"R3HAB,SAKIMA"
0,0.048100,0.591,0.703,0.0618,0.0338,114.960,Belle,The Wave,2018-08-25,2nzpyRpQbh3TVdxU9A0ScV,R3HAB
...,...,...,...,...,...,...,...,...,...,...,...
0,0.000028,0.774,0.597,0.0536,0.4220,128.001,Rock This Place,Rock This Place,2009-11-16,6ID8xGS5VHWe071wjCV1FK,R3HAB
0,0.001610,0.801,0.678,0.0784,0.1360,127.989,Casablanca Epic,Cities EP,2009-09-22,1YxThtrf9JrGWtEyVM66tP,"Koen Groeneveld,R3HAB"
0,0.000497,0.440,0.435,0.1030,0.1120,185.066,Dubai Shuffle,Cities EP,2009-09-22,00j6u5kd2WlRMvqXf2cbcD,"Koen Groeneveld,R3HAB"
0,0.095500,0.773,0.888,0.0577,0.4200,128.009,Looping New York,Cities EP,2009-09-22,31uC9QGVZykiBOjHXNV0Ex,"Koen Groeneveld,R3HAB"
