# Extracting Features From the Original Dataset

This notebook is used to extract features from the original dataset, which gives us limited information about the songs. Here, we use the "ari.py" script to extract a set of features about each song, along with the popularities of both the artist and song itself, along with genres

In [1]:
#Import from the other file
from scripts.ari import ari_to_features
import pandas as pd
from tqdm import tqdm
import re
import os



In [37]:
#Load the raw_data from the repo
dataPath = '../data/raw_data.csv'
df = pd.read_csv(dataPath)
df.head()

Unnamed: 0.1,Unnamed: 0,pos,artist_name,track_uri,artist_uri,track_name,album_uri,duration_ms,album_name,name
0,0,0,Missy Elliott,spotify:track:0UaMYEvWZi0ZqiDOoHU3YI,spotify:artist:2wIVse2owClT7go1WT98tk,Lose Control (feat. Ciara & Fat Man Scoop),spotify:album:6vV5UrXcfyQD1wu4Qo2I9K,226863,The Cookbook,Throwbacks
1,1,1,Britney Spears,spotify:track:6I9VzXrHxO9rA9A5euc8Ak,spotify:artist:26dSoYclwsYLMAKD3tpOr4,Toxic,spotify:album:0z7pVBGOD7HCIB7S8eLkLI,198800,In The Zone,Throwbacks
2,2,2,Beyoncé,spotify:track:0WqIKmW4BTrj3eJFmnCKMv,spotify:artist:6vWDO969PvNqNYHIOW5v0m,Crazy In Love,spotify:album:25hVFAxTlDvXbx2X2QkUkE,235933,Dangerously In Love (Alben für die Ewigkeit),Throwbacks
3,3,3,Justin Timberlake,spotify:track:1AWQoqb9bSvzTjaLralEkT,spotify:artist:31TPClRtHm23RisEBtV3X7,Rock Your Body,spotify:album:6QPkyl04rXwTGlGlcYaRoW,267266,Justified,Throwbacks
4,4,4,Shaggy,spotify:track:1lzr43nnXAijIGYnCT8M8H,spotify:artist:5EvFsr3kj42KNv97ZEnqij,It Wasn't Me,spotify:album:6NmFmPX56pcLBOFMhIiKvF,227600,Hot Shot,Throwbacks


In [38]:
#Edit the track-uris to a more usable format
df["track_uri"] = df["track_uri"].apply(lambda x: re.findall(r'\w+$', x)[0])
df["track_uri"]

0        0UaMYEvWZi0ZqiDOoHU3YI
1        6I9VzXrHxO9rA9A5euc8Ak
2        0WqIKmW4BTrj3eJFmnCKMv
3        1AWQoqb9bSvzTjaLralEkT
4        1lzr43nnXAijIGYnCT8M8H
                  ...          
67498    5uCax9HTNlzGybIStD3vDh
67499    0P1oO2gREMYUCoOkzYAyFu
67500    2oM4BuruDnEvk59IvIXCwn
67501    4Ri5TTUgjM96tbQZd5Ua7V
67502    5RVuBrXVLptAEbGJdSDzL5
Name: track_uri, Length: 67503, dtype: object

In [39]:
testDF = df
#feature = ari_to_features(df["track_uri"])
#feature_df_test = pd.DataFrame(feature)
#feature_df_test.head()

In [9]:
ari_to_features('0UaMYEvWZi0ZqiDOoHU3YI')

{'danceability': 0.904,
 'energy': 0.813,
 'key': 4,
 'loudness': -7.105,
 'mode': 0,
 'speechiness': 0.121,
 'acousticness': 0.0311,
 'instrumentalness': 0.00697,
 'liveness': 0.0471,
 'valence': 0.81,
 'tempo': 125.461,
 'type': 'audio_features',
 'id': '0UaMYEvWZi0ZqiDOoHU3YI',
 'uri': 'spotify:track:0UaMYEvWZi0ZqiDOoHU3YI',
 'track_href': 'https://api.spotify.com/v1/tracks/0UaMYEvWZi0ZqiDOoHU3YI',
 'analysis_url': 'https://api.spotify.com/v1/audio-analysis/0UaMYEvWZi0ZqiDOoHU3YI',
 'duration_ms': 226864,
 'time_signature': 4,
 'artist_pop': 71,
 'genres': 'dance_pop hip_hop hip_pop pop_rap r&b rap urban_contemporary virginia_hip_hop',
 'track_pop': 67}

## Included Features

The code cell below gives an example of the features extracted from each track, showing the kind of information that is used to cluster the data further on.

In [4]:
import os
client_id = 'e5d27af30b04420baec4921a610d3b4e'
secret = '830c1e45c2644274a06c163782ee54f7'
redirect = 'http://127.0.0.1:8080'
os.environ['SPOTIPY_CLIENT_ID'] = 'e5d27af30b04420baec4921a610d3b4e'
os.environ['SPOTIPY_CLIENT_SECRET'] = '830c1e45c2644274a06c163782ee54f7'
os.environ['SPOTIPY_REDIRECT_URI'] = 'http://127.0.0.1:8080'

In [55]:
#Shows the list of all songs sung by the artist or the band
import argparse
import logging

from spotipy.oauth2 import SpotifyClientCredentials
import spotipy

logger = logging.getLogger('examples.artist_discography')
logging.basicConfig(level='INFO')


def get_args():
    parser = argparse.ArgumentParser(description='Shows albums and tracks for '
                                     'given artist')
    parser.add_argument('-a', '--artist', required=True,
                        help='Name of Artist')
    return parser.parse_args()


def get_artist(name):
    results = sp.search(q='artist:' + name, type='artist')
    items = results['artists']['items']
    if len(items) > 0:
        return items[0]
    else:
        return None


def show_album_tracks(album):
    tracks = []
    results = sp.album_tracks(album['id'])
    tracks.extend(results['items'])
    while results['next']:
        results = sp.next(results)
        tracks.extend(results['items'])
    for i, track in enumerate(tracks):
        logger.info('%s. %s', i+1, track['name'])
    
    return tracks
        
    


def show_artist_albums(artist):
    albums = []
    trks = []
    results = sp.artist_albums(artist['id'], album_type='album')
    albums.extend(results['items'])
    while results['next']:
        results = sp.next(results)
        albums.extend(results['items'])
    logger.info('Total albums: %s', len(albums))
    unique = set()  # skip duplicate albums
    for album in albums:
        name = album['name'].lower()
        if name not in unique:
            logger.info('ALBUM: %s', name)
            unique.add(name)
            trks.extend(show_album_tracks(album))
    
    return trks


def show_artist(artist):
    logger.info('====%s====', artist['name'])
    logger.info('Popularity: %s', artist['popularity'])
    if len(artist['genres']) > 0:
        logger.info('Genres: %s', ','.join(artist['genres']))
        

def get_all_tracks(artist):
    
    art_d = get_artist(artist)
    tracks_df = pd.DataFrame(show_artist_albums(art_d))
    return tracks_df
    

def main():
    args = get_args()
    artist = get_artist(args.artist)
    show_artist(artist)
    show_artist_albums(artist)



# if __name__ == '__main__':
#     client_credentials_manager = SpotifyClientCredentials()
#     sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)
#     main()

In [65]:
client_credentials_manager=SpotifyClientCredentials()
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)

In [57]:
# Green - Hiroshi Yoshimura
# Wasser - Inoyamaland
# Chysis - Green-House
# The Plateaux of Mirror - Brian Eno
# The endless talking- Haruomi Hosono
# Horizon - Emily A Sprague
# # Blade Runner Blues - Vangelis

In [58]:
columns_order = ['id','name','followers','genres','popularity','type']

In [59]:
name = ['Hiroshi Yoshimura','Inoyamaland',
        'Green-House','Brian Eno','Haruomi Hosono',
        'Emily A Sprague', 'Vangelis']
artist_info = [get_artist(n) for n in name]
artist_df = pd.DataFrame(artist_info)[columns_order]

In [60]:
temp_trk_dfs = [get_all_tracks(artist) for artist in name]
trk_df = pd.concat(temp_trk_dfs)

INFO:examples.artist_discography:Total albums: 5
INFO:examples.artist_discography:ALBUM: green (sfx version)
INFO:examples.artist_discography:1. Creek
INFO:examples.artist_discography:2. Feel
INFO:examples.artist_discography:3. Sheep
INFO:examples.artist_discography:4. Sleep
INFO:examples.artist_discography:5. Green
INFO:examples.artist_discography:6. Feet
INFO:examples.artist_discography:7. Street
INFO:examples.artist_discography:8. Teevee
INFO:examples.artist_discography:ALBUM: green
INFO:examples.artist_discography:1. CREEK
INFO:examples.artist_discography:2. FEEL
INFO:examples.artist_discography:3. SHEEP
INFO:examples.artist_discography:4. SLEEP
INFO:examples.artist_discography:5. GREEN
INFO:examples.artist_discography:6. FEET
INFO:examples.artist_discography:7. STREET
INFO:examples.artist_discography:8. TEEVEE
INFO:examples.artist_discography:ALBUM: music for nine post cards
INFO:examples.artist_discography:1. Water Copy
INFO:examples.artist_discography:2. Clouds
INFO:examples.art

INFO:examples.artist_discography:4. Wind On Wind
INFO:examples.artist_discography:5. Announcement
INFO:examples.artist_discography:6. Wind On Water
INFO:examples.artist_discography:7. A Near Find In Rip Pop
INFO:examples.artist_discography:8. A Fearful Proper Din
INFO:examples.artist_discography:9. A Darn Psi Inferno
INFO:examples.artist_discography:10. Evening Star
INFO:examples.artist_discography:11. An Iron Frappe
INFO:examples.artist_discography:12. Softy Gun Poison
INFO:examples.artist_discography:13. An Index Of Metals
INFO:examples.artist_discography:14. Test Loop I
INFO:examples.artist_discography:15. Test Loop II
INFO:examples.artist_discography:16. Loop Only: A Radical Representative Of Pinsnip
INFO:examples.artist_discography:17. Loop Only: Wind On Water
INFO:examples.artist_discography:18. Loop Only: A Darn Psi Inferno
INFO:examples.artist_discography:19. Loop Only: Softy Gun Poison
INFO:examples.artist_discography:20. Loop Only: Wind On Water Reversed
INFO:examples.artist_

INFO:examples.artist_discography:7. Cells & Bells
INFO:examples.artist_discography:ALBUM: someday world
INFO:examples.artist_discography:1. The Satellites
INFO:examples.artist_discography:2. Daddy's Car
INFO:examples.artist_discography:3. A Man Wakes Up
INFO:examples.artist_discography:4. Witness
INFO:examples.artist_discography:5. Strip It Down
INFO:examples.artist_discography:6. Mother Of A Dog
INFO:examples.artist_discography:7. Who Rings The Bell
INFO:examples.artist_discography:8. When I Built This World
INFO:examples.artist_discography:9. To Us All
INFO:examples.artist_discography:ALBUM: lux
INFO:examples.artist_discography:1. LUX 1
INFO:examples.artist_discography:2. LUX 2
INFO:examples.artist_discography:3. LUX 3
INFO:examples.artist_discography:4. LUX 4
INFO:examples.artist_discography:ALBUM: drums between the bells
INFO:examples.artist_discography:1. bless this space
INFO:examples.artist_discography:2. glitch
INFO:examples.artist_discography:3. dreambirds
INFO:examples.artist

INFO:examples.artist_discography:9. One Fine Day
INFO:examples.artist_discography:10. Poor Boy
INFO:examples.artist_discography:11. The Lighthouse
INFO:examples.artist_discography:ALBUM: eno/wyatt/davies: music for airports
INFO:examples.artist_discography:1. 1/1
INFO:examples.artist_discography:2. 1/2
INFO:examples.artist_discography:3. 2/1
INFO:examples.artist_discography:4. 2/2
INFO:examples.artist_discography:ALBUM: harmonia & eno '76 - tracks and traces
INFO:examples.artist_discography:1. Welcome
INFO:examples.artist_discography:2. Atmosphere
INFO:examples.artist_discography:3. Vamos Companeros
INFO:examples.artist_discography:4. By the Riverside
INFO:examples.artist_discography:5. Luneburg Heath
INFO:examples.artist_discography:6. Sometimes in Autumn
INFO:examples.artist_discography:7. Weird Dream
INFO:examples.artist_discography:8. Almost
INFO:examples.artist_discography:9. Les Demoiselles
INFO:examples.artist_discography:10. When Shade Was Born
INFO:examples.artist_discography:

INFO:examples.artist_discography:17. Alhondiga Variation (Bonus Track)
INFO:examples.artist_discography:ALBUM: wrong way up
INFO:examples.artist_discography:1. Lay My Love
INFO:examples.artist_discography:2. One Word
INFO:examples.artist_discography:3. In The Backroom
INFO:examples.artist_discography:4. Empty Frame
INFO:examples.artist_discography:5. Cordoba
INFO:examples.artist_discography:6. Spinning Away
INFO:examples.artist_discography:7. Footsteps
INFO:examples.artist_discography:8. Been There Done That
INFO:examples.artist_discography:9. Crime in the Desert
INFO:examples.artist_discography:10. The River
INFO:examples.artist_discography:ALBUM: wrong way up [expanded edition]
INFO:examples.artist_discography:1. Lay My Love
INFO:examples.artist_discography:2. One Word
INFO:examples.artist_discography:3. In The Backroom
INFO:examples.artist_discography:4. Empty Frame
INFO:examples.artist_discography:5. Cordoba
INFO:examples.artist_discography:6. Spinning Away
INFO:examples.artist_dis

INFO:examples.artist_discography:2. Steal Away - Remastered 2004
INFO:examples.artist_discography:3. The Plateaux Of Mirror - Remastered 2004
INFO:examples.artist_discography:4. Above Chiangmai - Remastered 2004
INFO:examples.artist_discography:5. An Arc Of Doves - Remastered 2004
INFO:examples.artist_discography:6. Not Yet Remembered - Remastered 2004
INFO:examples.artist_discography:7. The Chill Air - Remastered 2004
INFO:examples.artist_discography:8. Among Fields Of Crystal - Remastered 2004
INFO:examples.artist_discography:9. Wind In Lonely Fences - Remastered 2004
INFO:examples.artist_discography:10. Failing Light - Remastered 2004
INFO:examples.artist_discography:ALBUM: ambient 1: music for airports (remastered 2004)
INFO:examples.artist_discography:1. 1/1 - Remastered 2004
INFO:examples.artist_discography:2. 2/1 - Remastered 2004
INFO:examples.artist_discography:3. 1/2 - Remastered 2004
INFO:examples.artist_discography:4. 2/2 - Remastered 2004
INFO:examples.artist_discography:A

INFO:examples.artist_discography:7. Angel On My Shoulder (Live at The Mayan Theatre, Los Angeles, July,2019)
INFO:examples.artist_discography:8. Honey Moon (Live at The Mayan Theatre, Los Angeles, July,2019)
INFO:examples.artist_discography:9. Roochoo Gumbo (Live at The Mayan Theatre, Los Angeles, July,2019)
INFO:examples.artist_discography:10. 北京ダック (Live at The Mayan Theatre, Los Angeles, July,2019)
INFO:examples.artist_discography:11. 香港ブルース (Live at The Mayan Theatre, Los Angeles, July,2019)
INFO:examples.artist_discography:12. Sports Men (Live at The Mayan Theatre, Los Angeles, July,2019)
INFO:examples.artist_discography:13. Cow Cow Boogie (Live at The Mayan Theatre, Los Angeles, July,2019)
INFO:examples.artist_discography:14. Ain't Nobody Here But Us Chickens (Live at The Mayan Theatre, Los Angeles, July,2019)
INFO:examples.artist_discography:15. Pom Pom 蒸気 (Live at The Mayan Theatre, Los Angeles, July,2019)
INFO:examples.artist_discography:16. Body Snatchers (Live at The Mayan T

INFO:examples.artist_discography:5. 星めぐりの歌 - ハンドベル・ヴァージョン
INFO:examples.artist_discography:6. 青い玉 - A
INFO:examples.artist_discography:7. TV-CM「街に、生きる力。篇」 - Bタイプ
INFO:examples.artist_discography:8. 天気輪の柱 ピアノ・サスペンス
INFO:examples.artist_discography:9. 45分 - 別ヴァージョン
INFO:examples.artist_discography:10. 燐光の原
INFO:examples.artist_discography:11. TV-CM「街に、生きる力。篇」 - Cタイプ
INFO:examples.artist_discography:12. アルビレオ~星の観測所
INFO:examples.artist_discography:13. 銀河の海原
INFO:examples.artist_discography:14. 主よ、みもとに近づかん - <賛美歌(プロテスタント)320番> ベーシック・トラック・ヴァージョン
INFO:examples.artist_discography:15. イメージ・ソング「銀河鉄道の夜」 - デモ・ヴァージョン
INFO:examples.artist_discography:ALBUM: 万引き家族「オリジナル・サウンドトラック」
INFO:examples.artist_discography:1. Shoplifters
INFO:examples.artist_discography:2. Yuri's Going Home
INFO:examples.artist_discography:3. Living Sketch
INFO:examples.artist_discography:4. Shota & Yuri 1
INFO:examples.artist_discography:5. Yuri & Shota's Shoplifting
INFO:examples.artist_discography:6. The Park
INFO:examples.

INFO:examples.artist_discography:2. HONEY MOON (2020 Remastering)
INFO:examples.artist_discography:3. DEIRA (2020 Remastering)
INFO:examples.artist_discography:4. QUIET LODGE EDIT (2020 Remastering)
INFO:examples.artist_discography:5. MEDICINE MIX (2020 Remastering)
INFO:examples.artist_discography:6. SAND STORM EDIT (2020 Remastering)
INFO:examples.artist_discography:7. MABUI DANCE #2 (2020 Remastering)
INFO:examples.artist_discography:8. AIWOIWAIAOU (2020 Remastering)
INFO:examples.artist_discography:9. ARMENIAN ORIENTATION (2020 Remastering)
INFO:examples.artist_discography:10. AMBIENT MEDITATION #3 (2020 Remastering)
INFO:examples.artist_discography:ALBUM: medicine compilation from the quiet lodge
INFO:examples.artist_discography:1. LAUGHTER MEDITATION
INFO:examples.artist_discography:2. HONEY MOON
INFO:examples.artist_discography:3. DEIRA
INFO:examples.artist_discography:4. QUIET LODGE EDIT
INFO:examples.artist_discography:5. MEDICINE MIX
INFO:examples.artist_discography:6. SAND S

INFO:examples.artist_discography:17. Pour Melia
INFO:examples.artist_discography:ALBUM: rosetta
INFO:examples.artist_discography:1. Origins (Arrival)
INFO:examples.artist_discography:2. Starstuff
INFO:examples.artist_discography:3. Infinitude
INFO:examples.artist_discography:4. Exo Genesis
INFO:examples.artist_discography:5. Celestial Whispers
INFO:examples.artist_discography:6. Albedo 0.06
INFO:examples.artist_discography:7. Sunlight
INFO:examples.artist_discography:8. Rosetta
INFO:examples.artist_discography:9. Philae's Descent
INFO:examples.artist_discography:10. Mission Accomplie (Rosetta's Waltz)
INFO:examples.artist_discography:11. Perihelion
INFO:examples.artist_discography:12. Elegy
INFO:examples.artist_discography:13. Return To The Void
INFO:examples.artist_discography:ALBUM: vangelis: delectus (remastered)
INFO:examples.artist_discography:1. Come On - Remastered
INFO:examples.artist_discography:2. We Are All Uprooted - Remastered
INFO:examples.artist_discography:3. Sunny Eart

INFO:examples.artist_discography:4. Mythodea - Music for the NASA Mission: 2001 Mars Odyssey: Movement 3 - Voice
INFO:examples.artist_discography:5. Mythodea - Music for the NASA Mission: 2001 Mars Odyssey: Movement 4 - Voice
INFO:examples.artist_discography:6. Mythodea - Music for the NASA Mission: 2001 Mars Odyssey: Movement 5 - Voice
INFO:examples.artist_discography:7. Mythodea - Music for the NASA Mission: 2001 Mars Odyssey: Movement 6 - Voice
INFO:examples.artist_discography:8. Mythodea - Music for the NASA Mission: 2001 Mars Odyssey: Movement 7 - Voice
INFO:examples.artist_discography:9. Mythodea - Music for the NASA Mission: 2001 Mars Odyssey: Movement 8 - Voice
INFO:examples.artist_discography:10. Mythodea - Music for the NASA Mission: 2001 Mars Odyssey: Movement 9 - Voice
INFO:examples.artist_discography:11. Mythodea - Music for the NASA Mission: 2001 Mars Odyssey: Movement 10 - Voice
INFO:examples.artist_discography:ALBUM: el greco - original motion picture soundtrack by vang

INFO:examples.artist_discography:12. Tears in Rain
INFO:examples.artist_discography:ALBUM: 1492: conquest of paradise
INFO:examples.artist_discography:1. Opening
INFO:examples.artist_discography:2. Conquest of Paradise
INFO:examples.artist_discography:3. Monastery of La Rabida
INFO:examples.artist_discography:4. City of Isabel
INFO:examples.artist_discography:5. Light and Shadow
INFO:examples.artist_discography:6. Deliverance
INFO:examples.artist_discography:7. West Across the Ocean Sea
INFO:examples.artist_discography:8. Eternity
INFO:examples.artist_discography:9. Hispanola
INFO:examples.artist_discography:10. Moxica and the Horse
INFO:examples.artist_discography:11. Twenty Eighth Parallel
INFO:examples.artist_discography:12. Pinta, Nina, Santa Maria
INFO:examples.artist_discography:ALBUM: page of life
INFO:examples.artist_discography:1. Wisdom Chain
INFO:examples.artist_discography:2. Page of Life
INFO:examples.artist_discography:3. Money
INFO:examples.artist_discography:4. Jazzy Bo

INFO:examples.artist_discography:1. Spiral
INFO:examples.artist_discography:2. Ballad
INFO:examples.artist_discography:3. Dervish D.
INFO:examples.artist_discography:4. To the Unknown Man
INFO:examples.artist_discography:5. 3 + 3
INFO:examples.artist_discography:6. To the Unknown Man - Pt. 2
INFO:examples.artist_discography:ALBUM: albedo 0.39
INFO:examples.artist_discography:1. Pulstar - Audio
INFO:examples.artist_discography:2. Freefall
INFO:examples.artist_discography:3. Mare Tranquillitatis
INFO:examples.artist_discography:4. Main Sequence
INFO:examples.artist_discography:5. Sword of Orion
INFO:examples.artist_discography:6. Alpha
INFO:examples.artist_discography:7. Nucleogenesis, Pt. 1
INFO:examples.artist_discography:8. Nucleogenesis, Pt. 2
INFO:examples.artist_discography:9. Albedo 0.39
INFO:examples.artist_discography:ALBUM: heaven and hell
INFO:examples.artist_discography:1. Heaven and Hell, Pt. I
INFO:examples.artist_discography:2. Heaven and Hell, Pt. II
INFO:examples.artist_

In [61]:
trk_df.shape

(1755, 14)

In [63]:
trk_df

Unnamed: 0,artists,available_markets,disc_number,duration_ms,explicit,external_urls,href,id,is_local,name,preview_url,track_number,type,uri
0,[{'external_urls': {'spotify': 'https://open.s...,"[AT, CH, DE, DK, FI, NO, SE]",1,362160,False,{'spotify': 'https://open.spotify.com/track/2L...,https://api.spotify.com/v1/tracks/2LiqK1oqzrqn...,2LiqK1oqzrqnxH6zaDLqAW,False,Creek,https://p.scdn.co/mp3-preview/9fd20c3659b6b8a3...,1,track,spotify:track:2LiqK1oqzrqnxH6zaDLqAW
1,[{'external_urls': {'spotify': 'https://open.s...,"[AT, CH, DE, DK, FI, NO, SE]",1,273866,False,{'spotify': 'https://open.spotify.com/track/17...,https://api.spotify.com/v1/tracks/17kSn9eA0hG6...,17kSn9eA0hG6HTAFvp76BS,False,Feel,https://p.scdn.co/mp3-preview/f9dc36a431cdf99b...,2,track,spotify:track:17kSn9eA0hG6HTAFvp76BS
2,[{'external_urls': {'spotify': 'https://open.s...,"[AT, CH, DE, DK, FI, NO, SE]",1,332133,False,{'spotify': 'https://open.spotify.com/track/0C...,https://api.spotify.com/v1/tracks/0CR4jQApsVlV...,0CR4jQApsVlVUFF7dYhkNC,False,Sheep,https://p.scdn.co/mp3-preview/a65b37bb9f7ee497...,3,track,spotify:track:0CR4jQApsVlVUFF7dYhkNC
3,[{'external_urls': {'spotify': 'https://open.s...,"[AT, CH, DE, DK, FI, NO, SE]",1,410333,False,{'spotify': 'https://open.spotify.com/track/2g...,https://api.spotify.com/v1/tracks/2guvKppGQ7Fs...,2guvKppGQ7Fsgi4ZAGILc6,False,Sleep,https://p.scdn.co/mp3-preview/39f6de0b41aefb55...,4,track,spotify:track:2guvKppGQ7Fsgi4ZAGILc6
4,[{'external_urls': {'spotify': 'https://open.s...,"[AT, CH, DE, DK, FI, NO, SE]",1,321666,False,{'spotify': 'https://open.spotify.com/track/0H...,https://api.spotify.com/v1/tracks/0HEzGwip1s4s...,0HEzGwip1s4seBM0s4fQhW,False,Green,https://p.scdn.co/mp3-preview/3062c65f5387ec92...,5,track,spotify:track:0HEzGwip1s4seBM0s4fQhW
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
481,[{'external_urls': {'spotify': 'https://open.s...,[GR],1,260399,False,{'spotify': 'https://open.spotify.com/track/5V...,https://api.spotify.com/v1/tracks/5VgfP2tl4RlY...,5VgfP2tl4RlYi3xDkqMQep,False,Let It Happen - Remastered,,6,track,spotify:track:5VgfP2tl4RlYi3xDkqMQep
482,[{'external_urls': {'spotify': 'https://open.s...,[GR],1,74640,False,{'spotify': 'https://open.spotify.com/track/0J...,https://api.spotify.com/v1/tracks/0JFhtvlaum1u...,0JFhtvlaum1uwWi6YDpufY,False,The City - Remastered,,7,track,spotify:track:0JFhtvlaum1uwWi6YDpufY
483,[{'external_urls': {'spotify': 'https://open.s...,[GR],1,264893,False,{'spotify': 'https://open.spotify.com/track/38...,https://api.spotify.com/v1/tracks/38ePpIzT2XKF...,38ePpIzT2XKFINmNV9jHLC,False,My Face In The Rain - Remastered,,8,track,spotify:track:38ePpIzT2XKFINmNV9jHLC
484,[{'external_urls': {'spotify': 'https://open.s...,[GR],1,176760,False,{'spotify': 'https://open.spotify.com/track/1L...,https://api.spotify.com/v1/tracks/1LAq5oi7ZTsR...,1LAq5oi7ZTsR4f5kM1RYcd,False,Watch Out - Remastered,,9,track,spotify:track:1LAq5oi7ZTsR4f5kM1RYcd


In [36]:
artist_df.sort_values('popularity',ascending=False)

Unnamed: 0,id,name,followers,genres,popularity,type
3,7MSUfLeTdDEoZiJPDSBXgi,Brian Eno,"{'href': None, 'total': 614258}","[ambient, art pop, art rock, compositional amb...",61,artist
6,4P70aqttdpJ9vuYFDmf7f6,Vangelis,"{'href': None, 'total': 523679}","[cyberpunk, synthesizer]",59,artist
4,370nbSkMB9kDWyTypwWYak,Haruomi Hosono,"{'href': None, 'total': 175582}","[classic j-rock, fourth world, japanese folk, ...",45,artist
0,1DGpHnPOpMYY780hcQHmPB,Hiroshi Yoshimura,"{'href': None, 'total': 73858}","[ambient, fourth world, j-ambient]",40,artist
2,0M6QGBKWICr8dxhh3UJW45,Green-House,"{'href': None, 'total': 18687}","[fourth world, spectra]",39,artist
1,3nYCvyP4RxuKyEKygqxWHy,INOYAMALAND,"{'href': None, 'total': 6139}","[fourth world, j-ambient]",32,artist
5,3GeWutjuNRg9uRqiIejRT9,Emily A. Sprague,"{'href': None, 'total': 11852}","[ambient, electra, experimental ambient, exper...",29,artist


In [37]:
test_artist = 'Hiroshi Yoshimura'
artist_d = get_artist(test_artist)
tracks_df = pd.DataFrame(show_artist_albums(artist_d))

INFO:examples.artist_discography:Total albums: 5
INFO:examples.artist_discography:ALBUM: green (sfx version)
INFO:examples.artist_discography:1. Creek
INFO:examples.artist_discography:2. Feel
INFO:examples.artist_discography:3. Sheep
INFO:examples.artist_discography:4. Sleep
INFO:examples.artist_discography:5. Green
INFO:examples.artist_discography:6. Feet
INFO:examples.artist_discography:7. Street
INFO:examples.artist_discography:8. Teevee
INFO:examples.artist_discography:ALBUM: green
INFO:examples.artist_discography:1. CREEK
INFO:examples.artist_discography:2. FEEL
INFO:examples.artist_discography:3. SHEEP
INFO:examples.artist_discography:4. SLEEP
INFO:examples.artist_discography:5. GREEN
INFO:examples.artist_discography:6. FEET
INFO:examples.artist_discography:7. STREET
INFO:examples.artist_discography:8. TEEVEE
INFO:examples.artist_discography:ALBUM: music for nine post cards
INFO:examples.artist_discography:1. Water Copy
INFO:examples.artist_discography:2. Clouds
INFO:examples.art

In [72]:
tracks_df.shape

(5, 14)

## Extraction

Below here, we extract features from each track using the Spotify API and the associated URI. This is done in 3 sections, due to the extremely long runtime of this process. We build a DataFrame containing these features.

In [73]:
uri_list = trk_df['id'].unique()
featureLIST = []

for i in tqdm([uri for uri in uri_list]):
    try:
        featureLIST.append(ari_to_features(i))
    except:
        continue

100%|█████████████████████████████████████████████████████████████████████████████████████| 1755/1755 [20:39<00:00,  1.42it/s]


In [74]:
feature_df =  pd.DataFrame(featureLIST)

In [75]:
feature_df

Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,...,type,id,uri,track_href,analysis_url,duration_ms,time_signature,artist_pop,genres,track_pop
0,0.6370,0.7910,0,-17.057,1,0.0610,0.810,0.8860,0.9420,0.8710,...,audio_features,2LiqK1oqzrqnxH6zaDLqAW,spotify:track:2LiqK1oqzrqnxH6zaDLqAW,https://api.spotify.com/v1/tracks/2LiqK1oqzrqn...,https://api.spotify.com/v1/audio-analysis/2Liq...,362160,4,40,ambient fourth_world j-ambient,10
1,0.2840,0.2590,7,-19.314,1,0.0363,0.988,0.9590,0.3430,0.0339,...,audio_features,17kSn9eA0hG6HTAFvp76BS,spotify:track:17kSn9eA0hG6HTAFvp76BS,https://api.spotify.com/v1/tracks/17kSn9eA0hG6...,https://api.spotify.com/v1/audio-analysis/17kS...,273867,3,40,ambient fourth_world j-ambient,9
2,0.4300,0.1880,2,-15.965,1,0.0545,0.962,0.7460,0.4150,0.5600,...,audio_features,0CR4jQApsVlVUFF7dYhkNC,spotify:track:0CR4jQApsVlVUFF7dYhkNC,https://api.spotify.com/v1/tracks/0CR4jQApsVlV...,https://api.spotify.com/v1/audio-analysis/0CR4...,332133,4,40,ambient fourth_world j-ambient,7
3,0.3840,0.0415,0,-21.981,1,0.0436,0.988,0.0436,0.0701,0.0964,...,audio_features,2guvKppGQ7Fsgi4ZAGILc6,spotify:track:2guvKppGQ7Fsgi4ZAGILc6,https://api.spotify.com/v1/tracks/2guvKppGQ7Fs...,https://api.spotify.com/v1/audio-analysis/2guv...,410333,3,40,ambient fourth_world j-ambient,7
4,0.3310,0.0360,9,-26.835,1,0.0499,0.991,0.7710,0.3730,0.0716,...,audio_features,0HEzGwip1s4seBM0s4fQhW,spotify:track:0HEzGwip1s4seBM0s4fQhW,https://api.spotify.com/v1/tracks/0HEzGwip1s4s...,https://api.spotify.com/v1/audio-analysis/0HEz...,321667,4,40,ambient fourth_world j-ambient,16
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1749,0.2570,0.4700,2,-17.097,0,0.0436,0.199,0.0121,0.1660,0.5420,...,audio_features,5VgfP2tl4RlYi3xDkqMQep,spotify:track:5VgfP2tl4RlYi3xDkqMQep,https://api.spotify.com/v1/tracks/5VgfP2tl4RlY...,https://api.spotify.com/v1/audio-analysis/5Vgf...,260400,4,59,cyberpunk synthesizer,6
1750,0.6070,0.7910,2,-16.991,1,0.0299,0.163,0.8170,0.3970,0.0395,...,audio_features,0JFhtvlaum1uwWi6YDpufY,spotify:track:0JFhtvlaum1uwWi6YDpufY,https://api.spotify.com/v1/tracks/0JFhtvlaum1u...,https://api.spotify.com/v1/audio-analysis/0JFh...,74640,4,59,cyberpunk synthesizer,3
1751,0.0666,0.1610,1,-20.571,1,0.0396,0.779,0.8340,0.1180,0.0373,...,audio_features,38ePpIzT2XKFINmNV9jHLC,spotify:track:38ePpIzT2XKFINmNV9jHLC,https://api.spotify.com/v1/tracks/38ePpIzT2XKF...,https://api.spotify.com/v1/audio-analysis/38eP...,264893,4,59,cyberpunk synthesizer,3
1752,0.2770,0.4270,8,-21.335,1,0.0339,0.402,0.9100,0.3920,0.1210,...,audio_features,1LAq5oi7ZTsR4f5kM1RYcd,spotify:track:1LAq5oi7ZTsR4f5kM1RYcd,https://api.spotify.com/v1/tracks/1LAq5oi7ZTsR...,https://api.spotify.com/v1/audio-analysis/1LAq...,176760,4,59,cyberpunk synthesizer,2


In [6]:
first_half = df["track_uri"].unique()[:10000]
second_half = df["track_uri"].unique()[10000:20000]
third_half = df["track_uri"].unique()[20000:]
dataLIST = [first_half,second_half,third_half]

In [7]:
featureLIST = []

for i in tqdm([uri for uri in dataLIST[0]]):
    try:
        featureLIST.append(ari_to_features(i))
    except:
        continue


100%|██████████████████████████████████████████████████████████████████████████| 10000/10000 [1:22:58<00:00,  2.01it/s]


In [8]:
for i in tqdm([uri for uri in dataLIST[1]]):
    try:
        featureLIST.append(ari_to_features(i))
    except:
        continue

100%|██████████████████████████████████████████████████████████████████████████| 10000/10000 [1:23:07<00:00,  2.00it/s]


In [9]:
for i in tqdm([uri for uri in dataLIST[2]]):
    try:
        featureLIST.append(ari_to_features(i))
    except:
        continue

  4%|███▎                                                                        | 622/14443 [05:09<3:37:43,  1.06it/s]HTTP Error for GET to https://api.spotify.com/v1/tracks/656TZlNdVe90zHvmebFt9U with Params: {'market': None} returned 404 due to non existing id
 61%|█████████████████████████████████████████████▊                             | 8830/14443 [1:13:22<18:50,  4.96it/s]HTTP Error for GET to https://api.spotify.com/v1/tracks/5GiU7GOYjDH2yp7fMf9w9j with Params: {'market': None} returned 404 due to non existing id
100%|██████████████████████████████████████████████████████████████████████████| 14443/14443 [2:00:09<00:00,  2.00it/s]


In [10]:
#Preview the DataFrame
featureDF = pd.DataFrame(featureLIST)
featureDF

Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,...,type,id,uri,track_href,analysis_url,duration_ms,time_signature,artist_pop,genres,track_pop
0,0.904,0.813,4,-7.105,0,0.1210,0.03110,0.006970,0.0471,0.810,...,audio_features,0UaMYEvWZi0ZqiDOoHU3YI,spotify:track:0UaMYEvWZi0ZqiDOoHU3YI,https://api.spotify.com/v1/tracks/0UaMYEvWZi0Z...,https://api.spotify.com/v1/audio-analysis/0UaM...,226864,4,74,dance_pop hip_hop hip_pop pop pop_rap r&b rap ...,69
1,0.774,0.838,5,-3.914,0,0.1140,0.02490,0.025000,0.2420,0.924,...,audio_features,6I9VzXrHxO9rA9A5euc8Ak,spotify:track:6I9VzXrHxO9rA9A5euc8Ak,https://api.spotify.com/v1/tracks/6I9VzXrHxO9r...,https://api.spotify.com/v1/audio-analysis/6I9V...,198800,4,84,dance_pop pop post-teen_pop,83
2,0.664,0.758,2,-6.583,0,0.2100,0.00238,0.000000,0.0598,0.701,...,audio_features,0WqIKmW4BTrj3eJFmnCKMv,spotify:track:0WqIKmW4BTrj3eJFmnCKMv,https://api.spotify.com/v1/tracks/0WqIKmW4BTrj...,https://api.spotify.com/v1/audio-analysis/0WqI...,235933,4,86,dance_pop pop r&b,25
3,0.892,0.714,4,-6.055,0,0.1410,0.20100,0.000234,0.0521,0.817,...,audio_features,1AWQoqb9bSvzTjaLralEkT,spotify:track:1AWQoqb9bSvzTjaLralEkT,https://api.spotify.com/v1/tracks/1AWQoqb9bSvz...,https://api.spotify.com/v1/audio-analysis/1AWQ...,267267,4,82,dance_pop pop,79
4,0.853,0.606,0,-4.596,1,0.0713,0.05610,0.000000,0.3130,0.654,...,audio_features,1lzr43nnXAijIGYnCT8M8H,spotify:track:1lzr43nnXAijIGYnCT8M8H,https://api.spotify.com/v1/tracks/1lzr43nnXAij...,https://api.spotify.com/v1/audio-analysis/1lzr...,227600,4,75,pop_rap reggae_fusion,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
34435,0.669,0.228,2,-12.119,1,0.0690,0.79200,0.065000,0.0944,0.402,...,audio_features,3uCHI1gfOUL5j5swEh0TcH,spotify:track:3uCHI1gfOUL5j5swEh0TcH,https://api.spotify.com/v1/tracks/3uCHI1gfOUL5...,https://api.spotify.com/v1/audio-analysis/3uCH...,189184,4,47,unknown,27
34436,0.493,0.727,1,-5.031,1,0.2170,0.08730,0.000000,0.1290,0.289,...,audio_features,0P1oO2gREMYUCoOkzYAyFu,spotify:track:0P1oO2gREMYUCoOkzYAyFu,https://api.spotify.com/v1/tracks/0P1oO2gREMYU...,https://api.spotify.com/v1/audio-analysis/0P1o...,263680,4,39,australian_r&b,37
34437,0.702,0.524,7,-10.710,1,0.0793,0.33200,0.055300,0.2980,0.265,...,audio_features,2oM4BuruDnEvk59IvIXCwn,spotify:track:2oM4BuruDnEvk59IvIXCwn,https://api.spotify.com/v1/tracks/2oM4BuruDnEv...,https://api.spotify.com/v1/audio-analysis/2oM4...,189213,4,55,canadian_contemporary_r&b modern_alternative_rock,49
34438,0.509,0.286,8,-14.722,1,0.1230,0.40200,0.000012,0.1310,0.259,...,audio_features,4Ri5TTUgjM96tbQZd5Ua7V,spotify:track:4Ri5TTUgjM96tbQZd5Ua7V,https://api.spotify.com/v1/tracks/4Ri5TTUgjM96...,https://api.spotify.com/v1/audio-analysis/4Ri5...,194720,4,4,unknown,16


## Finalising and Export

We finally merge the feature DataFrame with the original dataset, as this also contains useful information in the artist name and track name. This is then exported, as our processed data.

In [76]:
trk_df.columns

Index(['artists', 'available_markets', 'disc_number', 'duration_ms',
       'explicit', 'external_urls', 'href', 'id', 'is_local', 'name',
       'preview_url', 'track_number', 'type', 'uri'],
      dtype='object')

In [78]:
feature_df.columns

Index(['danceability', 'energy', 'key', 'loudness', 'mode', 'speechiness',
       'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo',
       'type', 'id', 'uri', 'track_href', 'analysis_url', 'duration_ms',
       'time_signature', 'artist_pop', 'genres', 'track_pop'],
      dtype='object')

In [80]:
final_df = trk_df.merge(feature_df, on='id')
final_df.to_csv('../data/song_features.csv')

In [82]:
!pwd

/Users/anadwi/Documents/Projects/B_Ragi/B_Ragi/notebooks


In [11]:
new_df = pd.merge(testDF,featureDF, left_on = "track_uri", right_on= "id")

In [12]:
new_df.to_csv('../data/processed_data.csv')