# Metallica Spofity Track Data

This notebook uses a python package called 'spotipy' to hook up to the Spotify API and construct track specific attributes. API credentials CLIENT_ID, CLIENT_SECRET are not provided in code. They must be replaced with user spotify credentials using a config.py file.

In [13]:
import numpy as np
import pandas as pd
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import config

In [14]:
#Spotify Authentication - without user
client_credentials_manager = SpotifyClientCredentials(client_id=config.CLIENT_ID, client_secret=config.CLIENT_SECRET)
sp = spotipy.Spotify(client_credentials_manager = client_credentials_manager)

In [15]:
#initialize dataframes
met_data = []

In [16]:
#Get UNIQUE ALBUMS
met_uri = 'spotify:artist:2ye2Wgw4gimLv2eAKyk1NB'

results = sp.artist_albums(met_uri, album_type='album')
albums = results['items']
while results['next']:
    results = sp.next(results)
    albums.extend(results['items'])

names = []
album_ids = []
for album in albums:
    name = album['name'].lower()
    if name not in names:
        names.append(name)
        album_ids.append(album['id'])

In [17]:
#Construct track specific data
for album_id in album_ids:
    results = sp.album_tracks(album_id)
    album_info = sp.album(album_id)
    tracks = results['items']
    while results['next']:
        results = sp.next(results)
        tracks.extend(results['items'])
    track_ids = []
    for track in tracks:
        track_row = {}
        track_row['spotify_id'] = track['id']
        track_row['spotify_uri'] = track['uri']
        track_row['album'] = album_info['name']
        track_row['name'] = track['name']
        track_row['release_date'] = album_info['release_date']
        track_row['track_number'] = track['track_number']
        try:
            popularity = sp.track(track['id'])['popularity']
            track_row['popularity'] = popularity
        except:
            popularity = float('nan')
        track_ids.append(track['id'])
        features = sp.audio_features(track['id'])[0]
        track_row['danceability'] = features['danceability']
        track_row['energy'] = features['energy']
        track_row['key'] = features['key']
        track_row['loudness'] = features['loudness']
        track_row['mode'] = features['mode']
        track_row['speechiness'] = features['speechiness']
        track_row['acousticness'] = features['acousticness']
        track_row['instrumentalness'] = features['instrumentalness']
        track_row['liveness'] = features['liveness']
        track_row['valence'] = features['valence']
        track_row['tempo'] = features['tempo']
        track_row['duration_ms'] = features['duration_ms']
        track_row['time_signature'] = features['time_signature']
        met_data.append(track_row)

In [18]:
cols = ['spotify_id', 'spotify_uri', 'album', 'name', 'popularity', 'release_date', 'track_number', 'danceability', 'energy',
       'key', 'loudness', 'mode', 'speechiness', 'acousticness', 'instrumentalness', 'liveness',
       'valence', 'tempo', 'duration_ms', 'time_signature']
metallica_data = pd.DataFrame(met_data, columns = cols)         

In [9]:
metallica_data.to_csv('metallica_songs.csv', index=False)

In [21]:
metallica_data.dtypes

spotify_id           object
spotify_uri          object
album                object
name                 object
popularity            int64
release_date         object
track_number          int64
danceability        float64
energy              float64
key                   int64
loudness            float64
mode                  int64
speechiness         float64
acousticness        float64
instrumentalness    float64
liveness            float64
valence             float64
tempo               float64
duration_ms           int64
time_signature        int64
dtype: object