# Querying Spotipy for Audio Features + Analysis

In [1]:
import json
import numpy as np
import pandas as pd
import pickle
import re
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import sys
import time

In [2]:
client_credentials_manager = SpotifyClientCredentials(client_id="2414300dd5be4a3cb0e9d83ecebe3964",
                                                          client_secret="f9c2e12e1f0247a4a74cf8e821a69aa7")
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)

#### Example Retrievals

In [6]:
sp.audio_features('62bOmKYxYg7dhrC6gH9vFn')

[{'danceability': 0.61,
  'energy': 0.926,
  'key': 8,
  'loudness': -4.843,
  'mode': 0,
  'speechiness': 0.0479,
  'acousticness': 0.031,
  'instrumentalness': 0.0012,
  'liveness': 0.0821,
  'valence': 0.861,
  'tempo': 172.638,
  'type': 'audio_features',
  'id': '62bOmKYxYg7dhrC6gH9vFn',
  'uri': 'spotify:track:62bOmKYxYg7dhrC6gH9vFn',
  'track_href': 'https://api.spotify.com/v1/tracks/62bOmKYxYg7dhrC6gH9vFn',
  'analysis_url': 'https://api.spotify.com/v1/audio-analysis/62bOmKYxYg7dhrC6gH9vFn',
  'duration_ms': 200400,
  'time_signature': 4}]

In [3]:
example = sp.audio_analysis('62bOmKYxYg7dhrC6gH9vFn')
del example['track']['codestring']
del example['track']['echoprintstring']
del example['track']['rhythmstring']
del example['track']['synchstring']

In [4]:
with open('../data/example_song_analysis.json', 'w') as f:
    json.dump(example, f)

#### Retrieving Song List from Spotipy Notebook

In [4]:
with open('../pickle/song_list.pkl', 'rb+') as f:
    song_list = pickle.load(f)

In [7]:
song_list[0]['tracks'][0]['id']

'62bOmKYxYg7dhrC6gH9vFn'

#### Retrieving Audio Features for Every Song in `song_list`

In [34]:
type(song_list[0])

dict

In [35]:
song_feat = []

for i in song_list:
    if isinstance(i, dict):
        id_list = []
        for k in i['tracks']:
            id_list.append(k['id'])
    else:
        continue
    try:
        song_feat.append(sp.audio_features(id_list))
        time.sleep(1)
    except:
        time.sleep(5)
        song_feat.append(sp.audio_features(id_list))

In [33]:
len(song_feat)

241

In [36]:
len(song_feat)

2444

In [38]:
with open('../pickle/song_feat.pkl', 'wb+') as f:
    pickle.dump(song_feat, f)

In [39]:
song_feat[209]

[{'danceability': 0.836,
  'energy': 0.728,
  'key': 10,
  'loudness': -12.058,
  'mode': 0,
  'speechiness': 0.234,
  'acousticness': 0.00569,
  'instrumentalness': 0.000819,
  'liveness': 0.0649,
  'valence': 0.599,
  'tempo': 100.842,
  'type': 'audio_features',
  'id': '0dNiLb9FEHrRK7VFDJctiR',
  'uri': 'spotify:track:0dNiLb9FEHrRK7VFDJctiR',
  'track_href': 'https://api.spotify.com/v1/tracks/0dNiLb9FEHrRK7VFDJctiR',
  'analysis_url': 'https://api.spotify.com/v1/audio-analysis/0dNiLb9FEHrRK7VFDJctiR',
  'duration_ms': 319907,
  'time_signature': 4},
 {'danceability': 0.891,
  'energy': 0.319,
  'key': 4,
  'loudness': -16.099,
  'mode': 0,
  'speechiness': 0.333,
  'acousticness': 0.0933,
  'instrumentalness': 0.000104,
  'liveness': 0.0482,
  'valence': 0.687,
  'tempo': 92.649,
  'type': 'audio_features',
  'id': '1Z4mVNtYGnpjHUoZVo3IMU',
  'uri': 'spotify:track:1Z4mVNtYGnpjHUoZVo3IMU',
  'track_href': 'https://api.spotify.com/v1/tracks/1Z4mVNtYGnpjHUoZVo3IMU',
  'analysis_url': 

#### Retrieving Audio Analysis for Every Song in `song_list`

In [5]:
song_analysis = []

for i in song_list:
    if isinstance(i, dict):
        for k in i['tracks']:
            try:
                song_analysis.append(dict({k['id'] : sp.audio_analysis(k['id'])}))
                time.sleep(1)
            except:
                time.sleep(5)
                song_analysis.append(dict({k['id'] : sp.audio_analysis(k['id'])}))

KeyboardInterrupt: 

In [11]:
with open('../data/song_analysis.json', 'w') as f:
    json.dump(song_analysis, f)

### Checking Progress on Song Analysis

I tried retreiving audio analysis for every track that I retreived overnight, but I was only able to gather 2698 tracks so far. I need to figure out the percentage of titles that included this analysis, to see whether it's worth gathering the remaining values

In [14]:
len(song_analysis)

2698

In [16]:
with open('../pickle/song_list.pkl', 'rb+') as f:
    song_list = pickle.load(f)

#### Creating Array of Song Titles + Unique ID

I'll need this for a ton of different things, including checking my progress on grabbing the audio analysis.

In [32]:
song_list[0]['tracks'][0]['name']

'Bye Bye Bye'

In [35]:
master_song_list = []

for entry in song_list:
    if isinstance(entry, dict):
        for track in entry['tracks']:
            master_song_list.append(dict({track['name'] : track['id']}))

In [5]:
with open('../data/master_song_list.json', 'r') as f:
    master_song_list = json.load(f)

##### Size of `master_song_list`

In [7]:
len(master_song_list)

23888

In [8]:
23888 * 403

9626864

One spotify audio analysis json (not including echostring, codestring, etc) is 403 KB. Multiplying that by the number of songs indexed, and that information alone is about 9.6 GB...which means I'll need to get some serious compute going in order to complete this .

##### Where Did I End Up At?

In [46]:
master_song_list[0].items()

dict_items([('Bye Bye Bye', '62bOmKYxYg7dhrC6gH9vFn')])

In [48]:
for song in master_song_list:
    for v in song.values():
        if v == '0qYok0f8O5DE8yJSo146dn':
            print(v , master_song_list.index(song))

0qYok0f8O5DE8yJSo146dn 2697


Looks like every song is included in the audio analysis.

##### Dumping Master Song List to `.json`

In [40]:
with open('../data/master_song_list.json', 'w+') as f:
    json.dump(master_song_list, f)

In [13]:
sp.track('0qYok0f8O5DE8yJSo146dn')

{'album': {'album_type': 'album',
  'artists': [{'external_urls': {'spotify': 'https://open.spotify.com/artist/6bOYtKnpLPQSfMpS2ilotK'},
    'href': 'https://api.spotify.com/v1/artists/6bOYtKnpLPQSfMpS2ilotK',
    'id': '6bOYtKnpLPQSfMpS2ilotK',
    'name': 'Bobby Vinton',
    'type': 'artist',
    'uri': 'spotify:artist:6bOYtKnpLPQSfMpS2ilotK'}],
  'available_markets': ['AD',
   'AR',
   'AT',
   'AU',
   'BE',
   'BG',
   'BO',
   'BR',
   'CA',
   'CH',
   'CL',
   'CO',
   'CR',
   'CY',
   'CZ',
   'DE',
   'DK',
   'DO',
   'EC',
   'EE',
   'ES',
   'FI',
   'FR',
   'GB',
   'GR',
   'GT',
   'HK',
   'HN',
   'HU',
   'ID',
   'IE',
   'IL',
   'IS',
   'IT',
   'JP',
   'LI',
   'LT',
   'LU',
   'LV',
   'MC',
   'MT',
   'MX',
   'MY',
   'NI',
   'NL',
   'NO',
   'NZ',
   'PA',
   'PE',
   'PH',
   'PL',
   'PT',
   'PY',
   'RO',
   'SE',
   'SG',
   'SK',
   'SV',
   'TH',
   'TR',
   'TW',
   'US',
   'UY',
   'VN',
   'ZA'],
  'external_urls': {'spotify': 'https://ope