# Querying Spotipy for Audio Features + Analysis

In [2]:
import json
import numpy as np
import pandas as pd
import pickle
import re
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import sys
import time
from sqlalchemy import create_engine

In [9]:
client_credentials_manager = SpotifyClientCredentials(client_id="2414300dd5be4a3cb0e9d83ecebe3964",
                                                          client_secret="f9c2e12e1f0247a4a74cf8e821a69aa7")
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)

In [1]:
# sp.audio_analysis('62bOmKYxYg7dhrC6gH9vFn')

#### Example Retrievals

In [6]:
sp.audio_features('62bOmKYxYg7dhrC6gH9vFn')

[{'danceability': 0.61,
  'energy': 0.926,
  'key': 8,
  'loudness': -4.843,
  'mode': 0,
  'speechiness': 0.0479,
  'acousticness': 0.031,
  'instrumentalness': 0.0012,
  'liveness': 0.0821,
  'valence': 0.861,
  'tempo': 172.638,
  'type': 'audio_features',
  'id': '62bOmKYxYg7dhrC6gH9vFn',
  'uri': 'spotify:track:62bOmKYxYg7dhrC6gH9vFn',
  'track_href': 'https://api.spotify.com/v1/tracks/62bOmKYxYg7dhrC6gH9vFn',
  'analysis_url': 'https://api.spotify.com/v1/audio-analysis/62bOmKYxYg7dhrC6gH9vFn',
  'duration_ms': 200400,
  'time_signature': 4}]

In [4]:
with open('../data/example_song_analysis.json', 'w') as f:
    json.dump(example, f)

#### Retrieving Song List from Spotipy Notebook

In [13]:
with open('../pickle/song_list.pkl', 'rb+') as f:
    song_list = pickle.load(f)

In [3]:
# song_list[500:1000]

#### Retrieving Audio Features for Every Song in `song_list`

In [34]:
type(song_list[0])

dict

In [35]:
song_feat = []

for i in song_list:
    if isinstance(i, dict):
        id_list = []
        for k in i['tracks']:
            id_list.append(k['id'])
    else:
        continue
    try:
        song_feat.append(sp.audio_features(id_list))
        time.sleep(1)
    except:
        time.sleep(5)
        song_feat.append(sp.audio_features(id_list))

In [36]:
len(song_feat)

2444

In [38]:
with open('../pickle/song_feat.pkl', 'wb+') as f:
    pickle.dump(song_feat, f)

In [33]:
# song_feat[209]

#### Retrieving Audio Analysis for Every Song in `song_list`

I'll break this up in batches, just to make sure I don't overload the RAM:

In [28]:
for i in master_song_list[:3]:
    for k, v in i.items():

{'Bye Bye Bye': '62bOmKYxYg7dhrC6gH9vFn'}
{'This I Promise You': '46n2EGFnPC3tzWCN1Aqe26'}
{"It's Gonna Be Me": '2AW37v0bDyuOzGP3XnmFuA'}


In [4]:
# song_list[764]

In [43]:
for i in song_list[1500:]:
    if isinstance(i, dict):
        for k in i['tracks']:
            try:
                analysis = sp.audio_analysis(k['id'])
            except:
                time.sleep(5)
                analysis = sp.audio_analysis(k['id'])
            with open('../data/audio_analysis/{}.json'.format(k['id']), 'w') as f:
                json.dump(analysis, f)

#### Checking to See Which Songs Weren't Grabbed in Audio Analysis

In [47]:
df = pd.read_csv('../data/analysis_list.txt', delimiter=" ", header=None)

In [53]:
df = df.apply(lambda x: x.str.rstrip('.json'), 1)

In [82]:
df.shape

(23129, 1)

In [64]:
engine = create_engine('postgresql://postgres:glide-mortuary-pod-cloy-belong@ec2-54-244-70-11.us-west-2.compute.amazonaws.com:5432/postgres')
engine.connect()

<sqlalchemy.engine.base.Connection at 0x7fa6daec65f8>

In [65]:
song_id_list = pd.read_sql("""
                            SELECT * FROM song_list
                            """, con=engine)

In [84]:
no_aa = song_id_list[~song_id_list['song_id'].isin(df[0])]    

In [85]:
no_aa.shape

(1506, 2)

Looks like 1506 titles weren't reterieved in the Audio Analysis API pull. Further, some of the .json's that I did pull don't seem to have the same feature set, wherein certain features (like `rhythmstring`) are note included.

#### Creating Array of Song Titles + Unique ID

I'll need this for a ton of different things, including checking my progress on grabbing the audio analysis.

In [32]:
song_list[0]['tracks'][0]['name']

'Bye Bye Bye'

In [35]:
master_song_list = []

for entry in song_list:
    if isinstance(entry, dict):
        for track in entry['tracks']:
            master_song_list.append(dict({track['name'] : track['id']}))

In [23]:
with open('../data/master_song_list.json', 'r') as f:
    master_song_list = json.load(f)

##### Size of `master_song_list`

In [7]:
len(master_song_list)

23888

In [8]:
23888 * 403

9626864

##### Dumping Master Song List to `.json`

In [40]:
with open('../data/master_song_list.json', 'w+') as f:
    json.dump(master_song_list, f)