## Getting data from Spotify API to get features for prediction

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

import spotipy
import spotipy.util as util
import json

from functions import *
import pickle

In [2]:
# reading in credentials necessary to use the API
# remember to save credentials in double quotes or else it gets mad at you
with open('credentials.json') as filename:
    credentials = json.load(filename)
    filename.close()

In [3]:
token = util.oauth2.SpotifyClientCredentials(client_id=credentials['client_id'],
                                             client_secret=credentials['client_secret'])
## creates an access token for you to do what you need to do
cache_token = token.get_access_token()
# probably need to check how much access it allows on default and if you need more access to do more interesting things


In [4]:
# need to create an object
sp = spotipy.Spotify(cache_token)

### Loading in data and search queries to use the API

In [5]:
data = pd.read_csv('data/billboards.csv')
data.rename(columns={
    'Artist': 'artist',
    'Song':'song'
}, inplace=True)
data['search_queries'] = (data.artist +' '+ data.song).str.lower()

In [6]:
def get_features(df):
    results_list = []
    for queries in df.search_queries.values:
        test = sp.search(q=queries, type = 'track', limit=1)
        results = test['tracks']['items']
        cleaned = clean_search_results(results) # returns a dictionary
        cleaned['search_query'] = queries
        results_list.append(cleaned)
    yield list(results_list)

In [9]:
with open('output.pkl', 'wb') as filename:
    results_list=[]
    results_list.extend([x for x in get_features(data)]) # testing on the head only
    pickle.dump(results_list[0], filename)
    filename.close()

In [11]:
len(results_list[0])

828

In [21]:
test = pickle.load(open('output.pkl', 'rb'))
test_df = pd.DataFrame(test)
pickle.dump(test_df, open('spotify_api_search_results.pkl', 'wb'))

In [24]:
test_df = pickle.load(open('spotify_api_search_results.pkl', 'rb')) # NTS: i need to look back and review what popularity is

In [60]:
from collections import Counter

len(Counter(test_df['id'].values)), len(test_df['id'].values)

(796, 828)

In [28]:
audio_features = [sp.audio_features(id_value) for id_value in test_df['id'].values]

In [48]:
flat_list = [item for sublist in audio_features for item in sublist]

In [70]:
# doesnt work because some are missing so doesnt know how to handle missing data
pd.DataFrame([song for song in flat_list if song !=None])
pickle.dump(pd.DataFrame([song for song in flat_list if song !=None]), open('audio_features.pkl', 'wb'))
# doesnt work because some are missing so doesnt know how to handle missing data


In [71]:
sp.track('6i0V12jOa3mr6uu4WYhUBr')

{'album': {'album_type': 'single',
  'artists': [{'external_urls': {'spotify': 'https://open.spotify.com/artist/3YQKmKGau1PzlVlkL1iodx'},
    'href': 'https://api.spotify.com/v1/artists/3YQKmKGau1PzlVlkL1iodx',
    'id': '3YQKmKGau1PzlVlkL1iodx',
    'name': 'Twenty One Pilots',
    'type': 'artist',
    'uri': 'spotify:artist:3YQKmKGau1PzlVlkL1iodx'}],
  'available_markets': ['AD',
   'AE',
   'AR',
   'AT',
   'AU',
   'BE',
   'BG',
   'BH',
   'BO',
   'BR',
   'CA',
   'CH',
   'CL',
   'CO',
   'CR',
   'CY',
   'CZ',
   'DE',
   'DK',
   'DO',
   'DZ',
   'EC',
   'EE',
   'EG',
   'ES',
   'FI',
   'FR',
   'GB',
   'GR',
   'GT',
   'HK',
   'HN',
   'HU',
   'ID',
   'IE',
   'IL',
   'IS',
   'IT',
   'JO',
   'JP',
   'KW',
   'LB',
   'LI',
   'LT',
   'LU',
   'LV',
   'MA',
   'MC',
   'MT',
   'MX',
   'MY',
   'NI',
   'NL',
   'NO',
   'NZ',
   'OM',
   'PA',
   'PE',
   'PH',
   'PL',
   'PS',
   'PT',
   'PY',
   'QA',
   'RO',
   'SA',
   'SE',
   'SG',
   'SK',
  

In [20]:
sp.audio_analysis('6i0V12jOa3mr6uu4WYhUBr')

{'meta': {'analyzer_version': '4.0.0',
  'platform': 'Linux',
  'detailed_status': 'OK',
  'status_code': 0,
  'timestamp': 1486043141,
  'analysis_time': 7.72221,
  'input_process': 'libvorbisfile L+R 44100->22050'},
 'track': {'num_samples': 4320036,
  'duration': 195.92,
  'sample_md5': '',
  'offset_seconds': 0,
  'window_seconds': 0,
  'analysis_sample_rate': 22050,
  'analysis_channels': 1,
  'end_of_fade_in': 6.69315,
  'start_of_fade_out': 189.60254,
  'loudness': -9.348,
  'tempo': 90.024,
  'tempo_confidence': 0.681,
  'time_signature': 4,
  'time_signature_confidence': 1.0,
  'key': 4,
  'key_confidence': 0.67,
  'mode': 0,
  'mode_confidence': 0.628,
  'codestring': 'eJxVmguW5SYMRLfiJZg_7H9jubdwT3cyOcmAbRD6lErijXNarWP1533OOXvt-ranllKf8tZa9-lzPbWfmXErpQ2ez9r_jfm21fd9-j6z9TUnw5nHZZ1Sdn3a2P1ZhT-jjvdpZ-58_ZbWDh_WMXk8S537lKf3Mp9xTm-ztv300e9i--xe3meUiqjljPGO2p7RfkQt_vcZ41sc0epioYksrc3R61x8vZGcxd85WOCZ7355u7dTdmGfWVd91rvWOHONZzYW48AvS73rmYut91kHhZ3ue3l5crKXYavu3Ptbexk8aadm59k2OlqL

In [None]:
## i think i also remember being able to get features on the artist tooo. i should explore whatt other ttings are avaiable in the api

### NTS: I want to get the release date of the songs too 