## Getting data from Spotify API to get features for prediction

In [17]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

import spotipy
import spotipy.util as util
import json

from functions import *
import pickle

In [2]:
# reading in credentials necessary to use the API
# remember to save credentials in double quotes or else it gets mad at you
with open('credentials.json') as filename:
    credentials = json.load(filename)
    filename.close()

In [3]:
token = util.oauth2.SpotifyClientCredentials(client_id=credentials['client_id'],
                                             client_secret=credentials['client_secret'])
## creates an access token for you to do what you need to do
cache_token = token.get_access_token()
# probably need to check how much access it allows on default and if you need more access to do more interesting things


In [4]:
# need to create an object
sp = spotipy.Spotify(cache_token)

### Loading in data and search queries to use the API

In [5]:
data = pd.read_csv('data/billboards.csv')
data.rename(columns={
    'Artist': 'artist',
    'Song':'song'
}, inplace=True)
data['search_queries'] = (data.artist +' '+ data.song).str.lower()

In [59]:
def get_features(df):
    results_list = []
    for queries in df.search_queries.values:
        test = sp.search(q=queries, type = 'track', limit=1)
        results = test['tracks']['items']
        cleaned = clean_search_results(results) # returns a dictionary
        cleaned['search_query'] = queries
        results_list.append(cleaned)
    yield list(results_list)

In [60]:
with open('output.pkl', 'wb') as filename:
    results_list=[]
    results_list.extend([x for x in get_features(data.head())]) # testing on the head only
    pickle.dump(results_list[0], filename)
    filename.close()

In [61]:
results_list

[[{'id': '1nX9KhK3Fff27SnrIor2Yb',
   'name': '4 AM',
   'artists': '2 Chainz, Travis Scott',
   'popularity': 72,
   'explicit': True,
   'duration_ms': 255560,
   'search_query': '2 chainz 4 am'},
  {'id': '6H0AwSQ20mo62jGlPGB8S6',
   'name': "It's A Vibe",
   'artists': '2 Chainz, Ty Dolla $ign, Trey Songz, Jhene Aiko',
   'popularity': 76,
   'explicit': True,
   'duration_ms': 210200,
   'search_query': "2 chainz it's a vibe"},
  {'id': '365wwIjijQdlRJEjUWTidq',
   'name': 'PROUD',
   'artists': '2 Chainz, YG, Offset',
   'popularity': 61,
   'explicit': True,
   'duration_ms': 234666,
   'search_query': '2 chainz proud'},
  {'id': '',
   'name': '',
   'artists': '',
   'popularity': '',
   'explicit': '',
   'duration_ms': '',
   'search_query': '2 chainz x gucci mane x quavo good drank'},
  {'id': '5S1IUPueD0xE0vj4zU3nSf',
   'name': 'Bigger Than You (feat. Drake & Quavo)',
   'artists': '2 Chainz, Drake, Quavo',
   'popularity': 75,
   'explicit': True,
   'duration_ms': 22589

In [62]:
test = pickle.load(open('output.pkl', 'rb'))

In [63]:
pd.DataFrame(test) # NTS: i need to look back and review what popularity is

Unnamed: 0,artists,duration_ms,explicit,id,name,popularity,search_query
0,"2 Chainz, Travis Scott",255560.0,True,1nX9KhK3Fff27SnrIor2Yb,4 AM,72.0,2 chainz 4 am
1,"2 Chainz, Ty Dolla $ign, Trey Songz, Jhene Aiko",210200.0,True,6H0AwSQ20mo62jGlPGB8S6,It's A Vibe,76.0,2 chainz it's a vibe
2,"2 Chainz, YG, Offset",234666.0,True,365wwIjijQdlRJEjUWTidq,PROUD,61.0,2 chainz proud
3,,,,,,,2 chainz x gucci mane x quavo good drank
4,"2 Chainz, Drake, Quavo",225893.0,True,5S1IUPueD0xE0vj4zU3nSf,Bigger Than You (feat. Drake & Quavo),75.0,"2 chainz, drake bigger > you"
