In [1]:
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import json
import requests
from bs4 import BeautifulSoup

import pandas as pd
import numpy as np
import time

In [2]:
cid = 'xxx'
secret = 'xxx'

auth_manager = SpotifyClientCredentials(client_id=cid, client_secret=secret)

sp = spotipy.Spotify(auth_manager = auth_manager)
sp.trace = False

## 1. Getting Track IDs From Playlists

In [5]:
def getTrackIDs(user, playlist_id):
    
    track_ids = []
    playlist = sp.user_playlist(user, playlist_id)
    for item in playlist['tracks']['items']:
        track = item['track']
        track_ids.append(track['id'])
    return track_ids

When playlist has more than 100 tracks:

In [5]:
def get_playlist_tracks(username,playlist_id):
    results = sp.user_playlist_tracks(username,playlist_id)
    tracks = results['items']
    track_ids = []
    
    while results['next']:
        results = sp.next(results)
        tracks.extend(results['items'])
    for item in tracks:
        track = item['track']
        track_ids.append(track['id'])
        
    return track_ids

In [6]:
track_ids = get_playlist_tracks('caroshima', 'spotify:playlist:0UhnBVrbtoGacKvWndLGWN')

In [None]:
track_ids

## 2. Getting Track Features

In [3]:
def getTrackFeatures(id):
    
    track_info = sp.track(id)
    features_info = sp.audio_features(id)

    track_name = track_info['name']
    album = track_info['album']['name']
    artist = track_info['album']['artists'][0]['name']
    release_date = track_info['album']['release_date']
    length = track_info['duration_ms']
    popularity = track_info['popularity']
    
    danceability = features_info[0]['danceability']
    energy = features_info[0]['energy']
    key = features_info[0]['key']
    loudness = features_info[0]['loudness']
    mode = features_info[0]['mode']
    speechiness = features_info[0]['speechiness']
    acousticness = features_info[0]['acousticness']
    instrumentalness = features_info[0]['instrumentalness']
    liveness = features_info[0]['liveness']
    valence = features_info[0]['valence']
    tempo = features_info[0]['tempo']
    time_signature = features_info[0]['time_signature']

    track_data = [track_name, album, artist, release_date, length, popularity, danceability, energy, 
                  key, loudness, mode, speechiness, acousticness, instrumentalness, liveness, 
                  valence, tempo, time_signature]
    
    time.sleep(0.1)

    return track_data

In [None]:
id = '3K0FE7QAh4R4Gi2jKWIPoP'
getTrackFeatures(id)

### Trying to extract track ids from Spotify search (failed)

In [87]:
#response = requests.get('https://api.spotify.com/v1/search?query=genre%3Ajazz&type=track&offset=0&limit=50')
#response.status_code
#soup = BeautifulSoup(response.text, 'lxml')

def search_func(q):
    track_id = []
    results = sp.search(q, limit=50, offset=0, type='track', market=None)
    results = results["tracks"]["items"]
    #for item in results:
        #track = item['tracks']
        #track_id.append(tracks['id'])
    return results

In [88]:
q = 'genre:jazz'

In [None]:
search_func(q)

## 3. List of Track IDs to DF

In [9]:
def id_to_df(track_ids):
    track_info_list = []
    
    for i in range(len(track_ids)):
        time.sleep(0.1)
        try:
            track_info = getTrackFeatures(track_ids[i])
            track_info_list.append(track_info)
        except:
            pass
        
    df =  pd.DataFrame(track_info_list, columns = ['track_name', 'album', 'artist', 'release_date', 'length', 'popularity', 
          'danceability', 'energy', 'key', 'loudness', 'mode', 'speechiness','acousticness', 
          'instrumentalness', 'liveness', 'valence', 'tempo','time_signature'])
    
    return df

In [10]:
testingmodel_df = id_to_df(track_ids)

In [12]:
testingmodel_df.to_pickle("testingmodel.pkl")

In [13]:
testingmodel_df.to_csv("testingmodel.csv")

## 4. DF To SQL

In [114]:
from sqlalchemy import create_engine
  
engine = create_engine('sqlite://')
  
practice_df.to_sql('practicing_df_to_sql', con = engine)

In [116]:
query = 'SELECT * FROM practicing_df_to_sql;'
        
pd.read_sql(query, engine)

Unnamed: 0,index,track_name,album,artist,release_date,length,popularity,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,time_signature
0,0,Mad Lucas,Last Splash,The Breeders,1993-08-30,276866,0,0.251,0.2510,2,-19.192,1,0.0559,0.22500,0.761000,0.2000,0.1770,90.527,4
1,1,Range Life,"Crooked Rain, Crooked Rain: LA's Desert Origins",Pavement,1994,294933,6,0.555,0.7240,9,-9.429,1,0.0288,0.04450,0.000014,0.1860,0.7240,112.812,4
2,2,Angeles,Either/Or,Elliott Smith,1997-02-25,176826,53,0.530,0.0993,4,-23.548,0,0.0549,0.86000,0.200000,0.1530,0.5880,118.139,4
3,3,Sugar for the Pill,Slowdive,Slowdive,2017-05-05,270918,60,0.533,0.5450,7,-7.681,1,0.0243,0.20700,0.797000,0.1130,0.4190,98.014,4
4,4,Suffering Jukebox,"Lookout Mountain, Lookout Sea",Silver Jews,2008-06-17,261226,0,0.475,0.5050,7,-10.859,1,0.0361,0.22600,0.481000,0.0896,0.3100,127.784,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
116,116,Lo Boob Oscillator,Lo Boob Oscillator,Stereolab,1995,396637,50,0.358,0.8690,4,-6.076,1,0.0449,0.00241,0.102000,0.1290,0.7970,123.670,4
117,117,Avant Garde M.O.R.,The Groop Played Space Age Batchelor Pad Music,Stereolab,1993-03-22,249560,0,0.512,0.3280,4,-17.515,1,0.0291,0.58100,0.940000,0.0998,0.3010,86.882,4
118,118,Emperor Tomato Ketchup,Emperor Tomato Ketchup,Stereolab,1996-03-18,277333,18,0.687,0.9100,1,-10.968,0,0.0528,0.04380,0.007340,0.1840,0.7660,151.052,4
119,119,Changer,Stunning Debut Album,Stereolab,1991,292557,8,0.341,0.7230,7,-4.957,1,0.0302,0.00904,0.878000,0.0849,0.3740,107.356,4
