# Song Analysis Using Spotify API

In [2]:
#import libraries
import sys
import spotipy
import spotipy.util as util
from spotipy.oauth2 import SpotifyClientCredentials
import pandas as pd
import numpy as np
from bs4 import BeautifulSoup as bs
from sklearn.metrics import pairwise_distances

import random

In [3]:
#Spotify authorization scope
scope = 'user-library-read'

In [4]:
#Spotify API credentials
# %store -r spotify_cid
# cid = spotify_cid
# %store -r spotify_secret
# secret = spotify_secret

In [6]:
#connect to Spotify through wrapper Spotipy
client_cred = SpotifyClientCredentials(client_id=cid, client_secret=secret)
sp = spotipy.Spotify(client_credentials_manager = client_cred)

### Functions

In [7]:
def get_track_info(track,artist):
    '''
    function returns dictionary with track's info (including audio features)
    parameters:
        track-->str
        artist-->str
    '''
    #search Spotify API for general song info
    info_json = sp.search(q='artist:' + artist + ' track:' + track)

    #create dictionary with song info
    info = {'track':info_json['tracks']['items'][0]['name'],
            'artist':info_json['tracks']['items'][0]['album']['artists'][0]['name'],
            'track_id':info_json['tracks']['items'][0]['id'],
            'release_date':info_json['tracks']['items'][0]['album']['release_date'],
            'url': info_json['tracks']['items'][0]['artists'][0]['external_urls']['spotify']}
            
    #add audio features to dictionary
    audio_info = sp.audio_features(info['track_id'])[0]
    info['dance'] = audio_info['danceability']
    info['energy'] = audio_info['energy']
    info['loud'] = audio_info['loudness']
    info['speech'] = audio_info['speechiness']
    info['acoust'] = audio_info['acousticness']
    info['live'] = audio_info['liveness']
    info['valence'] = audio_info['valence']
    info['tempo'] = audio_info['tempo']
    
    return info

In [49]:
def get_sim_songs(id_list):
    '''
    function returns list of tup (track, artist) of songs similar to seed tracks
    parameters:
        id_list-->list of song ids (str) for seed tracks
    '''
    #find similar songs (using Spotify's bart algorithm)
    song_recs = sp.recommendations(limit=100,seed_tracks=[i for i in id_list])['tracks']
    
    #create list of tup (track,artist)
    sim_songs = [(s['name'],s['artists'][0]['name'],s['id'], s['external_urls']['spotify']) for s in song_recs]
    
    return sim_songs

In [None]:
def get_df(track_list):
    '''
    function returns dataframe with audio features for available songs
    parameters:
        track_list-->list of tup (track, artist) of songs
    '''
    d = []
    for track, artist in track_list:
        try:
            d.append(get_track_info(track.lower().replace("'",""),
                                    artist.lower().replace("'","")))
        except:
            print(track + ' by ' + artist + ' is not available')
            
    return pd.DataFrame(data=d)

In [None]:
#rock/alternative seed tracks
rock_songs = [('Planet Zero','SHINEDOWN'),
              ('Black Summer','Red hot chili peppers'),
              ('Love dies young','foo fighters'),
              ('so called life','three days grace'),
              ("Love Sux",'Avril Lavigne')]

#create dataframe with track information
df_rock = get_df(rock_songs)

In [50]:
all_songs = get_sim_songs(df_rock.track_id)

In [54]:
random_songs = random.choices(sim_ids , k = 5)

In [60]:
new_songs = [i[2] for i in random]

In [72]:
all_songs = get_sim_songs(df_rock.track_id)
while len(set(all_songs)) < 30000:
    random_songs = random.choices(all_songs , k = 5)
    new_songs = [i[2] for i in random_songs]
    all_songs.extend(get_sim_songs(new_songs))


In [76]:
unique_songs = set(all_songs)

In [80]:
song_df = pd.DataFrame(unique_songs ,  columns = ['track','artist','id','url'])

In [81]:
song_df.to_csv('../Data/rock_songs_30k.csv')

In [2]:
! pip3 freeze > requirements.txt 