# Spotify Song Features

## Libraries

In [1]:
# main
import pandas as pd
import numpy as np

# spotify API
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials

# Spotify API
## Initialize API
The Spotify API has to be initilized with the client_id and with the client_secret. As the client_secret contains sensitive information, I have created a local file, read it with Python and then loop through it, asigning each line of the document to the correspondant variable.

In [4]:
import config

In [5]:
#Initialize SpotiPy with user credentias
sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id= config.client_id,
                                                           client_secret= config.client_secret))


# Obtaining Playlist

For this initial project, we will find a big Spotify Playlist (10K Songs) for ensuring that our sample is going to have different features, so the output predicted could be as much accurate as possible.

To obtain all the inforamation, I will build a function able to retrieve all the information available of that playlist.

In [7]:
def get_playlist_tracks(username,playlist_id):
    """
    Input: Username of the API initialized and ID of the playlist we want to pass
    Output: The playlist itself
    """
    results = sp.user_playlist_tracks(username,playlist_id)
    tracks = results['items']
    while results['next']:
        results = sp.next(results)
        tracks.extend(results['items'])
    return tracks

playlist = get_playlist_tracks("spotify", "spotify:playlist:1G8IpkZKobrIlXcVPoSIuf")

Once we have the playlist, the next thing we have to do is retain only the content that offers us value for that project. In our case, it will be song_names, artist_names, song_features and duration_ms.

Mention that, for obtain the song_features first of all we will have to retrieve the song_URI and, after that, another function will pull that information.

In [9]:
def playlist_info(playlist):
    """
    Input: The playlist stored from the previous function
    Output: The parts from the playlist that are useful for us, in this case songs, artists and features
    """
    song_names = []
    artist_names = []
    duration = []
    song_URI = []
    features = []
    i = 0
    
    for result in playlist:
        try:
            if result["is_local"] == False:
                i += 1
                song_names.append(result["track"]["name"])
                artist_names.append(result["track"]["artists"][0]["name"])
                duration.append(result["track"]["duration_ms"])
                song_URI.append(result["track"]["uri"])
                
                # Once Spotify reach 100, the speed for loading URI slows down, that's why I'm setting 100 as threshold
                # This will append lists with 100 lenght
                if i == 100:
                    i = 0
                    features.append(sp.audio_features(song_URI))
                    song_URI = []
                    
        except TypeError:
            pass
    
    features.append(sp.audio_features(song_URI)) 
    
    # Creating a list that matches the same lenght as the other variables
    correct_feature = [f for feature in features for f in feature]
    
    return song_names, artist_names, duration, correct_feature

songs, artists, duration, feature = playlist_info(playlist)

As we do have the uri from each song, now it's time to obtaing the final result, the song_features. After looking into the different features, I will select the ones that I consider it have more impact for predicting a song based on the user input.

In [10]:
def features_df_creation(info_features):
    """
    Input: List of URIs from the previous function
    Output: DF containing the song, artist, duration and the song_features that we chose
    """
    acousticness = []
    danceability = []
    energy = []
    instrumentalness = []
    liveness = []
    loudness = []
    speechiness = []
    valence = []
    tempo = []
    
    for feature in info_features:
        acousticness.append(feature["acousticness"])
        danceability.append(feature["danceability"])
        energy.append(feature["energy"])
        instrumentalness.append(feature["instrumentalness"])
        liveness.append(feature["liveness"])
        loudness.append(feature["loudness"])
        speechiness.append(feature["speechiness"])
        valence.append(feature["valence"])
        tempo.append(feature["tempo"])
        
    
    df = pd.DataFrame({"song":songs,
                       "artist":artists,
                       "duration":duration,
                       "danceability":danceability,
                       "energy":energy,
                       "acousticness":acousticness,
                       "instrumentalness":instrumentalness,
                       "liveness":liveness,
                       "loudness":loudness,
                       "speechiness":speechiness,
                       "valence":valence,
                       "tempo":tempo
                      })
    
    return df

complete_list_song_artist_features = features_df_creation(feature)

In [13]:
complete_list_song_artist_features

Unnamed: 0,song,artist,duration,danceability,energy,acousticness,instrumentalness,liveness,loudness,speechiness,valence,tempo
0,Like a Rolling Stone,Bob Dylan,369600,0.482,0.721,0.731000,0.000000,0.1890,-6.839,0.0321,0.557,95.263
1,Smells Like Teen Spirit,Nirvana,301240,0.485,0.863,0.000012,0.016200,0.1380,-9.027,0.0495,0.767,116.835
2,A Day In The Life - Remastered,The Beatles,337413,0.364,0.457,0.290000,0.000106,0.9220,-14.162,0.0675,0.175,163.219
3,Good Vibrations (Mono),The Beach Boys,219146,0.398,0.413,0.082200,0.000025,0.0891,-10.934,0.0388,0.331,133.574
4,Johnny B Goode,Chuck Berry,160892,0.518,0.756,0.735000,0.000062,0.3170,-10.851,0.0915,0.968,166.429
...,...,...,...,...,...,...,...,...,...,...,...,...
9942,Into The Valley,Skids,199466,0.332,0.707,0.000012,0.006120,0.1100,-12.698,0.0361,0.652,144.815
9943,Tonight's Da Night,Redman,201800,0.464,0.749,0.224000,0.000046,0.3510,-8.564,0.4800,0.879,181.121
9944,Figure 8,FKA twigs,183040,0.694,0.710,0.527000,0.001350,0.0697,-9.793,0.3400,0.415,119.964
9945,Like An Angel,The Mighty Lemon Drops,222160,0.332,0.800,0.000368,0.001600,0.0850,-9.746,0.0326,0.832,149.240


In [12]:
# Storing it as a CSV
complete_list_song_artist_features.to_csv("../spotify_playlist.csv", index=False)