# Generating Spotify Music Data for recommendation System

This Notebook showcases how to grab music data from a spotify playlist. The playlist chosen for this notebook is a playlist that consists of random music from a variety of genres and artist with the intention of creating a recommendation system.

In [1]:
import json
import pandas as pd

In [2]:
def get_keys(path):
    with open("/Users/Jonathan/Documents/Flatiron/phase_5/P5_spotify_recommendations/.secret/spotify_api.json") as f:
        return json.load(f)

In [3]:
keys = get_keys("/Users/Jonathan/.secret/spotify_api.json")

client = keys['client']
api_key = keys['api_key']

In [4]:
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials

auth_manager = SpotifyClientCredentials(client_id = client, client_secret = api_key)
sp = spotipy.Spotify(auth_manager=auth_manager)

In [5]:
# https://github.com/MaxHilsdorf/introduction_to_spotipy/blob/master/introduction_to_spotipy.ipynb
# https://towardsdatascience.com/how-to-create-large-music-datasets-using-spotipy-40e7242cc6a6
# https://stackoverflow.com/questions/39086287/spotipy-how-to-read-more-than-100-tracks-from-a-playlist

def analyze_playlist(creator, playlist_id):
    
    # Create empty dataframe
    playlist_features_list = ["artist", "artist_id", "popularity", "album", "track_name", "track_id", 
                             "danceability", "energy", "key", "loudness", "mode", "speechiness",
                             "instrumentalness", "liveness", "valence", "tempo", "duration_ms", "time_signature"]
    playlist_df = pd.DataFrame(columns = playlist_features_list)
    
    # Create empty dict
    playlist_features = {}
    
    # Loop through every track in the playlist, extract features and append the features to the playlist df
    results = sp.user_playlist_tracks(creator,playlist_id)
    tracks = results['items']
    while results['next']:
        results = sp.next(results)
        tracks.extend(results['items'])
    results = tracks   
    
    for track in results:
        # Get metadata
        playlist_features["artist"] = track["track"]["album"]["artists"][0]["name"]
        playlist_features['artist_id'] = track['track']['artists'][0]['id']
        playlist_features['popularity'] = track['track']['popularity']
        playlist_features["album"] = track["track"]["album"]["name"]
        playlist_features["track_name"] = track["track"]["name"]
        playlist_features["track_id"] = track["track"]["id"]
        
        # Get audio features
        audio_features = sp.audio_features(playlist_features["track_id"])[0]
        for feature in playlist_features_list[6:]:
            playlist_features[feature] = audio_features[feature]
        
             
        # Concat the dfs
        track_df = pd.DataFrame(playlist_features, index = [0])
        playlist_df = pd.concat([playlist_df, track_df], ignore_index = True)
    
    return playlist_df

In [7]:
playlist_df = analyze_playlist("Meek Music Mouse", "3O9P4HKyiYJ2npdqdJEsrd")

In [8]:
playlist_df.head()

Unnamed: 0,artist,artist_id,popularity,album,track_name,track_id,danceability,energy,key,loudness,mode,speechiness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature
0,Bon Jovi,58lV9VcRSjABbAbfWS6skp,8,Slippery When Wet,Livin' On A Prayer,0J6mQxEZnlRt9ymzFntA6z,0.534,0.887,0,-3.777,1,0.0345,9.9e-05,0.325,0.72,122.494,249293,4
1,Bon Jovi,58lV9VcRSjABbAbfWS6skp,0,Bon Jovi Greatest Hits,Wanted Dead Or Alive,4Zj9VM4fO1KwkU3lFaECsW,0.257,0.819,7,-3.562,1,0.0422,0.015,0.324,0.267,150.354,308560,4
2,Queen,1dfeR4HaWDbWqFHLkxsg1d,64,News Of The World (Deluxe Remastered Version),We Are The Champions - Remastered 2011,7ccI9cStQbQdystvc6TvxD,0.268,0.459,7,-6.948,0,0.0346,0.0,0.119,0.172,64.223,179200,4
3,Queen,1dfeR4HaWDbWqFHLkxsg1d,69,News Of The World (Deluxe Remastered Version),We Will Rock You - Remastered 2011,54flyrjcdnQdco7300avMJ,0.693,0.497,2,-7.316,1,0.119,0.0,0.258,0.473,81.308,122067,4
4,Bon Jovi,58lV9VcRSjABbAbfWS6skp,0,Cross Road,You Give Love A Bad Name,7LBJui5MdjfCd8YZr4xaqA,0.556,0.945,0,-4.347,0,0.0496,4e-06,0.394,0.781,122.875,224307,4


In [9]:
playlist_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6514 entries, 0 to 6513
Data columns (total 18 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   artist            6514 non-null   object 
 1   artist_id         6514 non-null   object 
 2   popularity        6514 non-null   object 
 3   album             6514 non-null   object 
 4   track_name        6514 non-null   object 
 5   track_id          6514 non-null   object 
 6   danceability      6514 non-null   float64
 7   energy            6514 non-null   float64
 8   key               6514 non-null   object 
 9   loudness          6514 non-null   float64
 10  mode              6514 non-null   object 
 11  speechiness       6514 non-null   float64
 12  instrumentalness  6514 non-null   float64
 13  liveness          6514 non-null   float64
 14  valence           6514 non-null   float64
 15  tempo             6514 non-null   float64
 16  duration_ms       6514 non-null   object 


In [11]:
playlist_df.to_csv("spotify_playlist.csv", index = False)