# Obtain Playlist Data
Based on original work from [makispl/Spotify-Data-Analysis](https://github.com/makispl/Spotify-Data-Analysis)

In [1]:
# # Import the libraries
import os
import pandas as pd
# import numpy as np
# import json
# import matplotlib.pyplot as plt
# import seaborn as sns
# %config InlineBackend.figure_format ='retina'
import spotipy
import spotipy.util as util

from spotipy.oauth2 import SpotifyClientCredentials

## Authorization Flow

In [2]:
# Declare the credentials
cid = os.environ['SPOTIFY_CLIENT_ID']
secret = os.environ['SPOTIFY_CLIENT_SECRET']
redirect_uri='http://localhost:7777/callback'
username = 'gustavoarjones'

In [3]:
# Authorization flow
scope = 'user-top-read'
token = util.prompt_for_user_token(username, scope, client_id=cid, client_secret=secret, redirect_uri=redirect_uri)

if token:
    sp = spotipy.Spotify(auth=token)
else:
    print("Can't get token for", username)

## Extract the User's Playlists

In [4]:
# Fetch the user's playlists
client_credentials_manager = SpotifyClientCredentials(client_id=cid, client_secret=secret) 
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)

In [5]:
def fetch_playlists(sp, username):
    """
    Returns the user's playlists.
    """
        
    id = []
    name = []
    num_tracks = []
    
    # Make the API request
    playlists = sp.user_playlists(username)
    for playlist in playlists['items']:
        id.append(playlist['id'])
        name.append(playlist['name'])
        num_tracks.append(playlist['tracks']['total'])

    # Create the final df   
    df_playlists = pd.DataFrame({"id":id, "name": name, "#tracks": num_tracks})
    return df_playlists

In [6]:
fetch_playlists(sp,username)

Unnamed: 0,id,name,#tracks
0,1dCVgJWis8lvuOXXTcI6GR,Len — Steal My Sunshine -,311
1,37i9dQZF1DWXm9R2iowygp,Los 90 España,50
2,2lrSd3Nk2Pe1j9sByZzwzK,,101
3,6ut2MhSOHD8n6rCHwurUQl,Eletro Pop Para Academia,154
4,37i9dQZF1DX9FSYmdU9mbT,O Melhor da MPB,70
5,37i9dQZF1DX4AyFl3yqHeK,Bossa Nova,58
6,7rrdUecavssG4xE8cekMO7,Baby Sleep,200
7,0Y0Q2zBIHq8Ood3XSLCv5D,Rolling Stones - Greatest Hits,43
8,4FO6rgOkMYPlIg8Zt7CK2v,🤔 Minha música,299
9,4OEocAgEWye7WiirTcm90M,2020 REMIXED HITS | TOP 50 GLOBAL - 2019 HITS ...,37


## Extract the User's Playlists' Tracks

In [7]:
def fetch_playlist_tracks(sp, username, playlist_id):
    """
    Returns the tracks for the given playlist.
    """
        
    offset = 0
    tracks = []
    
    # Make the API request
    while True:
        content = sp.user_playlist_tracks(username, playlist_id, fields=None, limit=100, offset=offset, market=None)
        tracks += content['items']
        
        if content['next'] is not None:
            offset += 100
        else:
            break
    
    track_id = []
    track_name = []
    
    for track in tracks:
        track_id.append(track['track']['id'])
        track_name.append(track['track']['name'])
    
    # Create the final df
    df_playlists_tracks = pd.DataFrame({"track_id":track_id, "track_name": track_name})
    return df_playlists_tracks

In [8]:
fetch_playlist_tracks(sp, username, '4FO6rgOkMYPlIg8Zt7CK2v')

Unnamed: 0,track_id,track_name
0,2LawezPeJhN4AWuSB0GtAU,Have You Ever Seen The Rain
1,2TDDvKi8n44EaLVJXd2GkP,Break It Down Again
2,2xGKkcdKQUyqKKfOhd58gP,Stay
3,7lo43I6slQYrmMZQYb99PC,A Visita
4,6IBYNkdYt6UePLysFn6xu9,O Que É O Que É?
...,...,...
294,3INsYP1Y8GG4qJvBsKCdXC,Don't Stop Me Now - ...Revisited
295,3AnQw2w00WUDHMKmtXGvVx,The Sea
296,72CPoBQn1XmRhJXS1Vy7O2,We No Speak Americano (JT Radio Edit)
297,4VqPOruhp5EdPBeR92t6lQ,Uprising


## Extract the Tracks' Audio Features

In [9]:
def fetch_audio_features(sp, username, playlist_id):
    playlist = fetch_playlist_tracks(sp, username, playlist_id)
    index = 0
    audio_features = []
    
    while index < playlist.shape[0]:
        audio_features += sp.audio_features(playlist.iloc[index:index + 50, 0])
        index += 50
    
    features_list = []
    for features in audio_features:
        features_list.append([features['danceability'],
                              features['energy'], features['tempo'],
                              features['loudness'], features['valence'],
                              features['speechiness'], features['instrumentalness'],
                              features['liveness'], features['acousticness']])
    
    df_audio_features = pd.DataFrame(features_list, columns=['danceability', 'energy',
                                                             'tempo', 'loudness', 'valence',
                                                             'speechiness', 'instrumentalness',
                                                             'liveness', 'acousticness'])
    
    df_playlist_audio_features = pd.concat([playlist, df_audio_features], axis=1)
    df_playlist_audio_features.set_index('track_id', inplace=True, drop=True)
        
    return df_playlist_audio_features

In [10]:
df = fetch_audio_features(sp, username, '4FO6rgOkMYPlIg8Zt7CK2v')
df.shape

(299, 10)

In [11]:
df

Unnamed: 0_level_0,track_name,danceability,energy,tempo,loudness,valence,speechiness,instrumentalness,liveness,acousticness
track_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2LawezPeJhN4AWuSB0GtAU,Have You Ever Seen The Rain,0.741,0.697,116.109,-7.028,0.774,0.0277,0.000023,0.1330,0.066400
2TDDvKi8n44EaLVJXd2GkP,Break It Down Again,0.678,0.744,108.989,-7.001,0.484,0.0245,0.432000,0.1190,0.012400
2xGKkcdKQUyqKKfOhd58gP,Stay,0.581,0.525,119.090,-11.357,0.652,0.0290,0.000030,0.1310,0.314000
7lo43I6slQYrmMZQYb99PC,A Visita,0.740,0.581,103.429,-6.466,0.969,0.0362,0.068900,0.1990,0.659000
6IBYNkdYt6UePLysFn6xu9,O Que É O Que É?,0.672,0.694,111.829,-8.867,0.939,0.0409,0.000000,0.2470,0.105000
...,...,...,...,...,...,...,...,...,...,...
3INsYP1Y8GG4qJvBsKCdXC,Don't Stop Me Now - ...Revisited,0.465,0.917,155.061,-6.331,0.390,0.2370,0.020000,0.2860,0.015200
3AnQw2w00WUDHMKmtXGvVx,The Sea,0.535,0.761,149.968,-7.589,0.248,0.0373,0.114000,0.0485,0.016500
72CPoBQn1XmRhJXS1Vy7O2,We No Speak Americano (JT Radio Edit),0.902,0.805,124.996,-5.005,0.737,0.0464,0.082000,0.0916,0.072500
4VqPOruhp5EdPBeR92t6lQ,Uprising,0.602,0.905,128.019,-4.046,0.411,0.0775,0.064000,0.1170,0.000202


In [12]:
df.to_csv('minha-musica-playlist.csv')

# END
-----

In [13]:
df = df2
df['score'] = df['danceability'] * 30 + df['energy'] * 20 + df['tempo'] * 20 + df['loudness'] * 10 + df['valence'] * 20


pd.set_option('display.max_colwidth', None)
pd.set_option('display.max_rows', None)
df.sort_values('score', ascending=False)

NameError: name 'df2' is not defined

In [None]:
df['score'].plot.line();

## Create the New Playlist
Next, we are going to create a new playlist and add to it all the tracks we ended up selecting:

In [None]:
# Authorization flow

scope = "playlist-modify-public"
token = util.prompt_for_user_token(username, scope, client_id=cid, client_secret=secret, redirect_uri=redirect_uri)

if token:
    sp = spotipy.Spotify(auth=token)
else:
    print("Can't get token for", username)

In [None]:
def create_playlist(sp, username, playlist_name, playlist_description):
    playlists = sp.user_playlist_create(username, playlist_name, description = playlist_description)

In [None]:
create_playlist(sp, username, 'Minha Musica Machine Learning', 'Organizing playlist with ML')

In [None]:
# Find the new playlist's id
fetch_playlists(sp,username)

In [None]:
def enrich_playlist(sp, username, playlist_id, playlist_tracks):
    index = 0
    results = []
    
    while index < len(playlist_tracks):
        results += sp.user_playlist_add_tracks(username, playlist_id, tracks = playlist_tracks[index:index + 100])
        index += 100

In [None]:
sorted_tracks = df.sort_values('score', ascending=False)

In [None]:
sorted_tracks['score'].plot()

In [None]:
list_track = sorted_tracks.index

In [None]:
enrich_playlist(sp, username, '4Eu5xldDR0xSxZXuUj8Wu6', list_track)

## Conclusion
We have come a long way so far; we processed hundreds of tracks, inspected their audio features and finally selected the most befitting for the party ones, using Python and only. In this way, we managed to accomplish the initially stated missions:

✔️ we demonstrated how plain (descriptive) statistics and coding - when properly combined - can figure out time-consuming activities like this.

✔️ we "tasted" (once again, after the previous project of Twitter Sentiment Analysis on the next James Bond movie) the importance of having an API available to request, so as to extract meaningful data.
## Additional Analysis
There is quite a number of additional analyses to be performed, expanding this one. You are welcome to extend and shape yours in any direction you may prefer. For instance, you can additionally try and remove the tracks that have significantly low `danceability` and `valence` audio features, and boost even more the playlist's `score`. Furthemore, you can apply each approach sequentially (on the dataframe that is produced each time).