# Spotify Playlist Analysis

From the Spotify API, this project will tell some interesting information about a public playlist.

The **spotipy** package is used during the whole project in order to help the connection and usage with the Spotify API. The link for the spotipy documentation is the following: https://spotipy.readthedocs.io/en/2.19.0/

Interesting Spotipy functions to explore: 
- artist_top_tracks
- artist_albums
- album

In [1]:
# Imports and authetication
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import cred
import datetime
import pandas as pd
from collections import Counter
from itertools import chain

# Authentication - without user
client_credentials_manager = SpotifyClientCredentials(client_id= cred.client_id, client_secret= cred.client_secret)
sp = spotipy.Spotify(client_credentials_manager = client_credentials_manager)

In [2]:
# Functions

# From a playlist link, this function return its URI
def get_id_playlist(playlist_link):
    return playlist_link.split("/")[-1].split('?')[0]

# From a category and ID, an URI is generated
def generate_uri(category, id):
    return "spotify:" + category + ":" + id

# From a playlist link, get the main information of the playlist
def playlist_info(playlist_link):
    playlist = sp.playlist(get_id_playlist(playlist_link))
    info = {
        'cover': sp.playlist_cover_image(get_id_playlist(playlist_link))[0]['url'],
        'name': playlist["name"],
        'description': playlist["description"],
        'owner': playlist["owner"]["display_name"] # Other information of the owner also available
    }
    return info

# From a playlist link, creates a Pandas DataFrame with the tracks information
def create_playlist_df(playlist_link):
    playlist_id = get_id_playlist(playlist_link)
        
    tracks = sp.playlist_tracks(playlist_id)
    total_tracks = tracks["total"]
    print("Number of tracks on the playlist: " + str(total_tracks))
    
    tracks_playlist = []
    
    while True:    
        for track in tracks["items"]:
            t_id = track["track"]["id"]
            name = track["track"]["name"]
            album = track["track"]["album"]["name"]
            duration = datetime.timedelta(seconds=int(track["track"]["duration_ms"]/1000))
            popularity = track["track"]["popularity"]
            n_artists = len(track["track"]["artists"])

            artist = ""
            for i in range(n_artists):
                name_artist = track["track"]["artists"][i]["name"]
                if artist == "":
                    artist = name_artist
                else:
                    artist = artist + ", " + name_artist

            track_info = {
                'id': t_id,
                'name': name,
                'album': album,
                'duration': duration,
                'popularity': popularity,
                'artist(s)': artist
            }
            tracks_playlist.append(track_info)
        if tracks["next"]:
            tracks = sp.next(tracks)
        else:
            break
        
    return pd.DataFrame(tracks_playlist)

# Return top 5 artists with most tracks apperances on a playlist
def top_artists_playlist(playlist_df):
    artists = playlist_df['artist(s)']
    res = pd.DataFrame.from_dict(Counter(map(str.strip, chain.from_iterable(artists.str.split(',')))),
                             orient='index').squeeze()

    return res.sort_values(ascending=False)[0:5]

# Return top 5 albums with most tracks apperances on a playlist
def top_albums_playlist(playlist_df):
    return playlist_df.groupby(['album'])['album'].count().sort_values(ascending=False)[0:5]

In [6]:
playlist_best_of_all = create_playlist_df("https://open.spotify.com/playlist/3F16u8SWwyeHTQzjMb5Nxg")
print(top_artists_playlist(playlist_best_of_all))
print("\n")
print(top_albums_playlist(playlist_best_of_all))
playlist_best_of_all.head()

loc_max = playlist_best_of_all["popularity"].idxmax()
playlist_best_of_all.iloc[loc_max]

Number of tracks on the playlist: 52
MC PR          4
MC Braz        3
Mc Frog        3
Mc Delux       3
DJ Jeeh FDC    2
Name: 0, dtype: int64


album
AUTOMOTIVO DAS UMBRELLA        1
AUTOMOTIVO EXTRADIMENSIONAL    1
O Porte da 40                  1
ONDA DO BERIMBAU               1
Pegada de Malvado              1
Name: album, dtype: int64


id            4NJKCE1TBC653P3dCnto0u
name                Simplesmente Ela
album               Simplesmente Ela
duration             0 days 00:02:41
popularity                        80
artist(s)                  Mc Gabzin
Name: 44, dtype: object