## Exploratory Data Analysis of Spotify Playlist
(By:- Vineet Singh Negi)

This notebook uses Spotipy to extract playlists.
Song data comes from the Spotify playlist that is provided while running.

Data features are gathered from the Spotify API (here)[https://developer.spotify.com/documentation/web-api/reference/#/operations/get-audio-features].

In [1]:
import spotipy
import spotipy.oauth2 as oauth2
from spotipy.oauth2 import SpotifyOAuth 
from spotipy.oauth2 import SpotifyClientCredentials
import time
from pprint import pprint
from datetime import date
import pandas as pd
import plotly.express as px

In [2]:
##==============================================
## Function to get Client Id and Client Secret
##==============================================

client_id = '' # <-- Enter your client_id.
client_secret = '' # <-- Enter your client secret
auth_manage = SpotifyClientCredentials(client_id = client_id, client_secret = client_secret)
sp = spotipy.Spotify(client_credentials_manager = auth_manage) # Spotify object to access API

In [3]:
##==================================
## Getting ID of songs in Playlist
##==================================

def getPlayListIDs (user, playlist_id):
    track_ids = []
    album = sp.user_playlist(user, playlist_id)
    for item in album ['tracks']['items']:
        track = item['track']
        track_ids.append(track['id'])
    return track_ids

In [4]:
# Drake Playlist URI: 37i9dQZF1DX7QOv5kjbU68, One Direction: 37i9dQZF1DX6p4TJxzMRDe
# Top Hindi Hits: 37i9dQZF1DX0XUfTFmNBRM
track_ids = getPlayListIDs('spotify', '37i9dQZF1DX0XUfTFmNBRM') # <-- Enter any playlist URI
print(len(track_ids))
pprint(track_ids)

50
['4QyX8CBSjcoq4iMZuvifyF',
 '4tzx5af2qpwgyjG6JQGthj',
 '2ncqKdTj6dz7tWoTMMrAtq',
 '0645eBDehHcqfiF15hscQV',
 '20LfqGqyGqjOsw9YA4nOTG',
 '00ake6KfnzbZ2MaRLGqsyX',
 '73K33p4Vyz9koXGqmL5eFs',
 '03hJuEQpEQERrHpjcXKWzJ',
 '7Ky9U9iHjGBiRax3joCRDS',
 '6FAYpZ4jve8vpvTwUvjK6H',
 '2vPrBucKCfKmafHhSfJ2pt',
 '5PUXKVVVQ74C3gl5vKy9Li',
 '51DEaelXeJJ6cFFYbX8Hal',
 '6t7PuZfHAtNGheWisgUq3I',
 '1AmwIgy0WYLJJ5e24aCFTm',
 '0czcoKJbJt08NqKrvSbbz7',
 '2Y2l0h051Vk4qUG2ZH7KKy',
 '2VsWBRKYkZH9FdD7AE6hRQ',
 '3dpM3ijJz0vGzbkh5DcMXz',
 '1418IuVKQPTYqt7QNJ9RXN',
 '0CtZpaOhtzvLV3FfcsVpQo',
 '2ZD4aIEepqZsdxPxLSuUhm',
 '0OsuHhPPArdFzJGuIAacnN',
 '3SiQsAVS3CDJQzWPRyX1XP',
 '1pDvtLmrjzr5FSQItRLWxA',
 '4svSc7BE6r9xMEk0GosnVu',
 '5Z2GQ0eBKoatNYqbGayihP',
 '4VsP4Dm8gsibRxB5I2hEkw',
 '0UZLaEOSIc5kg0cc7eszqC',
 '0zlGnseLGzRIBA0TJcb3Bo',
 '0QT9B8Yfrz0vi66oXMrKJg',
 '7E0A86Yhc0ZyOl6aAPE5wN',
 '5kwldaZvtbiTmRYsmlyxAN',
 '1t39FBb0zsKv5krZaqZKCB',
 '6VBhH7CyP56BXjp8VsDFPZ',
 '1ByVM9F8HeekD3aGXatRWd',
 '0v77QqHiZUvfJkCk2rGRqw'

In [5]:
##=========================================
## Extracting the Track info and features
##=========================================

def getPlaylistFeatures(id):
    track_info = sp.track(id)
    features_info = sp.audio_features(id)

# Track Info
    name = track_info['name']
    album = track_info['album']['name']
    artist = track_info['album']['artists'][0]['name']
    release_date = track_info['album']['release_date']
    length = track_info['duration_ms']
    popularity = track_info['popularity']

# Track Features
    acousticness = features_info[0]['acousticness']
    danceability = features_info[0]['danceability']
    energy = features_info[0]['energy']
    instrumentalness = features_info[0]['instrumentalness']
    liveness = features_info[0]['liveness']
    loudness = features_info[0]['loudness']
    speechiness = features_info[0]['speechiness']
    tempo = features_info[0]['tempo']
    time_signature = features_info[0]['time_signature']

    track_data = [name, album, artist, release_date, length, popularity, acousticness,
                   danceability, energy, instrumentalness, liveness, loudness, speechiness, 
                   tempo, time_signature]
    return track_data

In [6]:
##==========================================================
## Appending the track features for each music into a list
##==========================================================

track_list = []
for i in range(len(track_ids)):
    time.sleep(.3)
    track_data = getPlaylistFeatures(track_ids[i])
    track_list.append(track_data)

playlist = pd.DataFrame(track_list, columns = ['Name', 'Album', 'Artist', 'Release_Date', 'Length', 
                                               'Popularity', 'Acousticness', 'Danceability', 
                                               'Energy', 'Instrumentness', 'Liveness',
                                               'Loudness', 'Speechness', 'Tempo',
                                               'Time_Signature'])

playlist.to_csv("playlist_features.csv")
playlist.head()

Unnamed: 0,Name,Album,Artist,Release_Date,Length,Popularity,Acousticness,Danceability,Energy,Instrumentness,Liveness,Loudness,Speechness,Tempo,Time_Signature
0,"What Jhumka ? (From ""Rocky Aur Rani Kii Prem K...","What Jhumka ? (From ""Rocky Aur Rani Kii Prem K...",Various Artists,2023-07-12,213611,64,0.235,0.821,0.823,0.0,0.207,-5.455,0.1,107.953,4
1,"Zinda Banda (From ""Jawan"")","Zinda Banda (From ""Jawan"")",Anirudh Ravichander,2023-07-31,264000,72,0.00785,0.613,0.881,3e-06,0.155,-5.974,0.249,149.978,4
2,Mahiye Jinna Sohna,Mahiye Jinna Sohna,Darshan Raval,2023-06-22,181250,86,0.736,0.567,0.54,3.2e-05,0.164,-5.754,0.0406,92.027,4
3,"Janiye (from the Netflix Film ""Chor Nikal Ke B...","Janiye (from the Netflix Film ""Chor Nikal Ke B...",Vishal Mishra,2023-03-17,223390,80,0.44,0.578,0.444,0.0,0.146,-11.447,0.0644,76.032,4
4,Tere Vaaste,Zara Hatke Zara Bachke (Original Motion Pictur...,Sachin-Jigar,2023-05-26,189136,35,0.315,0.762,0.717,1.9e-05,0.0914,-8.316,0.0372,109.996,4


In [7]:
# Knowing the Datatype of Columns before working on it
playlist.dtypes

Name               object
Album              object
Artist             object
Release_Date       object
Length              int64
Popularity          int64
Acousticness      float64
Danceability      float64
Energy            float64
Instrumentness    float64
Liveness          float64
Loudness          float64
Speechness        float64
Tempo             float64
Time_Signature      int64
dtype: object

In [8]:
##=================================
## Getting Highest Populated Song
##=================================

high_po = playlist.loc[lambda playlist: playlist["Popularity"] >= 60].sort_values(by=["Popularity"], ascending = False)
high_po.head()

Unnamed: 0,Name,Album,Artist,Release_Date,Length,Popularity,Acousticness,Danceability,Energy,Instrumentness,Liveness,Loudness,Speechness,Tempo,Time_Signature
2,Mahiye Jinna Sohna,Mahiye Jinna Sohna,Darshan Raval,2023-06-22,181250,86,0.736,0.567,0.54,3.2e-05,0.164,-5.754,0.0406,92.027,4
19,Maan Meri Jaan,Champagne Talk,King,2022-10-12,194653,85,0.354,0.698,0.505,0.0,0.0995,-8.242,0.0356,95.868,4
6,Malang Sajna,Malang Sajna,Sachet Tandon,2022-12-19,161041,84,0.232,0.739,0.716,0.0,0.33,-4.964,0.037,113.943,4
15,"O Bedardeya (From ""Tu Jhoothi Main Makkaar"")","O Bedardeya (From ""Tu Jhoothi Main Makkaar"")",Pritam,2023-03-04,313051,83,0.794,0.582,0.418,0.0,0.109,-8.701,0.0288,112.909,4
27,Kahani Suno 2.0,Kahani Suno 2.0,Kaifi Khalil,2022-05-31,173637,82,0.909,0.577,0.379,0.000586,0.0986,-13.235,0.0374,139.585,3


In [9]:
##================================
## Getting Lowest Populated Song
##================================

low_po = playlist.loc[lambda playlist: playlist["Popularity"] <= 60].sort_values(by=["Popularity"], ascending=True)
low_po.head()

Unnamed: 0,Name,Album,Artist,Release_Date,Length,Popularity,Acousticness,Danceability,Energy,Instrumentness,Liveness,Loudness,Speechness,Tempo,Time_Signature
4,Tere Vaaste,Zara Hatke Zara Bachke (Original Motion Pictur...,Sachin-Jigar,2023-05-26,189136,35,0.315,0.762,0.717,1.9e-05,0.0914,-8.316,0.0372,109.996,4
14,"Ve Kamleya (From ""Rocky Aur Rani Kii Prem Kaha...","Ve Kamleya (From ""Rocky Aur Rani Kii Prem Kaha...",Various Artists,2023-07-18,246988,53,0.81,0.468,0.658,0.0333,0.12,-7.6,0.0668,87.607,4
37,Tu Mile Dil Khile,Tu Mile Dil Khile - Single,Asees Kaur,2023-01-19,197392,53,0.305,0.736,0.634,5e-06,0.08,-4.935,0.0307,93.96,4
25,Rang Lageya,Rang Lageya - Single,Mohit Chauhan,2021-03-17,227962,57,0.504,0.651,0.503,0.0,0.469,-8.729,0.0261,108.014,4
44,Meri Jaan,Gangubai Kathiawadi (Original Motion Picture S...,Sanjay Leela Bhansali,2022-02-19,238103,57,0.629,0.745,0.432,0.0,0.351,-8.092,0.0406,104.018,4


In [10]:
##===========================================
## Getting the oldest songs in the playlist
##===========================================

# Changing the 'Release_Date' column type from object to date
playlist["Release_Date"] = pd.to_datetime(playlist["Release_Date"])


# today = date.today()
# date = today.strftime("%Y/%m/%d")
# old_songs = playlist.loc[lambda playlist: playlist["Release_Date"] < date].sort_values(by=["Release_Date"], ascending=True)
# old_songs.head()

                            #OR

old_songs = playlist.sort_values(by="Release_Date", ascending=True)
old_songs.head()

Unnamed: 0,Name,Album,Artist,Release_Date,Length,Popularity,Acousticness,Danceability,Energy,Instrumentness,Liveness,Loudness,Speechness,Tempo,Time_Signature
48,"Shayad (From ""Love Aaj Kal"")","Shayad (From ""Love Aaj Kal"")",Pritam,2020-01-22,247941,72,0.789,0.487,0.456,2e-06,0.141,-7.144,0.0319,136.088,4
41,Asal Mein,Asal Mein,Darshan Raval,2020-02-18,224240,80,0.721,0.656,0.551,1e-06,0.0947,-6.43,0.0299,114.106,4
28,Manjha,Manjha,Vishal Mishra,2020-03-17,191647,78,0.674,0.524,0.515,0.0,0.104,-7.478,0.0427,169.882,4
39,Chaand Baaliyan,Chaand Baaliyan,Aditya A,2020-03-28,103264,81,0.796,0.729,0.396,0.0,0.104,-8.98,0.0359,97.281,4
47,Tu Aake Dekhle,The Carnival,King,2020-09-21,270000,69,0.515,0.77,0.84,0.00452,0.145,-4.022,0.107,96.012,4


In [11]:
##================================================================
## Using the Tempo column against the Danceability column to
## determine the scatter plots of popular songs in the playlist.
##================================================================

fig = px.scatter(playlist, x = playlist['Tempo'], y = playlist['Danceability'], color = playlist['Popularity'], 
                size = playlist['Popularity'], title = 'Scatter Plot of Popular Songs using Tempo angainst Danceability')

fig.show()

In [12]:
##========================================================
## Artist whose song has high popularity greater than 70
##========================================================

pop_70 =  playlist.loc[lambda playlist: playlist["Popularity"] > 70].sort_values(by=["Popularity"], ascending=False)

fig = px.pie(pop_70, values = "Popularity", names = "Artist", 
             title = 'Artist whose song has high popularity greater than 70')
fig.update_traces(textposition = "inside", textinfo = "percent + label") 
fig.show()