### Load Libraries

In [2]:
import os
import sys

import pandas as pd
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
from bs4 import BeautifulSoup
import requests

### Enter Spotify credentials

In [3]:
spotify_key = os.getenv('SPOTIFY_KEY')
spotify_secret = os.getenv('SPOTIFY_SECRET')

client_credentials_manager = SpotifyClientCredentials(client_id=spotify_key, 
                                                    client_secret=spotify_secret)

sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)

In [4]:
top_50_us_playlist_uri = '37i9dQZEVXbLRQDuF5jeBp'

### Pull songs from playlists

In [5]:
for x in sp.playlist_tracks(top_50_us_playlist_uri)["items"]:
    print(x)

{'added_at': '2024-02-17T09:58:50Z', 'added_by': {'external_urls': {'spotify': 'https://open.spotify.com/user/'}, 'href': 'https://api.spotify.com/v1/users/', 'id': '', 'type': 'user', 'uri': 'spotify:user:'}, 'is_local': False, 'primary_color': None, 'track': {'album': {'album_type': 'album', 'artists': [{'external_urls': {'spotify': 'https://open.spotify.com/artist/4xPQFgDA5M2xa0ZGo5iIsv'}, 'href': 'https://api.spotify.com/v1/artists/4xPQFgDA5M2xa0ZGo5iIsv', 'id': '4xPQFgDA5M2xa0ZGo5iIsv', 'name': '¥$', 'type': 'artist', 'uri': 'spotify:artist:4xPQFgDA5M2xa0ZGo5iIsv'}, {'external_urls': {'spotify': 'https://open.spotify.com/artist/5K4W6rqBFWDnAN6FQUkS6x'}, 'href': 'https://api.spotify.com/v1/artists/5K4W6rqBFWDnAN6FQUkS6x', 'id': '5K4W6rqBFWDnAN6FQUkS6x', 'name': 'Kanye West', 'type': 'artist', 'uri': 'spotify:artist:5K4W6rqBFWDnAN6FQUkS6x'}, {'external_urls': {'spotify': 'https://open.spotify.com/artist/7c0XG5cIJTrrAgEC3ULPiq'}, 'href': 'https://api.spotify.com/v1/artists/7c0XG5cIJT

In [6]:
playlist_top_50_us_dict = sp.playlist_tracks(top_50_us_playlist_uri)["items"]

In [33]:
playlist_top_50_us_dict[0]['track']['album']['uri']

'spotify:album:30zwjSQEodaUXCn11nmiVF'

In [8]:
def get_top_50_playlist_tracks(playist: dict) -> pd.DataFrame:

    tracks = []

    for track in playlist_top_50_us_dict:

        # Track ID
        track_id = track['track']['id']
        # Track URI
        track_uri = track['track']['uri']
        # Track Name
        track_name = track['track']['name']
        # Artist Info
        artists = track['track']['artists']
        track_artists = []
        tract_artist_id = []
        for artist in artists:
            artist_name = artist['name']
            artist_id = artist['id']
            track_artists.append([artist_name, artist_id])
        # Track Popularity
        track_popularity = track['track']['popularity']
        # Is Track Explicit
        track_explicit = track['track']['explicit']
        # Track Duration (ms)
        track_duration_ms  = track['track']['duration_ms']
        

        # Append results to list
        tracks.append([track_id, track_uri, track_name, track_artists, track_popularity, track_explicit, track_duration_ms])

        # Tracks DataFrame
        tracks_df_cols = ['track_id', 'track_uri', 'track_name', 'track_artists', 'track_popularity', 'track_explicit', 'track_duration_ms']
        tracks_df = pd.DataFrame(tracks, columns=tracks_df_cols)

    return tracks_df

In [9]:
tracks_df = get_top_50_playlist_tracks(playlist_top_50_us_dict)

In [10]:
tracks_df.head()

Unnamed: 0,track_id,track_uri,track_name,track_artists,track_popularity,track_explicit,track_duration_ms
0,3w0w2T288dec0mgeZZqoNN,spotify:track:3w0w2T288dec0mgeZZqoNN,CARNIVAL,"[[¥$, 4xPQFgDA5M2xa0ZGo5iIsv], [Kanye West, 5K...",33,True,264324
1,7AYmToZ5y67fBjq4RLFbuq,spotify:track:7AYmToZ5y67fBjq4RLFbuq,TEXAS HOLD 'EM,"[[Beyoncé, 6vWDO969PvNqNYHIOW5v0m]]",34,True,235636
2,6tNQ70jh4OwmPGpYy6R2o9,spotify:track:6tNQ70jh4OwmPGpYy6R2o9,Beautiful Things,"[[Benson Boone, 22wbnEMDvgVIAGdFeek6ET]]",97,False,180304
3,0mflMxspEfB0VbI1kyLiAv,spotify:track:0mflMxspEfB0VbI1kyLiAv,Stick Season,"[[Noah Kahan, 2RQXRUsr4IW1f3mKyKsy4B]]",98,False,182346
4,52eIcoLUM25zbQupAZYoFh,spotify:track:52eIcoLUM25zbQupAZYoFh,redrum,"[[21 Savage, 1URnnhqYAYcrqrcwql10ft]]",97,True,270697


In [11]:
# Saving track ID to list to pull audio data
all_track_uris = tracks_df['track_uri'].to_list()

In [12]:
# Audio Features list
audio_features = []

for track_uri in all_track_uris[0:4]:
    feat = sp.audio_features(track_uri)[0]
    track_acousticness = feat['acousticness']
    track_danceability = feat['danceability']
    track_energy = feat['energy']
    track_speechiness = feat['speechiness']
    track_instrumentalness = feat['instrumentalness']
    track_loudness = feat['loudness']
    track_tempo = feat['tempo']
    track_liveness = feat['liveness']
    track_valence = feat['valence']

    analysis = sp.audio_analysis(track_uri)['track']
    track_time_signature = analysis['time_signature']
    track_key = analysis['key']

    # Append results to list
    audio_features.append([track_uri, track_acousticness, track_danceability, track_energy, track_speechiness, track_instrumentalness, track_loudness, track_tempo, track_liveness, track_valence, track_time_signature, track_key])

    # Features DataFrame
    features_df_cols = ['track_uri', 'track_acousticness', 'track_danceability', 'track_energy', 'track_speechiness', 'track_instrumentalness', 'track_loudness', 'track_tempo', 'track_liveness', 'track_valence', 'track_time_signature', 'track_key']
    features_df = pd.DataFrame(audio_features, columns=features_df_cols)

In [13]:
features_df.head()

Unnamed: 0,track_uri,track_acousticness,track_danceability,track_energy,track_speechiness,track_instrumentalness,track_loudness,track_tempo,track_liveness,track_valence,track_time_signature,track_key
0,spotify:track:3w0w2T288dec0mgeZZqoNN,0.189,0.594,0.811,0.159,0,-5.746,148.144,0.339,0.311,4,1
1,spotify:track:7AYmToZ5y67fBjq4RLFbuq,0.588,0.725,0.709,0.072,0,-6.514,110.024,0.135,0.353,4,2
2,spotify:track:6tNQ70jh4OwmPGpYy6R2o9,0.151,0.472,0.471,0.0603,0,-5.692,105.029,0.14,0.219,3,10
3,spotify:track:0mflMxspEfB0VbI1kyLiAv,0.782,0.662,0.488,0.0682,0,-6.894,117.913,0.102,0.817,4,9


In [18]:
playlist_df = tracks_df.merge(features_df, how='inner', on='track_uri')

In [19]:
playlist_df.head()

Unnamed: 0,track_id,track_uri,track_name,track_artists,track_popularity,track_explicit,track_duration_ms,track_acousticness,track_danceability,track_energy,track_speechiness,track_instrumentalness,track_loudness,track_tempo,track_liveness,track_valence,track_time_signature,track_key
0,3w0w2T288dec0mgeZZqoNN,spotify:track:3w0w2T288dec0mgeZZqoNN,CARNIVAL,"[[¥$, 4xPQFgDA5M2xa0ZGo5iIsv], [Kanye West, 5K...",33,True,264324,0.189,0.594,0.811,0.159,0,-5.746,148.144,0.339,0.311,4,1
1,7AYmToZ5y67fBjq4RLFbuq,spotify:track:7AYmToZ5y67fBjq4RLFbuq,TEXAS HOLD 'EM,"[[Beyoncé, 6vWDO969PvNqNYHIOW5v0m]]",34,True,235636,0.588,0.725,0.709,0.072,0,-6.514,110.024,0.135,0.353,4,2
2,6tNQ70jh4OwmPGpYy6R2o9,spotify:track:6tNQ70jh4OwmPGpYy6R2o9,Beautiful Things,"[[Benson Boone, 22wbnEMDvgVIAGdFeek6ET]]",97,False,180304,0.151,0.472,0.471,0.0603,0,-5.692,105.029,0.14,0.219,3,10
3,0mflMxspEfB0VbI1kyLiAv,spotify:track:0mflMxspEfB0VbI1kyLiAv,Stick Season,"[[Noah Kahan, 2RQXRUsr4IW1f3mKyKsy4B]]",98,False,182346,0.782,0.662,0.488,0.0682,0,-6.894,117.913,0.102,0.817,4,9


In [20]:
playlist_df.shape

(4, 18)

In [1]:

import os

import pandas as pd
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials

from dataclasses import dataclass

@dataclass
class DataExtractionConfig:
    raw_data_path: str=os.path.join('data', 'raw', 'raw.csv')

class ExtractData:
    def __init__(self):
        self.extraction_config = DataExtractionConfig()

    def spotify_credentials(self) -> spotipy:
        spotify_key = os.getenv('SPOTIFY_KEY')
        spotify_secret = os.getenv('SPOTIFY_SECRET')

        client_credentials_manager = SpotifyClientCredentials(client_id=spotify_key, 
                                                    client_secret=spotify_secret)

        sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)

        return sp
    
    def get_playlist_data(self, playlist_uri: str) -> pd.DataFrame:

        '''Create a DataFrame for a Spotify playlist

        This function pulls song and artist info from each song 
        on the given playlist

        Args:
            playlist_uri (str): The playlist_uri to pull info from

        Returns:
            DataFrame of playlist data
        '''
        sp = self.spotify_credentials()

        tracks = []

        #playlist_uri = '37i9dQZEVXbLRQDuF5jeBp'
        playlist = sp.playlist_tracks(playlist_uri)["items"]

        for track in playlist:

            # Track ID
            track_id = track['track']['id']
            # Track URI
            track_uri = track['track']['uri']
            # Track Name
            track_name = track['track']['name']
            # Artist Info
            artists = track['track']['artists']
            track_artists = []
            #track_artist_id = []
            for artist in artists:
                artist_name = artist['name']
                artist_id = artist['id']
                track_artists.append([artist_name, artist_id])
            # Track Popularity
            track_popularity = track['track']['popularity']
            # Is Track Explicit
            track_explicit = track['track']['explicit']
            # Track Duration (ms)
            track_duration_ms  = track['track']['duration_ms']
            # Album Name
            track_album_name = track['track']['album']['name']
            # Album URI
            track_album_uri = track['track']['album']['uri']

            # Append results to list
            tracks.append([track_id, track_uri, track_name, track_artists, track_popularity, track_explicit, track_duration_ms, track_album_name, track_album_uri])

            # Tracks DataFrame
            tracks_df_cols = ['track_id', 'track_uri', 'track_name', 'track_artists', 'track_popularity', 'track_explicit', 'track_duration_ms', 'track_album_name', 'track_album_uri']
            tracks_df = pd.DataFrame(tracks, columns=tracks_df_cols)

            # Pull track URIs to retrieve audio data
            all_track_uris = tracks_df['track_uri'].to_list()

            # Initiate audio features list
            audio_features = []
            # Loop through each track and pull audio features
            for track_uri in all_track_uris[0:4]:
                feat = sp.audio_features(track_uri)[0]
                track_acousticness = feat['acousticness']
                track_danceability = feat['danceability']
                track_energy = feat['energy']
                track_speechiness = feat['speechiness']
                track_instrumentalness = feat['instrumentalness']
                track_loudness = feat['loudness']
                track_tempo = feat['tempo']
                track_liveness = feat['liveness']
                track_valence = feat['valence']

                analysis = sp.audio_analysis(track_uri)['track']
                track_time_signature = analysis['time_signature']
                track_key = analysis['key']

                # Append results to list
                audio_features.append([track_uri, track_acousticness, track_danceability, track_energy, track_speechiness, track_instrumentalness, track_loudness, track_tempo, track_liveness, track_valence, track_time_signature, track_key])

                # Features DataFrame
                features_df_cols = ['track_uri', 'track_acousticness', 'track_danceability', 'track_energy', 'track_speechiness', 'track_instrumentalness', 'track_loudness', 'track_tempo', 'track_liveness', 'track_valence', 'track_time_signature', 'track_key']
                features_df = pd.DataFrame(audio_features, columns=features_df_cols)

            # Join track_df and feature_df
            playlist_df = tracks_df.merge(features_df, how='inner', on='track_uri')
        
        return playlist_df

In [2]:
extraction_config = DataExtractionConfig()

def spotify_credentials():
    spotify_key = os.getenv('SPOTIFY_KEY')
    spotify_secret = os.getenv('SPOTIFY_SECRET')

    client_credentials_manager = SpotifyClientCredentials(client_id=spotify_key, 
                                                client_secret=spotify_secret)

    sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)

    return sp

In [4]:
sp = spotify_credentials()

In [6]:
playlist_uri = '37i9dQZEVXbLRQDuF5jeBp'

In [7]:
tracks = []

#playlist_uri = '37i9dQZEVXbLRQDuF5jeBp'
playlist = sp.playlist_tracks(playlist_uri)["items"]

for track in playlist:

    # Track ID
    track_id = track['track']['id']
    # Track URI
    track_uri = track['track']['uri']
    # Track Name
    track_name = track['track']['name']
    # Artist Info
    artists = track['track']['artists']
    track_artists = []
    #track_artist_id = []
    for artist in artists:
        artist_name = artist['name']
        artist_id = artist['id']
        track_artists.append([artist_name, artist_id])
    # Track Popularity
    track_popularity = track['track']['popularity']
    # Is Track Explicit
    track_explicit = track['track']['explicit']
    # Track Duration (ms)
    track_duration_ms  = track['track']['duration_ms']
    # Album Name
    track_album_name = track['track']['album']['name']
    # Album URI
    track_album_uri = track['track']['album']['uri']

    # Append results to list
    tracks.append([track_id, track_uri, track_name, track_artists, track_popularity, track_explicit, track_duration_ms, track_album_name, track_album_uri])

    # Tracks DataFrame
    tracks_df_cols = ['track_id', 'track_uri', 'track_name', 'track_artists', 'track_popularity', 'track_explicit', 'track_duration_ms', 'track_album_name', 'track_album_uri']
    tracks_df = pd.DataFrame(tracks, columns=tracks_df_cols)

    # Pull track URIs to retrieve audio data
    all_track_uris = tracks_df['track_uri'].to_list()

    # Initiate audio features list
    audio_features = []
    # Loop through each track and pull audio features
    for track_uri in all_track_uris[0:4]:
        feat = sp.audio_features(track_uri)[0]
        track_acousticness = feat['acousticness']
        track_danceability = feat['danceability']
        track_energy = feat['energy']
        track_speechiness = feat['speechiness']
        track_instrumentalness = feat['instrumentalness']
        track_loudness = feat['loudness']
        track_tempo = feat['tempo']
        track_liveness = feat['liveness']
        track_valence = feat['valence']

        analysis = sp.audio_analysis(track_uri)['track']
        track_time_signature = analysis['time_signature']
        track_key = analysis['key']

        # Append results to list
        audio_features.append([track_uri, track_acousticness, track_danceability, track_energy, track_speechiness, track_instrumentalness, track_loudness, track_tempo, track_liveness, track_valence, track_time_signature, track_key])

        # Features DataFrame
        features_df_cols = ['track_uri', 'track_acousticness', 'track_danceability', 'track_energy', 'track_speechiness', 'track_instrumentalness', 'track_loudness', 'track_tempo', 'track_liveness', 'track_valence', 'track_time_signature', 'track_key']
        features_df = pd.DataFrame(audio_features, columns=features_df_cols)

    # Join track_df and feature_df
    playlist_df = tracks_df.merge(features_df, how='inner', on='track_uri')

In [8]:
playlist_df.head()

Unnamed: 0,track_id,track_uri,track_name,track_artists,track_popularity,track_explicit,track_duration_ms,track_album_name,track_album_uri,track_acousticness,track_danceability,track_energy,track_speechiness,track_instrumentalness,track_loudness,track_tempo,track_liveness,track_valence,track_time_signature,track_key
0,7AYmToZ5y67fBjq4RLFbuq,spotify:track:7AYmToZ5y67fBjq4RLFbuq,TEXAS HOLD 'EM,"[[Beyoncé, 6vWDO969PvNqNYHIOW5v0m]]",67,True,235636,TEXAS HOLD 'EM,spotify:album:2NXwHjhgaAdkDy6GPSxMAd,0.588,0.725,0.709,0.072,0,-6.514,110.024,0.135,0.353,4,2
1,3w0w2T288dec0mgeZZqoNN,spotify:track:3w0w2T288dec0mgeZZqoNN,CARNIVAL,"[[¥$, 4xPQFgDA5M2xa0ZGo5iIsv], [Kanye West, 5K...",81,True,264324,VULTURES 1,spotify:album:30zwjSQEodaUXCn11nmiVF,0.189,0.594,0.811,0.159,0,-5.746,148.144,0.339,0.311,4,1
2,6tNQ70jh4OwmPGpYy6R2o9,spotify:track:6tNQ70jh4OwmPGpYy6R2o9,Beautiful Things,"[[Benson Boone, 22wbnEMDvgVIAGdFeek6ET]]",98,False,180304,Beautiful Things,spotify:album:29aSKB1qPEbN0Qf9OPSQpw,0.151,0.472,0.471,0.0603,0,-5.692,105.029,0.14,0.219,3,10
3,0mflMxspEfB0VbI1kyLiAv,spotify:track:0mflMxspEfB0VbI1kyLiAv,Stick Season,"[[Noah Kahan, 2RQXRUsr4IW1f3mKyKsy4B]]",98,False,182346,Stick Season,spotify:album:50ZenUP4O2Q5eCy2NRNvuz,0.782,0.662,0.488,0.0682,0,-6.894,117.913,0.102,0.817,4,9


In [36]:
playlist_uri = '37i9dQZEVXbLRQDuF5jeBp'

In [39]:
playlist_df = ExtractData.get_playlist_data(playlist_uri=playlist_uri)

TypeError: ExtractData.get_playlist_data() missing 1 required positional argument: 'self'