In [1]:
import pandas as pd
import requests
import base64
import time
from requests.adapters import HTTPAdapter
from requests.packages.urllib3.util.retry import Retry

In [1]:
# Spotify API credentials
client_id = 'your_client_id'
client_secret = 'your_client_secret'

# Encode credentials
credentials = f"{client_id}:{client_secret}"
encoded_credentials = base64.b64encode(credentials.encode()).decode()

# Function to get Spotify access token
def get_access_token():
    auth_url = "https://accounts.spotify.com/api/token"
    headers = {
        "Authorization": f"Basic {encoded_credentials}"
    }
    data = {
        "grant_type": "client_credentials"
    }
    response = requests.post(auth_url, headers=headers, data=data)
    response_data = response.json()
    return response_data['access_token']

# Function to search for a track and get the track ID
def get_track_id(track_name, artist_name, access_token):
    search_url = "https://api.spotify.com/v1/search"
    headers = {
        "Authorization": f"Bearer {access_token}"
    }
    params = {
        "q": f"track:{track_name} artist:{artist_name}",
        "type": "track",
        "limit": 1
    }
    response = requests.get(search_url, headers=headers, params=params)
    response_data = response.json()
    tracks = response_data.get('tracks', {}).get('items', [])
    if tracks:
        return tracks[0]['id']
    return None

# Function to get audio features for a track
def get_audio_features(track_id, access_token):
    features_url = f"https://api.spotify.com/v1/audio-features/{track_id}"
    headers = {
        "Authorization": f"Bearer {access_token}"
    }
    response = requests.get(features_url, headers=headers)
    return response.json()

# Set up a requests session with retry logic
session = requests.Session()
retries = Retry(total=5, backoff_factor=1, status_forcelist=[429, 500, 502, 503, 504])
session.mount('https://', HTTPAdapter(max_retries=retries))

# Load the dataset
file_path = '../Most Streamed Spotify Songs 2024.csv'
data = pd.read_csv(file_path, encoding='ISO-8859-1')

# Ensure all values in 'Track' and 'Artist' columns are strings
data['Track'] = data['Track'].astype(str)
data['Artist'] = data['Artist'].fillna('Unknown Artist').astype(str)

# Extract track names and artist names
tracks = data[['Track', 'Artist']]

# Get Spotify access token
access_token = get_access_token()

# Create lists to store audio features and tracks not found
audio_features_list = []
not_found_tracks = []

# Function to save progress
def save_progress(audio_features_list, not_found_tracks):
    audio_features_df = pd.DataFrame(audio_features_list)
    audio_features_df.to_csv('audio_features.csv', index=False)
    not_found_tracks_df = pd.DataFrame(not_found_tracks, columns=['Track', 'Artist'])
    not_found_tracks_df.to_csv('not_found_tracks.csv', index=False)

# Iterate over each track to get audio features
for index, row in tracks.iterrows():
    track_name = row['Track']
    artist_name = row['Artist']
    
    # Handle encoding issues
    track_name = track_name.encode('utf-8', 'ignore').decode('utf-8')
    artist_name = artist_name.encode('utf-8', 'ignore').decode('utf-8')
    
    try:
        track_id = get_track_id(track_name, artist_name, access_token)
        if track_id:
            audio_features = get_audio_features(track_id, access_token)
            audio_features['Track'] = track_name
            audio_features['Artist'] = artist_name
            audio_features_list.append(audio_features)
            print(f"Found and added: {track_name} by {artist_name}")
        else:
            not_found_tracks.append((track_name, artist_name))
            print(f"Track ID not found for {track_name} by {artist_name}")
        time.sleep(0.5)  # Sleep to avoid hitting rate limits
    except requests.exceptions.RequestException as e:
        print(f"Error retrieving data for {track_name} by {artist_name}: {e}")
        not_found_tracks.append((track_name, artist_name))
    
    # Periodically save progress every 10 iterations
    if (index + 1) % 10 == 0:
        save_progress(audio_features_list, not_found_tracks)

# Final save at the end
save_progress(audio_features_list, not_found_tracks)

# Print summary
print(f"\nTotal tracks processed: {len(tracks)}")
print(f"Tracks found and added: {len(audio_features_list)}")
print(f"Tracks not found: {len(not_found_tracks)}")

print("Audio features have been saved to audio_features.csv")
print("Tracks not found have been saved to not_found_tracks.csv")


Found and added: MILLION DOLLAR BABY by Tommy Richman
Found and added: Not Like Us by Kendrick Lamar
Found and added: i like the way you kiss me by Artemas
Found and added: Flowers by Miley Cyrus
Found and added: Houdini by Eminem
Found and added: Lovin On Me by Jack Harlow
Found and added: Beautiful Things by Benson Boone
Found and added: Gata Only by FloyyMenor
Found and added: Danza Kuduro - Cover by MUSIC LAB JPN
Found and added: BAND4BAND (feat. Lil Baby) by Central Cee
Found and added: I Had Some Help (feat. Morgan Wallen) by Post Malone
Found and added: The Door by Teddy Swims
Found and added: LUNCH by Billie Eilish
Found and added: Like That by Future
Found and added: bathroom floor by Kids With Buns
Found and added: LALA by Myke Towers
Found and added: Fortnight (feat. Post Malone) by Taylor Swift
Found and added: greedy by Tate McRae
Found and added: BLUE by Billie Eilish
Found and added: As It Was by Harry Styles
Found and added: Paint The Town Red by Doja Cat
Found and adde

In [2]:
# Load dataset
file_path = 'C:/Users/Victor Cardenas/Documents/dataset_projects/spotify_streaming/audio_features.csv'
audio_features = pd.read_csv(file_path, encoding='latin1')

# Display the first few rows of the dataframe
audio_features

Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,type,id,uri,track_href,analysis_url,duration_ms,time_signature,Track,Artist
0,0.852,0.697,1,-5.520,0,0.0439,0.097300,0.000370,0.0678,0.919,138.029,audio_features,5AJ9hqTS2wcFQCELCFRO7A,spotify:track:5AJ9hqTS2wcFQCELCFRO7A,https://api.spotify.com/v1/tracks/5AJ9hqTS2wcF...,https://api.spotify.com/v1/audio-analysis/5AJ9...,155152,4,MILLION DOLLAR BABY,Tommy Richman
1,0.898,0.472,1,-7.001,1,0.0776,0.010700,0.000000,0.1410,0.214,101.061,audio_features,6AI3ezQ4o3HUoP6Dhudph3,spotify:track:6AI3ezQ4o3HUoP6Dhudph3,https://api.spotify.com/v1/tracks/6AI3ezQ4o3HU...,https://api.spotify.com/v1/audio-analysis/6AI3...,274192,4,Not Like Us,Kendrick Lamar
2,0.599,0.946,11,-4.263,1,0.0447,0.000938,0.010600,0.0826,0.747,151.647,audio_features,2GxrNKugF82CnoRFbQfzPf,spotify:track:2GxrNKugF82CnoRFbQfzPf,https://api.spotify.com/v1/tracks/2GxrNKugF82C...,https://api.spotify.com/v1/audio-analysis/2Gxr...,142515,4,i like the way you kiss me,Artemas
3,0.706,0.691,0,-4.775,1,0.0633,0.058400,0.000070,0.0232,0.632,118.048,audio_features,7DSAEUvxU8FajXtRloy8M0,spotify:track:7DSAEUvxU8FajXtRloy8M0,https://api.spotify.com/v1/tracks/7DSAEUvxU8Fa...,https://api.spotify.com/v1/audio-analysis/7DSA...,200600,4,Flowers,Miley Cyrus
4,0.936,0.887,9,-2.760,0,0.0683,0.029200,0.000002,0.0582,0.889,127.003,audio_features,2HYFX63wP3otVIvopRS99Z,spotify:track:2HYFX63wP3otVIvopRS99Z,https://api.spotify.com/v1/tracks/2HYFX63wP3ot...,https://api.spotify.com/v1/audio-analysis/2HYF...,227239,4,Houdini,Eminem
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1725,0.502,0.168,1,-14.061,1,0.0486,0.885000,0.000000,0.1110,0.355,112.881,audio_features,219slQu3ALbTSAe9P22hvE,spotify:track:219slQu3ALbTSAe9P22hvE,https://api.spotify.com/v1/tracks/219slQu3ALbT...,https://api.spotify.com/v1/audio-analysis/219s...,228443,3,Happiest Year,Jaymes Young
1726,0.844,0.533,1,-9.612,1,0.5520,0.073500,0.000003,0.0953,0.230,140.078,audio_features,240audWazVjwvwh7XwfSZE,spotify:track:240audWazVjwvwh7XwfSZE,https://api.spotify.com/v1/tracks/240audWazVjw...,https://api.spotify.com/v1/audio-analysis/240a...,156081,4,For the Last Time,$uicideboy$
1727,0.574,0.729,9,-4.200,1,0.0473,0.349000,0.000002,0.6640,0.470,163.748,audio_features,4ILc7H2NfGbzbrHBclNOtB,spotify:track:4ILc7H2NfGbzbrHBclNOtB,https://api.spotify.com/v1/tracks/4ILc7H2NfGbz...,https://api.spotify.com/v1/audio-analysis/4ILc...,236000,4,Dil Meri Na Sune,Atif Aslam
1728,0.824,0.513,10,-6.263,0,0.3600,0.384000,0.000000,0.1090,0.179,123.023,audio_features,01JMnRUs2YOK6DDpdQASGY,spotify:track:01JMnRUs2YOK6DDpdQASGY,https://api.spotify.com/v1/tracks/01JMnRUs2YOK...,https://api.spotify.com/v1/audio-analysis/01JM...,203438,4,Grace (feat. 42 Dugg),Lil Baby


In [3]:
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import pandas as pd

# Spotify API credentials
client_id = 'your_client_id'
client_secret = 'your_client_secret'

# Authenticate with the Spotify API
sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id=client_id, client_secret=client_secret))

# Replace this with your playlist ID (without query parameters)
playlist_id = '2OPncXZPbD73X3WbRCEk7x'

# Get the tracks from the playlist
results = sp.playlist_tracks(playlist_id)
tracks = results['items']

# Get more tracks if the playlist has more than 100 tracks
while results['next']:
    results = sp.next(results)
    tracks.extend(results['items'])

# Extract track IDs
track_ids = [track['track']['id'] for track in tracks]

# Get audio features
audio_features = sp.audio_features(track_ids)

# Create a DataFrame
df = pd.DataFrame(audio_features, columns=['danceability', 'energy', 'key', 'loudness', 'mode', 'speechiness',
                                           'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo',
                                           'duration_ms'])

# Save to CSV (optional)
df.to_csv('spotify_playlist_audio_features.csv', index=False)

df.head()


Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms
0,0.708,0.737,1,-4.045,1,0.0436,0.0739,0.00162,0.0955,0.607,91.986,197920
1,0.706,0.691,0,-4.775,1,0.0633,0.0584,7e-05,0.0232,0.632,118.048,200600
2,0.687,0.606,7,-5.92,1,0.0262,0.178,0.000326,0.083,0.748,80.569,206307
3,0.578,0.654,1,-12.421,0,0.0562,0.121,4e-06,0.0673,0.621,129.513,216467
4,0.621,0.782,2,-5.548,1,0.044,0.0125,0.033,0.23,0.55,128.033,198938


In [6]:
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import pandas as pd

# Spotify API credentials
client_id = 'your_client_id'
client_secret = 'your_client_secret'

# Authenticate with the Spotify API
sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id=client_id, client_secret=client_secret))

# Replace this with your playlist ID (without query parameters)
playlist_id = '1zRN5JeaS7EE6JcIiUqAUh'

# Get the tracks from the playlist
results = sp.playlist_tracks(playlist_id)
tracks = results['items']

# Get more tracks if the playlist has more than 100 tracks
while results['next']:
    results = sp.next(results)
    tracks.extend(results['items'])

# Extract track IDs, names, and artist names
track_ids = [track['track']['id'] for track in tracks]
track_names = [track['track']['name'] for track in tracks]
artist_names = [', '.join([artist['name'] for artist in track['track']['artists']]) for track in tracks]

# Get audio features
audio_features = sp.audio_features(track_ids)

# Add track names and artist names to audio features
for i, feature in enumerate(audio_features):
    feature['track'] = track_names[i]
    feature['artist'] = artist_names[i]

# Create a DataFrame
df = pd.DataFrame(audio_features, columns=['track', 'artist', 'danceability', 'energy', 'key', 'loudness', 'mode', 'speechiness',
                                           'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo',
                                           'duration_ms'])

# Save to CSV (optional)
df.to_csv('spotify_random_playlist.csv', index=False)

df.head()


Unnamed: 0,track,artist,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms
0,The Show,Kerris Dorsey,0.594,0.0658,9,-16.422,0,0.0627,0.951,0.0,0.0823,0.578,149.314,193787
1,A Little Bit of Everything,Dawes,0.572,0.162,5,-20.962,1,0.0409,0.434,0.0,0.208,0.262,73.099,342787
2,The Lady In Red,Chris de Burgh,0.548,0.294,10,-17.112,1,0.0333,0.332,3e-06,0.0392,0.491,76.698,256173
3,Baby Can I Hold You,Tracy Chapman,0.692,0.422,2,-10.533,1,0.0333,0.0811,0.0,0.0848,0.468,74.375,193120
4,You're Somebody Else,flora cash,0.713,0.452,1,-9.638,1,0.0295,0.841,0.0038,0.105,0.235,102.963,218883
