In [None]:
import os
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import pandas as pd
import time

client_id = os.getenv('SPOTIFY_CLIENT_ID')
client_secret = os.getenv('SPOTIFY_CLIENT_SECRET')
auth_manager = SpotifyClientCredentials(client_id=client_id, client_secret=client_secret)
sp = spotipy.Spotify(auth_manager=auth_manager)

#Change if needed
pool_from = ['pop', 'rock', 'hip-hop', 'edm', 'jazz', 'classical', 
             'country', 'r&b', 'indie', 'blues', 'reggae', 'metal']

songs_per = 1000

all_songs = []

In [None]:
#Get audio features from the input track
def grab_features(track_id):
    audio_features = sp.audio_features(track_id)[0]
    return {
        'danceability': audio_features['danceability'],
        'energy': audio_features['energy'],
        'loudness': audio_features['loudness'],
        'speechiness': audio_features['speechiness'],
        'acousticness': audio_features['acousticness'],
        'instrumentalness': audio_features['instrumentalness'],
        'valence': audio_features['valence'],
        'tempo': audio_features['tempo'],
        'key': audio_features['key'],
        'mode': audio_features['mode']
    }

#Get songs by genre
def get_songs(genre, limit=50, total_songs=songs_per):
    songs = []
    offset = 0
    while len(songs) < total_songs:
        results = sp.search(q=f'genre:{genre}', type='track', limit=limit, offset=offset)
        tracks = results['tracks']['items']
        for track in tracks:
            #audio_feats = grab_features(track["id"])
            song_data = {
                'name': track['name'],
                'artist': ', '.join(artist['name'] for artist in track['artists']),
                'id': track['id'],
                'release_date': track['album']['release_date'],
                'genre': genre,  # Add genre to the song data
                #**audio_feats
            }
            songs.append(song_data)
        
        offset += limit  # Move to the next batch of results
        if len(tracks) < limit:
            break  # No more songs to fetch
                    
    return songs

In [None]:
for genre in pool_from:
    print(f"Fetching songs for genre: {genre}")
    genre_songs = get_songs(genre=genre, limit=50, total_songs=songs_per)
    all_songs.extend(genre_songs)

# Convert to DataFrame
df_songs = pd.DataFrame(all_songs)

# Save to CSV file
df_songs.to_csv('small_set.csv', index=False)
