In [1]:
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
from spotipy.oauth2 import SpotifyOAuth

#Spotipy documentation: https://spotipy.readthedocs.io/en/master/

In [2]:
#Write here the client ID and secret ID from spotify API
SPOTIPY_CLIENT_ID = ''
SPOTIPY_CLIENT_SECRET = ''
REDIRECT_URI = 'http://localhost:7000/callback'
scope = "user-library-read"

cache_handler = spotipy.cache_handler.MemoryCacheHandler()
auth_manager = SpotifyClientCredentials(client_id = SPOTIPY_CLIENT_ID, client_secret=SPOTIPY_CLIENT_SECRET, cache_handler=cache_handler)

sp = spotipy.Spotify(auth_manager = auth_manager)

In [3]:
#Testing finding genre
results = sp.available_markets()
sp.category('0JQ5DAqbMKFEC4WFtoNRpw', country = 'FI')
pop = '0JQ5DAqbMKFEC4WFtoNRpw'

Next cell below finds a random song in the finnish market according to spotify, returns the name, popularity and audio features

In [4]:
import random
import string
import pandas as pd
random_character = random.choice(string.ascii_letters)
random_search = random_character + '%'
offset = random.randint(1,1000)
year = '2022'
genre = 'hip-hop'
    

songs = sp.search(q = 'track:' + random_search + ' year:' + year + ' genre: ' +  genre, type = 'track', market = 'FI')


first_song = songs['tracks']['items'][0]
first_song_name = first_song['name']
first_song_artist = first_song['artists'][0]['name']
first_song_popularity = first_song['popularity']
first_song_audio_features = sp.audio_features(first_song['id'])

first_song_name, first_song_artist, first_song_popularity, first_song_audio_features, songs['tracks']['total']



('Amatimies',
 'JVG',
 64,
 [{'danceability': 0.871,
   'energy': 0.687,
   'key': 7,
   'loudness': -4.417,
   'mode': 1,
   'speechiness': 0.155,
   'acousticness': 0.0721,
   'instrumentalness': 0.00141,
   'liveness': 0.357,
   'valence': 0.586,
   'tempo': 115.963,
   'type': 'audio_features',
   'id': '3UTyIQpogsEpo9IIY5wLS5',
   'uri': 'spotify:track:3UTyIQpogsEpo9IIY5wLS5',
   'track_href': 'https://api.spotify.com/v1/tracks/3UTyIQpogsEpo9IIY5wLS5',
   'analysis_url': 'https://api.spotify.com/v1/audio-analysis/3UTyIQpogsEpo9IIY5wLS5',
   'duration_ms': 121961,
   'time_signature': 4}],
 10000)

# Next, we make a function to fetch data from using the example above

In [9]:
#function to fetch songs from a specific genre
#returns: dataframe with song name, artist name, and audio features
def fetch_songs(sp, genre, year, number, columns):
    #DF where the songs are stored
    df = pd.DataFrame(columns = columns)
    
    #Fetch songs until there are more than number of songs in the DataFrame
    while (df.shape[0] < number):
        
        #Create empty list for storing songs with one fetch
        song_data = []
        
        #Make random search by some random letter
        offset = random.randint(1,1000)
        random_character = random.choice(string.ascii_letters)
        random_search = random.choice([random_character + '%'
                                       ,'%' + random_character
                                       ,'%' + random_character + '%'])
        songs = sp.search(q = 'track:' + random_search + ' year:' + year + ' genre: ' +  genre, type = 'track', market = 'FI', offset = offset, limit = 50)
        
        #Go through all songs from the fetch and extract needed features
        for song in songs['tracks']['items']:
            name = song['name']
            artist = song['artists'][0]['name']
            popularity = song['popularity']
            audio_features = sp.audio_features(song['id'])
            
            song_data.append([name, artist, popularity] + list(audio_features[0].values()))
    
        #Concatenate the found songs to a dataframe and remove duplicates
        new_df = pd.DataFrame(columns = columns, data = song_data)
        df = pd.concat([df, new_df], ignore_index = True)
        df = df.drop_duplicates(subset = ['id'])
    
    df = df.drop(['type', 'track_href', 'analysis_url', 'time_signature'], axis = 1)
    
    return df

columns = ['song_name', 'artist_name', 'popularity'] + list(first_song_audio_features[0].keys())
#Test run, fetches 50 rap songs from 2019
data = fetch_songs(sp, 'rap', '2019', 100, columns)

In [10]:
data.shape
data

Unnamed: 0,song_name,artist_name,popularity,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,id,uri,duration_ms
0,Come Thru (with Usher),Summer Walker,70,0.679,0.728,5,-3.092,0,0.1540,0.1590,0.000000,0.1070,0.460,152.001,3TTMUI5dFcbeNSDTTDY9M8,spotify:track:3TTMUI5dFcbeNSDTTDY9M8,181013
1,UPPERS AND CHEAP LOVE,LIL DUSTY G,55,0.532,0.560,3,-9.979,0,0.1830,0.3390,0.000000,0.1670,0.296,80.411,6CJdA7LcUBxq8RApnM7epJ,spotify:track:6CJdA7LcUBxq8RApnM7epJ,155723
2,do u even miss me at all?,gianni & kyle,64,0.819,0.519,10,-7.160,0,0.0768,0.3840,0.000000,0.0898,0.452,100.039,1AHf5FSofKcUw8tyKkccKF,spotify:track:1AHf5FSofKcUw8tyKkccKF,177627
3,Gang Unit,Lil Loaded,46,0.917,0.682,6,-7.846,1,0.0740,0.1870,0.000000,0.0938,0.711,99.038,7pHQiDcMEoNV5zkZCoFNhC,spotify:track:7pHQiDcMEoNV5zkZCoFNhC,106736
4,Pull Up,Luh Kel,60,0.803,0.569,1,-6.366,1,0.0705,0.0957,0.000000,0.0976,0.260,95.009,2enuUNOuQe22b6LB31bimX,spotify:track:2enuUNOuQe22b6LB31bimX,123400
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,That's My Baby (feat. Pouya),Yung Pinch,34,0.719,0.536,0,-5.548,1,0.0333,0.1310,0.001430,0.1070,0.277,139.924,6Vf8CK5MZXuDnRFHm8Lwco,spotify:track:6Vf8CK5MZXuDnRFHm8Lwco,223307
96,Eleven (feat. BIBI),twlv,46,0.502,0.576,6,-6.145,1,0.0466,0.1450,0.000000,0.1700,0.205,89.897,1UaHqU3pJSczM0SijabTtn,spotify:track:1UaHqU3pJSczM0SijabTtn,196520
97,Family Matters (feat. Arin Ray),Cordae,44,0.492,0.536,7,-9.932,1,0.3820,0.2510,0.000000,0.1950,0.381,84.947,6dp8BFpvGV7DdwxyDYL5y7,spotify:track:6dp8BFpvGV7DdwxyDYL5y7,210779
98,jag har fått nog av att aldrig få nog av dig,Albin Johnsén,30,0.596,0.373,10,-8.923,0,0.0414,0.5080,0.000000,0.0665,0.340,115.908,2FjQVkv19BPgnaHxQDMRe5,spotify:track:2FjQVkv19BPgnaHxQDMRe5,178817
