In [2]:
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
from spotipy.oauth2 import SpotifyOAuth

#Spotipy documentation: https://spotipy.readthedocs.io/en/master/

In [3]:
#Write here the client ID and secret ID from spotify API
SPOTIPY_CLIENT_ID = ''
SPOTIPY_CLIENT_SECRET = ''
REDIRECT_URI = 'http://localhost:7000/callback'
scope = "user-library-read"

cache_handler = spotipy.cache_handler.MemoryCacheHandler()
auth_manager = SpotifyClientCredentials(client_id = SPOTIPY_CLIENT_ID, client_secret=SPOTIPY_CLIENT_SECRET, cache_handler=cache_handler)
sp = spotipy.Spotify(auth_manager = auth_manager)

In [4]:
#Testing finding genre
results = sp.available_markets()
sp.category('0JQ5DAqbMKFEC4WFtoNRpw', country = 'FI')
pop = '0JQ5DAqbMKFEC4WFtoNRpw'

Next cell below finds a random song in the finnish market according to spotify, returns the name, popularity and audio features

In [5]:
import random
import string
import pandas as pd
random_character = random.choice(string.ascii_letters)
random_search = random_character + '%'
offset = random.randint(1,1000)
year = '2022'
genre = 'hip-hop'
    
#Test to fetch random songs with specific genre
songs = sp.search(q = 'track:' + random_search + ' year:' + year + ' genre: ' +  genre, type = 'track', market = 'FI')


first_song = songs['tracks']['items'][0]
first_song_name = first_song['name']
first_song_artist = first_song['artists'][0]['name']
first_song_popularity = first_song['popularity']
first_song_audio_features = sp.audio_features(first_song['id'])

first_song_name, first_song_artist, first_song_popularity, first_song_audio_features, songs['tracks']['total']



('Nonstop (feat. Sexmane & B.Baby)',
 'MD$',
 60,
 [{'danceability': 0.752,
   'energy': 0.562,
   'key': 6,
   'loudness': -6.784,
   'mode': 0,
   'speechiness': 0.0739,
   'acousticness': 0.162,
   'instrumentalness': 0.00319,
   'liveness': 0.187,
   'valence': 0.28,
   'tempo': 134.039,
   'type': 'audio_features',
   'id': '0nRssz1xxPRC6B22k5lG06',
   'uri': 'spotify:track:0nRssz1xxPRC6B22k5lG06',
   'track_href': 'https://api.spotify.com/v1/tracks/0nRssz1xxPRC6B22k5lG06',
   'analysis_url': 'https://api.spotify.com/v1/audio-analysis/0nRssz1xxPRC6B22k5lG06',
   'duration_ms': 179272,
   'time_signature': 4}],
 4276)

# Next, we make a function to fetch data from using the example above

In [30]:
#function to fetch songs from a specific genre
#returns: dataframe with song name, artist name, and audio features
def fetch_songs(sp, genre, year, number, columns):
    #DF where the songs are stored
    df = pd.DataFrame(columns = columns)
    
    #Fetch songs until there are more than number of songs in the DataFrame
    while (df.shape[0] < number):
        
        #Create empty list for storing songs with one fetch
        song_data = []
        
        #Make random search by some random letter
        offset = random.randint(1,1000)
        random_character = random.choice(string.ascii_letters)
        random_search = random.choice([random_character + '%'
                                       ,'%' + random_character
                                       ,'%' + random_character + '%'])
        songs = sp.search(q = 'track:' + random_search + ' year:' + year + ' genre: ' +  genre, type = 'track', market = 'FI', offset = offset, limit = 50)
        
        #Go through all songs from the fetch and extract needed features
        for song in songs['tracks']['items']:
            name = song['name']
            artist = song['artists'][0]['name']
            popularity = song['popularity']
            audio_features = sp.audio_features(song['id'])
            
            song_data.append([name, artist, popularity] + list(audio_features[0].values()))
    
        #Concatenate the found songs to a dataframe and remove duplicates
        new_df = pd.DataFrame(columns = columns, data = song_data)
        df = pd.concat([df, new_df], ignore_index = True)
        df = df.drop_duplicates(subset = ['id'])
    
    df = df.drop(['type', 'track_href', 'analysis_url', 'time_signature'], axis = 1)
    
    return df

columns = ['song_name', 'artist_name', 'popularity'] + list(first_song_audio_features[0].keys())
#Test run, fetches 50 rap songs from 2019
data = fetch_songs(sp, 'rap', '2019', 50, columns)

## This function can be used to get a random sample of songs

In [32]:
data.shape
data.head

<bound method NDFrame.head of                                             song_name  \
0                                        La Vida Loca   
1                  Notas de Voz (feat. Random Preset)   
2                                       Niet Voor Mij   
3                         La Bamba (feat. Vale Lambo)   
4   Detroit To Inglewood (feat. Ice Burgandy, Dreg...   
5                             The Joker Vs. Pennywise   
6                                            Victoria   
7                                       Vie d’artiste   
8                                     Briiips, vol. 1   
9                                       Dernier verre   
10                                     Amor Vagabundo   
11                                     Videoüberwacht   
12                                             Violin   
13                                     Mi Segunda Vez   
14                                              Vuoto   
15                                        Pour la vie   
1

### Getting spotify featured playlist songs from Finland

The next step is to fetch all the songs from featured spotify playlists in finland, and extract their audio features that can be used to classify random songs

In [9]:
'''
Get all playlist ids, put them into a data frame
Get all songs from each playlist
Get average song for each playlist
Input: find closest playlist that fits to the song
'''

get_featured_playlists = sp.featured_playlists(country = 'FI')['playlists']['items']

playlists = {}

for playlist in get_featured_playlists:
    playlists[playlist['name']] = playlist['id']

playlists = pd.DataFrame.from_dict(playlists, orient ='index', columns = {'id'})
playlists.index.name = 'playlist_name'
playlists.reset_index(inplace = True)
playlists

Unnamed: 0,playlist_name,id
0,New Music Friday Suomi,37i9dQZF1DWXtcXUwhuzFM
1,It's Hits Suomi,37i9dQZF1DX64Cx4vTeaRB
2,Uuden suomalaisen musiikin lista,37i9dQZF1DX8txPsVVyl0W
3,#vainsuomihitit,37i9dQZF1DWUvzPS8uIABd
4,Nousussa,37i9dQZF1DXdJFpsr4Sn91
5,Ensisoitossa,37i9dQZF1DXbsb4MvKbVAY
6,100 Suomi,37i9dQZF1DWTw1ORfckhDu
7,Suomirock tänään,37i9dQZF1DWSTzzP5k4u3n
8,Iskelmä kuuluu kaikille,37i9dQZF1DWVOar3rtQpLZ
9,Metallia Suomesta,37i9dQZF1DX8oyW0kIcHwJ


In [10]:
playlist_songs = {}

#For each playlist, fetch the playlist id
for i in playlists.index:
    playlist_name = playlists['playlist_name'][i]
    id = playlists['id'][i]
    songs = sp.playlist_items(id)

    ids = []
    
    #For all the songs for each playlist, fetch the id and append it to a dictionary
    for song in songs['items']:
        song_id = song['track']['id']
        ids.append(song_id)
    
    playlist_songs[playlist_name] = ids
    

In [25]:
#Fetch all the songs and make dataframe for each playlist
song_data = []

#Go through each playlist
for playlist in playlist_songs.keys():
    playlist_name = playlist
    #Fetch the song id and extract correct data
    for song_id in playlist_songs[playlist]:
        song = sp.track(song_id)
        artist = song['artists'][0]['name']
        song_name = song['name']
        popularity = song['popularity']
        audio_features = sp.audio_features(song_id)
        
        song_data.append([playlist_name, song_name, artist, popularity] + list(audio_features[0].values()))
        
columns = ['playlist_name', 'song_name', 'artist', 'popularity'] + list(first_song_audio_features[0].keys())

#Put all of the data into a dataframe
featured_playlist_song_features = pd.DataFrame(data = song_data, columns = columns)

In [33]:
import csv

#Put all the songs into a csv
featured_playlist_song_features.to_csv('playlist_features_data.csv')

## Time to get the average song from each playlist

In [36]:
## Get all spotify playlist names
featured_playlist_song_features['playlist_name'].unique()
s

array(['New Music Friday Suomi', "It's Hits Suomi",
       'Uuden suomalaisen musiikin lista', '#vainsuomihitit', 'Nousussa',
       'Ensisoitossa', '100 Suomi', 'Suomirock tänään',
       'Iskelmä kuuluu kaikille', 'Metallia Suomesta',
       'Suomi-indie tänään', 'Tästä tulee hyvä päivä', 'EQUAL Suomi'],
      dtype=object)