In [1]:
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import requests
import json
import pandas as pd
import getpass
import datetime

In [2]:
import getpass
c_id = getpass.getpass()

········


In [3]:
c_secret = getpass.getpass()

········


In [4]:
sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id=c_id,client_secret=c_secret))

# PART 1 - I use spotipy to get new data out of Spotify

I will at first get data from Spotify playlists. I'll see later on (if I have time) if I could retrieve data for songs from other websites.

I'll start investigating with one playlist and see afterwards to maybe automatize the extractions for more songs.

In [5]:
from random import randint
from time import sleep

def get_playlist_tracks(playlist_id):
    results = sp.user_playlist_tracks("spotify",playlist_id)
    tracks = results['items']
    while results['next']!=None:
        results = sp.next(results)
        tracks = tracks + results['items']
        sleep(randint(1,3))
    return tracks

In [None]:
# Playlist New Release
new_release = get_playlist_tracks("37i9dQZF1DXb5BKLTO7ULa")
new_release

In [None]:
type(new_release)

In [None]:
def flatten(input_list):
    return [item for sublist in input_list for item in sublist]

def get_name_artists_from_track(track):
    return [(track["name"],artist["name"], track["uri"]) for artist in track["artists"]]

def get_name_artist_from_playlist_item(playlist_item):
    return get_name_artists_from_track(playlist_item['track'])

def get_name_artists_from_playlist(input_playlist):
    return pd.DataFrame(flatten(list(map(get_name_artist_from_playlist_item,input_playlist))), columns = ['track_name', 'artist_name', 'track_uri'])

In [None]:
df = get_name_artists_from_playlist(new_release)
df

## Get audio Features

### Exploration

In [None]:
df['track_uri'][0]

In [None]:
sp.audio_features(df['track_uri'][0])

In [None]:
sp.audio_features(df['track_uri'][0])[0]

In [None]:
sp.audio_features(df['track_uri'][0])[0]['danceability']

### Automation

In [None]:
# This was my initial plan to get only a limited list of features, nbut I then realized it would dramatically increase
# the number of necessary requests via the API. It thought afterwra ds it is probably more efficient to request them all
# at once and split/treat them locally

# I will only keep the following track features
# feature_list = [ 'acousticness','danceability','energy','instrumentalness','liveness','loudness','speechiness','tempo','time_signature', 'valence']

# for feature in feature_list:
#     feature_values = []
#     for i in range(len(df['track_uri'])):
#         feature_values.append(sp.audio_features(df['track_uri'][i])[0][feature])
#         sleep(randint(1,3))
#     df[feature] = feature_values

# df

In [None]:
# Second trial, which should be more efficient

audio_features = []
for i in range(len(df['track_uri'])):
    audio_features.append(sp.audio_features(df['track_uri'][i])[0])
    sleep(randint(1,3))
df['audio_features'] = audio_features

df

In [28]:
def flat_audio_feat(data):
    flattened = pd.DataFrame(dict(data['audio_features'])).transpose()
    columns = [str(col) for col in flattened.columns]
    flattened.columns = [colname for colname in columns]
    data = pd.concat([data, flattened], axis=1)
    # Drop initial features column + features I do not want to keep
    data = data.drop(['audio_features', 'analysis_url', 'duration_ms', 'id', 'key', 'mode', 'track_href', 'type', 'uri'], axis=1)
    return data

In [None]:
df_test = df.copy()

df_test = flat_audio_feat(df_test)
df_test

Great, it works !

## Create function

In [None]:
def get_audio_features_from_playlist(playlist_id):
    
    #Get playlist tracks
    input_pl = get_playlist_tracks(playlist_id)
    
    # Get tracks name, artist and uri
    data = get_name_artists_from_playlist(input_pl)
    
    # Get audio features
    audio_features_pl = []
    for j in range(len(data['track_uri'])):
        audio_features_pl.append(sp.audio_features(data['track_uri'][j])[0])
        sleep(randint(1,3))
    data['audio_features'] = audio_features_pl
    
    # Flatten audio features
    data = flat_audio_feat(data)
    
    return data

In [None]:
# Top Songs Global : https://open.spotify.com/playlist/37i9dQZEVXbNG2KDcFcKOF?si=69c1cf2b2c4d4f0d
top_songs_global = get_audio_features_from_playlist('37i9dQZEVXbNG2KDcFcKOF')
top_songs_global

It seems to work as well !

### Gather more song data from other playlists

In [None]:
# Soul Cuisine : https://open.spotify.com/playlist/37i9dQZF1DWX2Y4jtvVshZ?si=fc650a81149b4722
soul_cuisine = get_audio_features_from_playlist('37i9dQZF1DWX2Y4jtvVshZ')
soul_cuisine

In [None]:
print(df_test.shape)
print(top_songs_global.shape)
print(soul_cuisine.shape)

In [None]:
full_data = pd.concat([df_test, top_songs_global, soul_cuisine], axis=0)
full_data

In [None]:
# Beast Mode : https://open.spotify.com/playlist/37i9dQZF1DX76Wlfdnj7AP?si=584ee9ff9d184f52
# id : 37i9dQZF1DX76Wlfdnj7AP

In [None]:
# Motivation Mix : https://open.spotify.com/playlist/37i9dQZF1DXdxcBWuJkbcy?si=f7ac907b15f34f96
# id : 37i9dQZF1DXdxcBWuJkbcy

In [None]:
# Sunrise yoga : https://open.spotify.com/playlist/37i9dQZF1DXdVyc8LtLi96?si=93661d87b10b43bc
# id : 37i9dQZF1DXdVyc8LtLi96

In [None]:
# Peaceful Meditation : https://open.spotify.com/playlist/37i9dQZF1DWZqd5JICZI0u?si=5c76ed8c0dfd4343
# id : 37i9dQZF1DWZqd5JICZI0u

In [None]:
# Run Wild : https://open.spotify.com/playlist/37i9dQZF1DX35oM5SPECmN?si=9aac2257c07745e2
# id : 37i9dQZF1DX35oM5SPECmN

In [None]:
# Women of Pop : https://open.spotify.com/playlist/37i9dQZF1DX3WvGXE8FqYX?si=4f17556518894237
# id : 37i9dQZF1DX3WvGXE8FqYX

In [None]:
# Women of Jip-Hop : https://open.spotify.com/playlist/37i9dQZF1DX5l9rcXWdrth?si=d76edea72b274570
# id : 37i9dQZF1DX5l9rcXWdrth

In [None]:
# Women of Rock : https://open.spotify.com/playlist/37i9dQZF1DXd0ZFXhY0CRF?si=3117bfbf8dc849fa
# id : 37i9dQZF1DXd0ZFXhY0CRF

In [None]:
# Women of Indie : https://open.spotify.com/playlist/37i9dQZF1DWWW53w3AslFX?si=79a1f269277c405e
# id : 37i9dQZF1DWWW53w3AslFX

In [None]:
# Valentine's Day Love : https://open.spotify.com/playlist/37i9dQZF1DX4pAtJteyweQ?si=f30e5640654a45bd
# id : 37i9dQZF1DX4pAtJteyweQ

In [None]:
# Focus Modus : https://open.spotify.com/playlist/37i9dQZF1DWVRrbkzYIlbi?si=3920acf28b7a4889
# id : 37i9dQZF1DWVRrbkzYIlbi

In [None]:
# Peaceful Piano : https://open.spotify.com/playlist/37i9dQZF1DX4sWSpwq3LiO?si=376d5e8e8cc742fe
# id : 37i9dQZF1DX4sWSpwq3LiO

In [None]:
# Disney Hits : https://open.spotify.com/playlist/37i9dQZF1DX8C9xQcOrE6T?si=84aa0fd0269d47ab
# id : 37i9dQZF1DX8C9xQcOrE6T

In [None]:
playlist_id_list = ['37i9dQZF1DX76Wlfdnj7AP', '37i9dQZF1DXdxcBWuJkbcy', '37i9dQZF1DXdVyc8LtLi96', '37i9dQZF1DWZqd5JICZI0u', '37i9dQZF1DX35oM5SPECmN', '37i9dQZF1DX3WvGXE8FqYX', '37i9dQZF1DX5l9rcXWdrth', '37i9dQZF1DXd0ZFXhY0CRF', '37i9dQZF1DWWW53w3AslFX', '37i9dQZF1DX4pAtJteyweQ', '37i9dQZF1DWVRrbkzYIlbi', '37i9dQZF1DX4sWSpwq3LiO', '37i9dQZF1DX8C9xQcOrE6T']

In [None]:
for pl_id in playlist_id_list:
    temp = get_audio_features_from_playlist(pl_id)
    full_data = pd.concat([full_data, temp], axis=0)

In [None]:
full_data.shape

I'll export the data to a csv file now, so that I do not have to rerun the requests everytime.

In [None]:
full_data.to_csv('full_data.csv', index=False)

### Control data quality

In [None]:
full_data.isna().sum()

In [None]:
full_data[full_data['danceability'].isna()]

In [None]:
full_data = full_data.dropna()

In [None]:
full_data.to_csv('full_data.csv', index=False)

### New Set of playlists

In [None]:
# Pure Pop Punk https://open.spotify.com/playlist/37i9dQZF1DXasneILDRM7B?si=c464a9abd4854e31
# Ambient Essentials : https://open.spotify.com/playlist/37i9dQZF1DWUrPBdYfoJvz?si=9d92f129c19f4d40
# gloomcore : https://open.spotify.com/playlist/37i9dQZF1DXbENHm2OgowX?si=a7ec0bea4fd64027
# Top Christian & Gospel : https://open.spotify.com/playlist/37i9dQZF1DXcb6CQIjdqKy?si=b9fe214ed75b4190
# Rainbow Mix : https://open.spotify.com/playlist/37i9dQZF1DX3lPnCj14MCY?si=915d3d3bfcb54460
# Classic Roadtrip Songs : https://open.spotify.com/playlist/37i9dQZF1DX9wC1KY45plY?si=9bba8553b4e241bb
# 70s Road Trip : https://open.spotify.com/playlist/37i9dQZF1DWWiDhnQ2IIru?si=876c21b38dd746ee
# Home for Christmas : https://open.spotify.com/playlist/37i9dQZF1DX4YPvoFXY6lj?si=0e1e7afeeec245f5
# Let's Get Funky : https://open.spotify.com/playlist/37i9dQZF1DWVKqzBo1JsCN?si=6b985610ab9f43c3

In [None]:
playlist_id_list_2 = ['37i9dQZF1DXasneILDRM7B','37i9dQZF1DWUrPBdYfoJvz','37i9dQZF1DXbENHm2OgowX','37i9dQZF1DXcb6CQIjdqKy','37i9dQZF1DX3lPnCj14MCY','37i9dQZF1DX9wC1KY45plY','37i9dQZF1DWWiDhnQ2IIru','37i9dQZF1DX4YPvoFXY6lj','37i9dQZF1DWVKqzBo1JsCN']

In [None]:
for pl_id in playlist_id_list_2:
    temp = get_audio_features_from_playlist(pl_id)
    full_data = pd.concat([full_data, temp], axis=0)

In [None]:
full_data.shape

In [None]:
full_data.isna().sum()

In [None]:
full_data.to_csv('full_data.csv', index=False)

# Try to retrieve data from title and Artist (using data from previous lab)

In [6]:
from bs4 import BeautifulSoup
import requests

In [7]:
def list_scraping(genre, year):
    # create URL
    url_scrap = "https://playback.fm/charts/"+ genre +"/"+ str(year)
    # Create request
    resp_scrap = requests.get(url_scrap)
    # Parse the response
    soup_scrap = BeautifulSoup(resp_scrap.content, "html.parser")
    
    # Get list of titles and artists
    title_scrap = []
    artist_scrap = []
    for song in soup_scrap.select("span.song > a"):
        title_scrap.append(song.get_text().replace('\n',''))
    for artist in soup_scrap.select("a.artist"):
        artist_scrap.append(artist.get_text().replace('\n',''))
    
    # create dataframe
    data = pd.DataFrame({"track_name":title_scrap, "artist_name":artist_scrap })
    
    return data

In [None]:
test_df = list_scraping('rock', 2000)
test_df

In [None]:
type(test_df)

In [8]:
genre_list = ['rock', 'country', 'rnb', 'brasil']
year_range = range(2000,2022)

In [9]:
playback_fm = pd.DataFrame(columns = ['track_name','artist_name'])

for genre in genre_list:
    for year in year_range:
        temp_df = list_scraping(genre, year)
        if len(temp_df['track_name']) > 0:
            playback_fm = pd.concat([playback_fm, temp_df],axis=0)

playback_fm.shape

(8200, 2)

In [10]:
playback_fm = playback_fm.reset_index(drop=True)

In [None]:
playback_fm

In [11]:
playback_fm.to_csv('playback_fm.csv')

In [None]:
# Let's try to find the uri for one song
print(playback_fm['track_name'][0])
print(playback_fm['artist_name'][0])

In [None]:
trial = sp.search(q='artist:' + playback_fm['artist_name'][0], type='artist')
trial

In [None]:
res = sp.search(q='track:' + playback_fm['track_name'][0], type='track')
res

In [None]:
res_track_keys = res['tracks'].keys()
res_track_keys

In [None]:
res['tracks']['next']

In [None]:
type(res)

In [None]:
res.keys()

In [None]:
res_tracks_items = res['tracks']['items']
res_tracks_items

In [None]:
res_track_items_df = pd.DataFrame(res['tracks']['items'])
res_track_items_df

I see here in the name column that in the songs returned by Spotipy, we have songs contains the title we have put but are not exact matches.

In [None]:
name_searched = playback_fm['track_name'][0].lower()
name_searched

In [None]:
res_track_items_df['name'] = res_track_items_df['name'].apply(lambda x: x.lower())
res_track_items_df = res_track_items_df[res_track_items_df['name'] == name_searched].reset_index(drop = True)
res_track_items_df

In [None]:
res_track_items_df['artists']

From this, I can extract the list of artists and check if one matches with what we were looking for. I can then retrieve the Spotify id of the right song.

In [None]:
res_track_items_df['artists'][0][0]['name']

In [None]:
artist_searched = playback_fm['artist_name'][0].lower()
artist_searched

In [None]:
for i in range(len(res_track_items_df['artists'])):
    art = res_track_items_df['artists'][i][0]['name'].lower()
    song_id = res_track_items_df['id'][i]
    print(art)
    print(song_id)

Let's make this a function.

In [12]:
def search_song_id(row):
    # get name of the track and artist
    track_searched = row['track_name'].lower()
    artist_searched = row['artist_name'].lower()
    
    # Get data from Spotify
    res = sp.search(q='track:' + track_searched, type='track', limit=50)
    data = res['tracks']['items']

    # I wanted to extract more than the limit when Spotify returns more songs, but I cannot make it work
    #     while res['tracks']['next']!=None:
#         res = sp.next(res['tracks'])
#         data = data + res['tracks']['items']
#         sleep(randint(1,3))
        
    # Create Dataframe with the requests out of Spotify API
    data = pd.DataFrame(data)
    
    # Lower song names
    data['name'] = data['name'].apply(lambda x: x.lower())
    # filter the Dataframe to keep only exactly matching track names
    data = data[data['name'] == track_searched].reset_index(drop = True)
    
    # initiate empty track_id
    track_id = None
    
    # we review the artists and if one matches the one we search, we get the track_id
    for i in range(len(data['artists'])):
        art = data['artists'][i][0]['name'].lower()
        if art == artist_searched:
            track_id = data['id'][i]
            break
    sleep(randint(1,3))
    return track_id

In [13]:
# Let's try the function on a smaller dataframe.
test_data = playback_fm[playback_fm['artist_name']==playback_fm['artist_name'][0]].copy()
test_data

Unnamed: 0,track_name,artist_name
0,Loser,3 Doors Down
2,Kryptonite,3 Doors Down
121,Duck and Run,3 Doors Down
194,Be Like That,3 Doors Down
200,When I'm Gone,3 Doors Down
391,Here without You,3 Doors Down
416,Let Me Go,3 Doors Down
532,Let Me Go,3 Doors Down
578,Live For Today,3 Doors Down
810,It's Not My Time,3 Doors Down


In [14]:
test_data['track_id'] = test_data.apply(search_song_id, axis=1)
test_data

Unnamed: 0,track_name,artist_name,track_id
0,Loser,3 Doors Down,1aC1IBAETh2XYGOE8kCC6t
2,Kryptonite,3 Doors Down,6ZOBP3NvffbU4SZcrnt1k6
121,Duck and Run,3 Doors Down,0s8OsDxW96cl290PcsI2mX
194,Be Like That,3 Doors Down,3PbHle8KhJp6GuO1jLdEmQ
200,When I'm Gone,3 Doors Down,3WbphvawbMZ8FyqDxYGdSQ
391,Here without You,3 Doors Down,3NLrRZoMF0Lx6zTlYqeIo4
416,Let Me Go,3 Doors Down,0nNVR2iDM3eVzEgMi78vQm
532,Let Me Go,3 Doors Down,0nNVR2iDM3eVzEgMi78vQm
578,Live For Today,3 Doors Down,1fZLcY8rJze7s8ly8SZLqH
810,It's Not My Time,3 Doors Down,0uybt73QFXaLCoxuVf6fhm


#### Yippie !!! It seems to work !! 

Now I can apply it to the full PlayBack FM data set, but it is gonna last forever...

I will first upload a version of the la to github and let it run all night afterwards.

# New trial

I have just discovered how to make searches on song & artist at the same time. This significantly optimizes the size of the answer everytime.
https://stackoverflow.com/questions/38664235/how-to-look-up-spotify-ids-song-track-ids-in-bulk

In [None]:
trial_2 = sp.search(q='track:' + playback_fm['track_name'][0] + ' artist:' + playback_fm['artist_name'][0], type='track')
trial_2

In [None]:
trial_2['tracks'].keys()

In [None]:
trial_2['tracks']['items'][0].keys()

In [None]:
trial_2['tracks']['items'][0]['name']

In [None]:
trial_2['tracks']['items'][0]['artists'][0]['name']

In [None]:
trial_2['tracks']['items'][1]['artists'][0]['name']

In [None]:
trial_2['tracks']['items'][2]['artists'][0]['name']

In [None]:
trial_2['tracks']['items'][1]['name']

In [None]:
for i in range(len(trial_2['tracks']['items'])):
    print(trial_2['tracks']['items'][i]['name'], trial_2['tracks']['items'][i]['artists'][0]['name'], trial_2['tracks']['items'][i]['id'])

In [None]:
print(playback_fm['track_name'][1])
print(playback_fm['artist_name'][1])
trial_3 = sp.search(q='track:' + playback_fm['track_name'][1] + ' artist:' + playback_fm['artist_name'][1], type='track')
for i in range(len(trial_3['tracks']['items'])):
    print(trial_3['tracks']['items'][i]['name'], trial_3['tracks']['items'][i]['artists'][0]['name'], trial_3['tracks']['items'][i]['id'])

In [None]:
sp.search(q='track:' + 'this wont exist' + ' artist:' + 'wont exist either', type='track', limit=50)

In [15]:
def search_song_id_2(row):
    # get name of the track and artist
    track_searched = row['track_name'].lower()
    artist_searched = row['artist_name'].lower()
    
    # initiate empty track_id
    track_id = None    
    
    # Get data from Spotify
    res = sp.search(q='track:' + track_searched + ' artist:' + artist_searched, type='track', limit=50)
    data = res['tracks']['items']
    
    if len(data)==0:
        return track_id

    # I wanted to extract more than the limit when Spotify returns more songs, but I cannot make it work
    #     while res['tracks']['next']!=None:
#         res = sp.next(res['tracks'])
#         data = data + res['tracks']['items']
#         sleep(randint(1,3))
        
    # Create Dataframe with the requests out of Spotify API
    data = pd.DataFrame(data)
    
    # Lower song names
    data['name'] = data['name'].apply(lambda x: x.lower())
    # filter the Dataframe to keep only exactly matching track names
    data = data[data['name'] == track_searched].reset_index(drop = True)
    
    # we review the artists and if one matches the one we search, we get the track_id
    for i in range(len(data['artists'])):
        art = data['artists'][i][0]['name'].lower()
        if art == artist_searched:
            track_id = data['id'][i]
            break
    sleep(randint(1,3))
    return track_id

In [16]:
# Let's try the function on a smaller dataframe.
test_data_2 = playback_fm[playback_fm['artist_name']==playback_fm['artist_name'][0]].copy()
test_data_2['track_id'] = test_data.apply(search_song_id_2, axis=1)
test_data_2

Unnamed: 0,track_name,artist_name,track_id
0,Loser,3 Doors Down,1aC1IBAETh2XYGOE8kCC6t
2,Kryptonite,3 Doors Down,6ZOBP3NvffbU4SZcrnt1k6
121,Duck and Run,3 Doors Down,0s8OsDxW96cl290PcsI2mX
194,Be Like That,3 Doors Down,3PbHle8KhJp6GuO1jLdEmQ
200,When I'm Gone,3 Doors Down,3WbphvawbMZ8FyqDxYGdSQ
391,Here without You,3 Doors Down,3NLrRZoMF0Lx6zTlYqeIo4
416,Let Me Go,3 Doors Down,0nNVR2iDM3eVzEgMi78vQm
532,Let Me Go,3 Doors Down,0nNVR2iDM3eVzEgMi78vQm
578,Live For Today,3 Doors Down,1fZLcY8rJze7s8ly8SZLqH
810,It's Not My Time,3 Doors Down,0uybt73QFXaLCoxuVf6fhm


This new version is more efficient, it has found all the id's for the test_set.

Let's apply this now to the full PlayBack_FM set.

In [17]:
playback_fm

Unnamed: 0,track_name,artist_name
0,Loser,3 Doors Down
1,Hanging by a Moment,Lifehouse
2,Kryptonite,3 Doors Down
3,No Leaf Clover,Metallica with Michael Kamen conducting The Sa...
4,Otherside,Red Hot Chili Peppers
...,...,...
8195,Pra Sempre,CPM 22
8196,Tô Mal,César Menotti & Fabiano
8197,A Paz Desse Amor,Paula Fernandes
8198,Eu Ligo Pra Você,Zé Neto & Cristiano


In [18]:
playback_fm['track_id'] = playback_fm.apply(search_song_id_2, axis=1)

KeyboardInterrupt: 

In [19]:
import numpy as np
pb_fm_subframes = np.array_split(playback_fm,9)
pb_fm_subframes

[              track_name                                        artist_name
 0                  Loser                                       3 Doors Down
 1    Hanging by a Moment                                          Lifehouse
 2             Kryptonite                                       3 Doors Down
 3         No Leaf Clover  Metallica with Michael Kamen conducting The Sa...
 4              Otherside                              Red Hot Chili Peppers
 ..                   ...                                                ...
 907      Know Your Enemy                                          Green Day
 908     Sound Of Madness                                          Shinedown
 909                Break                                   Three Days Grace
 910              Cyanide                                          Metallica
 911     Kings and Queens                                 30 Seconds to Mars
 
 [912 rows x 2 columns],
                                              tra

In [None]:
pb_fm_0 = pb_fm_subframes[0]
pb_fm_0['track_id'] = pb_fm_0.apply(search_song_id_2, axis=1)

In [None]:
pb_fm_0

In [None]:
pb_fm_0 = pb_fm_0.dropna().reset_index(drop=True)
pb_fm_0

In [None]:
# Let's get the audio feature now for this first part of the PlayBack FM data

audio_features_pb_fm_0 = []

for i in range(len(pb_fm_0['track_id'])):
    track_uri = 'spotify:track:' + pb_fm_0['track_id'][i]
    audio_features_pb_fm_0.append(sp.audio_features(track_uri)[0])
    sleep(randint(1,3))
    
pb_fm_0['audio_features'] = audio_features_pb_fm_0

pb_fm_0 = flat_audio_feat(pb_fm_0)

pb_fm_0

In [None]:
pb_fm_0.to_csv('pb_fm_0.csv')

In [None]:
pb_fm_1 = pb_fm_subframes[1]
pb_fm_1['track_id'] = pb_fm_1.apply(search_song_id_2, axis=1)

In [None]:
pb_fm_1 = pb_fm_1.dropna().reset_index(drop=True)

audio_features_pb_fm_1 = []

for i in range(len(pb_fm_1['track_id'])):
    track_uri = 'spotify:track:' + pb_fm_1['track_id'][i]
    audio_features_pb_fm_1.append(sp.audio_features(track_uri)[0])
    sleep(randint(1,3))
    
pb_fm_1['audio_features'] = audio_features_pb_fm_1

pb_fm_1 = flat_audio_feat(pb_fm_1)

In [None]:
pb_fm = pd.concat([pb_fm_0, pb_fm_1], axis=0)

In [None]:
pb_fm.to_csv('pb_fm.csv')

In [None]:
pb_fm_2 = pb_fm_subframes[2]
pb_fm_2['track_id'] = pb_fm_2.apply(search_song_id_2, axis=1)

In [None]:
pb_fm_2 = pb_fm_2.dropna().reset_index(drop=True)

audio_features_pb_fm_2 = []

for i in range(len(pb_fm_2['track_id'])):
    track_uri = 'spotify:track:' + pb_fm_2['track_id'][i]
    audio_features_pb_fm_2.append(sp.audio_features(track_uri)[0])
    sleep(randint(1,3))
    
pb_fm_2['audio_features'] = audio_features_pb_fm_2

pb_fm_2 = flat_audio_feat(pb_fm_2)

In [None]:
pb_fm = pd.concat([pb_fm, pb_fm_2], axis=0)
pb_fm.to_csv('pb_fm.csv')

In [None]:
pb_fm_3 = pb_fm_subframes[3]
pb_fm_3['track_id'] = pb_fm_3.apply(search_song_id_2, axis=1)

In [None]:
pb_fm_3 = pb_fm_3.dropna().reset_index(drop=True)

audio_features_pb_fm_3 = []

for i in range(len(pb_fm_3['track_id'])):
    track_uri = 'spotify:track:' + pb_fm_3['track_id'][i]
    audio_features_pb_fm_3.append(sp.audio_features(track_uri)[0])
    sleep(randint(1,3))
    
pb_fm_3['audio_features'] = audio_features_pb_fm_3

pb_fm_3 = flat_audio_feat(pb_fm_3)

pb_fm = pd.concat([pb_fm, pb_fm_3], axis=0)
pb_fm.to_csv('pb_fm.csv', index=False)

In [None]:
pb_fm_4 = pb_fm_subframes[4]
pb_fm_4['track_id'] = pb_fm_4.apply(search_song_id_2, axis=1)

In [None]:
pb_fm_4 = pb_fm_4.dropna().reset_index(drop=True)

audio_features_pb_fm_4 = []

for i in range(len(pb_fm_4['track_id'])):
    track_uri = 'spotify:track:' + pb_fm_4['track_id'][i]
    audio_features_pb_fm_4.append(sp.audio_features(track_uri)[0])
    sleep(randint(1,3))
    
pb_fm_4['audio_features'] = audio_features_pb_fm_4

pb_fm_4 = flat_audio_feat(pb_fm_4)

pb_fm = pd.concat([pb_fm, pb_fm_4], axis=0)
pb_fm.to_csv('pb_fm.csv', index=False)

In [None]:
pb_fm_5 = pb_fm_subframes[5]
pb_fm_5['track_id'] = pb_fm_5.apply(search_song_id_2, axis=1)

In [None]:
pb_fm_5 = pb_fm_5.dropna().reset_index(drop=True)

audio_features_pb_fm_5 = []

for i in range(len(pb_fm_5['track_id'])):
    track_uri = 'spotify:track:' + pb_fm_5['track_id'][i]
    audio_features_pb_fm_5.append(sp.audio_features(track_uri)[0])
    sleep(randint(1,3))
    
pb_fm_5['audio_features'] = audio_features_pb_fm_5

pb_fm_5 = flat_audio_feat(pb_fm_5)

pb_fm = pd.concat([pb_fm, pb_fm_5], axis=0)
pb_fm.to_csv('pb_fm.csv', index=False)

In [None]:
pb_fm_6 = pb_fm_subframes[6]
pb_fm_6['track_id'] = pb_fm_6.apply(search_song_id_2, axis=1)

In [None]:
pb_fm_6 = pb_fm_6.dropna().reset_index(drop=True)

audio_features_pb_fm_6 = []

for i in range(len(pb_fm_6['track_id'])):
    track_uri = 'spotify:track:' + pb_fm_6['track_id'][i]
    audio_features_pb_fm_6.append(sp.audio_features(track_uri)[0])
    sleep(randint(1,3))
    
pb_fm_6['audio_features'] = audio_features_pb_fm_6

pb_fm_6 = flat_audio_feat(pb_fm_6)

pb_fm = pd.concat([pb_fm, pb_fm_6], axis=0)
pb_fm.to_csv('pb_fm.csv', index=False)

In [None]:
pb_fm_7 = pb_fm_subframes[7]
pb_fm_7['track_id'] = pb_fm_7.apply(search_song_id_2, axis=1)

In [None]:
pb_fm_7 = pb_fm_7.dropna().reset_index(drop=True)

audio_features_pb_fm_7 = []

for i in range(len(pb_fm_7['track_id'])):
    track_uri = 'spotify:track:' + pb_fm_7['track_id'][i]
    audio_features_pb_fm_7.append(sp.audio_features(track_uri)[0])
    sleep(randint(1,3))
    
pb_fm_7['audio_features'] = audio_features_pb_fm_7

pb_fm_7 = flat_audio_feat(pb_fm_7)

pb_fm = pd.concat([pb_fm, pb_fm_7], axis=0)
pb_fm.to_csv('pb_fm.csv', index=False)

In [21]:
pb_fm_8 = pb_fm_subframes[8]
pb_fm_8['track_id'] = pb_fm_8.apply(search_song_id_2, axis=1)

In [22]:
pb_fm_8

Unnamed: 0,track_name,artist_name,track_id
7289,Por Um Gole A Mais,Bruno & Marrone,6gtz6ZrxOGySZgeNDECih1
7290,Wake Up,Hilary Duff,7qhUtJDCu1tYrdkIMPVYRc
7291,Quero Só Você,AfroReggae,
7292,Breakaway,Kelly Clarkson,61Qhe2mHSLhUE04QeK4lkD
7293,Déjà Vu,Pitty,5Gbeo74EGXQmObATXwPjzd
...,...,...,...
8195,Pra Sempre,CPM 22,
8196,Tô Mal,César Menotti & Fabiano,2ainqIGj3V7Vfv5lDYPbzH
8197,A Paz Desse Amor,Paula Fernandes,5GqGypWJIx3nLLFrvOrqcj
8198,Eu Ligo Pra Você,Zé Neto & Cristiano,4qQBnUPsHRdz2uPFLdflde


In [23]:
pb_fm = pd.read_csv('pb_fm.csv')

In [24]:
pb_fm

Unnamed: 0,track_name,artist_name,track_id,danceability,energy,loudness,speechiness,acousticness,instrumentalness,liveness,valence,tempo,time_signature
0,Loser,3 Doors Down,1aC1IBAETh2XYGOE8kCC6t,0.493,0.880,-6.424,0.0693,0.010800,0.000027,0.2260,0.389,147.913,4
1,Hanging by a Moment,Lifehouse,0wqOReZDnrefefEsrIGeR4,0.541,0.864,-4.915,0.0357,0.001180,0.000000,0.0896,0.435,124.557,4
2,Kryptonite,3 Doors Down,6ZOBP3NvffbU4SZcrnt1k6,0.545,0.865,-5.708,0.0286,0.006640,0.000011,0.1680,0.543,99.009,4
3,Otherside,Red Hot Chili Peppers,64BbK9SFKH2jk86U3dGj2P,0.458,0.795,-3.265,0.0574,0.003160,0.000202,0.0756,0.513,123.229,4
4,Hemorrhage (In My Hands),Fuel,1sjrDQXqAa9V07FjKIlAQ4,0.301,0.823,-4.072,0.0433,0.000144,0.000309,0.0833,0.372,152.017,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...
4662,So Sick,Ne-Yo,6brl7bwOHmGFkNw3MBqssT,0.452,0.574,-8.336,0.3100,0.246000,0.000000,0.1890,0.580,92.791,4
4663,Sorry,Madonna,55whbebR4olz2HHcRQa2kx,0.589,0.877,-6.229,0.0460,0.001210,0.013000,0.1690,0.443,132.971,4
4664,Eu Sei,Papas da Língua,5YQPOIAt3U7DMAswTVvzax,0.440,0.613,-8.234,0.0301,0.590000,0.000000,0.9350,0.397,108.087,4
4665,O Inferno São Os Outros,Titãs,35jkaBY09RzxjCotqypr4v,0.416,0.975,-5.220,0.1210,0.004140,0.000002,0.9390,0.666,159.939,4


In [29]:
pb_fm_8 = pb_fm_8.dropna().reset_index(drop=True)

audio_features_pb_fm_8 = []

for i in range(len(pb_fm_8['track_id'])):
    track_uri = 'spotify:track:' + pb_fm_8['track_id'][i]
    audio_features_pb_fm_8.append(sp.audio_features(track_uri)[0])
    sleep(randint(1,3))
    
pb_fm_8['audio_features'] = audio_features_pb_fm_8

pb_fm_8 = flat_audio_feat(pb_fm_8)

pb_fm = pd.concat([pb_fm, pb_fm_8], axis=0)
pb_fm.to_csv('pb_fm.csv', index=False)

In [31]:
pb_fm.shape

(5112, 13)