# Setup

In [1]:
import os
import re
import requests
from time import sleep

import numpy as np
import pandas as pd
import spotipy

from bs4 import BeautifulSoup
from dotenv import load_dotenv, find_dotenv
from spotipy.oauth2 import SpotifyClientCredentials, SpotifyOAuth
from tqdm.auto import tqdm

# Web Scrapping

## List of viral songs on TikTok

### Get response

https://www.popsugar.com/entertainment/popular-tiktok-songs-47289804?stream_view=1#photo-47289832

In [2]:
# Get response from the url and check it
url = 'https://www.popsugar.com/entertainment/popular-tiktok-songs-47289804?stream_view=1#photo-47289832'
response = requests.get(url)
response

<Response [200]>

### Get desired content

In [3]:
# Get the content in the url
content_popsugar = BeautifulSoup(response.text)

# Get the date the post was made
date_popsugar = content_popsugar.find('time').text.replace('\n', '').strip()
date_popsugar

# Get only the songs and artists
html_popsugar = content_popsugar.find_all('span', attrs={'class': 'count-copy'})

# Check the result for 'html_popsugar'
#html_popsugar

In [4]:
# Conver the list 'html_popsugar' to a Pandas DataFrame
df_base = pd.DataFrame([re.split(' by ', song.text.replace('"', '').strip()) for song in html_popsugar], 
                       columns=['song', 'artists'])

# Check the result
df_base

Unnamed: 0,song,artists
0,Roxanne,Arizona Zervas
1,Say So,Doja Cat
2,My Oh My,Camila Cabello feat. DaBaby
3,Moon,Kid Francescoli
4,Vibe,Cookiee Kawaii
...,...,...
64,What the Hell,Avril Lavigne
65,Towards the Sun,Rihanna
66,I Think I'm OKAY,"Machine Gun Kelly, YUNGBLUD, and Travis Barker"
67,Myself,Bazzi


### Data Cleaning

In [5]:
df_base['artists_list'] = [re.split(',* and |, * | [Ff]eat. ', artists.strip()) for artists in df_base.artists]
df_base['number_artists'] = df_base.artists_list.apply(len)

In [6]:
# Check possible number of artists for one song
df_base.number_artists.value_counts()

1    48
2    18
3     3
Name: number_artists, dtype: int64

In [7]:
df_base['artist_1'] = df_base.artists_list.apply(lambda x : x[0] if len(x) >= 1 else 'no-artist')
df_base['artist_2'] = df_base.artists_list.apply(lambda x : x[1] if len(x) >= 2 else 'no-artist')
df_base['artist_3'] = df_base.artists_list.apply(lambda x : x[2] if len(x) == 3 else 'no-artist')

In [8]:
df_base.head()

Unnamed: 0,song,artists,artists_list,number_artists,artist_1,artist_2,artist_3
0,Roxanne,Arizona Zervas,[Arizona Zervas],1,Arizona Zervas,no-artist,no-artist
1,Say So,Doja Cat,[Doja Cat],1,Doja Cat,no-artist,no-artist
2,My Oh My,Camila Cabello feat. DaBaby,"[Camila Cabello, DaBaby]",2,Camila Cabello,DaBaby,no-artist
3,Moon,Kid Francescoli,[Kid Francescoli],1,Kid Francescoli,no-artist,no-artist
4,Vibe,Cookiee Kawaii,[Cookiee Kawaii],1,Cookiee Kawaii,no-artist,no-artist


### Make backup dataframe

In [9]:
df_base_raw_bck = df_base.copy()

## List of music genre

A list of music genres will be necessary to analyze the tags that will be obtained in the Lastfm API.

https://musicbrainz.org/genres

### Get the response

In [10]:
# Get response from the url and check it
url = 'https://musicbrainz.org/genres'
response = requests.get(url)
response

<Response [200]>

### Get desired content

In [11]:
# Get the content in the url
musicbrainz_content = BeautifulSoup(response.content)

# Create a list with the music genres listed in the url
musicbrainz_genre = [genre.text for genre in musicbrainz_content.find_all('bdi')]

# Spotify

https://spotipy.readthedocs.io/en/2.14.0/#module-spotipy.client

https://developer.spotify.com/documentation/web-api/reference/tracks/get-audio-features/

## Connecting to the API

In [82]:
load_dotenv(find_dotenv())

True

In [83]:
cid = os.getenv('spotify_p03_key')
csecret = os.getenv('spotify_p03_secret')
cc_manager = SpotifyClientCredentials(client_id=cid, client_secret=csecret)
sp = spotipy.Spotify(client_credentials_manager=cc_manager)

## Songs

### Search information about each song

In [14]:
# Search in the API wrapper
spotify_songs = [sp.search(q=df_base.iloc[index, 0], type='track', limit=50) for index in tqdm(df_base.index)]

HBox(children=(FloatProgress(value=0.0, max=69.0), HTML(value='')))




In [15]:
len(spotify_songs)

69

In [16]:
df_base['spotify_search'] = spotify_songs

In [17]:
df_base.head()

Unnamed: 0,song,artists,artists_list,number_artists,artist_1,artist_2,artist_3,spotify_search
0,Roxanne,Arizona Zervas,[Arizona Zervas],1,Arizona Zervas,no-artist,no-artist,{'tracks': {'href': 'https://api.spotify.com/v...
1,Say So,Doja Cat,[Doja Cat],1,Doja Cat,no-artist,no-artist,{'tracks': {'href': 'https://api.spotify.com/v...
2,My Oh My,Camila Cabello feat. DaBaby,"[Camila Cabello, DaBaby]",2,Camila Cabello,DaBaby,no-artist,{'tracks': {'href': 'https://api.spotify.com/v...
3,Moon,Kid Francescoli,[Kid Francescoli],1,Kid Francescoli,no-artist,no-artist,{'tracks': {'href': 'https://api.spotify.com/v...
4,Vibe,Cookiee Kawaii,[Cookiee Kawaii],1,Cookiee Kawaii,no-artist,no-artist,{'tracks': {'href': 'https://api.spotify.com/v...


In [18]:
def get_spotify_track_info(df):
    
    list_spotify_track_id = []
    list_spotify_track_duration = []
    list_spotify_track_popularity = []
    list_spotify_album_release_date = []
    list_spotify_track_explicit = []
    
    for index in df.index:
        
        song_name = df.iloc[index, 0].lower()
        
        artists_list = [artist.lower() for artist in df.iloc[index, 2]]
        
        total_artists = df.iloc[index, 3]
        
        mask = df.iloc[index, 7]['tracks']['items']
        
        # If the track was not found in the Spotify
        if len(mask) == 0:
            list_spotify_track_id.append('not-found')
            list_spotify_track_duration.append('not-found')
            list_spotify_track_popularity.append('not-found')
            list_spotify_album_release_date.append('not-found')
            list_spotify_track_explicit.append('not-found')
            #print(f'{index} - {song_name} - NOT FOUND')
        
        # If the track was found in the Spotify
        else:
            
            added = 0
            
            # For each track it was listed 50 tracks related to the query 
            for idx, each_found in enumerate(mask):
                
                track_name = mask[idx]['name'].lower()
                
                track_id = mask[idx]['id']
                track_duration = mask[idx]['duration_ms']
                track_popularity = mask[idx]['popularity']
                album_release_date = mask[idx]['album']['release_date']
                track_explicit = mask[idx]['explicit']
                
                n_artists = len(mask[idx]['artists'])
                
                first_artist_name = mask[idx]['artists'][0]['name'].lower()
            
                    
                if ((song_name in track_name) & (total_artists == n_artists) & (first_artist_name in artists_list)
                    & (added == 0)):
                    list_spotify_track_id.append(track_id)
                    list_spotify_track_duration.append(track_duration)
                    list_spotify_track_popularity.append(track_popularity)
                    list_spotify_album_release_date.append(album_release_date)
                    list_spotify_track_explicit.append(track_explicit)
                    added += 1
                    #print(f'{index} - {track_name} - {track_id}')
                
                elif (idx == len(mask) - 1) & (added == 0):
                    list_spotify_track_id.append('not-found')
                    list_spotify_track_duration.append('not-found')
                    list_spotify_track_popularity.append('not-found')
                    list_spotify_album_release_date.append('not-found')
                    list_spotify_track_explicit.append('not-found')
                    #print(f'{index} - {song_name} - NOT FOUND')
     
    # Inplace
    df['sp_id'] = list_spotify_track_id
    df['sp_duration_ms'] = list_spotify_track_duration
    df['sp_popularity'] = list_spotify_track_popularity
    df['sp_release_date'] = list_spotify_album_release_date
    df['sp_explicit'] = list_spotify_track_explicit
                    
    return df

In [19]:
get_spotify_track_info(df_base)
df_base.head()

Unnamed: 0,song,artists,artists_list,number_artists,artist_1,artist_2,artist_3,spotify_search,sp_id,sp_duration_ms,sp_popularity,sp_release_date,sp_explicit
0,Roxanne,Arizona Zervas,[Arizona Zervas],1,Arizona Zervas,no-artist,no-artist,{'tracks': {'href': 'https://api.spotify.com/v...,696DnlkuDOXcMAnKlTgXXK,163636,89,2019-10-10,True
1,Say So,Doja Cat,[Doja Cat],1,Doja Cat,no-artist,no-artist,{'tracks': {'href': 'https://api.spotify.com/v...,3Dv1eDb0MEgF93GpLXlucZ,237893,89,2019-11-07,True
2,My Oh My,Camila Cabello feat. DaBaby,"[Camila Cabello, DaBaby]",2,Camila Cabello,DaBaby,no-artist,{'tracks': {'href': 'https://api.spotify.com/v...,3yOlyBJuViE2YSGn3nVE1K,170746,83,2019-12-06,False
3,Moon,Kid Francescoli,[Kid Francescoli],1,Kid Francescoli,no-artist,no-artist,{'tracks': {'href': 'https://api.spotify.com/v...,24upABZ8A0sAepfu91sEYr,390638,70,2017-03-03,False
4,Vibe,Cookiee Kawaii,[Cookiee Kawaii],1,Cookiee Kawaii,no-artist,no-artist,{'tracks': {'href': 'https://api.spotify.com/v...,4gOgQTv9RYYFZ1uQNnlk3q,83940,73,2019-03-29,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...
64,What the Hell,Avril Lavigne,[Avril Lavigne],1,Avril Lavigne,no-artist,no-artist,{'tracks': {'href': 'https://api.spotify.com/v...,2z4U9d5OAA4YLNXoCgioxo,220706,74,2011-03-08,False
65,Towards the Sun,Rihanna,[Rihanna],1,Rihanna,no-artist,no-artist,{'tracks': {'href': 'https://api.spotify.com/v...,1UuZhGTon3gzXQAJzNa2A4,273293,55,2015-03-23,False
66,I Think I'm OKAY,"Machine Gun Kelly, YUNGBLUD, and Travis Barker","[Machine Gun Kelly, YUNGBLUD, Travis Barker]",3,Machine Gun Kelly,YUNGBLUD,Travis Barker,{'tracks': {'href': 'https://api.spotify.com/v...,2gTdDMpNxIRFSiu7HutMCg,169397,81,2019-07-05,True
67,Myself,Bazzi,[Bazzi],1,Bazzi,no-artist,no-artist,{'tracks': {'href': 'https://api.spotify.com/v...,5YLHLxoZsodDWjqSgjhBf3,167552,77,2018-04-12,False


In [21]:
df_base.head()

Unnamed: 0,song,artists,artists_list,number_artists,artist_1,artist_2,artist_3,spotify_search,sp_id,sp_duration_ms,sp_popularity,sp_release_date,sp_explicit
0,Roxanne,Arizona Zervas,[Arizona Zervas],1,Arizona Zervas,no-artist,no-artist,{'tracks': {'href': 'https://api.spotify.com/v...,696DnlkuDOXcMAnKlTgXXK,163636,89,2019-10-10,True
1,Say So,Doja Cat,[Doja Cat],1,Doja Cat,no-artist,no-artist,{'tracks': {'href': 'https://api.spotify.com/v...,3Dv1eDb0MEgF93GpLXlucZ,237893,89,2019-11-07,True
2,My Oh My,Camila Cabello feat. DaBaby,"[Camila Cabello, DaBaby]",2,Camila Cabello,DaBaby,no-artist,{'tracks': {'href': 'https://api.spotify.com/v...,3yOlyBJuViE2YSGn3nVE1K,170746,83,2019-12-06,False
3,Moon,Kid Francescoli,[Kid Francescoli],1,Kid Francescoli,no-artist,no-artist,{'tracks': {'href': 'https://api.spotify.com/v...,24upABZ8A0sAepfu91sEYr,390638,70,2017-03-03,False
4,Vibe,Cookiee Kawaii,[Cookiee Kawaii],1,Cookiee Kawaii,no-artist,no-artist,{'tracks': {'href': 'https://api.spotify.com/v...,4gOgQTv9RYYFZ1uQNnlk3q,83940,73,2019-03-29,True


### Find the audio features for each song

In [25]:
# Search in the API wrapper
spotify_audio_features = [sp.audio_features(track_id)  if track_id != 'not-found'else 'not-found' 
                          for track_id in tqdm(df_base.sp_id)]

HBox(children=(FloatProgress(value=0.0, max=69.0), HTML(value='')))




In [27]:
len(spotify_audio_features)

69

In [41]:
def get_spotify_audio_features(df, audio_features: list):
    
    list_danceability = []
    list_energy = []
    list_key = []
    list_loudness = []
    list_mode = []
    list_speechiness = []
    list_acousticness = []
    list_instrumentalness = []
    list_liveness = []
    list_valence = []
    list_tempo = []
    list_time_signature = []
    
    for index in df.index:
        
        track_id = df.iloc[index, 8]
        
        if track_id == 'not-found':
            
            list_danceability.append('not-found')
            list_energy.append('not-found')
            list_key.append('not-found')
            list_loudness.append('not-found')
            list_mode.append('not-found')
            list_speechiness.append('not-found')
            list_acousticness.append('not-found')
            list_instrumentalness.append('not-found')
            list_liveness.append('not-found')
            list_valence.append('not-found')
            list_tempo.append('not-found')
            list_time_signature.append('not-found')
    
        else:
            
            list_danceability.append(audio_features[index][0]['danceability'])
            list_energy.append(audio_features[index][0]['energy'])
            list_key.append(audio_features[index][0]['key'])
            list_loudness.append(audio_features[index][0]['loudness'])
            list_mode.append(audio_features[index][0]['mode'])
            list_speechiness.append(audio_features[index][0]['speechiness'])
            list_acousticness.append(audio_features[index][0]['acousticness'])
            list_instrumentalness.append(audio_features[index][0]['instrumentalness'])
            list_liveness.append(audio_features[index][0]['liveness'])
            list_valence.append(audio_features[index][0]['valence'])
            list_tempo.append(audio_features[index][0]['tempo'])
            list_time_signature.append(audio_features[index][0]['time_signature'])
     
    # Inplace
    df['sp_danceability'] = list_danceability
    df['sp_energy'] = list_energy
    df['sp_key'] = list_key
    df['sp_loudness'] = list_loudness
    df['sp_mode'] = list_mode
    df['sp_speechiness'] = list_speechiness
    df['sp_acousticness'] = list_acousticness
    df['sp_instrumentalness'] = list_instrumentalness
    df['sp_liveness'] = list_liveness
    df['sp_valence'] = list_valence
    df['sp_tempo'] = list_tempo
    df['sp_time_signature'] = list_time_signature
    
    return df

In [43]:
get_spotify_audio_features(df_base, spotify_audio_features)
df_base.head()

Unnamed: 0,song,artists,artists_list,number_artists,artist_1,artist_2,artist_3,spotify_search,sp_id,sp_duration_ms,...,sp_key,sp_loudness,sp_mode,sp_speechiness,sp_acousticness,sp_instrumentalness,sp_liveness,sp_valence,sp_tempo,sp_time_signature
0,Roxanne,Arizona Zervas,[Arizona Zervas],1,Arizona Zervas,no-artist,no-artist,{'tracks': {'href': 'https://api.spotify.com/v...,696DnlkuDOXcMAnKlTgXXK,163636,...,6,-5.616,0,0.148,0.0522,0.0,0.46,0.457,116.735,5
1,Say So,Doja Cat,[Doja Cat],1,Doja Cat,no-artist,no-artist,{'tracks': {'href': 'https://api.spotify.com/v...,3Dv1eDb0MEgF93GpLXlucZ,237893,...,11,-4.577,0,0.158,0.256,3.57e-06,0.0904,0.786,110.962,4
2,My Oh My,Camila Cabello feat. DaBaby,"[Camila Cabello, DaBaby]",2,Camila Cabello,DaBaby,no-artist,{'tracks': {'href': 'https://api.spotify.com/v...,3yOlyBJuViE2YSGn3nVE1K,170746,...,8,-6.024,1,0.0296,0.018,1.29e-05,0.0887,0.383,105.046,4
3,Moon,Kid Francescoli,[Kid Francescoli],1,Kid Francescoli,no-artist,no-artist,{'tracks': {'href': 'https://api.spotify.com/v...,24upABZ8A0sAepfu91sEYr,390638,...,7,-10.002,1,0.0345,0.288,0.856,0.102,0.0584,117.986,4
4,Vibe,Cookiee Kawaii,[Cookiee Kawaii],1,Cookiee Kawaii,no-artist,no-artist,{'tracks': {'href': 'https://api.spotify.com/v...,4gOgQTv9RYYFZ1uQNnlk3q,83940,...,10,-8.719,1,0.344,0.0635,0.00932,0.118,0.175,159.947,4


## Artists

In [51]:
df_artist = pd.DataFrame([each_artist for each_list in df_base.artists_list 
                          for each_artist in each_list], columns=['artist']).drop_duplicates()
df_artist.head()

Unnamed: 0,artist
0,Arizona Zervas
1,Doja Cat
2,Camila Cabello
3,DaBaby
4,Kid Francescoli


In [55]:
# Search in the API wrapper
spotify_artists = [sp.search(q=artist, type='artist') for artist in tqdm(df_artist.artist)]

HBox(children=(FloatProgress(value=0.0, max=91.0), HTML(value='')))




In [81]:
spotify_artists[3]['artists']['items'][:2]

[{'external_urls': {'spotify': 'https://open.spotify.com/artist/4r63FhuTkUYltbVAg5TQnk'},
  'followers': {'href': None, 'total': 4291213},
  'genres': ['north carolina hip hop', 'rap'],
  'href': 'https://api.spotify.com/v1/artists/4r63FhuTkUYltbVAg5TQnk',
  'id': '4r63FhuTkUYltbVAg5TQnk',
  'images': [{'height': 640,
    'url': 'https://i.scdn.co/image/f68192e6516d89a77a2b16904725a77b75b42056',
    'width': 640},
   {'height': 320,
    'url': 'https://i.scdn.co/image/f88e08cf9132c7facc2ee9fbdd1be3924b5c5a74',
    'width': 320},
   {'height': 160,
    'url': 'https://i.scdn.co/image/1b6dd2116962f6d9741d0181708e31006b5048a7',
    'width': 160}],
  'name': 'DaBaby',
  'popularity': 95,
  'type': 'artist',
  'uri': 'spotify:artist:4r63FhuTkUYltbVAg5TQnk'},
 {'external_urls': {'spotify': 'https://open.spotify.com/artist/7MCrEuHBgUcjP8eMxM2IFC'},
  'followers': {'href': None, 'total': 144},
  'genres': [],
  'href': 'https://api.spotify.com/v1/artists/7MCrEuHBgUcjP8eMxM2IFC',
  'id': '7MCrE

## Playlists

In [86]:
# Search in the API wrapper
spotify_tiktok = sp.search(q='tiktok', type='playlist', limit=50)

In [87]:
len(spotify_tiktok['playlists']['items'])

50

# Last.fm

https://www.last.fm/api/show/track.getInfo



headers = {'user-agent': lastfm_user}

payload = {
    'api_key': lastfm_key,
    'method': 'track.getInfo'
}

response = requests.get('http://ws.audioscrobbler.com/2.0/', headers=headers, params=payload)
response

## Connecting to the API and collecting data

Example of how to connect to the API and collect data

```python
response = lastfm_get({
    'method': 'track.getInfo',
    'track': 'My Oh My',
    'artist': 'Camila Cabello'
})```

In [88]:
lastfm_key = os.getenv('lastfm_p03_key')
lastfm_user = 'gnakasato'

In [89]:
# Function to connect to the API and get data

def lastfm_get(payload):
    # Define the headers and the url
    headers = {'user-agent': lastfm_user}
    url = 'http://ws.audioscrobbler.com/2.0/'
    
    # Add API key and format to the payload
    payload['api_key'] = lastfm_key
    payload['format=json'] = lastfm_key
    
    response = requests.get(url, headers=headers, params=payload)
    
    return response

In [92]:
# Create an auxiliary list
lastfm_track_info = []

# Connect to the API and collect data

for index in tqdm(range(df_base.shape[0])):
    
    # Connect to the API
    response = lastfm_get({'method': 'track.getInfo',
                           'track': df_base.iloc[index, 0],
                           'artist': df_base.iloc[index, 4],
                           'autocorrect[0|1]': '1'})
    
    # Convert the data into a more amicable format
    track_info = BeautifulSoup(response.text)
    
    # Add the data to the auxiliary list
    lastfm_track_info.append(track_info)
    
    # Wait 1 second to check the next track
    sleep(1)

HBox(children=(FloatProgress(value=0.0, max=69.0), HTML(value='')))




In [94]:
# Create a list with the tags of each track
# If the track was not found in the API, the value to the corresponding track is 'not-found'
lastfm_tags = [track.find_all('tag') if track.text.replace('\n\n', '') != 'Track not found' else 'not-found' for track 
                in lastfm_track_info]

# Check the result for the first two tracks
lastfm_tags[:2]

[[<tag><name>2019</name>
  <url>https://www.last.fm/tag/2019</url>
  </tag>,
  <tag><name>2010s</name>
  <url>https://www.last.fm/tag/2010s</url>
  </tag>,
  <tag><name>arizona zervas</name>
  <url>https://www.last.fm/tag/arizona+zervas</url>
  </tag>,
  <tag><name>Hip-Hop</name>
  <url>https://www.last.fm/tag/Hip-Hop</url>
  </tag>,
  <tag><name>rap</name>
  <url>https://www.last.fm/tag/rap</url>
  </tag>],
 [<tag><name>pop</name>
  <url>https://www.last.fm/tag/pop</url>
  </tag>,
  <tag><name>Disco</name>
  <url>https://www.last.fm/tag/Disco</url>
  </tag>,
  <tag><name>rap</name>
  <url>https://www.last.fm/tag/rap</url>
  </tag>,
  <tag><name>Hip-Hop</name>
  <url>https://www.last.fm/tag/Hip-Hop</url>
  </tag>,
  <tag><name>female vocalists</name>
  <url>https://www.last.fm/tag/female+vocalists</url>
  </tag>]]

The idea is to create a nested list with lists of tags of each track

In [101]:
# Create auxiliary lists 
tracks_tags = []  # Final list with the tags
tags_exist = []  # List with all tags

# Get the tag names for each track from the raw (messy) data

for track_tag in lastfm_tags:
    
    # If the track was not found in the API
    if track_tag == 'not-found':
        tracks_tags.append(['not-found'])
    
    # If the track was found, but there is no tag related to the track
    elif len(track_tag) == 0:
        tracks_tags.append(['no-tag'])
    
    # If the track was found and there are tags related to the track
    else:
        
        # Create an auxiliary list 
        # Before checking each track, this list needs to be cleared, so it stores a list of messy tags for each track
        each_tag_lists = []
        
        # Each track has a list with messy tags, so it is necessary to clean this data, checking each tag for each track
        for each_tag in track_tag:
            
            # Get only the tag name, but this process creates a list with the tag name
            tag_list = each_tag.find('name')
            
            # Add each tag (messy data) in an auxiliary list
            each_tag_lists.append(tag_list)
            
            # Create an auxiliary list
            # Before checking each tag of a track, this list needs to be cleared, so it stores a list of clean data of
            # tags for each track
            each_track_tags = []
            
            # Each tag name of a track is inside a list
            for each_one in each_tag_lists:
                
                # Clean the data for each tag name
                tag = each_one.text.lower().replace('-', ' ')
                
                # Add the tag (clean data) with all lower case letters in an auxiliary list if the tag is one of the
                # genres listed in the 'musicbrainz_genre'
                if tag in musicbrainz_genre:
                    each_track_tags.append(tag)
                    tags_exist.append(tag)
                
        # Add each list of tags (clean) for one track in a final list
        tracks_tags.append(each_track_tags)
        #print(f'\n{tracks_tags}\n')
        
# Add a column in the 'popsugar_df' with the tags found in Last.fm API
df_base['lastfm_tags'] = tracks_tags

In [102]:
df_base

Unnamed: 0,song,artists,artists_list,number_artists,artist_1,artist_2,artist_3,spotify_search,sp_id,sp_duration_ms,...,sp_loudness,sp_mode,sp_speechiness,sp_acousticness,sp_instrumentalness,sp_liveness,sp_valence,sp_tempo,sp_time_signature,lastfm_tags
0,Roxanne,Arizona Zervas,[Arizona Zervas],1,Arizona Zervas,no-artist,no-artist,{'tracks': {'href': 'https://api.spotify.com/v...,696DnlkuDOXcMAnKlTgXXK,163636,...,-5.616,0,0.148,0.0522,0,0.46,0.457,116.735,5,[hip hop]
1,Say So,Doja Cat,[Doja Cat],1,Doja Cat,no-artist,no-artist,{'tracks': {'href': 'https://api.spotify.com/v...,3Dv1eDb0MEgF93GpLXlucZ,237893,...,-4.577,0,0.158,0.256,3.57e-06,0.0904,0.786,110.962,4,"[pop, disco, hip hop]"
2,My Oh My,Camila Cabello feat. DaBaby,"[Camila Cabello, DaBaby]",2,Camila Cabello,DaBaby,no-artist,{'tracks': {'href': 'https://api.spotify.com/v...,3yOlyBJuViE2YSGn3nVE1K,170746,...,-6.024,1,0.0296,0.018,1.29e-05,0.0887,0.383,105.046,4,[pop]
3,Moon,Kid Francescoli,[Kid Francescoli],1,Kid Francescoli,no-artist,no-artist,{'tracks': {'href': 'https://api.spotify.com/v...,24upABZ8A0sAepfu91sEYr,390638,...,-10.002,1,0.0345,0.288,0.856,0.102,0.0584,117.986,4,"[chillout, indie pop]"
4,Vibe,Cookiee Kawaii,[Cookiee Kawaii],1,Cookiee Kawaii,no-artist,no-artist,{'tracks': {'href': 'https://api.spotify.com/v...,4gOgQTv9RYYFZ1uQNnlk3q,83940,...,-8.719,1,0.344,0.0635,0.00932,0.118,0.175,159.947,4,[no-tag]
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
64,What the Hell,Avril Lavigne,[Avril Lavigne],1,Avril Lavigne,no-artist,no-artist,{'tracks': {'href': 'https://api.spotify.com/v...,2z4U9d5OAA4YLNXoCgioxo,220706,...,-3.689,0,0.0548,0.00472,0.0127,0.14,0.877,149.976,4,"[pop rock, pop, rock]"
65,Towards the Sun,Rihanna,[Rihanna],1,Rihanna,no-artist,no-artist,{'tracks': {'href': 'https://api.spotify.com/v...,1UuZhGTon3gzXQAJzNa2A4,273293,...,-6.207,0,0.0392,0.0531,0,0.152,0.263,170.18,4,[pop]
66,I Think I'm OKAY,"Machine Gun Kelly, YUNGBLUD, and Travis Barker","[Machine Gun Kelly, YUNGBLUD, Travis Barker]",3,Machine Gun Kelly,YUNGBLUD,Travis Barker,{'tracks': {'href': 'https://api.spotify.com/v...,2gTdDMpNxIRFSiu7HutMCg,169397,...,-4.718,1,0.0379,0.0257,0,0.313,0.277,119.921,4,[]
67,Myself,Bazzi,[Bazzi],1,Bazzi,no-artist,no-artist,{'tracks': {'href': 'https://api.spotify.com/v...,5YLHLxoZsodDWjqSgjhBf3,167552,...,-5.513,0,0.072,0.465,1.12e-06,0.0338,0.902,195.918,4,"[pop, hip hop]"


# Final Dataframe

In [103]:
df = df_base.drop(['spotify_search'], axis=1)
df.head()

Unnamed: 0,song,artists,artists_list,number_artists,artist_1,artist_2,artist_3,sp_id,sp_duration_ms,sp_popularity,...,sp_loudness,sp_mode,sp_speechiness,sp_acousticness,sp_instrumentalness,sp_liveness,sp_valence,sp_tempo,sp_time_signature,lastfm_tags
0,Roxanne,Arizona Zervas,[Arizona Zervas],1,Arizona Zervas,no-artist,no-artist,696DnlkuDOXcMAnKlTgXXK,163636,89,...,-5.616,0,0.148,0.0522,0.0,0.46,0.457,116.735,5,[hip hop]
1,Say So,Doja Cat,[Doja Cat],1,Doja Cat,no-artist,no-artist,3Dv1eDb0MEgF93GpLXlucZ,237893,89,...,-4.577,0,0.158,0.256,3.57e-06,0.0904,0.786,110.962,4,"[pop, disco, hip hop]"
2,My Oh My,Camila Cabello feat. DaBaby,"[Camila Cabello, DaBaby]",2,Camila Cabello,DaBaby,no-artist,3yOlyBJuViE2YSGn3nVE1K,170746,83,...,-6.024,1,0.0296,0.018,1.29e-05,0.0887,0.383,105.046,4,[pop]
3,Moon,Kid Francescoli,[Kid Francescoli],1,Kid Francescoli,no-artist,no-artist,24upABZ8A0sAepfu91sEYr,390638,70,...,-10.002,1,0.0345,0.288,0.856,0.102,0.0584,117.986,4,"[chillout, indie pop]"
4,Vibe,Cookiee Kawaii,[Cookiee Kawaii],1,Cookiee Kawaii,no-artist,no-artist,4gOgQTv9RYYFZ1uQNnlk3q,83940,73,...,-8.719,1,0.344,0.0635,0.00932,0.118,0.175,159.947,4,[no-tag]
