https://www.standardmedia.co.ke/sci-tech/article/2001386004/ai-robot-develops-the-perfect-tiktok-song-and-it-s-very-unusual

# Setup

In [119]:
import os
import re
import requests
from time import sleep

import numpy as np
import pandas as pd
import spotipy
import tqdm
from bs4 import BeautifulSoup
from dotenv import load_dotenv, find_dotenv
from spotipy.oauth2 import SpotifyClientCredentials, SpotifyOAuth

# Web Scrapping

## List of viral songs on TikTok

In [2]:
url = 'https://www.buzzfeednews.com/article/laurenstrapagiel/most-viral-tiktok-songs-of-2020'

In [3]:
response = requests.get(url)
response

<Response [200]>

In [4]:
content_buzzfeed = BeautifulSoup(response.text)
#content

In [5]:
html_buzzfeed = content_buzzfeed.find_all('h2', limit=13)
html_buzzfeed

[<h2>1. "Laxed (SIREN BEAT)" by Jawsh 685</h2>,
 <h2>2. "Roses (Imanbek Remix)" by SAINt JHN and Imanbek</h2>,
 <h2>3. "Boss Bitch" by Doja Cat</h2>,
 <h2>4. "Supalonely" by BENEE feat. Gus Dapperton</h2>,
 <h2>5. "Stunnin'" by Curtis Waters feat. Harm Franklin</h2>,
 <h2>6. "Lottery'" by K CAMP</h2>,
 <h2>7. "ROXANNE'" by Arizona Zervas</h2>,
 <h2>8. "Bored in the House'" by Curtis Roach</h2>,
 <h2>9. "Sober Up" by AJR feat. Rivers Cuomo</h2>,
 <h2>10. "Prom Queen" by Beach Bunny</h2>,
 <h2>11. "Vibe" by Cookiee Kawaii</h2>,
 <h2>12. "Renee" by SALES</h2>,
 <h2>13. "Savage" by Megan Thee Stallion</h2>]

In [6]:
date_buzzfeed = content_buzzfeed.find('p', attrs={'class': 'news-article-header__timestamps-posted'}).text.replace('\n', '').strip()
date_buzzfeed

'Posted on July 23, 2020, at 3:24 p.m. ET'

In [7]:
url = 'https://www.popsugar.com/entertainment/popular-tiktok-songs-47289804?stream_view=1#photo-47289832'

In [8]:
response = requests.get(url)
response

<Response [200]>

In [9]:
content_popsugar = BeautifulSoup(response.text)
#content_popsugar

In [10]:
html_popsugar = content_popsugar.find_all('span', attrs={'class': 'count-copy'})
#html_popsugar

In [11]:
date_popsugar = content_popsugar.find('time').text.replace('\n', '').strip()
date_popsugar

'March 27, 2020'

In [12]:
popsugar_df = pd.DataFrame([re.split(' by | feat. ', song.text.replace('"', '').strip()) for song in html_popsugar], 
                             columns=['song', 'artist', 'feat'])
popsugar_df

Unnamed: 0,song,artist,feat
0,Roxanne,Arizona Zervas,
1,Say So,Doja Cat,
2,My Oh My,Camila Cabello,DaBaby
3,Moon,Kid Francescoli,
4,Vibe,Cookiee Kawaii,
...,...,...,...
64,What the Hell,Avril Lavigne,
65,Towards the Sun,Rihanna,
66,I Think I'm OKAY,"Machine Gun Kelly, YUNGBLUD, and Travis Barker",
67,Myself,Bazzi,


In [13]:
popsugar_df_raw = popsugar_df.copy()

## List of music genre

https://musicbrainz.org/genres

In [128]:
url = 'https://musicbrainz.org/genres'

In [129]:
# Get response
response = requests.get(url)

# Check response
response

<Response [200]>

In [130]:
musicbrainz_content = BeautifulSoup(response.content)
#musicbrainz_content

In [133]:
musicbrainz_genre = [genre.text for genre in musicbrainz_content.find_all('bdi')]

# Spotify

https://spotipy.readthedocs.io/en/2.14.0/#module-spotipy.client

https://developer.spotify.com/documentation/web-api/reference/tracks/get-audio-features/

## Connecting to the API

In [218]:
load_dotenv(find_dotenv())

True

In [219]:
cid = os.getenv('spotify_p03_key')
csecret = os.getenv('spotify_p03_secret')

In [220]:
cc_manager = SpotifyClientCredentials(client_id=cid, client_secret=csecret)
sp = spotipy.Spotify(client_credentials_manager=cc_manager)

## Songs

In [17]:
# Search in the API wrapper
spotify_search = [sp.search(q=song, type='track') for song in popsugar_df['song']]

In [18]:
popsugar_df['spotify_id'] = [track['tracks']['items'][0]['id'] if len(track['tracks']['items']) != 0 else 'not-found' 
                             for track in spotify_search]

In [19]:
popsugar_df['popularity'] = [track['tracks']['items'][0]['popularity'] if len(track['tracks']['items']) != 0 
                             else 'not-found' for track in spotify_search]

In [20]:
popsugar_df['release_date'] = [track['tracks']['items'][0]['album']['release_date'] if len(track['tracks']['items']) != 0 
                               else 'not-found' for track in spotify_search]

In [21]:
popsugar_df['explicit'] = [track['tracks']['items'][0]['explicit'] if len(track['tracks']['items']) != 0 
                           else 'not-found' for track in spotify_search]

In [22]:
popsugar_df.head()

Unnamed: 0,song,artist,feat,spotify_id,popularity,release_date,explicit
0,Roxanne,Arizona Zervas,,696DnlkuDOXcMAnKlTgXXK,89,2019-10-10,True
1,Say So,Doja Cat,,3Dv1eDb0MEgF93GpLXlucZ,89,2019-11-07,True
2,My Oh My,Camila Cabello,DaBaby,3yOlyBJuViE2YSGn3nVE1K,83,2019-12-06,False
3,Moon,Kid Francescoli,,0JP9xo3adEtGSdUEISiszL,86,2018-03-16,True
4,Vibe,Cookiee Kawaii,,0fySG6A6qLE8IvDpayb5bM,80,2019-09-27,True


In [23]:
spotipy_au_feat = [sp.audio_features(id) if id != 'not-found' else 'not-found' for id in popsugar_df.spotify_id]

In [24]:
spotipy_au_feat[0][0]

{'danceability': 0.621,
 'energy': 0.601,
 'key': 6,
 'loudness': -5.616,
 'mode': 0,
 'speechiness': 0.148,
 'acousticness': 0.0522,
 'instrumentalness': 0,
 'liveness': 0.46,
 'valence': 0.457,
 'tempo': 116.735,
 'type': 'audio_features',
 'id': '696DnlkuDOXcMAnKlTgXXK',
 'uri': 'spotify:track:696DnlkuDOXcMAnKlTgXXK',
 'track_href': 'https://api.spotify.com/v1/tracks/696DnlkuDOXcMAnKlTgXXK',
 'analysis_url': 'https://api.spotify.com/v1/audio-analysis/696DnlkuDOXcMAnKlTgXXK',
 'duration_ms': 163636,
 'time_signature': 5}

In [25]:
spotipy_au_feat_keys = list(spotipy_au_feat[0][0].keys())
spotipy_au_feat_keys.remove('type')
spotipy_au_feat_keys.remove('id')
spotipy_au_feat_keys.remove('uri')
spotipy_au_feat_keys.remove('track_href')
spotipy_au_feat_keys.remove('analysis_url')
#spotipy_au_feat_keys

['danceability',
 'energy',
 'key',
 'loudness',
 'mode',
 'speechiness',
 'acousticness',
 'instrumentalness',
 'liveness',
 'valence',
 'tempo',
 'duration_ms',
 'time_signature']

In [26]:
for key in spotipy_au_feat_keys:
    popsugar_df[key] = [feat[0][key] if feat != 'not-found' else 'not-found' for feat in spotipy_au_feat]

In [27]:
popsugar_df.head()

Unnamed: 0,song,artist,feat,spotify_id,popularity,release_date,explicit,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature
0,Roxanne,Arizona Zervas,,696DnlkuDOXcMAnKlTgXXK,89,2019-10-10,True,0.621,0.601,6,-5.616,0,0.148,0.0522,0.0,0.46,0.457,116.735,163636,5
1,Say So,Doja Cat,,3Dv1eDb0MEgF93GpLXlucZ,89,2019-11-07,True,0.787,0.673,11,-4.577,0,0.158,0.256,3.57e-06,0.0904,0.786,110.962,237893,4
2,My Oh My,Camila Cabello,DaBaby,3yOlyBJuViE2YSGn3nVE1K,83,2019-12-06,False,0.724,0.491,8,-6.024,1,0.0296,0.018,1.29e-05,0.0887,0.383,105.046,170746,4
3,Moon,Kid Francescoli,,0JP9xo3adEtGSdUEISiszL,86,2018-03-16,True,0.921,0.537,9,-5.723,0,0.0804,0.556,0.00404,0.102,0.711,128.009,135090,4
4,Vibe,Cookiee Kawaii,,0fySG6A6qLE8IvDpayb5bM,80,2019-09-27,True,0.768,0.652,1,-2.708,0,0.307,0.113,0.0,0.107,0.777,154.187,144935,4


In [None]:
# Search in the API wrapper
spotify_search = [sp.search(q=song, type='track') for song in tqdm(popsugar_df['song'])]

## Artists

In [228]:
df_artists = popsugar_df_raw.copy().artist.to_frame().drop_duplicates()
df_artists.head()

Unnamed: 0,artist
0,Arizona Zervas
1,Doja Cat
2,Camila Cabello
3,Kid Francescoli
4,Cookiee Kawaii


In [230]:
# Search in the API wrapper
spotify_artists = [sp.search(q=artist.lower().replace(' ', '+'), type='artist') for artist in df_artists['artist']]

In [None]:
spotify_artists[0]['artists']['items'][0]['id']
spotify_artists[0]['artists']['items'][0]['genres']
spotify_artists[0]['artists']['items'][0]['popularity']
spotify_artists[0]['artists']['items'][0]['followers']['total']

In [231]:
df_artists['spotify_id'] = [artist['artists']['items'][0]['id'] if len(artist['artists']['items']) != 0 
                            else 'not-found' for artist in spotify_artists]

In [243]:
not_found_artists = df_artists[df_artists['spotify_id'] == 'not-found'].artist.to_list()
not_found_artists

['Y2K and bbno$',
 'Meduza, Becky Hill, and Goodboys',
 'TELYKast and One True God',
 'Frank Ocean and L.Dre',
 'GRiZ and Subtronics',
 'iLL Wayno and Holla FyeSixWun',
 'Ellie Goulding and Juice WRLD',
 'KYLE Feat. Lil Yachty',
 'Becky G and Digital Farm Animals',
 'Machine Gun Kelly, YUNGBLUD, and Travis Barker']

In [251]:
spotify_search[23]

{'tracks': {'href': 'https://api.spotify.com/v1/search?query=Lalala&type=track&offset=0&limit=10',
  'items': [{'album': {'album_type': 'single',
     'artists': [{'external_urls': {'spotify': 'https://open.spotify.com/artist/6USMTwO0MNDnKte5a5h0xx'},
       'href': 'https://api.spotify.com/v1/artists/6USMTwO0MNDnKte5a5h0xx',
       'id': '6USMTwO0MNDnKte5a5h0xx',
       'name': 'Y2K',
       'type': 'artist',
       'uri': 'spotify:artist:6USMTwO0MNDnKte5a5h0xx'},
      {'external_urls': {'spotify': 'https://open.spotify.com/artist/41X1TR6hrK8Q2ZCpp2EqCz'},
       'href': 'https://api.spotify.com/v1/artists/41X1TR6hrK8Q2ZCpp2EqCz',
       'id': '41X1TR6hrK8Q2ZCpp2EqCz',
       'name': 'bbno$',
       'type': 'artist',
       'uri': 'spotify:artist:41X1TR6hrK8Q2ZCpp2EqCz'}],
     'available_markets': ['AD',
      'AE',
      'AL',
      'AR',
      'AT',
      'AU',
      'BA',
      'BE',
      'BG',
      'BH',
      'BO',
      'BR',
      'BY',
      'CA',
      'CH',
      'CL',
 

In [248]:
popsugar_df[popsugar_df.artist.isin(not_found_artists)]

Unnamed: 0,song,artist,feat,spotify_id,popularity,release_date,explicit,danceability,energy,key,...,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature,lastfm_tags
23,Lalala,Y2K and bbno$,,51Fjme0JiitpyXKuyQiCDo,84,2019-06-28,True,0.843,0.391,2,...,1,0.0845,0.181,0,0.137,0.496,129.972,160627,4,[no-tag]
26,Lose Control,"Meduza, Becky Hill, and Goodboys",,7CHi4DtfK4heMlQaudCuHK,87,2019-10-11,False,0.598,0.526,10,...,0,0.0415,0.129,0,0.14,0.529,123.935,168387,4,[not-found]
35,DAYLIGHT,TELYKast and One True God,,6Ed1q0X8oSKSm4IIhiQbYg,81,2020-08-06,True,0.528,0.749,7,...,1,0.0479,0.00488,9.6e-05,0.0949,0.729,163.944,163906,4,[not-found]
36,Shibuya — Chanel Funk Remix,Frank Ocean and L.Dre,,not-found,not-found,not-found,not-found,not-found,not-found,not-found,...,not-found,not-found,not-found,not-found,not-found,not-found,not-found,not-found,not-found,[not-found]
41,Griztronics,GRiZ and Subtronics,,6OTClxme7EYRZGO6An3SMc,64,2019-08-14,False,0.607,0.941,11,...,0,0.481,0.037,0.0137,0.321,0.521,75.401,198400,4,[not-found]
43,Eyes. Lips. Face.,iLL Wayno and Holla FyeSixWun,,0UcTXmzMNrnS2rOLp94LlJ,61,2019-12-06,False,0.88,0.776,6,...,1,0.265,0.0512,0,0.115,0.931,159.964,139547,4,[not-found]
44,Hate Me,Ellie Goulding and Juice WRLD,,6kls8cSlUyHW2BUOkDJIZE,82,2019-06-26,False,0.657,0.768,8,...,1,0.0507,0.108,0,0.144,0.759,75.025,186223,4,[no-tag]
51,Hey Julie!,KYLE Feat. Lil Yachty,,3fVIFpU3VdSEoR82DaYe79,71,2019-08-23,True,0.803,0.499,11,...,1,0.306,0.0363,3.46e-06,0.129,0.749,161.042,156637,4,[no-tag]
59,Next to You,Becky G and Digital Farm Animals,Rvssian,4drTFbY9KJIvllrqVcJvLi,64,2020-01-17,False,0.53,0.394,8,...,1,0.0276,0.756,1.48e-05,0.099,0.194,135.981,246813,4,[not-found]
66,I Think I'm OKAY,"Machine Gun Kelly, YUNGBLUD, and Travis Barker",,2gTdDMpNxIRFSiu7HutMCg,81,2019-07-05,True,0.628,0.744,7,...,1,0.0379,0.0257,0,0.313,0.277,119.921,169397,4,[no-tag]


In [233]:
df_artists.head()

Unnamed: 0,artist,spotify_id
0,Arizona Zervas,0vRvGUQVUjytro0xpb26bs
1,Doja Cat,5cj0lLjcoR7YOSnhnX0Po5
2,Camila Cabello,4nDoRrQiYLoBzwC5BhVJzF
3,Kid Francescoli,2G7QgTep5IsJHGHm1hXygD
4,Cookiee Kawaii,0DbBBj0ScPumRqKXswGQH1


In [None]:
popsugar_df['spotify_id'] = [track['tracks']['items'][0]['id'] if len(track['tracks']['items']) != 0 else 'not-found' 
                             for track in spotify_search]

## Playlists

In [157]:
# Search in the API wrapper
spotify_tiktok = sp.search(q='tiktok', type='playlist')

In [169]:
spotify_tiktok['playlists']['items']

[{'collaborative': False,
  'description': 'Viral, trending and taking off. \n(Disclaimer: Spotify has no relationship or affiliation with TikTok)',
  'external_urls': {'spotify': 'https://open.spotify.com/playlist/37i9dQZF1DX2L0iB23Enbq'},
  'href': 'https://api.spotify.com/v1/playlists/37i9dQZF1DX2L0iB23Enbq',
  'id': '37i9dQZF1DX2L0iB23Enbq',
  'images': [{'height': None,
    'url': 'https://i.scdn.co/image/ab67706f00000003166e4736f6ffe9edb96a4768',
    'width': None}],
  'name': 'TikTok Hits',
  'owner': {'display_name': 'Spotify',
   'external_urls': {'spotify': 'https://open.spotify.com/user/spotify'},
   'href': 'https://api.spotify.com/v1/users/spotify',
   'id': 'spotify',
   'type': 'user',
   'uri': 'spotify:user:spotify'},
  'primary_color': None,
  'public': None,
  'snapshot_id': 'MTU5OTc1NTE2MCwwMDAwMDBmMDAwMDAwMTc0NzhkNjhiN2YwMDAwMDE3M2I3YzhiMTlj',
  'tracks': {'href': 'https://api.spotify.com/v1/playlists/37i9dQZF1DX2L0iB23Enbq/tracks',
   'total': 113},
  'type': 'pla

### Track example

```python
[In]:
sp.search(q='My Oh My', type='track', limit=1)

[Out]:
{'tracks': {'href': 'https://api.spotify.com/v1/search?query=My+Oh+My&type=track&offset=0&limit=1',
            # Track info
            # Album info
            'items': [{'album': {'album_type': 'album',
                                 # Main artist info (about the album)
                                 'artists': [{'external_urls': 
                                              {'spotify': 'https://open.spotify.com/artist/4nDoRrQiYLoBzwC5BhVJzF'},
                                              'href': 'https://api.spotify.com/v1/artists/4nDoRrQiYLoBzwC5BhVJzF',
                                              # Artist id
                                              'id': '4nDoRrQiYLoBzwC5BhVJzF',
                                              # Artist name
                                              'name': 'Camila Cabello',
                                              'type': 'artist',
                                              'uri': 'spotify:artist:4nDoRrQiYLoBzwC5BhVJzF'}],
                                 # Where the album is available
                                 'available_markets': ['AD', 'AE', 'AL', 'AR', 'AT', 'AU', 'BA', 'BE', 'BG', 'BH', 'BO',
                                                       'BR', 'BY', 'CA', 'CH', 'CL', 'CO', 'CR', 'CY', 'CZ', 'DE', 'DK',
                                                       'DO', 'DZ', 'EC', 'EE', 'EG', 'ES', 'FI', 'FR', 'GB', 'GR', 'GT',
                                                       'HK', 'HN', 'HR', 'HU', 'ID', 'IE', 'IL', 'IN', 'IS', 'IT', 'JO',
                                                       'JP', 'KW', 'KZ', 'LB', 'LI', 'LT', 'LU', 'LV', 'MA', 'MC', 'MD',
                                                       'ME', 'MK', 'MT', 'MX', 'MY', 'NI', 'NL', 'NO', 'NZ', 'OM', 'PA',
                                                       'PE', 'PH', 'PL', 'PS', 'PT', 'PY', 'QA', 'RO', 'RS', 'RU', 'SA',
                                                       'SE', 'SG', 'SI', 'SK', 'SV', 'TH', 'TN', 'TR', 'TW', 'UA', 'US',
                                                       'UY', 'VN', 'XK', 'ZA'],
                                 'external_urls': {'spotify': 'https://open.spotify.com/album/3Vsbl0diFGw8HNSjG8ue9m'},
                                 'href': 'https://api.spotify.com/v1/albums/3Vsbl0diFGw8HNSjG8ue9m',
                                 # Album id
                                 'id': '3Vsbl0diFGw8HNSjG8ue9m',
                                 # Album images
                                 'images': [{'height': 640,
                                             'url': 'https://i.scdn.co/image/ab67616d0000b2735f53c0dbe5190a0af0fa28f3',
                                             'width': 640},
                                            {'height': 300,
                                             'url': 'https://i.scdn.co/image/ab67616d00001e025f53c0dbe5190a0af0fa28f3',
                                             'width': 300},
                                            {'height': 64,
                                             'url': 'https://i.scdn.co/image/ab67616d000048515f53c0dbe5190a0af0fa28f3',
                                             'width': 64}],
                                 # Album name
                                 'name': 'Romance',
                                 # Album release date
                                 'release_date': '2019-12-06',
                                 'release_date_precision': 'day',
                                 # Number of tracks in the album
                                 'total_tracks': 14,
                                 'type': 'album',
                                 'uri': 'spotify:album:3Vsbl0diFGw8HNSjG8ue9m'},
                       # Artists info (about the track)
                       'artists': [{'external_urls': {'spotify': 'https://open.spotify.com/artist/4nDoRrQiYLoBzwC5BhVJzF'},
                                    'href': 'https://api.spotify.com/v1/artists/4nDoRrQiYLoBzwC5BhVJzF',
                                    'id': '4nDoRrQiYLoBzwC5BhVJzF',
                                    'name': 'Camila Cabello',
                                    'type': 'artist',
                                    'uri': 'spotify:artist:4nDoRrQiYLoBzwC5BhVJzF'},
                                   # Feat
                                   {'external_urls': {'spotify': 'https://open.spotify.com/artist/4r63FhuTkUYltbVAg5TQnk'},
                                    'href': 'https://api.spotify.com/v1/artists/4r63FhuTkUYltbVAg5TQnk',
                                    'id': '4r63FhuTkUYltbVAg5TQnk',
                                    'name': 'DaBaby',
                                    'type': 'artist',
                                    'uri': 'spotify:artist:4r63FhuTkUYltbVAg5TQnk'}],
                       # Where the track is available
                       'available_markets': ['AD', 'AE', 'AL', 'AR', 'AT', 'AU', 'BA', 'BE', 'BG', 'BH', 'BO', 'BR', 'BY',
                                             'CA', 'CH', 'CL', 'CO', 'CR', 'CY', 'CZ', 'DE', 'DK', 'DO', 'DZ', 'EC', 'EE',
                                             'EG', 'ES', 'FI', 'FR', 'GB', 'GR', 'GT', 'HK', 'HN', 'HR', 'HU', 'ID', 'IE',
                                             'IL', 'IN', 'IS', 'IT', 'JO', 'JP', 'KW', 'KZ', 'LB', 'LI', 'LT', 'LU', 'LV',
                                             'MA', 'MC', 'MD', 'ME', 'MK', 'MT', 'MX', 'MY', 'NI', 'NL', 'NO', 'NZ', 'OM',
                                             'PA', 'PE', 'PH', 'PL', 'PS', 'PT', 'PY', 'QA', 'RO', 'RS', 'RU', 'SA', 'SE',
                                             'SG', 'SI', 'SK', 'SV', 'TH', 'TN', 'TR', 'TW', 'UA', 'US', 'UY', 'VN', 'XK',
                                             'ZA'],
                       'disc_number': 1,
                       # Durantion of the track (ms)
                       'duration_ms': 170746,
                       # Explicit?
                       'explicit': False,
                       'external_ids': {'isrc': 'USSM11914257'},
                       'external_urls': {'spotify': 'https://open.spotify.com/track/3yOlyBJuViE2YSGn3nVE1K'},
                       'href': 'https://api.spotify.com/v1/tracks/3yOlyBJuViE2YSGn3nVE1K',
                       # Track id
                       'id': '3yOlyBJuViE2YSGn3nVE1K',
                       'is_local': False,
                       # Track name
                       'name': 'My Oh My (feat. DaBaby)',
                       # Track popularity
                       'popularity': 83,
                       'preview_url': 'https://p.scdn.co/mp3-preview/f4d0ff165605108542de7358547443d1a9f33fe3?cid=52d383b8c23d4cf987d4a0d509588d7a',
                       'track_number': 4,
                       'type': 'track',
                       'uri': 'spotify:track:3yOlyBJuViE2YSGn3nVE1K'}],
            'limit': 1,
            'next': 'https://api.spotify.com/v1/search?query=My+Oh+My&type=track&offset=1&limit=1',
            'offset': 0,
            'previous': None,
            'total': 25565}}```

### Artist example

```python
[In]:
sp.search(q='Camila Cabello', type='artist', limit=1)

[Out]:
# Artist info
{'artists': {'href': 'https://api.spotify.com/v1/search?query=Camila+Cabello&type=artist&offset=0&limit=1',
             'items': [{'external_urls': {'spotify': 'https://open.spotify.com/artist/4nDoRrQiYLoBzwC5BhVJzF'},
                        # Followers
                        'followers': {'href': None, 'total': 17921890},
                        # Genres
                        'genres': ['dance pop', 'pop', 'post-teen pop'],
                        'href': 'https://api.spotify.com/v1/artists/4nDoRrQiYLoBzwC5BhVJzF',
                        # Artist id
                        'id': '4nDoRrQiYLoBzwC5BhVJzF',
                        'images': [{'height': 640,
                                    'url': 'https://i.scdn.co/image/667daebca4856db3f4cbe8f51a4881091e62c401',
                                    'width': 640},
                                   {'height': 320,
                                    'url': 'https://i.scdn.co/image/6628c8b4acfa56206eaf83edd561f0549f8f23df',
                                    'width': 320},
                                   {'height': 160,
                                    'url': 'https://i.scdn.co/image/9beb36d55d0a4784f4adf06fb506d292c688b8bf',
                                    'width': 160}],
                        # Artist name
                        'name': 'Camila Cabello',
                        # Artist popularity
                        'popularity': 87,
                        'type': 'artist',
                        'uri': 'spotify:artist:4nDoRrQiYLoBzwC5BhVJzF'}],
             'limit': 1,
             'next': 'https://api.spotify.com/v1/search?query=Camila+Cabello&type=artist&offset=1&limit=1',
             'offset': 0,
             'previous': None,
             'total': 3}}```

### Audio features

```python
[In]:
sp.audio_features('3yOlyBJuViE2YSGn3nVE1K')

[Out]:
[{'danceability': 0.724,
  'energy': 0.491,
  'key': 8,
  'loudness': -6.024,
  'mode': 1,
  'speechiness': 0.0296,
  'acousticness': 0.018,
  'instrumentalness': 1.29e-05,
  'liveness': 0.0887,
  'valence': 0.383,
  'tempo': 105.046,
  'type': 'audio_features',
  'id': '3yOlyBJuViE2YSGn3nVE1K',
  'uri': 'spotify:track:3yOlyBJuViE2YSGn3nVE1K',
  'track_href': 'https://api.spotify.com/v1/tracks/3yOlyBJuViE2YSGn3nVE1K',
  'analysis_url': 'https://api.spotify.com/v1/audio-analysis/3yOlyBJuViE2YSGn3nVE1K',
  'duration_ms': 170746,
  'time_signature': 4}]```

# Last.fm

https://www.last.fm/api/show/track.getInfo



headers = {'user-agent': lastfm_user}

payload = {
    'api_key': lastfm_key,
    'method': 'track.getInfo'
}

response = requests.get('http://ws.audioscrobbler.com/2.0/', headers=headers, params=payload)
response

## Connecting to the API and collecting data

Example of how to connect to the API and collect data

```python
response = lastfm_get({
    'method': 'track.getInfo',
    'track': 'My Oh My',
    'artist': 'Camila Cabello'
})```

In [108]:
lastfm_key = os.getenv('lastfm_p03_key')
lastfm_user = 'gnakasato'

In [91]:
# Function to connect to the API and get data

def lastfm_get(payload):
    # Define the headers and the url
    headers = {'user-agent': lastfm_user}
    url = 'http://ws.audioscrobbler.com/2.0/'
    
    # Add API key and format to the payload
    payload['api_key'] = lastfm_key
    payload['format=json'] = lastfm_key
    
    response = requests.get(url, headers=headers, params=payload)
    
    return response

In [118]:
# Create an auxiliary list
lastfm_track_info = []

# Connect to the API and collect data

for index in tqdm(range(popsugar_df.shape[0])):
    
    # Connect to the API
    response = lastfm_get({'method': 'track.getInfo',
                           'track': popsugar_df.iloc[index, 0],
                           'artist': popsugar_df.iloc[index, 1],
                           'autocorrect[0|1]': '1'})
    
    # Convert the data into a more amicable format
    track_info = BeautifulSoup(response.text)
    
    # Add the data to the auxiliary list
    lastfm_track_info.append(track_info)
    
    # Wait 1 second to check the next track
    sleep(1)

In [137]:
lastfm_track_info[0]

<?xml version="1.0" encoding="UTF-8" ?><html><body><lfm status="ok"><track><name>ROXANNE</name>
<url>https://www.last.fm/music/Arizona+Zervas/_/ROXANNE</url>
<duration>164000</duration>
<streamable fulltrack="0">0</streamable>
<listeners>193475</listeners>
<playcount>1803316</playcount>
<artist><name>Arizona Zervas</name>
<url>https://www.last.fm/music/Arizona+Zervas</url>
</artist>
<album><artist>Various Artists</artist>
<title>Now That's What I Call Music! Vol. 74</title>
<url>https://www.last.fm/music/Various+Artists/Now+That%27s+What+I+Call+Music%21+Vol.+74</url>
<image size="small">https://lastfm.freetls.fastly.net/i/u/34s/415635b305c9f838970afb56eeb7fc9c.png</image>
<image size="medium">https://lastfm.freetls.fastly.net/i/u/64s/415635b305c9f838970afb56eeb7fc9c.png</image>
<image size="large">https://lastfm.freetls.fastly.net/i/u/174s/415635b305c9f838970afb56eeb7fc9c.png</image>
<image size="extralarge">https://lastfm.freetls.fastly.net/i/u/300x300/415635b305c9f838970afb56eeb7fc9c

In [120]:
# Create a list with the tags of each track
# If the track was not found in the API, the value to the corresponding track is 'not-found'
lastfm_tags = [track.find_all('tag') if track.text.replace('\n\n', '') != 'Track not found' else 'not-found' for track 
                in lastfm_track_info]

# Check the result for the first two tracks
lastfm_tags[:2]

[[<tag><name>2019</name>
  <url>https://www.last.fm/tag/2019</url>
  </tag>,
  <tag><name>2010s</name>
  <url>https://www.last.fm/tag/2010s</url>
  </tag>,
  <tag><name>arizona zervas</name>
  <url>https://www.last.fm/tag/arizona+zervas</url>
  </tag>,
  <tag><name>Hip-Hop</name>
  <url>https://www.last.fm/tag/Hip-Hop</url>
  </tag>,
  <tag><name>rap</name>
  <url>https://www.last.fm/tag/rap</url>
  </tag>],
 [<tag><name>pop</name>
  <url>https://www.last.fm/tag/pop</url>
  </tag>,
  <tag><name>Disco</name>
  <url>https://www.last.fm/tag/Disco</url>
  </tag>,
  <tag><name>rap</name>
  <url>https://www.last.fm/tag/rap</url>
  </tag>,
  <tag><name>Hip-Hop</name>
  <url>https://www.last.fm/tag/Hip-Hop</url>
  </tag>,
  <tag><name>female vocalists</name>
  <url>https://www.last.fm/tag/female+vocalists</url>
  </tag>]]

The idea is to create a nested list with lists of tags of each track

In [134]:
# Create auxiliary lists 
tracks_tags = []  # Final list with the tags
tags_exist = []  # List with all tags

# Get the tag names for each track from the raw (messy) data

for track_tag in lastfm_tags:
    
    # If the track was not found in the API
    if track_tag == 'not-found':
        tracks_tags.append(['not-found'])
    
    # If the track was found, but there is no tag related to the track
    elif len(track_tag) == 0:
        tracks_tags.append(['no-tag'])
    
    # If the track was found and there are tags related to the track
    else:
        
        # Create an auxiliary list 
        # Before checking each track, this list needs to be cleared, so it stores a list of messy tags for each track
        each_tag_lists = []
        
        # Each track has a list with messy tags, so it is necessary to clean this data, checking each tag for each track
        for each_tag in track_tag:
            
            # Get only the tag name, but this process creates a list with the tag name
            tag_list = each_tag.find('name')
            
            # Add each tag (messy data) in an auxiliary list
            each_tag_lists.append(tag_list)
            
            # Create an auxiliary list
            # Before checking each tag of a track, this list needs to be cleared, so it stores a list of clean data of
            # tags for each track
            each_track_tags = []
            
            # Each tag name of a track is inside a list
            for each_one in each_tag_lists:
                
                # Clean the data for each tag name
                tag = each_one.text.lower().replace('-', ' ')
                
                # Add the tag (clean data) with all lower case letters in an auxiliary list if the tag is one of the
                # genres listed in the 'musicbrainz_genre'
                if tag in musicbrainz_genre:
                    each_track_tags.append(tag)
                    tags_exist.append(tag)
                
        # Add each list of tags (clean) for one track in a final list
        tracks_tags.append(each_track_tags)
        #print(f'\n{tracks_tags}\n')
        
# Add a column in the 'popsugar_df' with the tags found in Last.fm API
popsugar_df['lastfm_tags'] = tracks_tags

In [135]:
popsugar_df.head()

Unnamed: 0,song,artist,feat,spotify_id,popularity,release_date,explicit,danceability,energy,key,...,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature,lastfm_tags
0,Roxanne,Arizona Zervas,,696DnlkuDOXcMAnKlTgXXK,89,2019-10-10,True,0.621,0.601,6,...,0,0.148,0.0522,0.0,0.46,0.457,116.735,163636,5,[hip hop]
1,Say So,Doja Cat,,3Dv1eDb0MEgF93GpLXlucZ,89,2019-11-07,True,0.787,0.673,11,...,0,0.158,0.256,3.57e-06,0.0904,0.786,110.962,237893,4,"[pop, disco, hip hop]"
2,My Oh My,Camila Cabello,DaBaby,3yOlyBJuViE2YSGn3nVE1K,83,2019-12-06,False,0.724,0.491,8,...,1,0.0296,0.018,1.29e-05,0.0887,0.383,105.046,170746,4,[pop]
3,Moon,Kid Francescoli,,0JP9xo3adEtGSdUEISiszL,86,2018-03-16,True,0.921,0.537,9,...,0,0.0804,0.556,0.00404,0.102,0.711,128.009,135090,4,"[chillout, indie pop]"
4,Vibe,Cookiee Kawaii,,0fySG6A6qLE8IvDpayb5bM,80,2019-09-27,True,0.768,0.652,1,...,0,0.307,0.113,0.0,0.107,0.777,154.187,144935,4,[no-tag]
