### Retrieving Top Songs & Lyrics for Artist
####  @author: Jyontika Kapoor

In [12]:
# pip install spotipy
# pip install lxml

### The below code uses the **spotipy** package to get the top tracks of an artist. Each artist has a spotify ID which we obtain using *sp.search()*. Then we get the top tracks through the *artist_top_tracks()* method. 

In [1]:
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import requests
import os
import pandas as pd
from bs4 import BeautifulSoup

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [4]:
## CLIENT ID AND CLIENT SECRET
### DO NOT PUT ON GIT

client_id = ''
client_secret = ''
genius_access_token = 'lOqXN-Ms2yerxQ84trh3ex5sV_QehWhhQNG2BecI0n3DcCuqV2Cq-Sb9py7U7ETO'


In [6]:

def get_top_tracks(artist_name, client_id, client_secret):
    # Initialize Spotify client with your credentials
    credentials = SpotifyClientCredentials(client_id=client_id, client_secret=client_secret)
    sp = spotipy.Spotify(client_credentials_manager=credentials)
    
    # search for the artist to get the Spotify ID through sp.search
    results = sp.search(q='artist:' + artist_name, type='artist') 
    items = results['artists']['items']
    if not items:
        return []
    
    
    artist_id = items[0]['id']
    
    top_tracks = sp.artist_top_tracks(artist_id)
    
    return [track['name'] for track in top_tracks['tracks']]


In [7]:
artist_name = 'Selena Gomez'
top_tracks = get_top_tracks(artist_name, client_id, client_secret)
top_tracks

SpotifyOauthError: No client_id. Pass it or set a SPOTIPY_CLIENT_ID environment variable.

#### Now we want to get the lyrics for each of the 10 tracks. We will do this using the **genius API** and the requests & bs4 library. The idea is we will search a song name on Genius through the requests library, find a match, and use bs4 to parse the HTML and extract the lyrics.

In [31]:
def get_lyrics(song_name, artist_name, genius_access_token):
    #set up the request using access token 
    base_url = "https://api.genius.com"
    headers = {'Authorization': 'Bearer ' + genius_access_token}
    search_url = base_url + "/search" #getting ready to search!
    data = {'q': song_name} #we want to search the song name!

    response = requests.get(search_url, params=data, headers=headers)
    json = response.json()
    path = None

    # loops through the search results returned by the Genius API to find a specific song that matches name of song and artist name

    for hit in json["response"]["hits"]:
        if artist_name.lower() in hit["result"]["primary_artist"]["name"].lower():
            path = hit["result"]["path"]
            break #assuming the first is right 

    if path:
        lyrics_url = "https://genius.com" + path
        lyrics_response = requests.get(lyrics_url)
        soup = BeautifulSoup(lyrics_response.text, 'lxml')

        # Check if the song is instrumental
        instrumental_div = soup.find('div', class_='LyricsPlaceholder__Message-uen8er-2')
        if instrumental_div:
            return "This song is an instrumental"

        lyrics_div = soup.find('div', {'data-lyrics-container': 'true'})
        lyrics = lyrics_div.get_text(separator='\n', strip=True) if lyrics_div else "Lyrics not found"
        return lyrics

    else:
        return "Lyrics not found"
    

In [8]:
artist_name = 'Kesha'
tracks = get_top_tracks(artist_name, client_id, client_secret)
tracks

['Timber',
 'TiK ToK',
 'Right Round',
 'Die Young',
 'We R Who We R',
 'Your Love Is My Drug',
 'Good Old Days',
 'Backstabber',
 'Blow',
 'Take It Off']

In [9]:
lyrics = get_lyrics(tracks[1], artist_name, genius_access_token)
print(lyrics)

[Verse 1]
Wake up in the morning feelin' like P. Diddy
(
Hey, what up, girl?
)
Grab my glasses, I'm out the door, I'm gonna hit this city
(
Let's go
)
Before I leave, brush my teeth with a bottle of Jack
'Cause when I leave for the night, I ain't coming back
[Pre-Chorus]
I'm talkin' pedicure on our toes, toes, tryin' on all our clothes, clothes
Boys blowin' up our phones, phones
Drop-toppin', playin' our favorite CDs, pullin' up to the parties
Tryna get a little bit tipsy
[Chorus]
Don't stop, make it pop, DJ, blow my speakers up
Tonight, I'ma fight 'til we see the sunlight
Tick tock on the clock, but the party don't stop, no
Oh, woah, woah, oh, oh, woah, woah, oh
Don't stop, make it pop, DJ, blow my speakers up
Tonight, I'ma fight 'til we see the sunlight
Tick tock on the clock, but the party don't stop, no
Oh, woah, woah, oh, oh, woah, woah, oh
[Verse 2]
Ain't got a care in the world, but got plenty of beer
Ain't got no money in my pocket, but I'm already here
And now the dudes are li

### Now we will retrive lyrics from the identified viral songs

In [31]:
cwd = os.getcwd()
viral_songs = f'{cwd}/data-analysis/viral.csv'
viral_songs = pd.read_csv(viral_songs)
viral_songs

Unnamed: 0,artists,songs
0,Kinfolk Thugs,Dumptruck
1,Cordelia,Little Life
2,"¥$, Kanye West, Ty Dolla $ign & Rich The Kid",CARNIVAL (feat. Playboi Carti)
3,Ariana Grande,"yes, and? (Mixed)"
4,Bobby Caldwell,What You Won't Do for Love
5,R.m.y,How I Love Being a Woman 6
6,Rihanna,Bitch Better Have My Money
7,"The Weeknd, JENNIE & Lily Rose Depp",One Of The Girls
8,Gold-Tiger,Funny
9,Beyoncé,Countdown


In [39]:
from collections import Counter

singers = []
artists = Counter(viral_songs['artists'])
for artist in artists: 
    singers.append(artist)

singers

['Kinfolk Thugs',
 'Cordelia',
 '¥$, Kanye West, Ty Dolla $ign & Rich The Kid',
 'Ariana Grande',
 'Bobby Caldwell',
 'R.m.y',
 'Rihanna',
 'The Weeknd, JENNIE & Lily Rose Depp',
 'Gold-Tiger',
 'Beyoncé',
 'Masego & FKJ',
 'Lana Del Rey',
 'Peso Pluma & Anitta',
 '21 Savage',
 'Flo Milli',
 'Muni Long',
 'ผดุง ทรงแสง (แจ๊ส)',
 'Lilithzplug']

### Add in Lyrics to our Data Frame

In [2]:
songs_data = pd.read_csv("/Users/jyontika/Documents/GitHub/CS315-Final-Project/test-shazam-api/songs_info.csv")

In [3]:
songs_data

Unnamed: 0,file_name,track_name,artist,track_subject
0,share_video_7309247847898090794_.mp3,TAKI TA TRIBALERO (feat. DJ Erandes),DJ Mecca,TAKI TA TRIBALERO (feat. DJ Erandes) - DJ Mecca
1,share_video_7329274198096973099_.mp3,Montagem Mysterious Game,LXNGVX,Montagem Mysterious Game - LXNGVX
2,share_video_7339685917759769899_.mp3,Dumptruck,Kinfolk Thugs,Dumptruck - Kinfolk Thugs
3,share_video_7329990500528753953_.mp3,Old Future,Koday Jackson,Old Future - Koday Jackson
4,share_video_7324083003284573482_.mp3,Little Life,Cordelia,Little Life - Cordelia
...,...,...,...,...
505,share_video_7331566337015237930_.mp3,"หลวงพี่แจ๊ส 4G (From ""ภาพยนตร์เรื่องหลวงพี่แจ๊...",ผดุง ทรงแสง (แจ๊ส),"หลวงพี่แจ๊ส 4G (From ""ภาพยนตร์เรื่องหลวงพี่แจ๊..."
506,share_video_7331565050198543659_.mp3,Show Me How,Men I Trust,Show Me How - Men I Trust
507,share_video_7341274575193378090_.mp3,"yes, and? (sped up)",Ariana Grande,"yes, and? (sped up) - Ariana Grande"
508,share_video_7322305748975848705_.mp3,Funny,Gold-Tiger,Funny - Gold-Tiger


In [48]:
songs_data.drop('track_subject', axis=1, inplace=True)

In [50]:
replacement_dict = {
    'yes, and? (sped up)': 'yes, and',
    'yes, and? (Mixed)': 'yes, and',
    'Made For Me (Sped Up Version)' : 'Made for Me',
    'Made For Me (Mixed)' : 'Made for Me',
    'Murder on the Dancefloor (Sped Up)' : 'Murder on the Dancefloor',
    'PROVENZA (Mixed)' : 'PROVENZA',
    'What Was I Made For? (Mixed)' : 'What Was I Made For?',
    'BIG GIRLS DONT CRY (workout mix)' :  'BIG GIRLS DONT CRY (workout mix)' 
}

In [51]:
songs_data['track_name'] = songs_data['track_name'].replace(replacement_dict)

In [52]:
##apply get_lyrics() to the data

songs_data['lyrics'] = [
    get_lyrics(row['track_name'], row['artist'], genius_access_token)
    for idx, row in songs_data.iterrows()
    if not print(f"Processing row {idx + 1}: {row['track_name']} by {row['artist']}")
]

Processing row 1: TAKI TA TRIBALERO (feat. DJ Erandes) by DJ Mecca
Processing row 2: Montagem Mysterious Game by LXNGVX
Processing row 3: Dumptruck by Kinfolk Thugs
Processing row 4: Old Future by Koday Jackson
Processing row 5: Little Life by Cordelia
Processing row 6: A Sky Full of Stars (Live at the Royal Albert Hall, London) by Coldplay
Processing row 7: How To Never Stop Being Sad by dandelion hands
Processing row 8: How To Press a Pill by Punchmade Dev
Processing row 9: CARNIVAL (feat. Playboi Carti) by ¥$, Kanye West, Ty Dolla $ign & Rich The Kid
Processing row 10: yes, and by Ariana Grande
Processing row 11: We Are the People (Burns Remix) by Empire of the Sun
Processing row 12: Oh No by Lil Lucky
Processing row 13: Blooket Freestyle Beat by cj icyy
Processing row 14: Hot X So Much Hype by Tre Savage
Processing row 15: I Love You by SINGHSANGER MALVINDER
Processing row 16: Ribs by Lorde
Processing row 17: Grey by Yung Filly
Processing row 18: Untitled #13 (Super Slowed) by glwz

In [53]:
songs_data['lyrics']

0                                       Lyrics not found
1                           This song is an instrumental
2      [Intro]\nAh-where they at?\n(K-K-K-Kin Folk Th...
3                                       Lyrics not found
4      [Verse 1]\nHow would you have me described?\nW...
                             ...                        
505                                     Lyrics not found
506    [Verse 1]\nShow me how you care\nTell me how y...
507    [Verse 1]\nIn case you haven't noticed\nWell, ...
508                                     Lyrics not found
509    [Verse 1: Camila]\nI ain't worried about nothi...
Name: lyrics, Length: 510, dtype: object

In [54]:
no_lyrics_found = songs_data[songs_data['lyrics'] == "Lyrics not found"]
no_lyrics_found = no_lyrics_found.iloc[:, 1:3]

if not no_lyrics_found.empty:
    print("Rows with no lyrics found:")
    print(no_lyrics_found)
else:
    print("Lyrics were found for all songs.")

Rows with no lyrics found:
                                            track_name              artist
0                 TAKI TA TRIBALERO (feat. DJ Erandes)            DJ Mecca
3                                           Old Future       Koday Jackson
11                                               Oh No           Lil Lucky
12                              Blooket Freestyle Beat             cj icyy
13                                  Hot X So Much Hype          Tre Savage
..                                                 ...                 ...
500                                              Dummy         DummyTheKid
501                                        Iam natural        Angel Amedro
504                                    Enamorado De Ti       los temerario
505  หลวงพี่แจ๊ส 4G (From "ภาพยนตร์เรื่องหลวงพี่แจ๊...  ผดุง ทรงแสง (แจ๊ส)
508                                              Funny          Gold-Tiger

[255 rows x 2 columns]


In [55]:
no_lyrics_found_unique = no_lyrics_found.drop_duplicates()

In [56]:
no_lyrics_found_unique
no_lyrics_found_unique.to_csv('no_lyrics_found_unique.csv', index=False)

In [57]:
no_lyrics_found.shape

(255, 2)

In [64]:
get_lyrics('BIG GIRLS DONT CRY', 'Toby Gad & Victoria Justice', genius_access_token)

'Lyrics not found'